Merge branch 'master' into fix/aspect-ratio-enum-keys

PyOpenGL-accelerate is not necessary. (#12692 )
Native LongCat-Image implementation (#12597 )
2026-03-07 22:30:00 +00:00 · 2026-02-27 20:35:12 -08:00 · 2026-02-27 23:34:58 -05:00 · 2026-02-27 23:04:34 -05:00 · 2026-02-27 20:16:24 -05:00 · 2026-02-27 17:13:54 -08:00
172 changed files with 6701 additions and 450 deletions
--- a/.coderabbit.yaml
+++ b/.coderabbit.yaml
@@ -0,0 +1,127 @@
+# yaml-language-server: $schema=https://coderabbit.ai/integrations/schema.v2.json
+language: "en-US"
+early_access: false
+tone_instructions: "Only comment on issues introduced by this PR's changes. Do not flag pre-existing problems in moved, re-indented, or reformatted code."
+
+reviews:
+  profile: "chill"
+  request_changes_workflow: false
+  high_level_summary: false
+  poem: false
+  review_status: false
+  review_details: false
+  commit_status: true
+  collapse_walkthrough: true
+  changed_files_summary: false
+  sequence_diagrams: false
+  estimate_code_review_effort: false
+  assess_linked_issues: false
+  related_issues: false
+  related_prs: false
+  suggested_labels: false
+  auto_apply_labels: false
+  suggested_reviewers: false
+  auto_assign_reviewers: false
+  in_progress_fortune: false
+  enable_prompt_for_ai_agents: true
+
+  path_filters:
+    - "!comfy_api_nodes/apis/**"
+    - "!**/generated/*.pyi"
+    - "!.ci/**"
+    - "!script_examples/**"
+    - "!**/__pycache__/**"
+    - "!**/*.ipynb"
+    - "!**/*.png"
+    - "!**/*.bat"
+
+  path_instructions:
+    - path: "**"
+      instructions: |
+        IMPORTANT: Only comment on issues directly introduced by this PR's code changes.
+        Do NOT flag pre-existing issues in code that was merely moved, re-indented,
+        de-indented, or reformatted without logic changes. If code appears in the diff
+        only due to whitespace or structural reformatting (e.g., removing a `with:` block),
+        treat it as unchanged. Contributors should not feel obligated to address
+        pre-existing issues outside the scope of their contribution.
+    - path: "comfy/**"
+      instructions: |
+        Core ML/diffusion engine. Focus on:
+        - Backward compatibility (breaking changes affect all custom nodes)
+        - Memory management and GPU resource handling
+        - Performance implications in hot paths
+        - Thread safety for concurrent execution
+    - path: "comfy_api_nodes/**"
+      instructions: |
+        Third-party API integration nodes. Focus on:
+        - No hardcoded API keys or secrets
+        - Proper error handling for API failures (timeouts, rate limits, auth errors)
+        - Correct Pydantic model usage
+        - Security of user data passed to external APIs
+    - path: "comfy_extras/**"
+      instructions: |
+        Community-contributed extra nodes. Focus on:
+        - Consistency with node patterns (INPUT_TYPES, RETURN_TYPES, FUNCTION, CATEGORY)
+        - No breaking changes to existing node interfaces
+    - path: "comfy_execution/**"
+      instructions: |
+        Execution engine (graph execution, caching, jobs). Focus on:
+        - Caching correctness
+        - Concurrent execution safety
+        - Graph validation edge cases
+    - path: "nodes.py"
+      instructions: |
+        Core node definitions (2500+ lines). Focus on:
+        - Backward compatibility of NODE_CLASS_MAPPINGS
+        - Consistency of INPUT_TYPES return format
+    - path: "alembic_db/**"
+      instructions: |
+        Database migrations. Focus on:
+        - Migration safety and rollback support
+        - Data preservation during schema changes
+
+  auto_review:
+    enabled: true
+    auto_incremental_review: true
+    drafts: false
+    ignore_title_keywords:
+      - "WIP"
+      - "DO NOT REVIEW"
+      - "DO NOT MERGE"
+
+  finishing_touches:
+    docstrings:
+      enabled: false
+    unit_tests:
+      enabled: false
+
+  tools:
+    ruff:
+      enabled: false
+    pylint:
+      enabled: false
+    flake8:
+      enabled: false
+    gitleaks:
+      enabled: true
+    shellcheck:
+      enabled: false
+    markdownlint:
+      enabled: false
+    yamllint:
+      enabled: false
+    languagetool:
+      enabled: false
+    github-checks:
+      enabled: true
+      timeout_ms: 90000
+    ast-grep:
+      essential_rules: true
+
+chat:
+  auto_reply: true
+
+knowledge_base:
+  opt_out: false
+  learnings:
+    scope: "auto"
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -16,7 +16,7 @@ body:

        ## Very Important

-        Please make sure that you post ALL your ComfyUI logs in the bug report. A bug report without logs will likely be ignored.
+        Please make sure that you post ALL your ComfyUI logs in the bug report **even if there is no crash**. Just paste everything. The startup log (everything before "To see the GUI go to: ...") contains critical information to developers trying to help. For a performance issue or crash, paste everything from "got prompt" to the end, including the crash. More is better - always. A bug report without logs will likely be ignored.
  - type: checkboxes
    id: custom-nodes-test
    attributes:
--- a/README.md
+++ b/README.md
@@ -189,8 +189,6 @@ The portable above currently comes with python 3.13 and pytorch cuda 13.0. Updat

 [Experimental portable for AMD GPUs](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_amd.7z)

-[Portable with pytorch cuda 12.8 and python 3.12](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_nvidia_cu128.7z).
-
 [Portable with pytorch cuda 12.6 and python 3.12](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_nvidia_cu126.7z) (Supports Nvidia 10 series and older GPUs).

 #### How do I share models between another UI and ComfyUI?
@@ -229,9 +227,9 @@ AMD users can install rocm and pytorch with pip if you don't have it already ins

 ```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm7.1```

-This is the command to install the nightly with ROCm 7.1 which might have some performance improvements:
+This is the command to install the nightly with ROCm 7.2 which might have some performance improvements:

-```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm7.1```
+```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm7.2```


 ### AMD GPUs (Experimental: Windows and Linux), RDNA 3, 3.5 and 4 only.
--- a/app/node_replace_manager.py
+++ b/app/node_replace_manager.py
@@ -46,6 +46,8 @@ class NodeReplaceManager:
        connections: dict[str, list[tuple[str, str, int]]] = {}
        need_replacement: set[str] = set()
        for node_number, node_struct in prompt.items():
+            if "class_type" not in node_struct or "inputs" not in node_struct:
+                continue
            class_type = node_struct["class_type"]
            # need replacement if not in NODE_CLASS_MAPPINGS and has replacement
            if class_type not in nodes.NODE_CLASS_MAPPINGS.keys() and self.has_replacement(class_type):
--- a/app/subgraph_manager.py
+++ b/app/subgraph_manager.py
@@ -53,7 +53,7 @@ class SubgraphManager:
        return entry_id, entry

    async def load_entry_data(self, entry: SubgraphEntry):
-        with open(entry['path'], 'r') as f:
+        with open(entry['path'], 'r', encoding='utf-8') as f:
            entry['data'] = f.read()
        return entry

--- a/blueprints/.glsl/Brightness_and_Contrast_1.frag
+++ b/blueprints/.glsl/Brightness_and_Contrast_1.frag
@@ -0,0 +1,44 @@
+#version 300 es
+precision highp float;
+
+uniform sampler2D u_image0;
+uniform float u_float0; // Brightness slider -100..100
+uniform float u_float1; // Contrast slider -100..100
+
+in vec2 v_texCoord;
+out vec4 fragColor;
+
+const float MID_GRAY = 0.18;  // 18% reflectance
+
+// sRGB gamma 2.2 approximation
+vec3 srgbToLinear(vec3 c) {
+    return pow(max(c, 0.0), vec3(2.2));
+}
+
+vec3 linearToSrgb(vec3 c) {
+    return pow(max(c, 0.0), vec3(1.0/2.2));
+}
+
+float mapBrightness(float b) {
+    return clamp(b / 100.0, -1.0, 1.0);
+}
+
+float mapContrast(float c) {
+    return clamp(c / 100.0 + 1.0, 0.0, 2.0);
+}
+
+void main() {
+    vec4 orig = texture(u_image0, v_texCoord);
+
+    float brightness = mapBrightness(u_float0);
+    float contrast   = mapContrast(u_float1);
+
+    vec3 lin = srgbToLinear(orig.rgb);
+
+    lin = (lin - MID_GRAY) * contrast + brightness + MID_GRAY;
+
+    // Convert back to sRGB
+    vec3 result = linearToSrgb(clamp(lin, 0.0, 1.0));
+
+    fragColor = vec4(result, orig.a);
+}
--- a/blueprints/.glsl/Chromatic_Aberration_16.frag
+++ b/blueprints/.glsl/Chromatic_Aberration_16.frag
@@ -0,0 +1,72 @@
+#version 300 es
+precision highp float;
+
+uniform sampler2D u_image0;
+uniform vec2 u_resolution;
+uniform int u_int0;      // Mode
+uniform float u_float0;  // Amount (0 to 100)
+
+in vec2 v_texCoord;
+out vec4 fragColor;
+
+const int MODE_LINEAR   = 0;
+const int MODE_RADIAL   = 1;
+const int MODE_BARREL   = 2;
+const int MODE_SWIRL    = 3;
+const int MODE_DIAGONAL = 4;
+
+const float AMOUNT_SCALE = 0.0005;
+const float RADIAL_MULT = 4.0;
+const float BARREL_MULT = 8.0;
+const float INV_SQRT2 = 0.70710678118;
+
+void main() {
+    vec2 uv = v_texCoord;
+    vec4 original = texture(u_image0, uv);
+
+    float amount = u_float0 * AMOUNT_SCALE;
+
+    if (amount < 0.000001) {
+        fragColor = original;
+        return;
+    }
+
+    // Aspect-corrected coordinates for circular effects
+    float aspect = u_resolution.x / u_resolution.y;
+    vec2 centered = uv - 0.5;
+    vec2 corrected = vec2(centered.x * aspect, centered.y);
+    float r = length(corrected);
+    vec2 dir = r > 0.0001 ? corrected / r : vec2(0.0);
+    vec2 offset = vec2(0.0);
+
+    if (u_int0 == MODE_LINEAR) {
+        // Horizontal shift (no aspect correction needed)
+        offset = vec2(amount, 0.0);
+    }
+    else if (u_int0 == MODE_RADIAL) {
+        // Outward from center, stronger at edges
+        offset = dir * r * amount * RADIAL_MULT;
+        offset.x /= aspect;  // Convert back to UV space
+    }
+    else if (u_int0 == MODE_BARREL) {
+        // Lens distortion simulation (r² falloff)
+        offset = dir * r * r * amount * BARREL_MULT;
+        offset.x /= aspect;  // Convert back to UV space
+    }
+    else if (u_int0 == MODE_SWIRL) {
+        // Perpendicular to radial (rotational aberration)
+        vec2 perp = vec2(-dir.y, dir.x);
+        offset = perp * r * amount * RADIAL_MULT;
+        offset.x /= aspect;  // Convert back to UV space
+    }
+    else if (u_int0 == MODE_DIAGONAL) {
+        // 45° offset (no aspect correction needed)
+        offset = vec2(amount, amount) * INV_SQRT2;
+    }
+    
+    float red = texture(u_image0, uv + offset).r;
+    float green = original.g;
+    float blue = texture(u_image0, uv - offset).b;
+    
+    fragColor = vec4(red, green, blue, original.a);
+}
--- a/blueprints/.glsl/Color_Adjustment_15.frag
+++ b/blueprints/.glsl/Color_Adjustment_15.frag
@@ -0,0 +1,78 @@
+#version 300 es
+precision highp float;
+
+uniform sampler2D u_image0;
+uniform float u_float0; // temperature (-100 to 100)
+uniform float u_float1; // tint (-100 to 100)
+uniform float u_float2; // vibrance (-100 to 100)
+uniform float u_float3; // saturation (-100 to 100)
+
+in vec2 v_texCoord;
+out vec4 fragColor;
+
+const float INPUT_SCALE = 0.01;
+const float TEMP_TINT_PRIMARY = 0.3;
+const float TEMP_TINT_SECONDARY = 0.15;
+const float VIBRANCE_BOOST = 2.0;
+const float SATURATION_BOOST = 2.0;
+const float SKIN_PROTECTION = 0.5;
+const float EPSILON = 0.001;
+const vec3 LUMA_WEIGHTS = vec3(0.299, 0.587, 0.114);
+
+void main() {
+    vec4 tex = texture(u_image0, v_texCoord);
+    vec3 color = tex.rgb;
+    
+    // Scale inputs: -100/100 → -1/1
+    float temperature = u_float0 * INPUT_SCALE;
+    float tint = u_float1 * INPUT_SCALE;
+    float vibrance = u_float2 * INPUT_SCALE;
+    float saturation = u_float3 * INPUT_SCALE;
+    
+    // Temperature (warm/cool): positive = warm, negative = cool
+    color.r += temperature * TEMP_TINT_PRIMARY;
+    color.b -= temperature * TEMP_TINT_PRIMARY;
+    
+    // Tint (green/magenta): positive = green, negative = magenta
+    color.g += tint * TEMP_TINT_PRIMARY;
+    color.r -= tint * TEMP_TINT_SECONDARY;
+    color.b -= tint * TEMP_TINT_SECONDARY;
+    
+    // Single clamp after temperature/tint
+    color = clamp(color, 0.0, 1.0);
+    
+    // Vibrance with skin protection
+    if (vibrance != 0.0) {
+        float maxC = max(color.r, max(color.g, color.b));
+        float minC = min(color.r, min(color.g, color.b));
+        float sat = maxC - minC;
+        float gray = dot(color, LUMA_WEIGHTS);
+        
+        if (vibrance < 0.0) {
+            // Desaturate: -100 → gray
+            color = mix(vec3(gray), color, 1.0 + vibrance);
+        } else {
+            // Boost less saturated colors more
+            float vibranceAmt = vibrance * (1.0 - sat);
+            
+            // Branchless skin tone protection
+            float isWarmTone = step(color.b, color.g) * step(color.g, color.r);
+            float warmth = (color.r - color.b) / max(maxC, EPSILON);
+            float skinTone = isWarmTone * warmth * sat * (1.0 - sat);
+            vibranceAmt *= (1.0 - skinTone * SKIN_PROTECTION);
+            
+            color = mix(vec3(gray), color, 1.0 + vibranceAmt * VIBRANCE_BOOST);
+        }
+    }
+    
+    // Saturation
+    if (saturation != 0.0) {
+        float gray = dot(color, LUMA_WEIGHTS);
+        float satMix = saturation < 0.0
+            ? 1.0 + saturation                      // -100 → gray
+            : 1.0 + saturation * SATURATION_BOOST;  // +100 → 3x boost
+        color = mix(vec3(gray), color, satMix);
+    }
+    
+    fragColor = vec4(clamp(color, 0.0, 1.0), tex.a);
+}
--- a/blueprints/.glsl/Edge-Preserving_Blur_128.frag
+++ b/blueprints/.glsl/Edge-Preserving_Blur_128.frag
@@ -0,0 +1,94 @@
+#version 300 es
+precision highp float;
+
+uniform sampler2D u_image0;
+uniform float u_float0;   // Blur radius (0–20, default ~5)
+uniform float u_float1;   // Edge threshold (0–100, default ~30)
+uniform int u_int0;       // Step size (0/1 = every pixel, 2+ = skip pixels)
+
+in vec2 v_texCoord;
+out vec4 fragColor;
+
+const int MAX_RADIUS = 20;
+const float EPSILON = 0.0001;
+
+// Perceptual luminance
+float getLuminance(vec3 rgb) {
+    return dot(rgb, vec3(0.299, 0.587, 0.114));
+}
+
+vec4 bilateralFilter(vec2 uv, vec2 texelSize, int radius,
+                     float sigmaSpatial, float sigmaColor)
+{
+    vec4 center = texture(u_image0, uv);
+    vec3 centerRGB = center.rgb;
+
+    float invSpatial2 = -0.5 / (sigmaSpatial * sigmaSpatial);
+    float invColor2   = -0.5 / (sigmaColor * sigmaColor + EPSILON);
+
+    vec3 sumRGB = vec3(0.0);
+    float sumWeight = 0.0;
+
+    int step = max(u_int0, 1);
+    float radius2 = float(radius * radius);
+
+    for (int dy = -MAX_RADIUS; dy <= MAX_RADIUS; dy++) {
+        if (dy < -radius || dy > radius) continue;
+        if (abs(dy) % step != 0) continue;
+
+        for (int dx = -MAX_RADIUS; dx <= MAX_RADIUS; dx++) {
+            if (dx < -radius || dx > radius) continue;
+            if (abs(dx) % step != 0) continue;
+
+            vec2 offset = vec2(float(dx), float(dy));
+            float dist2 = dot(offset, offset);
+            if (dist2 > radius2) continue;
+
+            vec3 sampleRGB = texture(u_image0, uv + offset * texelSize).rgb;
+
+            // Spatial Gaussian
+            float spatialWeight = exp(dist2 * invSpatial2);
+
+            // Perceptual color distance (weighted RGB)
+            vec3 diff = sampleRGB - centerRGB;
+            float colorDist = dot(diff * diff, vec3(0.299, 0.587, 0.114));
+            float colorWeight = exp(colorDist * invColor2);
+
+            float w = spatialWeight * colorWeight;
+            sumRGB += sampleRGB * w;
+            sumWeight += w;
+        }
+    }
+
+    vec3 resultRGB = sumRGB / max(sumWeight, EPSILON);
+    return vec4(resultRGB, center.a); // preserve center alpha
+}
+
+void main() {
+    vec2 texelSize = 1.0 / vec2(textureSize(u_image0, 0));
+
+    float radiusF = clamp(u_float0, 0.0, float(MAX_RADIUS));
+    int radius = int(radiusF + 0.5);
+
+    if (radius == 0) {
+        fragColor = texture(u_image0, v_texCoord);
+        return;
+    }
+
+    // Edge threshold → color sigma
+    // Squared curve for better low-end control
+    float t = clamp(u_float1, 0.0, 100.0) / 100.0;
+    t *= t;
+    float sigmaColor = mix(0.01, 0.5, t);
+
+    // Spatial sigma tied to radius
+    float sigmaSpatial = max(radiusF * 0.75, 0.5);
+
+    fragColor = bilateralFilter(
+        v_texCoord,
+        texelSize,
+        radius,
+        sigmaSpatial,
+        sigmaColor
+    );
+}
--- a/blueprints/.glsl/Film_Grain_15.frag
+++ b/blueprints/.glsl/Film_Grain_15.frag
@@ -0,0 +1,124 @@
+#version 300 es
+precision highp float;
+
+uniform sampler2D u_image0;
+uniform vec2 u_resolution;
+uniform float u_float0; // grain amount      [0.0 – 1.0]   typical: 0.2–0.8
+uniform float u_float1; // grain size        [0.3 – 3.0]   lower = finer grain
+uniform float u_float2; // color amount      [0.0 – 1.0]   0 = monochrome, 1 = RGB grain
+uniform float u_float3; // luminance bias    [0.0 – 1.0]   0 = uniform, 1 = shadows only
+uniform int   u_int0;   // noise mode        [0 or 1]      0 = smooth, 1 = grainy
+
+in vec2 v_texCoord;
+layout(location = 0) out vec4 fragColor0;
+
+// High-quality integer hash (pcg-like)
+uint pcg(uint v) {
+    uint state = v * 747796405u + 2891336453u;
+    uint word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u;
+    return (word >> 22u) ^ word;
+}
+
+// 2D -> 1D hash input
+uint hash2d(uvec2 p) {
+    return pcg(p.x + pcg(p.y));
+}
+
+// Hash to float [0, 1]
+float hashf(uvec2 p) {
+    return float(hash2d(p)) / float(0xffffffffu);
+}
+
+// Hash to float with offset (for RGB channels)
+float hashf(uvec2 p, uint offset) {
+    return float(pcg(hash2d(p) + offset)) / float(0xffffffffu);
+}
+
+// Convert uniform [0,1] to roughly Gaussian distribution
+// Using simple approximation: average of multiple samples
+float toGaussian(uvec2 p) {
+    float sum = hashf(p, 0u) + hashf(p, 1u) + hashf(p, 2u) + hashf(p, 3u);
+    return (sum - 2.0) * 0.7;  // Centered, scaled
+}
+
+float toGaussian(uvec2 p, uint offset) {
+    float sum = hashf(p, offset) + hashf(p, offset + 1u) 
+              + hashf(p, offset + 2u) + hashf(p, offset + 3u);
+    return (sum - 2.0) * 0.7;
+}
+
+// Smooth noise with better interpolation
+float smoothNoise(vec2 p) {
+    vec2 i = floor(p);
+    vec2 f = fract(p);
+    
+    // Quintic interpolation (less banding than cubic)
+    f = f * f * f * (f * (f * 6.0 - 15.0) + 10.0);
+    
+    uvec2 ui = uvec2(i);
+    float a = toGaussian(ui);
+    float b = toGaussian(ui + uvec2(1u, 0u));
+    float c = toGaussian(ui + uvec2(0u, 1u));
+    float d = toGaussian(ui + uvec2(1u, 1u));
+    
+    return mix(mix(a, b, f.x), mix(c, d, f.x), f.y);
+}
+
+float smoothNoise(vec2 p, uint offset) {
+    vec2 i = floor(p);
+    vec2 f = fract(p);
+    
+    f = f * f * f * (f * (f * 6.0 - 15.0) + 10.0);
+    
+    uvec2 ui = uvec2(i);
+    float a = toGaussian(ui, offset);
+    float b = toGaussian(ui + uvec2(1u, 0u), offset);
+    float c = toGaussian(ui + uvec2(0u, 1u), offset);
+    float d = toGaussian(ui + uvec2(1u, 1u), offset);
+    
+    return mix(mix(a, b, f.x), mix(c, d, f.x), f.y);
+}
+
+void main() {
+    vec4 color = texture(u_image0, v_texCoord);
+    
+    // Luminance (Rec.709)
+    float luma = dot(color.rgb, vec3(0.2126, 0.7152, 0.0722));
+    
+    // Grain UV (resolution-independent)
+    vec2 grainUV = v_texCoord * u_resolution / max(u_float1, 0.01);
+    uvec2 grainPixel = uvec2(grainUV);
+    
+    float g;
+    vec3 grainRGB;
+    
+    if (u_int0 == 1) {
+        // Grainy mode: pure hash noise (no interpolation = no banding)
+        g = toGaussian(grainPixel);
+        grainRGB = vec3(
+            toGaussian(grainPixel, 100u),
+            toGaussian(grainPixel, 200u),
+            toGaussian(grainPixel, 300u)
+        );
+    } else {
+        // Smooth mode: interpolated with quintic curve
+        g = smoothNoise(grainUV);
+        grainRGB = vec3(
+            smoothNoise(grainUV, 100u),
+            smoothNoise(grainUV, 200u),
+            smoothNoise(grainUV, 300u)
+        );
+    }
+    
+    // Luminance weighting (less grain in highlights)
+    float lumWeight = mix(1.0, 1.0 - luma, clamp(u_float3, 0.0, 1.0));
+    
+    // Strength
+    float strength = u_float0 * 0.15;
+    
+    // Color vs monochrome grain
+    vec3 grainColor = mix(vec3(g), grainRGB, clamp(u_float2, 0.0, 1.0));
+    
+    color.rgb += grainColor * strength * lumWeight;
+    fragColor0 = vec4(clamp(color.rgb, 0.0, 1.0), color.a);
+}
--- a/blueprints/.glsl/Glow_30.frag
+++ b/blueprints/.glsl/Glow_30.frag
@@ -0,0 +1,133 @@
+#version 300 es
+precision mediump float;
+
+uniform sampler2D u_image0;
+uniform vec2 u_resolution;
+uniform int u_int0;      // Blend mode
+uniform int u_int1;      // Color tint
+uniform float u_float0;  // Intensity
+uniform float u_float1;  // Radius
+uniform float u_float2;  // Threshold
+
+in vec2 v_texCoord;
+out vec4 fragColor;
+
+const int BLEND_ADD      = 0;
+const int BLEND_SCREEN   = 1;
+const int BLEND_SOFT     = 2;
+const int BLEND_OVERLAY  = 3;
+const int BLEND_LIGHTEN  = 4;
+
+const float GOLDEN_ANGLE = 2.39996323;
+const int MAX_SAMPLES = 48;
+const vec3 LUMA = vec3(0.299, 0.587, 0.114);
+
+float hash(vec2 p) {
+    p = fract(p * vec2(123.34, 456.21));
+    p += dot(p, p + 45.32);
+    return fract(p.x * p.y);
+}
+
+vec3 hexToRgb(int h) {
+    return vec3(
+        float((h >> 16) & 255),
+        float((h >> 8) & 255),
+        float(h & 255)
+    ) * (1.0 / 255.0);
+}
+
+vec3 blend(vec3 base, vec3 glow, int mode) {
+    if (mode == BLEND_SCREEN) {
+        return 1.0 - (1.0 - base) * (1.0 - glow);
+    }
+    if (mode == BLEND_SOFT) {
+        return mix(
+            base - (1.0 - 2.0 * glow) * base * (1.0 - base),
+            base + (2.0 * glow - 1.0) * (sqrt(base) - base),
+            step(0.5, glow)
+        );
+    }
+    if (mode == BLEND_OVERLAY) {
+        return mix(
+            2.0 * base * glow,
+            1.0 - 2.0 * (1.0 - base) * (1.0 - glow),
+            step(0.5, base)
+        );
+    }
+    if (mode == BLEND_LIGHTEN) {
+        return max(base, glow);
+    }
+    return base + glow;
+}
+
+void main() {
+    vec4 original = texture(u_image0, v_texCoord);
+    
+    float intensity = u_float0 * 0.05;
+    float radius = u_float1 * u_float1 * 0.012;
+    
+    if (intensity < 0.001 || radius < 0.1) {
+        fragColor = original;
+        return;
+    }
+    
+    float threshold = 1.0 - u_float2 * 0.01;
+    float t0 = threshold - 0.15;
+    float t1 = threshold + 0.15;
+    
+    vec2 texelSize = 1.0 / u_resolution;
+    float radius2 = radius * radius;
+    
+    float sampleScale = clamp(radius * 0.75, 0.35, 1.0);
+    int samples = int(float(MAX_SAMPLES) * sampleScale);
+    
+    float noise = hash(gl_FragCoord.xy);
+    float angleOffset = noise * GOLDEN_ANGLE;
+    float radiusJitter = 0.85 + noise * 0.3;
+    
+    float ca = cos(GOLDEN_ANGLE);
+    float sa = sin(GOLDEN_ANGLE);
+    vec2 dir = vec2(cos(angleOffset), sin(angleOffset));
+    
+    vec3 glow = vec3(0.0);
+    float totalWeight = 0.0;
+    
+    // Center tap
+    float centerMask = smoothstep(t0, t1, dot(original.rgb, LUMA));
+    glow += original.rgb * centerMask * 2.0;
+    totalWeight += 2.0;
+    
+    for (int i = 1; i < MAX_SAMPLES; i++) {
+        if (i >= samples) break;
+        
+        float fi = float(i);
+        float dist = sqrt(fi / float(samples)) * radius * radiusJitter;
+        
+        vec2 offset = dir * dist * texelSize;
+        vec3 c = texture(u_image0, v_texCoord + offset).rgb;
+        float mask = smoothstep(t0, t1, dot(c, LUMA));
+        
+        float w = 1.0 - (dist * dist) / (radius2 * 1.5);
+        w = max(w, 0.0);
+        w *= w;
+        
+        glow += c * mask * w;
+        totalWeight += w;
+        
+        dir = vec2(
+            dir.x * ca - dir.y * sa,
+            dir.x * sa + dir.y * ca
+        );
+    }
+    
+    glow *= intensity / max(totalWeight, 0.001);
+    
+    if (u_int1 > 0) {
+        glow *= hexToRgb(u_int1);
+    }
+    
+    vec3 result = blend(original.rgb, glow, u_int0);
+    result += (noise - 0.5) * (1.0 / 255.0);
+    
+    fragColor = vec4(clamp(result, 0.0, 1.0), original.a);
+}
--- a/blueprints/.glsl/Hue_and_Saturation_1.frag
+++ b/blueprints/.glsl/Hue_and_Saturation_1.frag
@@ -0,0 +1,222 @@
+#version 300 es
+precision highp float;
+
+uniform sampler2D u_image0;
+uniform int u_int0;      // Mode: 0=Master, 1=Reds, 2=Yellows, 3=Greens, 4=Cyans, 5=Blues, 6=Magentas, 7=Colorize
+uniform int u_int1;      // Color Space: 0=HSL, 1=HSB/HSV
+uniform float u_float0;  // Hue (-180 to 180)
+uniform float u_float1;  // Saturation (-100 to 100)
+uniform float u_float2;  // Lightness/Brightness (-100 to 100)
+uniform float u_float3;  // Overlap (0 to 100) - feathering between adjacent color ranges
+
+in vec2 v_texCoord;
+out vec4 fragColor;
+
+// Color range modes
+const int MODE_MASTER   = 0;
+const int MODE_RED      = 1;
+const int MODE_YELLOW   = 2;
+const int MODE_GREEN    = 3;
+const int MODE_CYAN     = 4;
+const int MODE_BLUE     = 5;
+const int MODE_MAGENTA  = 6;
+const int MODE_COLORIZE = 7;
+
+// Color space modes
+const int COLORSPACE_HSL = 0;
+const int COLORSPACE_HSB = 1;
+
+const float EPSILON = 0.0001;
+
+//=============================================================================
+// RGB <-> HSL Conversions
+//=============================================================================
+
+vec3 rgb2hsl(vec3 c) {
+    float maxC = max(max(c.r, c.g), c.b);
+    float minC = min(min(c.r, c.g), c.b);
+    float delta = maxC - minC;
+
+    float h = 0.0;
+    float s = 0.0;
+    float l = (maxC + minC) * 0.5;
+
+    if (delta > EPSILON) {
+        s = l < 0.5
+            ? delta / (maxC + minC)
+            : delta / (2.0 - maxC - minC);
+
+        if (maxC == c.r) {
+            h = (c.g - c.b) / delta + (c.g < c.b ? 6.0 : 0.0);
+        } else if (maxC == c.g) {
+            h = (c.b - c.r) / delta + 2.0;
+        } else {
+            h = (c.r - c.g) / delta + 4.0;
+        }
+        h /= 6.0;
+    }
+
+    return vec3(h, s, l);
+}
+
+float hue2rgb(float p, float q, float t) {
+    t = fract(t);
+    if (t < 1.0/6.0) return p + (q - p) * 6.0 * t;
+    if (t < 0.5)       return q;
+    if (t < 2.0/3.0)   return p + (q - p) * (2.0/3.0 - t) * 6.0;
+    return p;
+}
+
+vec3 hsl2rgb(vec3 hsl) {
+    if (hsl.y < EPSILON) return vec3(hsl.z);
+
+    float q = hsl.z < 0.5
+        ? hsl.z * (1.0 + hsl.y)
+        : hsl.z + hsl.y - hsl.z * hsl.y;
+    float p = 2.0 * hsl.z - q;
+
+    return vec3(
+        hue2rgb(p, q, hsl.x + 1.0/3.0),
+        hue2rgb(p, q, hsl.x),
+        hue2rgb(p, q, hsl.x - 1.0/3.0)
+    );
+}
+
+vec3 rgb2hsb(vec3 c) {
+    float maxC = max(max(c.r, c.g), c.b);
+    float minC = min(min(c.r, c.g), c.b);
+    float delta = maxC - minC;
+
+    float h = 0.0;
+    float s = (maxC > EPSILON) ? delta / maxC : 0.0;
+    float b = maxC;
+
+    if (delta > EPSILON) {
+        if (maxC == c.r) {
+            h = (c.g - c.b) / delta + (c.g < c.b ? 6.0 : 0.0);
+        } else if (maxC == c.g) {
+            h = (c.b - c.r) / delta + 2.0;
+        } else {
+            h = (c.r - c.g) / delta + 4.0;
+        }
+        h /= 6.0;
+    }
+
+    return vec3(h, s, b);
+}
+
+vec3 hsb2rgb(vec3 hsb) {
+    vec3 rgb = clamp(abs(mod(hsb.x * 6.0 + vec3(0.0, 4.0, 2.0), 6.0) - 3.0) - 1.0, 0.0, 1.0);
+    return hsb.z * mix(vec3(1.0), rgb, hsb.y);
+}
+
+//=============================================================================
+// Color Range Weight Calculation
+//=============================================================================
+
+float hueDistance(float a, float b) {
+    float d = abs(a - b);
+    return min(d, 1.0 - d);
+}
+
+float getHueWeight(float hue, float center, float overlap) {
+    float baseWidth = 1.0 / 6.0;
+    float feather = baseWidth * overlap;
+
+    float d = hueDistance(hue, center);
+
+    float inner = baseWidth * 0.5;
+    float outer = inner + feather;
+
+    return 1.0 - smoothstep(inner, outer, d);
+}
+
+float getModeWeight(float hue, int mode, float overlap) {
+    if (mode == MODE_MASTER || mode == MODE_COLORIZE) return 1.0;
+
+    if (mode == MODE_RED) {
+        return max(
+            getHueWeight(hue, 0.0, overlap),
+            getHueWeight(hue, 1.0, overlap)
+        );
+    }
+
+    float center = float(mode - 1) / 6.0;
+    return getHueWeight(hue, center, overlap);
+}
+
+//=============================================================================
+// Adjustment Functions
+//=============================================================================
+
+float adjustLightness(float l, float amount) {
+    return amount > 0.0
+        ? l + (1.0 - l) * amount
+        : l + l * amount;
+}
+
+float adjustBrightness(float b, float amount) {
+    return clamp(b + amount, 0.0, 1.0);
+}
+
+float adjustSaturation(float s, float amount) {
+    return amount > 0.0
+        ? s + (1.0 - s) * amount
+        : s + s * amount;
+}
+
+vec3 colorize(vec3 rgb, float hue, float sat, float light) {
+    float lum = dot(rgb, vec3(0.299, 0.587, 0.114));
+    float l = adjustLightness(lum, light);
+
+    vec3 hsl = vec3(fract(hue), clamp(sat, 0.0, 1.0), clamp(l, 0.0, 1.0));
+    return hsl2rgb(hsl);
+}
+
+//=============================================================================
+// Main
+//=============================================================================
+
+void main() {
+    vec4 original = texture(u_image0, v_texCoord);
+
+    float hueShift   = u_float0 / 360.0;   // -180..180 -> -0.5..0.5
+    float satAmount  = u_float1 / 100.0;   // -100..100 -> -1..1
+    float lightAmount= u_float2 / 100.0;   // -100..100 -> -1..1
+    float overlap    = u_float3 / 100.0;   // 0..100 -> 0..1
+
+    vec3 result;
+
+    if (u_int0 == MODE_COLORIZE) {
+        result = colorize(original.rgb, hueShift, satAmount, lightAmount);
+        fragColor = vec4(result, original.a);
+        return;
+    }
+
+    vec3 hsx = (u_int1 == COLORSPACE_HSL)
+        ? rgb2hsl(original.rgb)
+        : rgb2hsb(original.rgb);
+
+    float weight = getModeWeight(hsx.x, u_int0, overlap);
+
+    if (u_int0 != MODE_MASTER && hsx.y < EPSILON) {
+        weight = 0.0;
+    }
+
+    if (weight > EPSILON) {
+        float h = fract(hsx.x + hueShift * weight);
+        float s = clamp(adjustSaturation(hsx.y, satAmount * weight), 0.0, 1.0);
+        float v = (u_int1 == COLORSPACE_HSL)
+            ? clamp(adjustLightness(hsx.z, lightAmount * weight), 0.0, 1.0)
+            : clamp(adjustBrightness(hsx.z, lightAmount * weight), 0.0, 1.0);
+
+        vec3 adjusted = vec3(h, s, v);
+        result = (u_int1 == COLORSPACE_HSL)
+            ? hsl2rgb(adjusted)
+            : hsb2rgb(adjusted);
+    } else {
+        result = original.rgb;
+    }
+
+    fragColor = vec4(result, original.a);
+}
--- a/blueprints/.glsl/Image_Blur_1.frag
+++ b/blueprints/.glsl/Image_Blur_1.frag
@@ -0,0 +1,111 @@
+#version 300 es
+#pragma passes 2
+precision highp float;
+
+// Blur type constants
+const int BLUR_GAUSSIAN = 0;
+const int BLUR_BOX = 1;
+const int BLUR_RADIAL = 2;
+
+// Radial blur config
+const int RADIAL_SAMPLES = 12;
+const float RADIAL_STRENGTH = 0.0003;
+
+uniform sampler2D u_image0;
+uniform vec2 u_resolution;
+uniform int u_int0;      // Blur type (BLUR_GAUSSIAN, BLUR_BOX, BLUR_RADIAL)
+uniform float u_float0;  // Blur radius/amount
+uniform int u_pass;      // Pass index (0 = horizontal, 1 = vertical)
+
+in vec2 v_texCoord;
+layout(location = 0) out vec4 fragColor0;
+
+float gaussian(float x, float sigma) {
+    return exp(-(x * x) / (2.0 * sigma * sigma));
+}
+
+void main() {
+    vec2 texelSize = 1.0 / u_resolution;
+    float radius = max(u_float0, 0.0);
+
+    // Radial (angular) blur - single pass, doesn't use separable
+    if (u_int0 == BLUR_RADIAL) {
+        // Only execute on first pass
+        if (u_pass > 0) {
+            fragColor0 = texture(u_image0, v_texCoord);
+            return;
+        }
+
+        vec2 center = vec2(0.5);
+        vec2 dir = v_texCoord - center;
+        float dist = length(dir);
+
+        if (dist < 1e-4) {
+            fragColor0 = texture(u_image0, v_texCoord);
+            return;
+        }
+
+        vec4 sum = vec4(0.0);
+        float totalWeight = 0.0;
+        float angleStep = radius * RADIAL_STRENGTH;
+
+        dir /= dist;
+
+        float cosStep = cos(angleStep);
+        float sinStep = sin(angleStep);
+
+        float negAngle = -float(RADIAL_SAMPLES) * angleStep;
+        vec2 rotDir = vec2(
+            dir.x * cos(negAngle) - dir.y * sin(negAngle),
+            dir.x * sin(negAngle) + dir.y * cos(negAngle)
+        );
+
+        for (int i = -RADIAL_SAMPLES; i <= RADIAL_SAMPLES; i++) {
+            vec2 uv = center + rotDir * dist;
+            float w = 1.0 - abs(float(i)) / float(RADIAL_SAMPLES);
+            sum += texture(u_image0, uv) * w;
+            totalWeight += w;
+
+            rotDir = vec2(
+                rotDir.x * cosStep - rotDir.y * sinStep,
+                rotDir.x * sinStep + rotDir.y * cosStep
+            );
+        }
+
+        fragColor0 = sum / max(totalWeight, 0.001);
+        return;
+    }
+
+    // Separable Gaussian / Box blur
+    int samples = int(ceil(radius));
+
+    if (samples == 0) {
+        fragColor0 = texture(u_image0, v_texCoord);
+        return;
+    }
+
+    // Direction: pass 0 = horizontal, pass 1 = vertical
+    vec2 dir = (u_pass == 0) ? vec2(1.0, 0.0) : vec2(0.0, 1.0);
+
+    vec4 color = vec4(0.0);
+    float totalWeight = 0.0;
+    float sigma = radius / 2.0;
+
+    for (int i = -samples; i <= samples; i++) {
+        vec2 offset = dir * float(i) * texelSize;
+        vec4 sample_color = texture(u_image0, v_texCoord + offset);
+
+        float weight;
+        if (u_int0 == BLUR_GAUSSIAN) {
+            weight = gaussian(float(i), sigma);
+        } else {
+            // BLUR_BOX
+            weight = 1.0;
+        }
+
+        color += sample_color * weight;
+        totalWeight += weight;
+    }
+
+    fragColor0 = color / totalWeight;
+}
--- a/blueprints/.glsl/Image_Channels_23.frag
+++ b/blueprints/.glsl/Image_Channels_23.frag
@@ -0,0 +1,19 @@
+#version 300 es
+precision highp float;
+
+uniform sampler2D u_image0;
+
+in vec2 v_texCoord;
+layout(location = 0) out vec4 fragColor0;
+layout(location = 1) out vec4 fragColor1;
+layout(location = 2) out vec4 fragColor2;
+layout(location = 3) out vec4 fragColor3;
+
+void main() {
+  vec4 color = texture(u_image0, v_texCoord);
+  // Output each channel as grayscale to separate render targets
+  fragColor0 = vec4(vec3(color.r), 1.0);  // Red channel
+  fragColor1 = vec4(vec3(color.g), 1.0);  // Green channel
+  fragColor2 = vec4(vec3(color.b), 1.0);  // Blue channel
+  fragColor3 = vec4(vec3(color.a), 1.0);  // Alpha channel
+}
--- a/blueprints/.glsl/Image_Levels_1.frag
+++ b/blueprints/.glsl/Image_Levels_1.frag
@@ -0,0 +1,71 @@
+#version 300 es
+precision highp float;
+
+// Levels Adjustment
+// u_int0:   channel      (0=RGB, 1=R, 2=G, 3=B)         default: 0
+// u_float0: input black  (0-255)                        default: 0
+// u_float1: input white  (0-255)                        default: 255
+// u_float2: gamma        (0.01-9.99)                    default: 1.0
+// u_float3: output black (0-255)                        default: 0
+// u_float4: output white (0-255)                        default: 255
+
+uniform sampler2D u_image0;
+uniform int u_int0;
+uniform float u_float0;
+uniform float u_float1;
+uniform float u_float2;
+uniform float u_float3;
+uniform float u_float4;
+
+in vec2 v_texCoord;
+out vec4 fragColor;
+
+vec3 applyLevels(vec3 color, float inBlack, float inWhite, float gamma, float outBlack, float outWhite) {
+    float inRange = max(inWhite - inBlack, 0.0001);
+    vec3 result = clamp((color - inBlack) / inRange, 0.0, 1.0);
+    result = pow(result, vec3(1.0 / gamma));
+    result = mix(vec3(outBlack), vec3(outWhite), result);
+    return result;
+}
+
+float applySingleChannel(float value, float inBlack, float inWhite, float gamma, float outBlack, float outWhite) {
+    float inRange = max(inWhite - inBlack, 0.0001);
+    float result = clamp((value - inBlack) / inRange, 0.0, 1.0);
+    result = pow(result, 1.0 / gamma);
+    result = mix(outBlack, outWhite, result);
+    return result;
+}
+
+void main() {
+    vec4 texColor = texture(u_image0, v_texCoord);
+    vec3 color = texColor.rgb;
+    
+    float inBlack = u_float0 / 255.0;
+    float inWhite = u_float1 / 255.0;
+    float gamma = u_float2;
+    float outBlack = u_float3 / 255.0;
+    float outWhite = u_float4 / 255.0;
+    
+    vec3 result;
+    
+    if (u_int0 == 0) {
+        result = applyLevels(color, inBlack, inWhite, gamma, outBlack, outWhite);
+    }
+    else if (u_int0 == 1) {
+        result = color;
+        result.r = applySingleChannel(color.r, inBlack, inWhite, gamma, outBlack, outWhite);
+    }
+    else if (u_int0 == 2) {
+        result = color;
+        result.g = applySingleChannel(color.g, inBlack, inWhite, gamma, outBlack, outWhite);
+    }
+    else if (u_int0 == 3) {
+        result = color;
+        result.b = applySingleChannel(color.b, inBlack, inWhite, gamma, outBlack, outWhite);
+    }
+    else {
+        result = color;
+    }
+    
+    fragColor = vec4(result, texColor.a);
+}
--- a/blueprints/.glsl/README.md
+++ b/blueprints/.glsl/README.md
@@ -0,0 +1,28 @@
+# GLSL Shader Sources
+
+This folder contains the GLSL fragment shaders extracted from blueprint JSON files for easier editing and version control.
+
+## File Naming Convention
+
+`{Blueprint_Name}_{node_id}.frag`
+
+- **Blueprint_Name**: The JSON filename with spaces/special chars replaced by underscores
+- **node_id**: The GLSLShader node ID within the subgraph
+
+## Usage
+
+```bash
+# Extract shaders from blueprint JSONs to this folder
+python update_blueprints.py extract
+
+# Patch edited shaders back into blueprint JSONs
+python update_blueprints.py patch
+```
+
+## Workflow
+
+1. Run `extract` to pull current shaders from JSONs
+2. Edit `.frag` files
+3. Run `patch` to update the blueprint JSONs
+4. Test
+5. Commit both `.frag` files and updated JSONs
--- a/blueprints/.glsl/Sharpen_23.frag
+++ b/blueprints/.glsl/Sharpen_23.frag
@@ -0,0 +1,28 @@
+#version 300 es
+precision highp float;
+
+uniform sampler2D u_image0;
+uniform vec2 u_resolution;
+uniform float u_float0;  // strength [0.0 – 2.0] typical: 0.3–1.0
+
+in vec2 v_texCoord;
+layout(location = 0) out vec4 fragColor0;
+
+void main() {
+    vec2 texel = 1.0 / u_resolution;
+    
+    // Sample center and neighbors
+    vec4 center = texture(u_image0, v_texCoord);
+    vec4 top    = texture(u_image0, v_texCoord + vec2( 0.0, -texel.y));
+    vec4 bottom = texture(u_image0, v_texCoord + vec2( 0.0,  texel.y));
+    vec4 left   = texture(u_image0, v_texCoord + vec2(-texel.x,  0.0));
+    vec4 right  = texture(u_image0, v_texCoord + vec2( texel.x,  0.0));
+    
+    // Edge enhancement (Laplacian)
+    vec4 edges = center * 4.0 - top - bottom - left - right;
+    
+    // Add edges back scaled by strength
+    vec4 sharpened = center + edges * u_float0;
+    
+    fragColor0 = vec4(clamp(sharpened.rgb, 0.0, 1.0), center.a);
+}
--- a/blueprints/.glsl/Unsharp_Mask_26.frag
+++ b/blueprints/.glsl/Unsharp_Mask_26.frag
@@ -0,0 +1,61 @@
+#version 300 es
+precision highp float;
+
+uniform sampler2D u_image0;
+uniform vec2 u_resolution;
+uniform float u_float0;  // amount    [0.0 - 3.0]  typical: 0.5-1.5
+uniform float u_float1;  // radius    [0.5 - 10.0] blur radius in pixels
+uniform float u_float2;  // threshold [0.0 - 0.1]  min difference to sharpen
+
+in vec2 v_texCoord;
+layout(location = 0) out vec4 fragColor0;
+
+float gaussian(float x, float sigma) {
+    return exp(-(x * x) / (2.0 * sigma * sigma));
+}
+
+float getLuminance(vec3 color) {
+    return dot(color, vec3(0.2126, 0.7152, 0.0722));
+}
+
+void main() {
+    vec2 texel = 1.0 / u_resolution;
+    float radius = max(u_float1, 0.5);
+    float amount = u_float0;
+    float threshold = u_float2;
+
+    vec4 original = texture(u_image0, v_texCoord);
+
+    // Gaussian blur for the "unsharp" mask
+    int samples = int(ceil(radius));
+    float sigma = radius / 2.0;
+
+    vec4 blurred = vec4(0.0);
+    float totalWeight = 0.0;
+
+    for (int x = -samples; x <= samples; x++) {
+        for (int y = -samples; y <= samples; y++) {
+            vec2 offset = vec2(float(x), float(y)) * texel;
+            vec4 sample_color = texture(u_image0, v_texCoord + offset);
+
+            float dist = length(vec2(float(x), float(y)));
+            float weight = gaussian(dist, sigma);
+            blurred += sample_color * weight;
+            totalWeight += weight;
+        }
+    }
+    blurred /= totalWeight;
+
+    // Unsharp mask = original - blurred
+    vec3 mask = original.rgb - blurred.rgb;
+
+    // Luminance-based threshold with smooth falloff
+    float lumaDelta = abs(getLuminance(original.rgb) - getLuminance(blurred.rgb));
+    float thresholdScale = smoothstep(0.0, threshold, lumaDelta);
+    mask *= thresholdScale;
+
+    // Sharpen: original + mask * amount
+    vec3 sharpened = original.rgb + mask * amount;
+
+    fragColor0 = vec4(clamp(sharpened, 0.0, 1.0), original.a);
+}
--- a/blueprints/.glsl/update_blueprints.py
+++ b/blueprints/.glsl/update_blueprints.py
@@ -0,0 +1,159 @@
+#!/usr/bin/env python3
+"""
+Shader Blueprint Updater
+
+Syncs GLSL shader files between this folder and blueprint JSON files.
+
+File naming convention:
+    {Blueprint Name}_{node_id}.frag
+
+Usage:
+    python update_blueprints.py extract   # Extract shaders from JSONs to here
+    python update_blueprints.py patch     # Patch shaders back into JSONs
+    python update_blueprints.py           # Same as patch (default)
+"""
+
+import json
+import logging
+import sys
+import re
+from pathlib import Path
+
+logging.basicConfig(level=logging.INFO, format='%(message)s')
+logger = logging.getLogger(__name__)
+
+GLSL_DIR = Path(__file__).parent
+BLUEPRINTS_DIR = GLSL_DIR.parent
+
+
+def get_blueprint_files():
+    """Get all blueprint JSON files."""
+    return sorted(BLUEPRINTS_DIR.glob("*.json"))
+
+
+def sanitize_filename(name):
+    """Convert blueprint name to safe filename."""
+    return re.sub(r'[^\w\-]', '_', name)
+
+
+def extract_shaders():
+    """Extract all shaders from blueprint JSONs to this folder."""
+    extracted = 0
+    for json_path in get_blueprint_files():
+        blueprint_name = json_path.stem
+
+        try:
+            with open(json_path, 'r') as f:
+                data = json.load(f)
+        except (json.JSONDecodeError, IOError) as e:
+            logger.warning("Skipping %s: %s", json_path.name, e)
+            continue
+
+        # Find GLSLShader nodes in subgraphs
+        for subgraph in data.get('definitions', {}).get('subgraphs', []):
+            for node in subgraph.get('nodes', []):
+                if node.get('type') == 'GLSLShader':
+                    node_id = node.get('id')
+                    widgets = node.get('widgets_values', [])
+
+                    # Find shader code (first string that looks like GLSL)
+                    for widget in widgets:
+                        if isinstance(widget, str) and widget.startswith('#version'):
+                            safe_name = sanitize_filename(blueprint_name)
+                            frag_name = f"{safe_name}_{node_id}.frag"
+                            frag_path = GLSL_DIR / frag_name
+
+                            with open(frag_path, 'w') as f:
+                                f.write(widget)
+
+                            logger.info("  Extracted: %s", frag_name)
+                            extracted += 1
+                            break
+
+    logger.info("\nExtracted %d shader(s)", extracted)
+
+
+def patch_shaders():
+    """Patch shaders from this folder back into blueprint JSONs."""
+    # Build lookup: blueprint_name -> [(node_id, shader_code), ...]
+    shader_updates = {}
+
+    for frag_path in sorted(GLSL_DIR.glob("*.frag")):
+        # Parse filename: {blueprint_name}_{node_id}.frag
+        parts = frag_path.stem.rsplit('_', 1)
+        if len(parts) != 2:
+            logger.warning("Skipping %s: invalid filename format", frag_path.name)
+            continue
+
+        blueprint_name, node_id_str = parts
+
+        try:
+            node_id = int(node_id_str)
+        except ValueError:
+            logger.warning("Skipping %s: invalid node_id", frag_path.name)
+            continue
+
+        with open(frag_path, 'r') as f:
+            shader_code = f.read()
+
+        if blueprint_name not in shader_updates:
+            shader_updates[blueprint_name] = []
+        shader_updates[blueprint_name].append((node_id, shader_code))
+
+    # Apply updates to JSON files
+    patched = 0
+    for json_path in get_blueprint_files():
+        blueprint_name = sanitize_filename(json_path.stem)
+
+        if blueprint_name not in shader_updates:
+            continue
+
+        try:
+            with open(json_path, 'r') as f:
+                data = json.load(f)
+        except (json.JSONDecodeError, IOError) as e:
+            logger.error("Error reading %s: %s", json_path.name, e)
+            continue
+
+        modified = False
+        for node_id, shader_code in shader_updates[blueprint_name]:
+            # Find the node and update
+            for subgraph in data.get('definitions', {}).get('subgraphs', []):
+                for node in subgraph.get('nodes', []):
+                    if node.get('id') == node_id and node.get('type') == 'GLSLShader':
+                        widgets = node.get('widgets_values', [])
+                        if len(widgets) > 0 and widgets[0] != shader_code:
+                            widgets[0] = shader_code
+                            modified = True
+                            logger.info("  Patched: %s (node %d)", json_path.name, node_id)
+                            patched += 1
+
+        if modified:
+            with open(json_path, 'w') as f:
+                json.dump(data, f)
+
+    if patched == 0:
+        logger.info("No changes to apply.")
+    else:
+        logger.info("\nPatched %d shader(s)", patched)
+
+
+def main():
+    if len(sys.argv) < 2:
+        command = "patch"
+    else:
+        command = sys.argv[1].lower()
+
+    if command == "extract":
+        logger.info("Extracting shaders from blueprints...")
+        extract_shaders()
+    elif command in ("patch", "update", "apply"):
+        logger.info("Patching shaders into blueprints...")
+        patch_shaders()
+    else:
+        logger.info(__doc__)
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
--- a/blueprints/Brightness
+++ b/blueprints/Brightness
--- a/(Z-Image-Turbo).json
+++ b/(Z-Image-Turbo).json
--- a/blueprints/Canny
+++ b/blueprints/Canny
--- a/blueprints/Chromatic
+++ b/blueprints/Chromatic
--- a/blueprints/Color
+++ b/blueprints/Color
--- a/(Z-Image-Turbo).json
+++ b/(Z-Image-Turbo).json
--- a/blueprints/Depth
+++ b/blueprints/Depth
--- a/blueprints/Edge-Preserving
+++ b/blueprints/Edge-Preserving
--- a/blueprints/Film
+++ b/blueprints/Film
--- a/blueprints/Glow.json
+++ b/blueprints/Glow.json
--- a/Saturation.json
+++ b/Saturation.json
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/blueprints/Image
+++ b/blueprints/Image
@@ -0,0 +1 @@
+{"revision": 0, "last_node_id": 29, "last_link_id": 0, "nodes": [{"id": 29, "type": "4c9d6ea4-b912-40e5-8766-6793a9758c53", "pos": [1970, -230], "size": [180, 86], "flags": {}, "order": 5, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "R", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}, {"label": "G", "localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": []}, {"label": "B", "localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": []}, {"label": "A", "localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": []}], "title": "Image Channels", "properties": {"proxyWidgets": []}, "widgets_values": []}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "4c9d6ea4-b912-40e5-8766-6793a9758c53", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 28, "lastLinkId": 39, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Image Channels", "inputNode": {"id": -10, "bounding": [1820, -185, 120, 60]}, "outputNode": {"id": -20, "bounding": [2460, -215, 120, 120]}, "inputs": [{"id": "3522932b-2d86-4a1f-a02a-cb29f3a9d7fe", "name": "images.image0", "type": "IMAGE", "linkIds": [39], "localized_name": "images.image0", "label": "image", "pos": [1920, -165]}], "outputs": [{"id": "605cb9c3-b065-4d9b-81d2-3ec331889b2b", "name": "IMAGE0", "type": "IMAGE", "linkIds": [26], "localized_name": "IMAGE0", "label": "R", "pos": [2480, -195]}, {"id": "fb44a77e-0522-43e9-9527-82e7465b3596", "name": "IMAGE1", "type": "IMAGE", "linkIds": [27], "localized_name": "IMAGE1", "label": "G", "pos": [2480, -175]}, {"id": "81460ee6-0131-402a-874f-6bf3001fc4ff", "name": "IMAGE2", "type": "IMAGE", "linkIds": [28], "localized_name": "IMAGE2", "label": "B", "pos": [2480, -155]}, {"id": "ae690246-80d4-4951-b1d9-9306d8a77417", "name": "IMAGE3", "type": "IMAGE", "linkIds": [29], "localized_name": "IMAGE3", "label": "A", "pos": [2480, -135]}], "widgets": [], "nodes": [{"id": 23, "type": "GLSLShader", "pos": [2000, -330], "size": [400, 172], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 39}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}], "outputs": [{"label": "R", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [26]}, {"label": "G", "localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": [27]}, {"label": "B", "localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": [28]}, {"label": "A", "localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": [29]}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\nlayout(location = 1) out vec4 fragColor1;\nlayout(location = 2) out vec4 fragColor2;\nlayout(location = 3) out vec4 fragColor3;\n\nvoid main() {\n  vec4 color = texture(u_image0, v_texCoord);\n  // Output each channel as grayscale to separate render targets\n  fragColor0 = vec4(vec3(color.r), 1.0);  // Red channel\n  fragColor1 = vec4(vec3(color.g), 1.0);  // Green channel\n  fragColor2 = vec4(vec3(color.b), 1.0);  // Blue channel\n  fragColor3 = vec4(vec3(color.a), 1.0);  // Alpha channel\n}\n", "from_input"]}], "groups": [], "links": [{"id": 39, "origin_id": -10, "origin_slot": 0, "target_id": 23, "target_slot": 0, "type": "IMAGE"}, {"id": 26, "origin_id": 23, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 27, "origin_id": 23, "origin_slot": 1, "target_id": -20, "target_slot": 1, "type": "IMAGE"}, {"id": 28, "origin_id": 23, "origin_slot": 2, "target_id": -20, "target_slot": 2, "type": "IMAGE"}, {"id": 29, "origin_id": 23, "origin_slot": 3, "target_id": -20, "target_slot": 3, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Color adjust"}]}}
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/(Qwen-image).json
+++ b/(Qwen-image).json
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/(Qwen-Image).json
+++ b/(Qwen-Image).json
--- a/Upscale(Z-image-Turbo).json
+++ b/Upscale(Z-image-Turbo).json
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/blueprints/Image
+++ b/blueprints/Image
--- a/(Z-Image-Turbo).json
+++ b/(Z-Image-Turbo).json
--- a/blueprints/Pose
+++ b/blueprints/Pose
--- a/blueprints/Prompt
+++ b/blueprints/Prompt
@@ -0,0 +1 @@
+{"revision": 0, "last_node_id": 15, "last_link_id": 0, "nodes": [{"id": 15, "type": "24d8bbfd-39d4-4774-bff0-3de40cc7a471", "pos": [-1490, 2040], "size": [400, 260], "flags": {}, "order": 0, "mode": 0, "inputs": [{"name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": null}, {"label": "reference images", "name": "images", "type": "IMAGE", "link": null}], "outputs": [{"name": "STRING", "type": "STRING", "links": null}], "title": "Prompt Enhance", "properties": {"proxyWidgets": [["-1", "prompt"]], "cnr_id": "comfy-core", "ver": "0.14.1"}, "widgets_values": [""]}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "24d8bbfd-39d4-4774-bff0-3de40cc7a471", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 15, "lastLinkId": 14, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Prompt Enhance", "inputNode": {"id": -10, "bounding": [-2170, 2110, 138.876953125, 80]}, "outputNode": {"id": -20, "bounding": [-640, 2110, 120, 60]}, "inputs": [{"id": "aeab7216-00e0-4528-a09b-bba50845c5a6", "name": "prompt", "type": "STRING", "linkIds": [11], "pos": [-2051.123046875, 2130]}, {"id": "7b73fd36-aa31-4771-9066-f6c83879994b", "name": "images", "type": "IMAGE", "linkIds": [14], "label": "reference images", "pos": [-2051.123046875, 2150]}], "outputs": [{"id": "c7b0d930-68a1-48d1-b496-0519e5837064", "name": "STRING", "type": "STRING", "linkIds": [13], "pos": [-620, 2130]}], "widgets": [], "nodes": [{"id": 11, "type": "GeminiNode", "pos": [-1560, 1990], "size": [470, 470], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "shape": 7, "type": "IMAGE", "link": 14}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": null}, {"localized_name": "video", "name": "video", "shape": 7, "type": "VIDEO", "link": null}, {"localized_name": "files", "name": "files", "shape": 7, "type": "GEMINI_INPUT_FILES", "link": null}, {"localized_name": "prompt", "name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": 11}, {"localized_name": "model", "name": "model", "type": "COMBO", "widget": {"name": "model"}, "link": null}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "system_prompt", "name": "system_prompt", "shape": 7, "type": "STRING", "widget": {"name": "system_prompt"}, "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": [13]}], "properties": {"cnr_id": "comfy-core", "ver": "0.14.1", "Node name for S&R": "GeminiNode"}, "widgets_values": ["", "gemini-3-pro-preview", 42, "randomize", "You are an expert in prompt writing.\nBased on the input, rewrite the user's input into a detailed prompt.\nincluding camera settings, lighting, composition, and style.\nReturn the prompt only"], "color": "#432", "bgcolor": "#653"}], "groups": [], "links": [{"id": 11, "origin_id": -10, "origin_slot": 0, "target_id": 11, "target_slot": 4, "type": "STRING"}, {"id": 13, "origin_id": 11, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "STRING"}, {"id": 14, "origin_id": -10, "origin_slot": 1, "target_id": 11, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Text generation/Prompt enhance"}]}, "extra": {}}
--- a/blueprints/Sharpen.json
+++ b/blueprints/Sharpen.json
@@ -0,0 +1 @@
+{"revision": 0, "last_node_id": 25, "last_link_id": 0, "nodes": [{"id": 25, "type": "621ba4e2-22a8-482d-a369-023753198b7b", "pos": [4610, -790], "size": [230, 58], "flags": {}, "order": 4, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "IMAGE", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}], "title": "Sharpen", "properties": {"proxyWidgets": [["24", "value"]]}, "widgets_values": []}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "621ba4e2-22a8-482d-a369-023753198b7b", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 24, "lastLinkId": 36, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Sharpen", "inputNode": {"id": -10, "bounding": [4090, -825, 120, 60]}, "outputNode": {"id": -20, "bounding": [5150, -825, 120, 60]}, "inputs": [{"id": "37011fb7-14b7-4e0e-b1a0-6a02e8da1fd7", "name": "images.image0", "type": "IMAGE", "linkIds": [34], "localized_name": "images.image0", "label": "image", "pos": [4190, -805]}], "outputs": [{"id": "e9182b3f-635c-4cd4-a152-4b4be17ae4b9", "name": "IMAGE0", "type": "IMAGE", "linkIds": [35], "localized_name": "IMAGE0", "label": "IMAGE", "pos": [5170, -805]}], "widgets": [], "nodes": [{"id": 24, "type": "PrimitiveFloat", "pos": [4280, -1240], "size": [270, 58], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "strength", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [36]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 3, "precision": 2, "step": 0.05}, "widgets_values": [0.5]}, {"id": 23, "type": "GLSLShader", "pos": [4570, -1240], "size": [370, 192], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 34}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}, {"label": "u_float0", "localized_name": "floats.u_float0", "name": "floats.u_float0", "shape": 7, "type": "FLOAT", "link": 36}, {"label": "u_float1", "localized_name": "floats.u_float1", "name": "floats.u_float1", "shape": 7, "type": "FLOAT", "link": null}, {"label": "u_int0", "localized_name": "ints.u_int0", "name": "ints.u_int0", "shape": 7, "type": "INT", "link": null}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [35]}, {"localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": null}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform float u_float0;  // strength [0.0 – 2.0] typical: 0.3–1.0\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nvoid main() {\n    vec2 texel = 1.0 / u_resolution;\n    \n    // Sample center and neighbors\n    vec4 center = texture(u_image0, v_texCoord);\n    vec4 top    = texture(u_image0, v_texCoord + vec2( 0.0, -texel.y));\n    vec4 bottom = texture(u_image0, v_texCoord + vec2( 0.0,  texel.y));\n    vec4 left   = texture(u_image0, v_texCoord + vec2(-texel.x,  0.0));\n    vec4 right  = texture(u_image0, v_texCoord + vec2( texel.x,  0.0));\n    \n    // Edge enhancement (Laplacian)\n    vec4 edges = center * 4.0 - top - bottom - left - right;\n    \n    // Add edges back scaled by strength\n    vec4 sharpened = center + edges * u_float0;\n    \n    fragColor0 = vec4(clamp(sharpened.rgb, 0.0, 1.0), center.a);\n}", "from_input"]}], "groups": [], "links": [{"id": 36, "origin_id": 24, "origin_slot": 0, "target_id": 23, "target_slot": 2, "type": "FLOAT"}, {"id": 34, "origin_id": -10, "origin_slot": 0, "target_id": 23, "target_slot": 0, "type": "IMAGE"}, {"id": 35, "origin_id": 23, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Sharpen"}]}}
--- a/blueprints/Text
+++ b/blueprints/Text
--- a/(Z-Image-Turbo).json
+++ b/(Z-Image-Turbo).json
--- a/blueprints/Text
+++ b/blueprints/Text
--- a/blueprints/Unsharp
+++ b/blueprints/Unsharp
--- a/blueprints/Video
+++ b/blueprints/Video
--- a/blueprints/Video
+++ b/blueprints/Video
--- a/blueprints/Video
+++ b/blueprints/Video
--- a/blueprints/Video
+++ b/blueprints/Video
@@ -0,0 +1 @@
+{"revision": 0, "last_node_id": 13, "last_link_id": 0, "nodes": [{"id": 13, "type": "cf95b747-3e17-46cb-8097-cac60ff9b2e1", "pos": [1120, 330], "size": [240, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "video", "name": "video", "type": "VIDEO", "link": null}, {"name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": null}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": []}], "title": "Video Upscale(GAN x4)", "properties": {"proxyWidgets": [["-1", "model_name"]], "cnr_id": "comfy-core", "ver": "0.14.1"}, "widgets_values": ["RealESRGAN_x4plus.safetensors"]}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "cf95b747-3e17-46cb-8097-cac60ff9b2e1", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 13, "lastLinkId": 19, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Video Upscale(GAN x4)", "inputNode": {"id": -10, "bounding": [550, 460, 120, 80]}, "outputNode": {"id": -20, "bounding": [1490, 460, 120, 60]}, "inputs": [{"id": "666d633e-93e7-42dc-8d11-2b7b99b0f2a6", "name": "video", "type": "VIDEO", "linkIds": [10], "localized_name": "video", "pos": [650, 480]}, {"id": "2e23a087-caa8-4d65-99e6-662761aa905a", "name": "model_name", "type": "COMBO", "linkIds": [19], "pos": [650, 500]}], "outputs": [{"id": "0c1768ea-3ec2-412f-9af6-8e0fa36dae70", "name": "VIDEO", "type": "VIDEO", "linkIds": [15], "localized_name": "VIDEO", "pos": [1510, 480]}], "widgets": [], "nodes": [{"id": 2, "type": "ImageUpscaleWithModel", "pos": [1110, 450], "size": [320, 46], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "upscale_model", "name": "upscale_model", "type": "UPSCALE_MODEL", "link": 1}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 14}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [13]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "ImageUpscaleWithModel"}}, {"id": 11, "type": "CreateVideo", "pos": [1110, 550], "size": [320, 78], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 13}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": 16}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "widget": {"name": "fps"}, "link": 12}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": [15]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "CreateVideo"}, "widgets_values": [30]}, {"id": 10, "type": "GetVideoComponents", "pos": [1110, 330], "size": [320, 70], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "video", "name": "video", "type": "VIDEO", "link": 10}], "outputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "links": [14]}, {"localized_name": "audio", "name": "audio", "type": "AUDIO", "links": [16]}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "links": [12]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "GetVideoComponents"}}, {"id": 1, "type": "UpscaleModelLoader", "pos": [750, 450], "size": [280, 60], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "model_name", "name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": 19}], "outputs": [{"localized_name": "UPSCALE_MODEL", "name": "UPSCALE_MODEL", "type": "UPSCALE_MODEL", "links": [1]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "UpscaleModelLoader", "models": [{"name": "RealESRGAN_x4plus.safetensors", "url": "https://huggingface.co/Comfy-Org/Real-ESRGAN_repackaged/resolve/main/RealESRGAN_x4plus.safetensors", "directory": "upscale_models"}]}, "widgets_values": ["RealESRGAN_x4plus.safetensors"]}], "groups": [], "links": [{"id": 1, "origin_id": 1, "origin_slot": 0, "target_id": 2, "target_slot": 0, "type": "UPSCALE_MODEL"}, {"id": 14, "origin_id": 10, "origin_slot": 0, "target_id": 2, "target_slot": 1, "type": "IMAGE"}, {"id": 13, "origin_id": 2, "origin_slot": 0, "target_id": 11, "target_slot": 0, "type": "IMAGE"}, {"id": 16, "origin_id": 10, "origin_slot": 1, "target_id": 11, "target_slot": 1, "type": "AUDIO"}, {"id": 12, "origin_id": 10, "origin_slot": 2, "target_id": 11, "target_slot": 2, "type": "FLOAT"}, {"id": 10, "origin_id": -10, "origin_slot": 0, "target_id": 10, "target_slot": 0, "type": "VIDEO"}, {"id": 15, "origin_id": 11, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "VIDEO"}, {"id": 19, "origin_id": -10, "origin_slot": 1, "target_id": 1, "target_slot": 0, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Video generation and editing/Enhance video"}]}, "extra": {}}
--- a/comfy/comfy_types/node_typing.py
+++ b/comfy/comfy_types/node_typing.py
@@ -176,6 +176,8 @@ class InputTypeOptions(TypedDict):
    """COMBO type only. Specifies the configuration for a multi-select widget.
    Available after ComfyUI frontend v1.13.4
    https://github.com/Comfy-Org/ComfyUI_frontend/pull/2987"""
+    gradient_stops: NotRequired[list[list[float]]]
+    """Gradient color stops for gradientslider display mode. Each stop is [offset, r, g, b] (``FLOAT``)."""


 class HiddenInputTypeDict(TypedDict):
--- a/comfy/conds.py
+++ b/comfy/conds.py
@@ -4,6 +4,25 @@ import comfy.utils
 import logging


+def is_equal(x, y):
+    if torch.is_tensor(x) and torch.is_tensor(y):
+        return torch.equal(x, y)
+    elif isinstance(x, dict) and isinstance(y, dict):
+        if x.keys() != y.keys():
+            return False
+        return all(is_equal(x[k], y[k]) for k in x)
+    elif isinstance(x, (list, tuple)) and isinstance(y, (list, tuple)):
+        if type(x) is not type(y) or len(x) != len(y):
+            return False
+        return all(is_equal(a, b) for a, b in zip(x, y))
+    else:
+        try:
+            return x == y
+        except Exception:
+            logging.warning("comparison issue with COND")
+            return False
+
+
 class CONDRegular:
    def __init__(self, cond):
        self.cond = cond
@@ -84,7 +103,7 @@ class CONDConstant(CONDRegular):
        return self._copy_with(self.cond)

    def can_concat(self, other):
-        if self.cond != other.cond:
+        if not is_equal(self.cond, other.cond):
            return False
        return True

--- a/comfy/ldm/lightricks/av_model.py
+++ b/comfy/ldm/lightricks/av_model.py
@@ -9,6 +9,7 @@ from comfy.ldm.lightricks.model import (
    LTXVModel,
 )
 from comfy.ldm.lightricks.symmetric_patchifier import AudioPatchifier
+from comfy.ldm.lightricks.embeddings_connector import Embeddings1DConnector
 import comfy.ldm.common_dit

 class CompressedTimestep:
@@ -217,7 +218,7 @@ class BasicAVTransformerBlock(nn.Module):
    def forward(
        self, x: Tuple[torch.Tensor, torch.Tensor], v_context=None, a_context=None, attention_mask=None, v_timestep=None, a_timestep=None,
        v_pe=None, a_pe=None, v_cross_pe=None, a_cross_pe=None, v_cross_scale_shift_timestep=None, a_cross_scale_shift_timestep=None,
-        v_cross_gate_timestep=None, a_cross_gate_timestep=None, transformer_options=None,
+        v_cross_gate_timestep=None, a_cross_gate_timestep=None, transformer_options=None, self_attention_mask=None,
    ) -> Tuple[torch.Tensor, torch.Tensor]:
        run_vx = transformer_options.get("run_vx", True)
        run_ax = transformer_options.get("run_ax", True)
@@ -233,7 +234,7 @@ class BasicAVTransformerBlock(nn.Module):
            vshift_msa, vscale_msa = (self.get_ada_values(self.scale_shift_table, vx.shape[0], v_timestep, slice(0, 2)))
            norm_vx = comfy.ldm.common_dit.rms_norm(vx) * (1 + vscale_msa) + vshift_msa
            del vshift_msa, vscale_msa
-            attn1_out = self.attn1(norm_vx, pe=v_pe, transformer_options=transformer_options)
+            attn1_out = self.attn1(norm_vx, pe=v_pe, mask=self_attention_mask, transformer_options=transformer_options)
            del norm_vx
            # video cross-attention
            vgate_msa = self.get_ada_values(self.scale_shift_table, vx.shape[0], v_timestep, slice(2, 3))[0]
@@ -450,6 +451,29 @@ class LTXAVModel(LTXVModel):
            operations=self.operations,
        )

+        self.audio_embeddings_connector = Embeddings1DConnector(
+            split_rope=True,
+            double_precision_rope=True,
+            dtype=dtype,
+            device=device,
+            operations=self.operations,
+        )
+
+        self.video_embeddings_connector = Embeddings1DConnector(
+            split_rope=True,
+            double_precision_rope=True,
+            dtype=dtype,
+            device=device,
+            operations=self.operations,
+        )
+
+    def preprocess_text_embeds(self, context):
+        if context.shape[-1] == self.caption_channels * 2:
+            return context
+        out_vid = self.video_embeddings_connector(context)[0]
+        out_audio = self.audio_embeddings_connector(context)[0]
+        return torch.concat((out_vid, out_audio), dim=-1)
+
    def _init_transformer_blocks(self, device, dtype, **kwargs):
        """Initialize transformer blocks for LTXAV."""
        self.transformer_blocks = nn.ModuleList(
@@ -702,7 +726,7 @@ class LTXAVModel(LTXVModel):
        return [(v_pe, av_cross_video_freq_cis), (a_pe, av_cross_audio_freq_cis)]

    def _process_transformer_blocks(
-        self, x, context, attention_mask, timestep, pe, transformer_options={}, **kwargs
+        self, x, context, attention_mask, timestep, pe, transformer_options={}, self_attention_mask=None, **kwargs
    ):
        vx = x[0]
        ax = x[1]
@@ -746,6 +770,7 @@ class LTXAVModel(LTXVModel):
                        v_cross_gate_timestep=args["v_cross_gate_timestep"],
                        a_cross_gate_timestep=args["a_cross_gate_timestep"],
                        transformer_options=args["transformer_options"],
+                        self_attention_mask=args.get("self_attention_mask"),
                    )
                    return out

@@ -766,6 +791,7 @@ class LTXAVModel(LTXVModel):
                        "v_cross_gate_timestep": av_ca_a2v_gate_noise_timestep,
                        "a_cross_gate_timestep": av_ca_v2a_gate_noise_timestep,
                        "transformer_options": transformer_options,
+                        "self_attention_mask": self_attention_mask,
                    },
                    {"original_block": block_wrap},
                )
@@ -787,6 +813,7 @@ class LTXAVModel(LTXVModel):
                    v_cross_gate_timestep=av_ca_a2v_gate_noise_timestep,
                    a_cross_gate_timestep=av_ca_v2a_gate_noise_timestep,
                    transformer_options=transformer_options,
+                    self_attention_mask=self_attention_mask,
                )

        return [vx, ax]
--- a/comfy/ldm/lightricks/embeddings_connector.py
+++ b/comfy/ldm/lightricks/embeddings_connector.py
@@ -157,11 +157,9 @@ class Embeddings1DConnector(nn.Module):
        self.num_learnable_registers = num_learnable_registers
        if self.num_learnable_registers:
            self.learnable_registers = nn.Parameter(
-                torch.rand(
+                torch.empty(
                    self.num_learnable_registers, inner_dim, dtype=dtype, device=device
                )
-                * 2.0
-                - 1.0
            )

    def get_fractional_positions(self, indices_grid):
@@ -234,7 +232,7 @@ class Embeddings1DConnector(nn.Module):

        return indices

-    def precompute_freqs_cis(self, indices_grid, spacing="exp"):
+    def precompute_freqs_cis(self, indices_grid, spacing="exp", out_dtype=None):
        dim = self.inner_dim
        n_elem = 2  # 2 because of cos and sin
        freqs = self.precompute_freqs(indices_grid, spacing)
@@ -247,7 +245,7 @@ class Embeddings1DConnector(nn.Module):
            )
        else:
            cos_freq, sin_freq = interleaved_freqs_cis(freqs, dim % n_elem)
-        return cos_freq.to(self.dtype), sin_freq.to(self.dtype), self.split_rope
+        return cos_freq.to(dtype=out_dtype), sin_freq.to(dtype=out_dtype), self.split_rope

    def forward(
        self,
@@ -288,7 +286,7 @@ class Embeddings1DConnector(nn.Module):
            hidden_states.shape[1], dtype=torch.float32, device=hidden_states.device
        )
        indices_grid = indices_grid[None, None, :]
-        freqs_cis = self.precompute_freqs_cis(indices_grid)
+        freqs_cis = self.precompute_freqs_cis(indices_grid, out_dtype=hidden_states.dtype)

        # 2. Blocks
        for block_idx, block in enumerate(self.transformer_1d_blocks):
--- a/comfy/ldm/lightricks/model.py
+++ b/comfy/ldm/lightricks/model.py
@@ -1,6 +1,7 @@
 from abc import ABC, abstractmethod
 from enum import Enum
 import functools
+import logging
 import math
 from typing import Dict, Optional, Tuple

@@ -14,6 +15,8 @@ import comfy.ldm.common_dit

 from .symmetric_patchifier import SymmetricPatchifier, latent_to_pixel_coords

+logger = logging.getLogger(__name__)
+
 def _log_base(x, base):
    return np.log(x) / np.log(base)

@@ -415,12 +418,12 @@ class BasicTransformerBlock(nn.Module):

        self.scale_shift_table = nn.Parameter(torch.empty(6, dim, device=device, dtype=dtype))

-    def forward(self, x, context=None, attention_mask=None, timestep=None, pe=None, transformer_options={}):
+    def forward(self, x, context=None, attention_mask=None, timestep=None, pe=None, transformer_options={}, self_attention_mask=None):
        shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = (self.scale_shift_table[None, None].to(device=x.device, dtype=x.dtype) + timestep.reshape(x.shape[0], timestep.shape[1], self.scale_shift_table.shape[0], -1)).unbind(dim=2)

        attn1_input = comfy.ldm.common_dit.rms_norm(x)
        attn1_input = torch.addcmul(attn1_input, attn1_input, scale_msa).add_(shift_msa)
-        attn1_input = self.attn1(attn1_input, pe=pe, transformer_options=transformer_options)
+        attn1_input = self.attn1(attn1_input, pe=pe, mask=self_attention_mask, transformer_options=transformer_options)
        x.addcmul_(attn1_input, gate_msa)
        del attn1_input

@@ -638,8 +641,16 @@ class LTXBaseModel(torch.nn.Module, ABC):
        """Process input data. Must be implemented by subclasses."""
        pass

+    def _build_guide_self_attention_mask(self, x, transformer_options, merged_args):
+        """Build self-attention mask for per-guide attention attenuation.
+
+        Base implementation returns None (no attenuation). Subclasses that
+        support guide-based attention control should override this.
+        """
+        return None
+
    @abstractmethod
-    def _process_transformer_blocks(self, x, context, attention_mask, timestep, pe, **kwargs):
+    def _process_transformer_blocks(self, x, context, attention_mask, timestep, pe, self_attention_mask=None, **kwargs):
        """Process transformer blocks. Must be implemented by subclasses."""
        pass

@@ -788,9 +799,17 @@ class LTXBaseModel(torch.nn.Module, ABC):
        attention_mask = self._prepare_attention_mask(attention_mask, input_dtype)
        pe = self._prepare_positional_embeddings(pixel_coords, frame_rate, input_dtype)

+        # Build self-attention mask for per-guide attenuation
+        self_attention_mask = self._build_guide_self_attention_mask(
+            x, transformer_options, merged_args
+        )
+
        # Process transformer blocks
        x = self._process_transformer_blocks(
-            x, context, attention_mask, timestep, pe, transformer_options=transformer_options, **merged_args
+            x, context, attention_mask, timestep, pe,
+            transformer_options=transformer_options,
+            self_attention_mask=self_attention_mask,
+            **merged_args,
        )

        # Process output
@@ -890,13 +909,243 @@ class LTXVModel(LTXBaseModel):
            pixel_coords = pixel_coords[:, :, grid_mask, ...]

            kf_grid_mask = grid_mask[-keyframe_idxs.shape[2]:]
+
+            # Compute per-guide surviving token counts from guide_attention_entries.
+            # Each entry tracks one guide reference; they are appended in order and
+            # their pre_filter_counts partition the kf_grid_mask.
+            guide_entries = kwargs.get("guide_attention_entries", None)
+            if guide_entries:
+                total_pfc = sum(e["pre_filter_count"] for e in guide_entries)
+                if total_pfc != len(kf_grid_mask):
+                    raise ValueError(
+                        f"guide pre_filter_counts ({total_pfc}) != "
+                        f"keyframe grid mask length ({len(kf_grid_mask)})"
+                    )
+                resolved_entries = []
+                offset = 0
+                for entry in guide_entries:
+                    pfc = entry["pre_filter_count"]
+                    entry_mask = kf_grid_mask[offset:offset + pfc]
+                    surviving = int(entry_mask.sum().item())
+                    resolved_entries.append({
+                        **entry,
+                        "surviving_count": surviving,
+                    })
+                    offset += pfc
+                additional_args["resolved_guide_entries"] = resolved_entries
+
            keyframe_idxs = keyframe_idxs[..., kf_grid_mask, :]
            pixel_coords[:, :, -keyframe_idxs.shape[2]:, :] = keyframe_idxs

+            # Total surviving guide tokens (all guides)
+            additional_args["num_guide_tokens"] = keyframe_idxs.shape[2]
+
        x = self.patchify_proj(x)
        return x, pixel_coords, additional_args

-    def _process_transformer_blocks(self, x, context, attention_mask, timestep, pe, transformer_options={}, **kwargs):
+    def _build_guide_self_attention_mask(self, x, transformer_options, merged_args):
+        """Build self-attention mask for per-guide attention attenuation.
+
+        Reads resolved_guide_entries from merged_args (computed in _process_input)
+        to build a log-space additive bias mask that attenuates noisy ↔ guide
+        attention for each guide reference independently.
+
+        Returns None if no attenuation is needed (all strengths == 1.0 and no
+        spatial masks, or no guide tokens).
+        """
+        if isinstance(x, list):
+            # AV model: x = [vx, ax]; use vx for token count and device
+            total_tokens = x[0].shape[1]
+            device = x[0].device
+            dtype = x[0].dtype
+        else:
+            total_tokens = x.shape[1]
+            device = x.device
+            dtype = x.dtype
+
+        num_guide_tokens = merged_args.get("num_guide_tokens", 0)
+        if num_guide_tokens == 0:
+            return None
+
+        resolved_entries = merged_args.get("resolved_guide_entries", None)
+        if not resolved_entries:
+            return None
+
+        # Check if any attenuation is actually needed
+        needs_attenuation = any(
+            e["strength"] < 1.0 or e.get("pixel_mask") is not None
+            for e in resolved_entries
+        )
+        if not needs_attenuation:
+            return None
+
+        # Build per-guide-token weights for all tracked guide tokens.
+        # Guides are appended in order at the end of the sequence.
+        guide_start = total_tokens - num_guide_tokens
+        all_weights = []
+        total_tracked = 0
+
+        for entry in resolved_entries:
+            surviving = entry["surviving_count"]
+            if surviving == 0:
+                continue
+
+            strength = entry["strength"]
+            pixel_mask = entry.get("pixel_mask")
+            latent_shape = entry.get("latent_shape")
+
+            if pixel_mask is not None and latent_shape is not None:
+                f_lat, h_lat, w_lat = latent_shape
+                per_token = self._downsample_mask_to_latent(
+                    pixel_mask.to(device=device, dtype=dtype),
+                    f_lat, h_lat, w_lat,
+                )
+                # per_token shape: (B, f_lat*h_lat*w_lat).
+                # Collapse batch dim — the mask is assumed identical across the
+                # batch; validate and take the first element to get (1, tokens).
+                if per_token.shape[0] > 1:
+                    ref = per_token[0]
+                    for bi in range(1, per_token.shape[0]):
+                        if not torch.equal(ref, per_token[bi]):
+                            logger.warning(
+                                "pixel_mask differs across batch elements; "
+                                "using first element only."
+                            )
+                            break
+                    per_token = per_token[:1]
+                # `surviving` is the post-grid_mask token count.
+                # Clamp to surviving to handle any mismatch safely.
+                n_weights = min(per_token.shape[1], surviving)
+                weights = per_token[:, :n_weights] * strength  # (1, n_weights)
+            else:
+                weights = torch.full(
+                    (1, surviving), strength, device=device, dtype=dtype
+                )
+
+            all_weights.append(weights)
+            total_tracked += weights.shape[1]
+
+        if not all_weights:
+            return None
+
+        # Concatenate per-token weights for all tracked guides
+        tracked_weights = torch.cat(all_weights, dim=1)  # (1, total_tracked)
+
+        # Check if any weight is actually < 1.0 (otherwise no attenuation needed)
+        if (tracked_weights >= 1.0).all():
+            return None
+
+        # Build the mask: guide tokens are at the end of the sequence.
+        # Tracked guides come first (in order), untracked follow.
+        return self._build_self_attention_mask(
+            total_tokens, num_guide_tokens, total_tracked,
+            tracked_weights, guide_start, device, dtype,
+        )
+
+    @staticmethod
+    def _downsample_mask_to_latent(mask, f_lat, h_lat, w_lat):
+        """Downsample a pixel-space mask to per-token latent weights.
+
+        Args:
+            mask: (B, 1, F_pix, H_pix, W_pix) pixel-space mask with values in [0, 1].
+            f_lat: Number of latent frames (pre-dilation original count).
+            h_lat: Latent height (pre-dilation original height).
+            w_lat: Latent width (pre-dilation original width).
+
+        Returns:
+            (B, F_lat * H_lat * W_lat) flattened per-token weights.
+        """
+        b = mask.shape[0]
+        f_pix = mask.shape[2]
+
+        # Spatial downsampling: area interpolation per frame
+        spatial_down = torch.nn.functional.interpolate(
+            rearrange(mask, "b 1 f h w -> (b f) 1 h w"),
+            size=(h_lat, w_lat),
+            mode="area",
+        )
+        spatial_down = rearrange(spatial_down, "(b f) 1 h w -> b 1 f h w", b=b)
+
+        # Temporal downsampling: first pixel frame maps to first latent frame,
+        # remaining pixel frames are averaged in groups for causal temporal structure.
+        first_frame = spatial_down[:, :, :1, :, :]
+        if f_pix > 1 and f_lat > 1:
+            remaining_pix = f_pix - 1
+            remaining_lat = f_lat - 1
+            t = remaining_pix // remaining_lat
+            if t < 1:
+                # Fewer pixel frames than latent frames — upsample by repeating
+                # the available pixel frames via nearest interpolation.
+                rest_flat = rearrange(
+                    spatial_down[:, :, 1:, :, :],
+                    "b 1 f h w -> (b h w) 1 f",
+                )
+                rest_up = torch.nn.functional.interpolate(
+                    rest_flat, size=remaining_lat, mode="nearest",
+                )
+                rest = rearrange(
+                    rest_up, "(b h w) 1 f -> b 1 f h w",
+                    b=b, h=h_lat, w=w_lat,
+                )
+            else:
+                # Trim trailing pixel frames that don't fill a complete group
+                usable = remaining_lat * t
+                rest = rearrange(
+                    spatial_down[:, :, 1:1 + usable, :, :],
+                    "b 1 (f t) h w -> b 1 f t h w",
+                    t=t,
+                )
+                rest = rest.mean(dim=3)
+            latent_mask = torch.cat([first_frame, rest], dim=2)
+        elif f_lat > 1:
+            # Single pixel frame but multiple latent frames — repeat the
+            # single frame across all latent frames.
+            latent_mask = first_frame.expand(-1, -1, f_lat, -1, -1)
+        else:
+            latent_mask = first_frame
+
+        return rearrange(latent_mask, "b 1 f h w -> b (f h w)")
+
+    @staticmethod
+    def _build_self_attention_mask(total_tokens, num_guide_tokens, tracked_count,
+                                    tracked_weights, guide_start, device, dtype):
+        """Build a log-space additive self-attention bias mask.
+
+        Attenuates attention between noisy tokens and tracked guide tokens.
+        Untracked guide tokens (at the end of the guide portion) keep full attention.
+
+        Args:
+            total_tokens: Total sequence length.
+            num_guide_tokens: Total guide tokens (all guides) at end of sequence.
+            tracked_count: Number of tracked guide tokens (first in the guide portion).
+            tracked_weights: (1, tracked_count) tensor, values in [0, 1].
+            guide_start: Index where guide tokens begin in the sequence.
+            device: Target device.
+            dtype: Target dtype.
+
+        Returns:
+            (1, 1, total_tokens, total_tokens) additive bias mask.
+            0.0 = full attention, negative = attenuated, finfo.min = effectively fully masked.
+        """
+        finfo = torch.finfo(dtype)
+        mask = torch.zeros((1, 1, total_tokens, total_tokens), device=device, dtype=dtype)
+        tracked_end = guide_start + tracked_count
+
+        # Convert weights to log-space bias
+        w = tracked_weights.to(device=device, dtype=dtype)  # (1, tracked_count)
+        log_w = torch.full_like(w, finfo.min)
+        positive_mask = w > 0
+        if positive_mask.any():
+            log_w[positive_mask] = torch.log(w[positive_mask].clamp(min=finfo.tiny))
+
+        # noisy → tracked guides: each noisy row gets the same per-guide weight
+        mask[:, :, :guide_start, guide_start:tracked_end] = log_w.view(1, 1, 1, -1)
+        # tracked guides → noisy: each guide row broadcasts its weight across noisy cols
+        mask[:, :, guide_start:tracked_end, :guide_start] = log_w.view(1, 1, -1, 1)
+
+        return mask
+
+    def _process_transformer_blocks(self, x, context, attention_mask, timestep, pe, transformer_options={}, self_attention_mask=None, **kwargs):
        """Process transformer blocks for LTXV."""
        patches_replace = transformer_options.get("patches_replace", {})
        blocks_replace = patches_replace.get("dit", {})
@@ -906,10 +1155,10 @@ class LTXVModel(LTXBaseModel):

                def block_wrap(args):
                    out = {}
-                    out["img"] = block(args["img"], context=args["txt"], attention_mask=args["attention_mask"], timestep=args["vec"], pe=args["pe"], transformer_options=args["transformer_options"])
+                    out["img"] = block(args["img"], context=args["txt"], attention_mask=args["attention_mask"], timestep=args["vec"], pe=args["pe"], transformer_options=args["transformer_options"], self_attention_mask=args.get("self_attention_mask"))
                    return out

-                out = blocks_replace[("double_block", i)]({"img": x, "txt": context, "attention_mask": attention_mask, "vec": timestep, "pe": pe, "transformer_options": transformer_options}, {"original_block": block_wrap})
+                out = blocks_replace[("double_block", i)]({"img": x, "txt": context, "attention_mask": attention_mask, "vec": timestep, "pe": pe, "transformer_options": transformer_options, "self_attention_mask": self_attention_mask}, {"original_block": block_wrap})
                x = out["img"]
            else:
                x = block(
@@ -919,6 +1168,7 @@ class LTXVModel(LTXBaseModel):
                    timestep=timestep,
                    pe=pe,
                    transformer_options=transformer_options,
+                    self_attention_mask=self_attention_mask,
                )

        return x
--- a/comfy/ldm/modules/diffusionmodules/openaimodel.py
+++ b/comfy/ldm/modules/diffusionmodules/openaimodel.py
@@ -18,6 +18,8 @@ import comfy.patcher_extension
 import comfy.ops
 ops = comfy.ops.disable_weight_init

+from ..sdpose import HeatmapHead
+
 class TimestepBlock(nn.Module):
    """
    Any module where forward() takes timestep embeddings as a second argument.
@@ -441,6 +443,7 @@ class UNetModel(nn.Module):
        disable_temporal_crossattention=False,
        max_ddpm_temb_period=10000,
        attn_precision=None,
+        heatmap_head=False,
        device=None,
        operations=ops,
    ):
@@ -827,6 +830,9 @@ class UNetModel(nn.Module):
            #nn.LogSoftmax(dim=1)  # change to cross_entropy and produce non-normalized logits
        )

+        if heatmap_head:
+            self.heatmap_head = HeatmapHead(device=device, dtype=self.dtype, operations=operations)
+
    def forward(self, x, timesteps=None, context=None, y=None, control=None, transformer_options={}, **kwargs):
        return comfy.patcher_extension.WrapperExecutor.new_class_executor(
            self._forward,
--- a/comfy/ldm/modules/sdpose.py
+++ b/comfy/ldm/modules/sdpose.py
@@ -0,0 +1,130 @@
+import torch
+import numpy as np
+from scipy.ndimage import gaussian_filter
+
+class HeatmapHead(torch.nn.Module):
+    def __init__(
+            self,
+            in_channels=640,
+            out_channels=133,
+            input_size=(768, 1024),
+            heatmap_scale=4,
+            deconv_out_channels=(640,),
+            deconv_kernel_sizes=(4,),
+            conv_out_channels=(640,),
+            conv_kernel_sizes=(1,),
+            final_layer_kernel_size=1,
+            device=None, dtype=None, operations=None
+        ):
+        super().__init__()
+
+        self.heatmap_size = (input_size[0] // heatmap_scale, input_size[1] // heatmap_scale)
+        self.scale_factor = ((np.array(input_size) - 1) / (np.array(self.heatmap_size) - 1)).astype(np.float32)
+
+        # Deconv layers
+        if deconv_out_channels:
+            deconv_layers = []
+            for out_ch, kernel_size in zip(deconv_out_channels, deconv_kernel_sizes):
+                if kernel_size == 4:
+                    padding, output_padding = 1, 0
+                elif kernel_size == 3:
+                    padding, output_padding = 1, 1
+                elif kernel_size == 2:
+                    padding, output_padding = 0, 0
+                else:
+                    raise ValueError(f'Unsupported kernel size {kernel_size}')
+
+                deconv_layers.extend([
+                    operations.ConvTranspose2d(in_channels, out_ch, kernel_size,
+                                     stride=2, padding=padding, output_padding=output_padding, bias=False, device=device, dtype=dtype),
+                    torch.nn.InstanceNorm2d(out_ch, device=device, dtype=dtype),
+                    torch.nn.SiLU(inplace=True)
+                ])
+                in_channels = out_ch
+            self.deconv_layers = torch.nn.Sequential(*deconv_layers)
+        else:
+            self.deconv_layers = torch.nn.Identity()
+
+        # Conv layers
+        if conv_out_channels:
+            conv_layers = []
+            for out_ch, kernel_size in zip(conv_out_channels, conv_kernel_sizes):
+                padding = (kernel_size - 1) // 2
+                conv_layers.extend([
+                    operations.Conv2d(in_channels, out_ch, kernel_size,
+                            stride=1, padding=padding, device=device, dtype=dtype),
+                    torch.nn.InstanceNorm2d(out_ch, device=device, dtype=dtype),
+                    torch.nn.SiLU(inplace=True)
+                ])
+                in_channels = out_ch
+            self.conv_layers = torch.nn.Sequential(*conv_layers)
+        else:
+            self.conv_layers = torch.nn.Identity()
+
+        self.final_layer = operations.Conv2d(in_channels, out_channels, kernel_size=final_layer_kernel_size, padding=final_layer_kernel_size // 2, device=device, dtype=dtype)
+
+    def forward(self, x): # Decode heatmaps to keypoints
+        heatmaps = self.final_layer(self.conv_layers(self.deconv_layers(x)))
+        heatmaps_np = heatmaps.float().cpu().numpy()  # (B, K, H, W)
+        B, K, H, W = heatmaps_np.shape
+
+        batch_keypoints = []
+        batch_scores = []
+
+        for b in range(B):
+            hm = heatmaps_np[b].copy()  # (K, H, W)
+
+            # --- vectorised argmax ---
+            flat = hm.reshape(K, -1)
+            idx = np.argmax(flat, axis=1)
+            scores = flat[np.arange(K), idx].copy()
+            y_locs, x_locs = np.unravel_index(idx, (H, W))
+            keypoints = np.stack([x_locs, y_locs], axis=-1).astype(np.float32)  # (K, 2) in heatmap space
+            invalid = scores <= 0.
+            keypoints[invalid] = -1
+
+            # --- DARK sub-pixel refinement (UDP) ---
+            # 1. Gaussian blur with max-preserving normalisation
+            border = 5  # (kernel-1)//2 for kernel=11
+            for k in range(K):
+                origin_max = np.max(hm[k])
+                dr = np.zeros((H + 2 * border, W + 2 * border), dtype=np.float32)
+                dr[border:-border, border:-border] = hm[k].copy()
+                dr = gaussian_filter(dr, sigma=2.0)
+                hm[k] = dr[border:-border, border:-border].copy()
+                cur_max = np.max(hm[k])
+                if cur_max > 0:
+                    hm[k] *= origin_max / cur_max
+            # 2. Log-space for Taylor expansion
+            np.clip(hm, 1e-3, 50., hm)
+            np.log(hm, hm)
+            # 3. Hessian-based Newton step
+            hm_pad = np.pad(hm, ((0, 0), (1, 1), (1, 1)), mode='edge').flatten()
+            index = keypoints[:, 0] + 1 + (keypoints[:, 1] + 1) * (W + 2)
+            index += (W + 2) * (H + 2) * np.arange(0, K)
+            index = index.astype(int).reshape(-1, 1)
+            i_       = hm_pad[index]
+            ix1      = hm_pad[index + 1]
+            iy1      = hm_pad[index + W + 2]
+            ix1y1    = hm_pad[index + W + 3]
+            ix1_y1_  = hm_pad[index - W - 3]
+            ix1_     = hm_pad[index - 1]
+            iy1_     = hm_pad[index - 2 - W]
+            dx = 0.5 * (ix1 - ix1_)
+            dy = 0.5 * (iy1 - iy1_)
+            derivative = np.concatenate([dx, dy], axis=1).reshape(K, 2, 1)
+            dxx = ix1  - 2 * i_ + ix1_
+            dyy = iy1  - 2 * i_ + iy1_
+            dxy = 0.5 * (ix1y1 - ix1 - iy1 + i_ + i_ - ix1_ - iy1_ + ix1_y1_)
+            hessian = np.concatenate([dxx, dxy, dxy, dyy], axis=1).reshape(K, 2, 2)
+            hessian = np.linalg.inv(hessian + np.finfo(np.float32).eps * np.eye(2))
+            keypoints -= np.einsum('imn,ink->imk', hessian, derivative).squeeze(axis=-1)
+
+            # --- restore to input image space ---
+            keypoints = keypoints * self.scale_factor
+            keypoints[invalid] = -1
+
+            batch_keypoints.append(keypoints)
+            batch_scores.append(scores)
+
+        return batch_keypoints, batch_scores
--- a/comfy/ldm/wan/vae.py
+++ b/comfy/ldm/wan/vae.py
@@ -459,6 +459,7 @@ class WanVAE(nn.Module):
                 attn_scales=[],
                 temperal_downsample=[True, True, False],
                 image_channels=3,
+                 conv_out_channels=3,
                 dropout=0.0):
        super().__init__()
        self.dim = dim
@@ -474,7 +475,7 @@ class WanVAE(nn.Module):
                                 attn_scales, self.temperal_downsample, dropout)
        self.conv1 = CausalConv3d(z_dim * 2, z_dim * 2, 1)
        self.conv2 = CausalConv3d(z_dim, z_dim, 1)
-        self.decoder = Decoder3d(dim, z_dim, image_channels, dim_mult, num_res_blocks,
+        self.decoder = Decoder3d(dim, z_dim, conv_out_channels, dim_mult, num_res_blocks,
                                 attn_scales, self.temperal_upsample, dropout)

    def encode(self, x):
@@ -484,7 +485,7 @@ class WanVAE(nn.Module):
        iter_ = 1 + (t - 1) // 4
        feat_map = None
        if iter_ > 1:
-            feat_map = [None] * count_conv3d(self.decoder)
+            feat_map = [None] * count_conv3d(self.encoder)
        ## 对encode输入的x，按时间拆分为1、4、4、4....
        for i in range(iter_):
            conv_idx = [0]
--- a/comfy/lora.py
+++ b/comfy/lora.py
@@ -337,6 +337,7 @@ def model_lora_keys_unet(model, key_map={}):
            if k.startswith("diffusion_model.decoder.") and k.endswith(".weight"):
                key_lora = k[len("diffusion_model.decoder."):-len(".weight")]
                key_map["base_model.model.{}".format(key_lora)] = k  # Official base model loras
+                key_map["lycoris_{}".format(key_lora.replace(".", "_"))] = k  # LyCORIS/LoKR format

    return key_map

--- a/comfy/memory_management.py
+++ b/comfy/memory_management.py
@@ -78,4 +78,4 @@ def interpret_gathered_like(tensors, gathered):

    return dest_views

-aimdo_allocator = None
+aimdo_enabled = False
--- a/comfy/model_base.py
+++ b/comfy/model_base.py
@@ -76,6 +76,7 @@ class ModelType(Enum):
    FLUX = 8
    IMG_TO_IMG = 9
    FLOW_COSMOS = 10
+    IMG_TO_IMG_FLOW = 11


 def model_sampling(model_config, model_type):
@@ -108,6 +109,8 @@ def model_sampling(model_config, model_type):
    elif model_type == ModelType.FLOW_COSMOS:
        c = comfy.model_sampling.COSMOS_RFLOW
        s = comfy.model_sampling.ModelSamplingCosmosRFlow
+    elif model_type == ModelType.IMG_TO_IMG_FLOW:
+        c = comfy.model_sampling.IMG_TO_IMG_FLOW

    class ModelSampling(s, c):
        pass
@@ -922,6 +925,25 @@ class Flux(BaseModel):
            out['ref_latents'] = list([1, 16, sum(map(lambda a: math.prod(a.size()[2:]), ref_latents))])
        return out

+class LongCatImage(Flux):
+    def _apply_model(self, x, t, c_concat=None, c_crossattn=None, control=None, transformer_options={}, **kwargs):
+        transformer_options = transformer_options.copy()
+        rope_opts = transformer_options.get("rope_options", {})
+        rope_opts = dict(rope_opts)
+        rope_opts.setdefault("shift_t", 1.0)
+        rope_opts.setdefault("shift_y", 512.0)
+        rope_opts.setdefault("shift_x", 512.0)
+        transformer_options["rope_options"] = rope_opts
+        return super()._apply_model(x, t, c_concat, c_crossattn, control, transformer_options, **kwargs)
+
+    def encode_adm(self, **kwargs):
+        return None
+
+    def extra_conds(self, **kwargs):
+        out = super().extra_conds(**kwargs)
+        out.pop('guidance', None)
+        return out
+
 class Flux2(Flux):
    def extra_conds(self, **kwargs):
        out = super().extra_conds(**kwargs)
@@ -971,6 +993,10 @@ class LTXV(BaseModel):
        if keyframe_idxs is not None:
            out['keyframe_idxs'] = comfy.conds.CONDRegular(keyframe_idxs)

+        guide_attention_entries = kwargs.get("guide_attention_entries", None)
+        if guide_attention_entries is not None:
+            out['guide_attention_entries'] = comfy.conds.CONDConstant(guide_attention_entries)
+
        return out

    def process_timestep(self, timestep, x, denoise_mask=None, **kwargs):
@@ -988,10 +1014,14 @@ class LTXAV(BaseModel):
    def extra_conds(self, **kwargs):
        out = super().extra_conds(**kwargs)
        attention_mask = kwargs.get("attention_mask", None)
+        device = kwargs["device"]
+
        if attention_mask is not None:
            out['attention_mask'] = comfy.conds.CONDRegular(attention_mask)
        cross_attn = kwargs.get("cross_attn", None)
        if cross_attn is not None:
+            if hasattr(self.diffusion_model, "preprocess_text_embeds"):
+                cross_attn = self.diffusion_model.preprocess_text_embeds(cross_attn.to(device=device, dtype=self.get_dtype_inference()))
            out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn)

        out['frame_rate'] = comfy.conds.CONDConstant(kwargs.get("frame_rate", 25))
@@ -1019,6 +1049,10 @@ class LTXAV(BaseModel):
        if latent_shapes is not None:
            out['latent_shapes'] = comfy.conds.CONDConstant(latent_shapes)

+        guide_attention_entries = kwargs.get("guide_attention_entries", None)
+        if guide_attention_entries is not None:
+            out['guide_attention_entries'] = comfy.conds.CONDConstant(guide_attention_entries)
+
        return out

    def process_timestep(self, timestep, x, denoise_mask=None, audio_denoise_mask=None, **kwargs):
@@ -1462,6 +1496,12 @@ class WAN22(WAN21):
    def scale_latent_inpaint(self, sigma, noise, latent_image, **kwargs):
        return latent_image

+class WAN21_FlowRVS(WAN21):
+    def __init__(self, model_config, model_type=ModelType.IMG_TO_IMG_FLOW, image_to_video=False, device=None):
+        model_config.unet_config["model_type"] = "t2v"
+        super(WAN21, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model.WanModel)
+        self.image_to_video = image_to_video
+
 class Hunyuan3Dv2(BaseModel):
    def __init__(self, model_config, model_type=ModelType.FLOW, device=None):
        super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.hunyuan3d.model.Hunyuan3Dv2)
--- a/comfy/model_detection.py
+++ b/comfy/model_detection.py
@@ -279,6 +279,8 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
            dit_config["txt_norm"] = any_suffix_in(state_dict_keys, key_prefix, 'txt_norm.', ["weight", "scale"])
            if dit_config["yak_mlp"] and dit_config["txt_norm"]:  # Ovis model
                dit_config["txt_ids_dims"] = [1, 2]
+            if dit_config.get("context_in_dim") == 3584 and dit_config["vec_in_dim"] is None:  # LongCat-Image
+                dit_config["txt_ids_dims"] = [1, 2]

        return dit_config

@@ -509,6 +511,9 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
        if ref_conv_weight is not None:
            dit_config["in_dim_ref_conv"] = ref_conv_weight.shape[1]

+        if metadata is not None and "config" in metadata:
+            dit_config.update(json.loads(metadata["config"]).get("transformer", {}))
+
        return dit_config

    if '{}latent_in.weight'.format(key_prefix) in state_dict_keys:  # Hunyuan 3D
@@ -792,6 +797,10 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
        unet_config["use_temporal_resblock"] = False
        unet_config["use_temporal_attention"] = False

+    heatmap_key = '{}heatmap_head.conv_layers.0.weight'.format(key_prefix)
+    if heatmap_key in state_dict_keys:
+        unet_config["heatmap_head"] = True
+
    return unet_config

 def model_config_from_unet_config(unet_config, state_dict=None):
@@ -1012,7 +1021,7 @@ def unet_config_from_diffusers_unet(state_dict, dtype=None):

    LotusD = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False, 'adm_in_channels': 4,
            'dtype': dtype, 'in_channels': 4, 'model_channels': 320, 'num_res_blocks': [2, 2, 2, 2], 'transformer_depth': [1, 1, 1, 1, 1, 1, 0, 0],
-            'channel_mult': [1, 2, 4, 4], 'transformer_depth_middle': 1, 'use_linear_in_transformer': True, 'context_dim': 1024, 'num_heads': 8,
+            'channel_mult': [1, 2, 4, 4], 'transformer_depth_middle': 1, 'use_linear_in_transformer': True, 'context_dim': 1024, 'num_head_channels': 64,
            'transformer_depth_output': [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0],
            'use_temporal_attention': False, 'use_temporal_resblock': False}

--- a/comfy/model_management.py
+++ b/comfy/model_management.py
@@ -350,7 +350,7 @@ AMD_ENABLE_MIOPEN_ENV = 'COMFYUI_ENABLE_MIOPEN'

 try:
    if is_amd():
-        arch = torch.cuda.get_device_properties(get_torch_device()).gcnArchName
+        arch = torch.cuda.get_device_properties(get_torch_device()).gcnArchName.split(':')[0]
        if not (any((a in arch) for a in AMD_RDNA2_AND_OLDER_ARCH)):
            if os.getenv(AMD_ENABLE_MIOPEN_ENV) != '1':
                torch.backends.cudnn.enabled = False  # Seems to improve things a lot on AMD
@@ -378,7 +378,7 @@ try:
        if args.use_split_cross_attention == False and args.use_quad_cross_attention == False:
            if aotriton_supported(arch):  # AMD efficient attention implementation depends on aotriton.
                if torch_version_numeric >= (2, 7):  # works on 2.6 but doesn't actually seem to improve much
-                    if any((a in arch) for a in ["gfx90a", "gfx942", "gfx1100", "gfx1101", "gfx1151"]):  # TODO: more arches, TODO: gfx950
+                    if any((a in arch) for a in ["gfx90a", "gfx942", "gfx950", "gfx1100", "gfx1101", "gfx1151"]):  # TODO: more arches, TODO: gfx950
                        ENABLE_PYTORCH_ATTENTION = True
                if rocm_version >= (7, 0):
                   if any((a in arch) for a in ["gfx1200", "gfx1201"]):
@@ -836,7 +836,7 @@ def unet_inital_load_device(parameters, dtype):

    mem_dev = get_free_memory(torch_dev)
    mem_cpu = get_free_memory(cpu_dev)
-    if mem_dev > mem_cpu and model_size < mem_dev and comfy.memory_management.aimdo_allocator is None:
+    if mem_dev > mem_cpu and model_size < mem_dev and comfy.memory_management.aimdo_enabled:
        return torch_dev
    else:
        return cpu_dev
@@ -1121,7 +1121,6 @@ def get_cast_buffer(offload_stream, device, size, ref):
            synchronize()
            del STREAM_CAST_BUFFERS[offload_stream]
            del cast_buffer
-            #FIXME: This doesn't work in Aimdo because mempool cant clear cache
            soft_empty_cache()
        with wf_context:
            cast_buffer = torch.empty((size), dtype=torch.int8, device=device)
--- a/comfy/model_patcher.py
+++ b/comfy/model_patcher.py
@@ -271,6 +271,7 @@ class ModelPatcher:
        self.is_clip = False
        self.hook_mode = comfy.hooks.EnumHookMode.MaxSpeed

+        self.cached_patcher_init: tuple[Callable, tuple] | None = None
        if not hasattr(self.model, 'model_loaded_weight_memory'):
            self.model.model_loaded_weight_memory = 0

@@ -307,8 +308,15 @@ class ModelPatcher:
    def get_free_memory(self, device):
        return comfy.model_management.get_free_memory(device)

-    def clone(self):
-        n = self.__class__(self.model, self.load_device, self.offload_device, self.model_size(), weight_inplace_update=self.weight_inplace_update)
+    def clone(self, disable_dynamic=False):
+        class_ = self.__class__
+        model = self.model
+        if self.is_dynamic() and disable_dynamic:
+            class_ = ModelPatcher
+            temp_model_patcher = self.cached_patcher_init[0](*self.cached_patcher_init[1], disable_dynamic=True)
+            model = temp_model_patcher.model
+
+        n = class_(model, self.load_device, self.offload_device, self.model_size(), weight_inplace_update=self.weight_inplace_update)
        n.patches = {}
        for k in self.patches:
            n.patches[k] = self.patches[k][:]
@@ -362,6 +370,8 @@ class ModelPatcher:
        n.is_clip = self.is_clip
        n.hook_mode = self.hook_mode

+        n.cached_patcher_init = self.cached_patcher_init
+
        for callback in self.get_all_callbacks(CallbacksMP.ON_CLONE):
            callback(self, n)
        return n
--- a/comfy/model_sampling.py
+++ b/comfy/model_sampling.py
@@ -83,6 +83,16 @@ class IMG_TO_IMG(X0):
    def calculate_input(self, sigma, noise):
        return noise

+class IMG_TO_IMG_FLOW(CONST):
+    def calculate_denoised(self, sigma, model_output, model_input):
+        return model_output
+
+    def noise_scaling(self, sigma, noise, latent_image, max_denoise=False):
+        return latent_image
+
+    def inverse_noise_scaling(self, sigma, latent):
+        return 1.0 - latent
+
 class COSMOS_RFLOW:
    def calculate_input(self, sigma, noise):
        sigma = (sigma / (sigma + 1))
--- a/comfy/ops.py
+++ b/comfy/ops.py
@@ -19,7 +19,7 @@
 import torch
 import logging
 import comfy.model_management
-from comfy.cli_args import args, PerformanceFeature, enables_dynamic_vram
+from comfy.cli_args import args, PerformanceFeature
 import comfy.float
 import json
 import comfy.memory_management
@@ -167,17 +167,15 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu
            x = to_dequant(x, dtype)
        if not resident and lowvram_fn is not None:
            x = to_dequant(x, dtype if compute_dtype is None else compute_dtype)
-            #FIXME: this is not accurate, we need to be sensitive to the compute dtype
            x = lowvram_fn(x)
-            if (isinstance(orig, QuantizedTensor) and
-                (want_requant and len(fns) == 0 or update_weight)):
+            if (want_requant and len(fns) == 0 or update_weight):
                seed = comfy.utils.string_to_seed(s.seed_key)
-                y = QuantizedTensor.from_float(x, s.layout_type, scale="recalculate", stochastic_rounding=seed)
-                if want_requant and len(fns) == 0:
-                    #The layer actually wants our freshly saved QT
-                    x = y
-            elif update_weight:
-                y = comfy.float.stochastic_rounding(x, orig.dtype, seed = comfy.utils.string_to_seed(s.seed_key))
+                if isinstance(orig, QuantizedTensor):
+                    y = QuantizedTensor.from_float(x, s.layout_type, scale="recalculate", stochastic_rounding=seed)
+                else:
+                    y = comfy.float.stochastic_rounding(x, orig.dtype, seed=seed)
+            if want_requant and len(fns) == 0:
+                x = y
            if update_weight:
                orig.copy_(y)
        for f in fns:
@@ -296,7 +294,7 @@ class disable_weight_init:
    class Linear(torch.nn.Linear, CastWeightBiasOp):

        def __init__(self, in_features, out_features, bias=True, device=None, dtype=None):
-            if not comfy.model_management.WINDOWS or not enables_dynamic_vram():
+            if not comfy.model_management.WINDOWS or not comfy.memory_management.aimdo_enabled:
                super().__init__(in_features, out_features, bias, device, dtype)
                return

@@ -317,7 +315,7 @@ class disable_weight_init:
        def _load_from_state_dict(self, state_dict, prefix, local_metadata,
                                strict, missing_keys, unexpected_keys, error_msgs):

-            if not comfy.model_management.WINDOWS or not enables_dynamic_vram():
+            if not comfy.model_management.WINDOWS or not comfy.memory_management.aimdo_enabled:
                return super()._load_from_state_dict(state_dict, prefix, local_metadata, strict,
                                                     missing_keys, unexpected_keys, error_msgs)
            assign_to_params_buffers = local_metadata.get("assign_to_params_buffers", False)
@@ -617,7 +615,8 @@ def fp8_linear(self, input):

    if input.ndim != 2:
        return None
-    w, bias, offload_stream = cast_bias_weight(self, input, dtype=dtype, bias_dtype=input_dtype, offloadable=True)
+    lora_compute_dtype=comfy.model_management.lora_compute_dtype(input.device)
+    w, bias, offload_stream = cast_bias_weight(self, input, dtype=dtype, bias_dtype=input_dtype, offloadable=True, compute_dtype=lora_compute_dtype, want_requant=True)
    scale_weight = torch.ones((), device=input.device, dtype=torch.float32)

    scale_input = torch.ones((), device=input.device, dtype=torch.float32)
@@ -827,6 +826,10 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec
                else:
                    sd = {}

+                if not hasattr(self, 'weight'):
+                    logging.warning("Warning: state dict on uninitialized op {}".format(prefix))
+                    return sd
+
                if self.bias is not None:
                    sd["{}bias".format(prefix)] = self.bias

--- a/comfy/sd.py
+++ b/comfy/sd.py
@@ -60,6 +60,7 @@ import comfy.text_encoders.jina_clip_2
 import comfy.text_encoders.newbie
 import comfy.text_encoders.anima
 import comfy.text_encoders.ace15
+import comfy.text_encoders.longcat_image

 import comfy.model_patcher
 import comfy.lora
@@ -423,6 +424,17 @@ class CLIP:
    def get_key_patches(self):
        return self.patcher.get_key_patches()

+    def generate(self, tokens, do_sample=True, max_length=256, temperature=1.0, top_k=50, top_p=0.95, min_p=0.0, repetition_penalty=1.0, seed=None):
+        self.cond_stage_model.reset_clip_options()
+
+        self.load_model()
+        self.cond_stage_model.set_clip_options({"layer": None})
+        self.cond_stage_model.set_clip_options({"execution_device": self.patcher.load_device})
+        return self.cond_stage_model.generate(tokens, do_sample=do_sample, max_length=max_length, temperature=temperature, top_k=top_k, top_p=top_p, min_p=min_p, repetition_penalty=repetition_penalty, seed=seed)
+
+    def decode(self, token_ids, skip_special_tokens=True):
+        return self.tokenizer.decode(token_ids, skip_special_tokens=skip_special_tokens)
+
 class VAE:
    def __init__(self, sd=None, device=None, config=None, dtype=None, metadata=None):
        if 'decoder.up_blocks.0.resnets.0.norm1.weight' in sd.keys(): #diffusers format
@@ -683,8 +695,9 @@ class VAE:
                    self.latent_dim = 3
                    self.latent_channels = 16
                    self.output_channels = sd["encoder.conv1.weight"].shape[1]
+                    self.conv_out_channels = sd["decoder.head.2.weight"].shape[0]
                    self.pad_channel_value = 1.0
-                    ddconfig = {"dim": dim, "z_dim": self.latent_channels, "dim_mult": [1, 2, 4, 4], "num_res_blocks": 2, "attn_scales": [], "temperal_downsample": [False, True, True], "image_channels": self.output_channels, "dropout": 0.0}
+                    ddconfig = {"dim": dim, "z_dim": self.latent_channels, "dim_mult": [1, 2, 4, 4], "num_res_blocks": 2, "attn_scales": [], "temperal_downsample": [False, True, True], "image_channels": self.output_channels, "conv_out_channels": self.conv_out_channels, "dropout": 0.0}
                    self.first_stage_model = comfy.ldm.wan.vae.WanVAE(**ddconfig)
                    self.working_dtypes = [torch.bfloat16, torch.float16, torch.float32]
                    self.memory_used_encode = lambda shape, dtype: (1500 if shape[2]<=4 else 6000) * shape[3] * shape[4] * model_management.dtype_size(dtype)
@@ -1148,6 +1161,7 @@ class CLIPType(Enum):
    KANDINSKY5_IMAGE = 23
    NEWBIE = 24
    FLUX2 = 25
+    LONGCAT_IMAGE = 26


 def load_clip(ckpt_paths, embedding_directory=None, clip_type=CLIPType.STABLE_DIFFUSION, model_options={}):
@@ -1182,6 +1196,7 @@ class TEModel(Enum):
    JINA_CLIP_2 = 19
    QWEN3_8B = 20
    QWEN3_06B = 21
+    GEMMA_3_4B_VISION = 22


 def detect_te_model(sd):
@@ -1210,7 +1225,10 @@ def detect_te_model(sd):
        if 'model.layers.47.self_attn.q_norm.weight' in sd:
            return TEModel.GEMMA_3_12B
        if 'model.layers.0.self_attn.q_norm.weight' in sd:
-            return TEModel.GEMMA_3_4B
+            if 'vision_model.embeddings.patch_embedding.weight' in sd:
+                return TEModel.GEMMA_3_4B_VISION
+            else:
+                return TEModel.GEMMA_3_4B
        return TEModel.GEMMA_2_2B
    if 'model.layers.0.self_attn.k_proj.bias' in sd:
        weight = sd['model.layers.0.self_attn.k_proj.bias']
@@ -1270,6 +1288,8 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
        else:
            if "text_projection" in clip_data[i]:
                clip_data[i]["text_projection.weight"] = clip_data[i]["text_projection"].transpose(0, 1) #old models saved with the CLIPSave node
+        if "lm_head.weight" in clip_data[i]:
+            clip_data[i]["model.lm_head.weight"] = clip_data[i].pop("lm_head.weight") # prefix missing in some models

    tokenizer_data = {}
    clip_target = EmptyClass()
@@ -1335,6 +1355,14 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
            clip_target.clip = comfy.text_encoders.lumina2.te(**llama_detect(clip_data), model_type="gemma3_4b")
            clip_target.tokenizer = comfy.text_encoders.lumina2.NTokenizer
            tokenizer_data["spiece_model"] = clip_data[0].get("spiece_model", None)
+        elif te_model == TEModel.GEMMA_3_4B_VISION:
+            clip_target.clip = comfy.text_encoders.lumina2.te(**llama_detect(clip_data), model_type="gemma3_4b_vision")
+            clip_target.tokenizer = comfy.text_encoders.lumina2.NTokenizer
+            tokenizer_data["spiece_model"] = clip_data[0].get("spiece_model", None)
+        elif te_model == TEModel.GEMMA_3_12B:
+            clip_target.clip = comfy.text_encoders.lt.gemma3_te(**llama_detect(clip_data))
+            clip_target.tokenizer = comfy.text_encoders.lt.Gemma3_12BTokenizer
+            tokenizer_data["spiece_model"] = clip_data[0].get("spiece_model", None)
        elif te_model == TEModel.LLAMA3_8:
            clip_target.clip = comfy.text_encoders.hidream.hidream_clip(**llama_detect(clip_data),
                                                                        clip_l=False, clip_g=False, t5=False, llama=True, dtype_t5=None)
@@ -1346,6 +1374,9 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip
            if clip_type == CLIPType.HUNYUAN_IMAGE:
                clip_target.clip = comfy.text_encoders.hunyuan_image.te(byt5=False, **llama_detect(clip_data))
                clip_target.tokenizer = comfy.text_encoders.hunyuan_image.HunyuanImageTokenizer
+            elif clip_type == CLIPType.LONGCAT_IMAGE:
+                clip_target.clip = comfy.text_encoders.longcat_image.te(**llama_detect(clip_data))
+                clip_target.tokenizer = comfy.text_encoders.longcat_image.LongCatImageTokenizer
            else:
                clip_target.clip = comfy.text_encoders.qwen_image.te(**llama_detect(clip_data))
                clip_target.tokenizer = comfy.text_encoders.qwen_image.QwenImageTokenizer
@@ -1505,14 +1536,24 @@ def load_checkpoint(config_path=None, ckpt_path=None, output_vae=True, output_cl

    return (model, clip, vae)

-def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True, model_options={}, te_model_options={}):
+def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True, model_options={}, te_model_options={}, disable_dynamic=False):
    sd, metadata = comfy.utils.load_torch_file(ckpt_path, return_metadata=True)
-    out = load_state_dict_guess_config(sd, output_vae, output_clip, output_clipvision, embedding_directory, output_model, model_options, te_model_options=te_model_options, metadata=metadata)
+    out = load_state_dict_guess_config(sd, output_vae, output_clip, output_clipvision, embedding_directory, output_model, model_options, te_model_options=te_model_options, metadata=metadata, disable_dynamic=disable_dynamic)
    if out is None:
        raise RuntimeError("ERROR: Could not detect model type of: {}\n{}".format(ckpt_path, model_detection_error_hint(ckpt_path, sd)))
+    if output_model:
+        out[0].cached_patcher_init = (load_checkpoint_guess_config_model_only, (ckpt_path, embedding_directory, model_options, te_model_options))
    return out

-def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True, model_options={}, te_model_options={}, metadata=None):
+def load_checkpoint_guess_config_model_only(ckpt_path, embedding_directory=None, model_options={}, te_model_options={}, disable_dynamic=False):
+    model, *_ = load_checkpoint_guess_config(ckpt_path, False, False, False,
+            embedding_directory=embedding_directory,
+            model_options=model_options,
+            te_model_options=te_model_options,
+            disable_dynamic=disable_dynamic)
+    return model
+
+def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True, model_options={}, te_model_options={}, metadata=None, disable_dynamic=False):
    clip = None
    clipvision = None
    vae = None
@@ -1561,7 +1602,8 @@ def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_c
    if output_model:
        inital_load_device = model_management.unet_inital_load_device(parameters, unet_dtype)
        model = model_config.get_model(sd, diffusion_model_prefix, device=inital_load_device)
-        model_patcher = comfy.model_patcher.CoreModelPatcher(model, load_device=load_device, offload_device=model_management.unet_offload_device())
+        ModelPatcher = comfy.model_patcher.ModelPatcher if disable_dynamic else comfy.model_patcher.CoreModelPatcher
+        model_patcher = ModelPatcher(model, load_device=load_device, offload_device=model_management.unet_offload_device())
        model.load_model_weights(sd, diffusion_model_prefix, assign=model_patcher.is_dynamic())

    if output_vae:
@@ -1612,7 +1654,7 @@ def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_c
    return (model_patcher, clip, vae, clipvision)


-def load_diffusion_model_state_dict(sd, model_options={}, metadata=None):
+def load_diffusion_model_state_dict(sd, model_options={}, metadata=None, disable_dynamic=False):
    """
    Loads a UNet diffusion model from a state dictionary, supporting both diffusers and regular formats.

@@ -1696,7 +1738,8 @@ def load_diffusion_model_state_dict(sd, model_options={}, metadata=None):
        model_config.optimizations["fp8"] = True

    model = model_config.get_model(new_sd, "")
-    model_patcher = comfy.model_patcher.CoreModelPatcher(model, load_device=load_device, offload_device=offload_device)
+    ModelPatcher = comfy.model_patcher.ModelPatcher if disable_dynamic else comfy.model_patcher.CoreModelPatcher
+    model_patcher = ModelPatcher(model, load_device=load_device, offload_device=offload_device)
    if not model_management.is_device_cpu(offload_device):
        model.to(offload_device)
    model.load_model_weights(new_sd, "", assign=model_patcher.is_dynamic())
@@ -1705,12 +1748,13 @@ def load_diffusion_model_state_dict(sd, model_options={}, metadata=None):
        logging.info("left over keys in diffusion model: {}".format(left_over))
    return model_patcher

-def load_diffusion_model(unet_path, model_options={}):
+def load_diffusion_model(unet_path, model_options={}, disable_dynamic=False):
    sd, metadata = comfy.utils.load_torch_file(unet_path, return_metadata=True)
-    model = load_diffusion_model_state_dict(sd, model_options=model_options, metadata=metadata)
+    model = load_diffusion_model_state_dict(sd, model_options=model_options, metadata=metadata, disable_dynamic=disable_dynamic)
    if model is None:
        logging.error("ERROR UNSUPPORTED DIFFUSION MODEL {}".format(unet_path))
        raise RuntimeError("ERROR: Could not detect model type of: {}\n{}".format(unet_path, model_detection_error_hint(unet_path, sd)))
+    model.cached_patcher_init = (load_diffusion_model, (unet_path, model_options))
    return model

 def load_unet(unet_path, dtype=None):
--- a/comfy/sd1_clip.py
+++ b/comfy/sd1_clip.py
@@ -308,6 +308,15 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder):
    def load_sd(self, sd):
        return self.transformer.load_state_dict(sd, strict=False, assign=getattr(self, "can_assign_sd", False))

+    def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed):
+        if isinstance(tokens, dict):
+            tokens_only = next(iter(tokens.values())) # todo: get this better?
+        else:
+            tokens_only = tokens
+        tokens_only = [[t[0] for t in b] for b in tokens_only]
+        embeds = self.process_tokens(tokens_only, device=self.execution_device)[0]
+        return self.transformer.generate(embeds, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed)
+
 def parse_parentheses(string):
    result = []
    current_item = ""
@@ -564,6 +573,8 @@ class SDTokenizer:
        min_length = tokenizer_options.get("{}_min_length".format(self.embedding_key), self.min_length)
        min_padding = tokenizer_options.get("{}_min_padding".format(self.embedding_key), self.min_padding)

+        min_length = kwargs.get("min_length", min_length)
+
        text = escape_important(text)
        if kwargs.get("disable_weights", self.disable_weights):
            parsed_weights = [(text, 1.0)]
@@ -663,6 +674,9 @@ class SDTokenizer:
    def state_dict(self):
        return {}

+    def decode(self, token_ids, skip_special_tokens=True):
+        return self.tokenizer.decode(token_ids, skip_special_tokens=skip_special_tokens)
+
 class SD1Tokenizer:
    def __init__(self, embedding_directory=None, tokenizer_data={}, clip_name="l", tokenizer=SDTokenizer, name=None):
        if name is not None:
@@ -686,6 +700,9 @@ class SD1Tokenizer:
    def state_dict(self):
        return getattr(self, self.clip).state_dict()

+    def decode(self, token_ids, skip_special_tokens=True):
+        return getattr(self, self.clip).decode(token_ids, skip_special_tokens=skip_special_tokens)
+
 class SD1CheckpointClipModel(SDClipModel):
    def __init__(self, device="cpu", dtype=None, model_options={}):
        super().__init__(device=device, return_projected_pooled=False, dtype=dtype, model_options=model_options)
@@ -722,3 +739,6 @@ class SD1ClipModel(torch.nn.Module):

    def load_sd(self, sd):
        return getattr(self, self.clip).load_sd(sd)
+
+    def generate(self, tokens, do_sample=True, max_length=256, temperature=1.0, top_k=50, top_p=0.95, min_p=0.0, repetition_penalty=1.0, seed=None):
+        return getattr(self, self.clip).generate(tokens, do_sample=do_sample, max_length=max_length, temperature=temperature, top_k=top_k, top_p=top_p, min_p=min_p, repetition_penalty=repetition_penalty, seed=seed)
--- a/comfy/supported_models.py
+++ b/comfy/supported_models.py
@@ -25,6 +25,7 @@ import comfy.text_encoders.kandinsky5
 import comfy.text_encoders.z_image
 import comfy.text_encoders.anima
 import comfy.text_encoders.ace15
+import comfy.text_encoders.longcat_image

 from . import supported_models_base
 from . import latent_formats
@@ -525,7 +526,8 @@ class LotusD(SD20):
    }

    unet_extra_config = {
-        "num_classes": 'sequential'
+        "num_classes": 'sequential',
+        "num_head_channels": 64,
    }

    def get_model(self, state_dict, prefix="", device=None):
@@ -1256,6 +1258,16 @@ class WAN22_T2V(WAN21_T2V):
        out = model_base.WAN22(self, image_to_video=True, device=device)
        return out

+class WAN21_FlowRVS(WAN21_T2V):
+    unet_config = {
+        "image_model": "wan2.1",
+        "model_type": "flow_rvs",
+    }
+
+    def get_model(self, state_dict, prefix="", device=None):
+        out = model_base.WAN21_FlowRVS(self, image_to_video=True, device=device)
+        return out
+
 class Hunyuan3Dv2(supported_models_base.BASE):
    unet_config = {
        "image_model": "hunyuan3d2",
@@ -1667,6 +1679,37 @@ class ACEStep15(supported_models_base.BASE):
        return supported_models_base.ClipTarget(comfy.text_encoders.ace15.ACE15Tokenizer, comfy.text_encoders.ace15.te(**detect))


-models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, LTXAV, HunyuanVideo15_SR_Distilled, HunyuanVideo15, HunyuanImage21Refiner, HunyuanImage21, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, CosmosT2IPredict2, CosmosI2VPredict2, ZImage, Lumina2, WAN22_T2V, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, WAN21_Camera, WAN22_Camera, WAN22_S2V, WAN21_HuMo, WAN22_Animate, Hunyuan3Dv2mini, Hunyuan3Dv2, Hunyuan3Dv2_1, HiDream, Chroma, ChromaRadiance, ACEStep, ACEStep15, Omnigen2, QwenImage, Flux2, Kandinsky5Image, Kandinsky5, Anima]
+class LongCatImage(supported_models_base.BASE):
+    unet_config = {
+        "image_model": "flux",
+        "guidance_embed": False,
+        "vec_in_dim": None,
+        "context_in_dim": 3584,
+        "txt_ids_dims": [1, 2],
+    }
+
+    sampling_settings = {
+    }
+
+    unet_extra_config = {}
+    latent_format = latent_formats.Flux
+
+    memory_usage_factor = 2.5
+
+    supported_inference_dtypes = [torch.bfloat16, torch.float16, torch.float32]
+
+    vae_key_prefix = ["vae."]
+    text_encoder_key_prefix = ["text_encoders."]
+
+    def get_model(self, state_dict, prefix="", device=None):
+        out = model_base.LongCatImage(self, device=device)
+        return out
+
+    def clip_target(self, state_dict={}):
+        pref = self.text_encoder_key_prefix[0]
+        hunyuan_detect = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}qwen25_7b.transformer.".format(pref))
+        return supported_models_base.ClipTarget(comfy.text_encoders.longcat_image.LongCatImageTokenizer, comfy.text_encoders.longcat_image.te(**hunyuan_detect))
+
+models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, LongCatImage, FluxSchnell, GenmoMochi, LTXV, LTXAV, HunyuanVideo15_SR_Distilled, HunyuanVideo15, HunyuanImage21Refiner, HunyuanImage21, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, CosmosT2IPredict2, CosmosI2VPredict2, ZImage, Lumina2, WAN22_T2V, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, WAN21_Camera, WAN22_Camera, WAN22_S2V, WAN21_HuMo, WAN22_Animate, WAN21_FlowRVS, Hunyuan3Dv2mini, Hunyuan3Dv2, Hunyuan3Dv2_1, HiDream, Chroma, ChromaRadiance, ACEStep, ACEStep15, Omnigen2, QwenImage, Flux2, Kandinsky5Image, Kandinsky5, Anima]

 models += [SVD_img2vid]
--- a/comfy/text_encoders/anima.py
+++ b/comfy/text_encoders/anima.py
@@ -33,6 +33,8 @@ class AnimaTokenizer:
    def state_dict(self):
        return {}

+    def decode(self, token_ids, **kwargs):
+        return self.qwen3_06b.decode(token_ids, **kwargs)

 class Qwen3_06BModel(sd1_clip.SDClipModel):
    def __init__(self, device="cpu", layer="last", layer_idx=None, dtype=None, attention_mask=True, model_options={}):
--- a/comfy/text_encoders/llama.py
+++ b/comfy/text_encoders/llama.py
@@ -3,6 +3,8 @@ import torch.nn as nn
 from dataclasses import dataclass
 from typing import Optional, Any, Tuple
 import math
+from tqdm import tqdm
+import comfy.utils

 from comfy.ldm.modules.attention import optimized_attention_for_device
 import comfy.model_management
@@ -103,6 +105,7 @@ class Qwen3_06BConfig:
    rope_scale = None
    final_norm: bool = True
    lm_head: bool = False
+    stop_tokens = [151643, 151645]

@dataclass
 class Qwen3_06B_ACE15_Config:
@@ -126,6 +129,7 @@ class Qwen3_06B_ACE15_Config:
    rope_scale = None
    final_norm: bool = True
    lm_head: bool = False
+    stop_tokens = [151643, 151645]

@dataclass
 class Qwen3_2B_ACE15_lm_Config:
@@ -149,6 +153,7 @@ class Qwen3_2B_ACE15_lm_Config:
    rope_scale = None
    final_norm: bool = True
    lm_head: bool = False
+    stop_tokens = [151643, 151645]

@dataclass
 class Qwen3_4B_ACE15_lm_Config:
@@ -172,6 +177,7 @@ class Qwen3_4B_ACE15_lm_Config:
    rope_scale = None
    final_norm: bool = True
    lm_head: bool = False
+    stop_tokens = [151643, 151645]

@dataclass
 class Qwen3_4BConfig:
@@ -195,6 +201,7 @@ class Qwen3_4BConfig:
    rope_scale = None
    final_norm: bool = True
    lm_head: bool = False
+    stop_tokens = [151643, 151645]

@dataclass
 class Qwen3_8BConfig:
@@ -218,6 +225,7 @@ class Qwen3_8BConfig:
    rope_scale = None
    final_norm: bool = True
    lm_head: bool = False
+    stop_tokens = [151643, 151645]

@dataclass
 class Ovis25_2BConfig:
@@ -288,6 +296,7 @@ class Gemma2_2B_Config:
    rope_scale = None
    final_norm: bool = True
    lm_head: bool = False
+    stop_tokens = [1]

@dataclass
 class Gemma3_4B_Config:
@@ -312,6 +321,14 @@ class Gemma3_4B_Config:
    rope_scale = [8.0, 1.0]
    final_norm: bool = True
    lm_head: bool = False
+    stop_tokens = [1, 106]
+
+GEMMA3_VISION_CONFIG = {"num_channels": 3, "hidden_act": "gelu_pytorch_tanh", "hidden_size": 1152, "image_size": 896, "intermediate_size": 4304, "model_type": "siglip_vision_model", "num_attention_heads": 16, "num_hidden_layers": 27, "patch_size": 14}
+
+@dataclass
+class Gemma3_4B_Vision_Config(Gemma3_4B_Config):
+    vision_config = GEMMA3_VISION_CONFIG
+    mm_tokens_per_image = 256

@dataclass
 class Gemma3_12B_Config:
@@ -336,8 +353,9 @@ class Gemma3_12B_Config:
    rope_scale = [8.0, 1.0]
    final_norm: bool = True
    lm_head: bool = False
-    vision_config = {"num_channels": 3, "hidden_act": "gelu_pytorch_tanh", "hidden_size": 1152, "image_size": 896, "intermediate_size": 4304, "model_type": "siglip_vision_model", "num_attention_heads": 16, "num_hidden_layers": 27, "patch_size": 14}
+    vision_config = GEMMA3_VISION_CONFIG
    mm_tokens_per_image = 256
+    stop_tokens = [1, 106]

 class RMSNorm(nn.Module):
    def __init__(self, dim: int, eps: float = 1e-5, add=False, device=None, dtype=None):
@@ -441,8 +459,10 @@ class Attention(nn.Module):
        freqs_cis: Optional[torch.Tensor] = None,
        optimized_attention=None,
        past_key_value: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
+        sliding_window: Optional[int] = None,
    ):
        batch_size, seq_length, _ = hidden_states.shape
+
        xq = self.q_proj(hidden_states)
        xk = self.k_proj(hidden_states)
        xv = self.v_proj(hidden_states)
@@ -477,6 +497,11 @@ class Attention(nn.Module):
            else:
                present_key_value = (xk, xv, index + num_tokens)

+            if sliding_window is not None and xk.shape[2] > sliding_window:
+                xk = xk[:, :, -sliding_window:]
+                xv = xv[:, :, -sliding_window:]
+                attention_mask = attention_mask[..., -sliding_window:] if attention_mask is not None else None
+
        xk = xk.repeat_interleave(self.num_heads // self.num_kv_heads, dim=1)
        xv = xv.repeat_interleave(self.num_heads // self.num_kv_heads, dim=1)

@@ -559,10 +584,12 @@ class TransformerBlockGemma2(nn.Module):
        optimized_attention=None,
        past_key_value: Optional[Tuple[torch.Tensor, torch.Tensor]] = None,
    ):
+        sliding_window = None
        if self.transformer_type == 'gemma3':
            if self.sliding_attention:
+                sliding_window = self.sliding_attention
                if x.shape[1] > self.sliding_attention:
-                    sliding_mask = torch.full((x.shape[1], x.shape[1]), float("-inf"), device=x.device, dtype=x.dtype)
+                    sliding_mask = torch.full((x.shape[1], x.shape[1]), torch.finfo(x.dtype).min, device=x.device, dtype=x.dtype)
                    sliding_mask.tril_(diagonal=-self.sliding_attention)
                    if attention_mask is not None:
                        attention_mask = attention_mask + sliding_mask
@@ -581,6 +608,7 @@ class TransformerBlockGemma2(nn.Module):
            freqs_cis=freqs_cis,
            optimized_attention=optimized_attention,
            past_key_value=past_key_value,
+            sliding_window=sliding_window,
        )

        x = self.post_attention_layernorm(x)
@@ -765,6 +793,107 @@ class BaseLlama:
    def forward(self, input_ids, *args, **kwargs):
        return self.model(input_ids, *args, **kwargs)

+class BaseGenerate:
+    def logits(self, x):
+        input = x[:, -1:]
+        if hasattr(self.model, "lm_head"):
+            module = self.model.lm_head
+        else:
+            module = self.model.embed_tokens
+
+        offload_stream = None
+        if module.comfy_cast_weights:
+            weight, _, offload_stream = comfy.ops.cast_bias_weight(module, input, offloadable=True)
+        else:
+            weight = self.model.embed_tokens.weight.to(x)
+
+        x = torch.nn.functional.linear(input, weight, None)
+
+        comfy.ops.uncast_bias_weight(module, weight, None, offload_stream)
+        return x
+
+    def generate(self, embeds=None, do_sample=True, max_length=256, temperature=1.0, top_k=50, top_p=0.9, min_p=0.0, repetition_penalty=1.0, seed=42, stop_tokens=None, initial_tokens=[], execution_dtype=None, min_tokens=0):
+        device = embeds.device
+        model_config = self.model.config
+
+        if stop_tokens is None:
+            stop_tokens = self.model.config.stop_tokens
+
+        if execution_dtype is None:
+            if comfy.model_management.should_use_bf16(device):
+                execution_dtype = torch.bfloat16
+            else:
+                execution_dtype = torch.float32
+        embeds = embeds.to(execution_dtype)
+
+        if embeds.ndim == 2:
+            embeds = embeds.unsqueeze(0)
+
+        past_key_values = [] #kv_cache init
+        max_cache_len = embeds.shape[1] + max_length
+        for x in range(model_config.num_hidden_layers):
+            past_key_values.append((torch.empty([embeds.shape[0], model_config.num_key_value_heads, max_cache_len, model_config.head_dim], device=device, dtype=execution_dtype),
+                                    torch.empty([embeds.shape[0], model_config.num_key_value_heads, max_cache_len, model_config.head_dim], device=device, dtype=execution_dtype), 0))
+
+        generator = torch.Generator(device=device).manual_seed(seed) if do_sample else None
+
+        generated_token_ids = []
+        pbar = comfy.utils.ProgressBar(max_length)
+
+        # Generation loop
+        for step in tqdm(range(max_length), desc="Generating tokens"):
+            x, _, past_key_values = self.model.forward(None, embeds=embeds, attention_mask=None, past_key_values=past_key_values)
+            logits = self.logits(x)[:, -1]
+            next_token = self.sample_token(logits, temperature, top_k, top_p, min_p, repetition_penalty, initial_tokens + generated_token_ids, generator, do_sample=do_sample)
+            token_id = next_token[0].item()
+            generated_token_ids.append(token_id)
+
+            embeds = self.model.embed_tokens(next_token).to(execution_dtype)
+            pbar.update(1)
+
+            if token_id in stop_tokens:
+                break
+
+        return generated_token_ids
+
+    def sample_token(self, logits, temperature, top_k, top_p, min_p, repetition_penalty, token_history, generator, do_sample=True):
+
+        if not do_sample or temperature == 0.0:
+            return torch.argmax(logits, dim=-1, keepdim=True)
+
+        # Sampling mode
+        if repetition_penalty != 1.0:
+            for i in range(logits.shape[0]):
+                for token_id in set(token_history):
+                    logits[i, token_id] *= repetition_penalty if logits[i, token_id] < 0 else 1/repetition_penalty
+
+        if temperature != 1.0:
+            logits = logits / temperature
+
+        if top_k > 0:
+            indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, None]
+            logits[indices_to_remove] = torch.finfo(logits.dtype).min
+
+        if min_p > 0.0:
+            probs_before_filter = torch.nn.functional.softmax(logits, dim=-1)
+            top_probs, _ = probs_before_filter.max(dim=-1, keepdim=True)
+            min_threshold = min_p * top_probs
+            indices_to_remove = probs_before_filter < min_threshold
+            logits[indices_to_remove] = torch.finfo(logits.dtype).min
+
+        if top_p < 1.0:
+            sorted_logits, sorted_indices = torch.sort(logits, descending=True)
+            cumulative_probs = torch.cumsum(torch.nn.functional.softmax(sorted_logits, dim=-1), dim=-1)
+            sorted_indices_to_remove = cumulative_probs > top_p
+            sorted_indices_to_remove[..., 0] = False
+            indices_to_remove = torch.zeros_like(logits, dtype=torch.bool)
+            indices_to_remove.scatter_(1, sorted_indices, sorted_indices_to_remove)
+            logits[indices_to_remove] = torch.finfo(logits.dtype).min
+
+        probs = torch.nn.functional.softmax(logits, dim=-1)
+
+        return torch.multinomial(probs, num_samples=1, generator=generator)
+
 class BaseQwen3:
    def logits(self, x):
        input = x[:, -1:]
@@ -808,7 +937,7 @@ class Qwen25_3B(BaseLlama, torch.nn.Module):
        self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
        self.dtype = dtype

-class Qwen3_06B(BaseLlama, BaseQwen3, torch.nn.Module):
+class Qwen3_06B(BaseLlama, BaseQwen3, BaseGenerate, torch.nn.Module):
    def __init__(self, config_dict, dtype, device, operations):
        super().__init__()
        config = Qwen3_06BConfig(**config_dict)
@@ -835,7 +964,7 @@ class Qwen3_2B_ACE15_lm(BaseLlama, BaseQwen3, torch.nn.Module):
        self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
        self.dtype = dtype

-class Qwen3_4B(BaseLlama, BaseQwen3, torch.nn.Module):
+class Qwen3_4B(BaseLlama, BaseQwen3, BaseGenerate, torch.nn.Module):
    def __init__(self, config_dict, dtype, device, operations):
        super().__init__()
        config = Qwen3_4BConfig(**config_dict)
@@ -853,7 +982,7 @@ class Qwen3_4B_ACE15_lm(BaseLlama, BaseQwen3, torch.nn.Module):
        self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
        self.dtype = dtype

-class Qwen3_8B(BaseLlama, BaseQwen3, torch.nn.Module):
+class Qwen3_8B(BaseLlama, BaseQwen3, BaseGenerate, torch.nn.Module):
    def __init__(self, config_dict, dtype, device, operations):
        super().__init__()
        config = Qwen3_8BConfig(**config_dict)
@@ -871,7 +1000,7 @@ class Ovis25_2B(BaseLlama, torch.nn.Module):
        self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
        self.dtype = dtype

-class Qwen25_7BVLI(BaseLlama, torch.nn.Module):
+class Qwen25_7BVLI(BaseLlama, BaseGenerate, torch.nn.Module):
    def __init__(self, config_dict, dtype, device, operations):
        super().__init__()
        config = Qwen25_7BVLI_Config(**config_dict)
@@ -881,6 +1010,9 @@ class Qwen25_7BVLI(BaseLlama, torch.nn.Module):
        self.visual = qwen_vl.Qwen2VLVisionTransformer(hidden_size=1280, output_hidden_size=config.hidden_size, device=device, dtype=dtype, ops=operations)
        self.dtype = dtype

+        # todo: should this be tied or not?
+        #self.lm_head = operations.Linear(config.hidden_size, config.vocab_size, bias=False, device=device, dtype=dtype)
+
    def preprocess_embed(self, embed, device):
        if embed["type"] == "image":
            image, grid = qwen_vl.process_qwen2vl_images(embed["data"])
@@ -914,7 +1046,7 @@ class Qwen25_7BVLI(BaseLlama, torch.nn.Module):

        return super().forward(x, attention_mask=attention_mask, embeds=embeds, num_tokens=num_tokens, intermediate_output=intermediate_output, final_layer_norm_intermediate=final_layer_norm_intermediate, dtype=dtype, position_ids=position_ids)

-class Gemma2_2B(BaseLlama, torch.nn.Module):
+class Gemma2_2B(BaseLlama, BaseGenerate, torch.nn.Module):
    def __init__(self, config_dict, dtype, device, operations):
        super().__init__()
        config = Gemma2_2B_Config(**config_dict)
@@ -923,7 +1055,7 @@ class Gemma2_2B(BaseLlama, torch.nn.Module):
        self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
        self.dtype = dtype

-class Gemma3_4B(BaseLlama, torch.nn.Module):
+class Gemma3_4B(BaseLlama, BaseGenerate, torch.nn.Module):
    def __init__(self, config_dict, dtype, device, operations):
        super().__init__()
        config = Gemma3_4B_Config(**config_dict)
@@ -932,7 +1064,25 @@ class Gemma3_4B(BaseLlama, torch.nn.Module):
        self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
        self.dtype = dtype

-class Gemma3_12B(BaseLlama, torch.nn.Module):
+class Gemma3_4B_Vision(BaseLlama, BaseGenerate, torch.nn.Module):
+    def __init__(self, config_dict, dtype, device, operations):
+        super().__init__()
+        config = Gemma3_4B_Vision_Config(**config_dict)
+        self.num_layers = config.num_hidden_layers
+
+        self.model = Llama2_(config, device=device, dtype=dtype, ops=operations)
+        self.dtype = dtype
+        self.multi_modal_projector = Gemma3MultiModalProjector(config, dtype, device, operations)
+        self.vision_model = comfy.clip_model.CLIPVision(config.vision_config, dtype, device, operations)
+        self.image_size = config.vision_config["image_size"]
+
+    def preprocess_embed(self, embed, device):
+        if embed["type"] == "image":
+            image = comfy.clip_model.clip_preprocess(embed["data"], size=self.image_size, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], crop=True)
+            return self.multi_modal_projector(self.vision_model(image.to(device, dtype=torch.float32))[0]), None
+        return None, None
+
+class Gemma3_12B(BaseLlama, BaseGenerate, torch.nn.Module):
    def __init__(self, config_dict, dtype, device, operations):
        super().__init__()
        config = Gemma3_12B_Config(**config_dict)
--- a/comfy/text_encoders/longcat_image.py
+++ b/comfy/text_encoders/longcat_image.py
@@ -0,0 +1,184 @@
+import re
+import numbers
+import torch
+from comfy import sd1_clip
+from comfy.text_encoders.qwen_image import Qwen25_7BVLITokenizer, Qwen25_7BVLIModel
+import logging
+
+logger = logging.getLogger(__name__)
+
+QUOTE_PAIRS = [("'", "'"), ('"', '"'), ("\u2018", "\u2019"), ("\u201c", "\u201d")]
+QUOTE_PATTERN = "|".join(
+    [
+        re.escape(q1) + r"[^" + re.escape(q1 + q2) + r"]*?" + re.escape(q2)
+        for q1, q2 in QUOTE_PAIRS
+    ]
+)
+WORD_INTERNAL_QUOTE_RE = re.compile(r"[a-zA-Z]+'[a-zA-Z]+")
+
+
+def split_quotation(prompt):
+    matches = WORD_INTERNAL_QUOTE_RE.findall(prompt)
+    mapping = []
+    for i, word_src in enumerate(set(matches)):
+        word_tgt = "longcat_$##$_longcat" * (i + 1)
+        prompt = prompt.replace(word_src, word_tgt)
+        mapping.append((word_src, word_tgt))
+
+    parts = re.split(f"({QUOTE_PATTERN})", prompt)
+    result = []
+    for part in parts:
+        for word_src, word_tgt in mapping:
+            part = part.replace(word_tgt, word_src)
+        if not part:
+            continue
+        is_quoted = bool(re.match(QUOTE_PATTERN, part))
+        result.append((part, is_quoted))
+    return result
+
+
+class LongCatImageBaseTokenizer(Qwen25_7BVLITokenizer):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.max_length = 512
+
+    def tokenize_with_weights(self, text, return_word_ids=False, **kwargs):
+        parts = split_quotation(text)
+        all_tokens = []
+        for part_text, is_quoted in parts:
+            if is_quoted:
+                for char in part_text:
+                    ids = self.tokenizer(char, add_special_tokens=False)["input_ids"]
+                    all_tokens.extend(ids)
+            else:
+                ids = self.tokenizer(part_text, add_special_tokens=False)["input_ids"]
+                all_tokens.extend(ids)
+
+        if len(all_tokens) > self.max_length:
+            all_tokens = all_tokens[: self.max_length]
+            logger.warning(f"Truncated prompt to {self.max_length} tokens")
+
+        output = [(t, 1.0) for t in all_tokens]
+        # Pad to max length
+        self.pad_tokens(output, self.max_length - len(output))
+        return [output]
+
+
+class LongCatImageTokenizer(sd1_clip.SD1Tokenizer):
+    def __init__(self, embedding_directory=None, tokenizer_data={}):
+        super().__init__(
+            embedding_directory=embedding_directory,
+            tokenizer_data=tokenizer_data,
+            name="qwen25_7b",
+            tokenizer=LongCatImageBaseTokenizer,
+        )
+        self.longcat_template_prefix = "<|im_start|>system\nAs an image captioning expert, generate a descriptive text prompt based on an image content, suitable for input to a text-to-image model.<|im_end|>\n<|im_start|>user\n"
+        self.longcat_template_suffix = "<|im_end|>\n<|im_start|>assistant\n"
+
+    def tokenize_with_weights(self, text, return_word_ids=False, **kwargs):
+        skip_template = False
+        if text.startswith("<|im_start|>"):
+            skip_template = True
+        if text.startswith("<|start_header_id|>"):
+            skip_template = True
+        if text == "":
+            text = " "
+
+        base_tok = getattr(self, "qwen25_7b")
+        if skip_template:
+            tokens = super().tokenize_with_weights(
+                text, return_word_ids=return_word_ids, disable_weights=True, **kwargs
+            )
+        else:
+            prefix_ids = base_tok.tokenizer(
+                self.longcat_template_prefix, add_special_tokens=False
+            )["input_ids"]
+            suffix_ids = base_tok.tokenizer(
+                self.longcat_template_suffix, add_special_tokens=False
+            )["input_ids"]
+
+            prompt_tokens = base_tok.tokenize_with_weights(
+                text, return_word_ids=return_word_ids, **kwargs
+            )
+            prompt_pairs = prompt_tokens[0]
+
+            prefix_pairs = [(t, 1.0) for t in prefix_ids]
+            suffix_pairs = [(t, 1.0) for t in suffix_ids]
+
+            combined = prefix_pairs + prompt_pairs + suffix_pairs
+            tokens = {"qwen25_7b": [combined]}
+
+        return tokens
+
+
+class LongCatImageTEModel(sd1_clip.SD1ClipModel):
+    def __init__(self, device="cpu", dtype=None, model_options={}):
+        super().__init__(
+            device=device,
+            dtype=dtype,
+            name="qwen25_7b",
+            clip_model=Qwen25_7BVLIModel,
+            model_options=model_options,
+        )
+
+    def encode_token_weights(self, token_weight_pairs, template_end=-1):
+        out, pooled, extra = super().encode_token_weights(token_weight_pairs)
+        tok_pairs = token_weight_pairs["qwen25_7b"][0]
+        count_im_start = 0
+        if template_end == -1:
+            for i, v in enumerate(tok_pairs):
+                elem = v[0]
+                if not torch.is_tensor(elem):
+                    if isinstance(elem, numbers.Integral):
+                        if elem == 151644 and count_im_start < 2:
+                            template_end = i
+                            count_im_start += 1
+
+        if out.shape[1] > (template_end + 3):
+            if tok_pairs[template_end + 1][0] == 872:
+                if tok_pairs[template_end + 2][0] == 198:
+                    template_end += 3
+
+        if template_end == -1:
+            template_end = 0
+
+        suffix_start = None
+        for i in range(len(tok_pairs) - 1, -1, -1):
+            elem = tok_pairs[i][0]
+            if not torch.is_tensor(elem) and isinstance(elem, numbers.Integral):
+                if elem == 151645:
+                    suffix_start = i
+                    break
+
+        out = out[:, template_end:]
+
+        if "attention_mask" in extra:
+            extra["attention_mask"] = extra["attention_mask"][:, template_end:]
+            if extra["attention_mask"].sum() == torch.numel(extra["attention_mask"]):
+                extra.pop("attention_mask")
+
+        if suffix_start is not None:
+            suffix_len = len(tok_pairs) - suffix_start
+            if suffix_len > 0 and out.shape[1] > suffix_len:
+                out = out[:, :-suffix_len]
+                if "attention_mask" in extra:
+                    extra["attention_mask"] = extra["attention_mask"][:, :-suffix_len]
+                    if extra["attention_mask"].sum() == torch.numel(
+                        extra["attention_mask"]
+                    ):
+                        extra.pop("attention_mask")
+
+        return out, pooled, extra
+
+
+def te(dtype_llama=None, llama_quantization_metadata=None):
+    class LongCatImageTEModel_(LongCatImageTEModel):
+        def __init__(self, device="cpu", dtype=None, model_options={}):
+            if llama_quantization_metadata is not None:
+                model_options = model_options.copy()
+                model_options["quantization_metadata"] = llama_quantization_metadata
+            if dtype_llama is not None:
+                dtype = dtype_llama
+            super().__init__(device=device, dtype=dtype, model_options=model_options)
+
+    return LongCatImageTEModel_
--- a/comfy/text_encoders/lt.py
+++ b/comfy/text_encoders/lt.py
@@ -3,9 +3,10 @@ import os
 from transformers import T5TokenizerFast
 from .spiece_tokenizer import SPieceTokenizer
 import comfy.text_encoders.genmo
-from comfy.ldm.lightricks.embeddings_connector import Embeddings1DConnector
 import torch
 import comfy.utils
+import math
+import itertools

 class T5XXLTokenizer(sd1_clip.SDTokenizer):
    def __init__(self, embedding_directory=None, tokenizer_data={}):
@@ -22,46 +23,86 @@ def ltxv_te(*args, **kwargs):
    return comfy.text_encoders.genmo.mochi_te(*args, **kwargs)


-class Gemma3_12BTokenizer(sd1_clip.SDTokenizer):
-    def __init__(self, embedding_directory=None, tokenizer_data={}):
-        tokenizer = tokenizer_data.get("spiece_model", None)
-        super().__init__(tokenizer, pad_with_end=False, embedding_size=3840, embedding_key='gemma3_12b', tokenizer_class=SPieceTokenizer, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=512, pad_left=True, disable_weights=True, tokenizer_args={"add_bos": True, "add_eos": False}, tokenizer_data=tokenizer_data)
-
+class Gemma3_Tokenizer():
    def state_dict(self):
        return {"spiece_model": self.tokenizer.serialize_model()}

+    def tokenize_with_weights(self, text, return_word_ids=False, image=None, llama_template=None, skip_template=True, **kwargs):
+        self.llama_template = "<start_of_turn>system\nYou are a helpful assistant.<end_of_turn>\n<start_of_turn>user\n{}<end_of_turn>\n<start_of_turn>model\n"
+        self.llama_template_images = "<start_of_turn>system\nYou are a helpful assistant.<end_of_turn>\n<start_of_turn>user\n\n<image_soft_token>{}<end_of_turn>\n\n<start_of_turn>model\n"
+
+        if image is None:
+            images = []
+        else:
+            samples = image.movedim(-1, 1)
+            total = int(896 * 896)
+
+            scale_by = math.sqrt(total / (samples.shape[3] * samples.shape[2]))
+            width = round(samples.shape[3] * scale_by)
+            height = round(samples.shape[2] * scale_by)
+
+            s = comfy.utils.common_upscale(samples, width, height, "area", "disabled").movedim(1, -1)
+            images = [s[:, :, :, :3]]
+
+        if text.startswith('<start_of_turn>'):
+            skip_template = True
+
+        if skip_template:
+            llama_text = text
+        else:
+            if llama_template is None:
+                if len(images) > 0:
+                    llama_text = self.llama_template_images.format(text)
+                else:
+                    llama_text = self.llama_template.format(text)
+            else:
+                llama_text = llama_template.format(text)
+
+        text_tokens = super().tokenize_with_weights(llama_text, return_word_ids)
+
+        if len(images) > 0:
+            embed_count = 0
+            for r in text_tokens:
+                for i, token in enumerate(r):
+                    if token[0] == 262144 and embed_count < len(images):
+                        r[i] = ({"type": "image", "data": images[embed_count]},) + token[1:]
+                        embed_count += 1
+        return text_tokens
+
+class Gemma3_12BTokenizer(Gemma3_Tokenizer, sd1_clip.SDTokenizer):
+    def __init__(self, embedding_directory=None, tokenizer_data={}):
+        tokenizer = tokenizer_data.get("spiece_model", None)
+        special_tokens = {"<image_soft_token>": 262144, "<end_of_turn>": 106}
+        super().__init__(tokenizer, pad_with_end=False, embedding_size=3840, embedding_key='gemma3_12b', tokenizer_class=SPieceTokenizer, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1024, pad_left=True, disable_weights=True, tokenizer_args={"add_bos": True, "add_eos": False, "special_tokens": special_tokens}, tokenizer_data=tokenizer_data)
+
+
 class LTXAVGemmaTokenizer(sd1_clip.SD1Tokenizer):
    def __init__(self, embedding_directory=None, tokenizer_data={}):
        super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, name="gemma3_12b", tokenizer=Gemma3_12BTokenizer)

+
 class Gemma3_12BModel(sd1_clip.SDClipModel):
    def __init__(self, device="cpu", layer="all", layer_idx=None, dtype=None, attention_mask=True, model_options={}):
        llama_quantization_metadata = model_options.get("llama_quantization_metadata", None)
        if llama_quantization_metadata is not None:
            model_options = model_options.copy()
            model_options["quantization_metadata"] = llama_quantization_metadata
-
+        self.dtypes = set()
+        self.dtypes.add(dtype)
        super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Gemma3_12B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)

-    def tokenize_with_weights(self, text, return_word_ids=False, llama_template="{}", image_embeds=None, **kwargs):
-        text = llama_template.format(text)
-        text_tokens = super().tokenize_with_weights(text, return_word_ids)
-        embed_count = 0
-        for k in text_tokens:
-            tt = text_tokens[k]
-            for r in tt:
-                for i in range(len(r)):
-                    if r[i][0] == 262144:
-                        if image_embeds is not None and embed_count < image_embeds.shape[0]:
-                            r[i] = ({"type": "embedding", "data": image_embeds[embed_count], "original_type": "image"},) + r[i][1:]
-                            embed_count += 1
-        return text_tokens
+    def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed):
+        tokens_only = [[t[0] for t in b] for b in tokens]
+        embeds, _, _, embeds_info = self.process_tokens(tokens_only, self.execution_device)
+        comfy.utils.normalize_image_embeddings(embeds, embeds_info, self.transformer.model.config.hidden_size ** 0.5)
+        return self.transformer.generate(embeds, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed, stop_tokens=[106])  # 106 is <end_of_turn>

 class LTXAVTEModel(torch.nn.Module):
    def __init__(self, dtype_llama=None, device="cpu", dtype=None, model_options={}):
        super().__init__()
        self.dtypes = set()
        self.dtypes.add(dtype)
+        self.compat_mode = False

        self.gemma3_12b = Gemma3_12BModel(device=device, dtype=dtype_llama, model_options=model_options, layer="all", layer_idx=None)
        self.dtypes.add(dtype_llama)
@@ -69,6 +110,11 @@ class LTXAVTEModel(torch.nn.Module):
        operations = self.gemma3_12b.operations # TODO
        self.text_embedding_projection = operations.Linear(3840 * 49, 3840, bias=False, dtype=dtype, device=device)

+    def enable_compat_mode(self):  # TODO: remove
+        from comfy.ldm.lightricks.embeddings_connector import Embeddings1DConnector
+        operations = self.gemma3_12b.operations
+        dtype = self.text_embedding_projection.weight.dtype
+        device = self.text_embedding_projection.weight.device
        self.audio_embeddings_connector = Embeddings1DConnector(
            split_rope=True,
            double_precision_rope=True,
@@ -84,6 +130,7 @@ class LTXAVTEModel(torch.nn.Module):
            device=device,
            operations=operations,
        )
+        self.compat_mode = True

    def set_clip_options(self, options):
        self.execution_device = options.get("execution_device", self.execution_device)
@@ -106,30 +153,45 @@ class LTXAVTEModel(torch.nn.Module):
        out = out.reshape((out.shape[0], out.shape[1], -1))
        out = self.text_embedding_projection(out)
        out = out.float()
-        out_vid = self.video_embeddings_connector(out)[0]
-        out_audio = self.audio_embeddings_connector(out)[0]
-        out = torch.concat((out_vid, out_audio), dim=-1)
+
+        if self.compat_mode:
+            out_vid = self.video_embeddings_connector(out)[0]
+            out_audio = self.audio_embeddings_connector(out)[0]
+            out = torch.concat((out_vid, out_audio), dim=-1)

        return out.to(out_device), pooled

+    def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed):
+        return self.gemma3_12b.generate(tokens["gemma3_12b"], do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed)
+
    def load_sd(self, sd):
        if "model.layers.47.self_attn.q_norm.weight" in sd:
            return self.gemma3_12b.load_sd(sd)
        else:
-            sdo = comfy.utils.state_dict_prefix_replace(sd, {"text_embedding_projection.aggregate_embed.weight": "text_embedding_projection.weight", "model.diffusion_model.video_embeddings_connector.": "video_embeddings_connector.", "model.diffusion_model.audio_embeddings_connector.": "audio_embeddings_connector."}, filter_keys=True)
+            sdo = comfy.utils.state_dict_prefix_replace(sd, {"text_embedding_projection.aggregate_embed.weight": "text_embedding_projection.weight"}, filter_keys=True)
            if len(sdo) == 0:
                sdo = sd

            missing_all = []
            unexpected_all = []

-            for prefix, component in [("text_embedding_projection.", self.text_embedding_projection), ("video_embeddings_connector.", self.video_embeddings_connector), ("audio_embeddings_connector.", self.audio_embeddings_connector)]:
+            for prefix, component in [("text_embedding_projection.", self.text_embedding_projection)]:
                component_sd = {k.replace(prefix, ""): v for k, v in sdo.items() if k.startswith(prefix)}
                if component_sd:
                    missing, unexpected = component.load_state_dict(component_sd, strict=False, assign=getattr(self, "can_assign_sd", False))
                    missing_all.extend([f"{prefix}{k}" for k in missing])
                    unexpected_all.extend([f"{prefix}{k}" for k in unexpected])

+            if "model.diffusion_model.audio_embeddings_connector.transformer_1d_blocks.2.attn1.to_q.bias" not in sd:  # TODO: remove
+                ww = sd.get("model.diffusion_model.audio_embeddings_connector.transformer_1d_blocks.0.attn1.to_q.bias", None)
+                if ww is not None:
+                    if ww.shape[0] == 3840:
+                        self.enable_compat_mode()
+                        sdv = comfy.utils.state_dict_prefix_replace(sd, {"model.diffusion_model.video_embeddings_connector.": ""}, filter_keys=True)
+                        self.video_embeddings_connector.load_state_dict(sdv, strict=False, assign=getattr(self, "can_assign_sd", False))
+                        sda = comfy.utils.state_dict_prefix_replace(sd, {"model.diffusion_model.audio_embeddings_connector.": ""}, filter_keys=True)
+                        self.audio_embeddings_connector.load_state_dict(sda, strict=False, assign=getattr(self, "can_assign_sd", False))
+
            return (missing_all, unexpected_all)

    def memory_estimation_function(self, token_weight_pairs, device=None):
@@ -138,8 +200,10 @@ class LTXAVTEModel(torch.nn.Module):
            constant /= 2.0

        token_weight_pairs = token_weight_pairs.get("gemma3_12b", [])
-        num_tokens = sum(map(lambda a: len(a), token_weight_pairs))
-        num_tokens = max(num_tokens, 64)
+        m = min([sum(1 for _ in itertools.takewhile(lambda x: x[0] == 0, sub)) for sub in token_weight_pairs])
+
+        num_tokens = sum(map(lambda a: len(a), token_weight_pairs)) - m
+        num_tokens = max(num_tokens, 642)
        return num_tokens * constant * 1024 * 1024

 def ltxav_te(dtype_llama=None, llama_quantization_metadata=None):
@@ -152,3 +216,14 @@ def ltxav_te(dtype_llama=None, llama_quantization_metadata=None):
                dtype = dtype_llama
            super().__init__(dtype_llama=dtype_llama, device=device, dtype=dtype, model_options=model_options)
    return LTXAVTEModel_
+
+def gemma3_te(dtype_llama=None, llama_quantization_metadata=None):
+    class Gemma3_12BModel_(Gemma3_12BModel):
+        def __init__(self, device="cpu", dtype=None, model_options={}):
+            if llama_quantization_metadata is not None:
+                model_options = model_options.copy()
+                model_options["llama_quantization_metadata"] = llama_quantization_metadata
+            if dtype_llama is not None:
+                dtype = dtype_llama
+            super().__init__(device=device, dtype=dtype, model_options=model_options)
+    return Gemma3_12BModel_
--- a/comfy/text_encoders/lumina2.py
+++ b/comfy/text_encoders/lumina2.py
@@ -1,23 +1,23 @@
 from comfy import sd1_clip
 from .spiece_tokenizer import SPieceTokenizer
 import comfy.text_encoders.llama
-
+from comfy.text_encoders.lt import Gemma3_Tokenizer
+import comfy.utils

 class Gemma2BTokenizer(sd1_clip.SDTokenizer):
    def __init__(self, embedding_directory=None, tokenizer_data={}):
        tokenizer = tokenizer_data.get("spiece_model", None)
-        super().__init__(tokenizer, pad_with_end=False, embedding_size=2304, embedding_key='gemma2_2b', tokenizer_class=SPieceTokenizer, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, tokenizer_args={"add_bos": True, "add_eos": False}, tokenizer_data=tokenizer_data)
+        special_tokens = {"<end_of_turn>": 107}
+        super().__init__(tokenizer, pad_with_end=False, embedding_size=2304, embedding_key='gemma2_2b', tokenizer_class=SPieceTokenizer, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, tokenizer_args={"add_bos": True, "add_eos": False, "special_tokens": special_tokens}, tokenizer_data=tokenizer_data)

    def state_dict(self):
        return {"spiece_model": self.tokenizer.serialize_model()}

-class Gemma3_4BTokenizer(sd1_clip.SDTokenizer):
+class Gemma3_4BTokenizer(Gemma3_Tokenizer, sd1_clip.SDTokenizer):
    def __init__(self, embedding_directory=None, tokenizer_data={}):
        tokenizer = tokenizer_data.get("spiece_model", None)
-        super().__init__(tokenizer, pad_with_end=False, embedding_size=2560, embedding_key='gemma3_4b', tokenizer_class=SPieceTokenizer, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, tokenizer_args={"add_bos": True, "add_eos": False}, disable_weights=True, tokenizer_data=tokenizer_data)
-
-    def state_dict(self):
-        return {"spiece_model": self.tokenizer.serialize_model()}
+        special_tokens = {"<image_soft_token>": 262144, "<end_of_turn>": 106}
+        super().__init__(tokenizer, pad_with_end=False, embedding_size=2560, embedding_key='gemma3_4b', tokenizer_class=SPieceTokenizer, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, tokenizer_args={"add_bos": True, "add_eos": False, "special_tokens": special_tokens}, disable_weights=True, tokenizer_data=tokenizer_data)

 class LuminaTokenizer(sd1_clip.SD1Tokenizer):
    def __init__(self, embedding_directory=None, tokenizer_data={}):
@@ -40,6 +40,20 @@ class Gemma3_4BModel(sd1_clip.SDClipModel):

        super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Gemma3_4B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)

+class Gemma3_4B_Vision_Model(sd1_clip.SDClipModel):
+    def __init__(self, device="cpu", layer="hidden", layer_idx=-2, dtype=None, attention_mask=True, model_options={}):
+        llama_quantization_metadata = model_options.get("llama_quantization_metadata", None)
+        if llama_quantization_metadata is not None:
+            model_options = model_options.copy()
+            model_options["quantization_metadata"] = llama_quantization_metadata
+
+        super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Gemma3_4B_Vision, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options)
+
+    def process_tokens(self, tokens, device):
+        embeds, _, _, embeds_info = super().process_tokens(tokens, device)
+        comfy.utils.normalize_image_embeddings(embeds, embeds_info, self.transformer.model.config.hidden_size ** 0.5)
+        return embeds
+
 class LuminaModel(sd1_clip.SD1ClipModel):
    def __init__(self, device="cpu", dtype=None, model_options={}, name="gemma2_2b", clip_model=Gemma2_2BModel):
        super().__init__(device=device, dtype=dtype, name=name, clip_model=clip_model, model_options=model_options)
@@ -50,6 +64,8 @@ def te(dtype_llama=None, llama_quantization_metadata=None, model_type="gemma2_2b
        model = Gemma2_2BModel
    elif model_type == "gemma3_4b":
        model = Gemma3_4BModel
+    elif model_type == "gemma3_4b_vision":
+        model = Gemma3_4B_Vision_Model

    class LuminaTEModel_(LuminaModel):
        def __init__(self, device="cpu", dtype=None, model_options={}):
--- a/comfy/text_encoders/spiece_tokenizer.py
+++ b/comfy/text_encoders/spiece_tokenizer.py
@@ -6,9 +6,10 @@ class SPieceTokenizer:
    def from_pretrained(path, **kwargs):
        return SPieceTokenizer(path, **kwargs)

-    def __init__(self, tokenizer_path, add_bos=False, add_eos=True):
+    def __init__(self, tokenizer_path, add_bos=False, add_eos=True, special_tokens=None):
        self.add_bos = add_bos
        self.add_eos = add_eos
+        self.special_tokens = special_tokens
        import sentencepiece
        if torch.is_tensor(tokenizer_path):
            tokenizer_path = tokenizer_path.numpy().tobytes()
@@ -27,8 +28,32 @@ class SPieceTokenizer:
        return out

    def __call__(self, string):
+        if self.special_tokens is not None:
+            import re
+            special_tokens_pattern = '|'.join(re.escape(token) for token in self.special_tokens.keys())
+            if special_tokens_pattern and re.search(special_tokens_pattern, string):
+                parts = re.split(f'({special_tokens_pattern})', string)
+                result = []
+                for part in parts:
+                    if not part:
+                        continue
+                    if part in self.special_tokens:
+                        result.append(self.special_tokens[part])
+                    else:
+                        encoded = self.tokenizer.encode(part, add_bos=False, add_eos=False)
+                        result.extend(encoded)
+                return {"input_ids": result}
+
        out = self.tokenizer.encode(string)
        return {"input_ids": out}

+    def decode(self, token_ids, skip_special_tokens=False):
+
+        if skip_special_tokens and self.special_tokens:
+            special_token_ids = set(self.special_tokens.values())
+            token_ids = [tid for tid in token_ids if tid not in special_token_ids]
+
+        return self.tokenizer.decode(token_ids)
+
    def serialize_model(self):
        return torch.ByteTensor(list(self.tokenizer.serialized_model_proto()))
--- a/comfy/utils.py
+++ b/comfy/utils.py
@@ -29,7 +29,7 @@ import itertools
 from torch.nn.functional import interpolate
 from tqdm.auto import trange
 from einops import rearrange
-from comfy.cli_args import args, enables_dynamic_vram
+from comfy.cli_args import args
 import json
 import time
 import mmap
@@ -113,7 +113,7 @@ def load_torch_file(ckpt, safe_load=False, device=None, return_metadata=False):
    metadata = None
    if ckpt.lower().endswith(".safetensors") or ckpt.lower().endswith(".sft"):
        try:
-            if enables_dynamic_vram():
+            if comfy.memory_management.aimdo_enabled:
                sd, metadata = load_safetensors(ckpt)
                if not return_metadata:
                    metadata = None
@@ -1154,7 +1154,7 @@ def tiled_scale(samples, function, tile_x=64, tile_y=64, overlap = 8, upscale_am
    return tiled_scale_multidim(samples, function, (tile_y, tile_x), overlap=overlap, upscale_amount=upscale_amount, out_channels=out_channels, output_device=output_device, pbar=pbar)

 def model_trange(*args, **kwargs):
-    if comfy.memory_management.aimdo_allocator is None:
+    if not comfy.memory_management.aimdo_enabled:
        return trange(*args, **kwargs)

    pbar = trange(*args, **kwargs, smoothing=1.0)
@@ -1418,3 +1418,11 @@ def deepcopy_list_dict(obj, memo=None):

    memo[obj_id] = res
    return res
+
+def normalize_image_embeddings(embeds, embeds_info, scale_factor):
+    """Normalize image embeddings to match text embedding scale"""
+    for info in embeds_info:
+        if info.get("type") == "image":
+            start_idx = info["index"]
+            end_idx = start_idx + info["size"]
+            embeds[:, start_idx:end_idx, :] /= scale_factor
--- a/comfy_api/latest/_input_impl/video_types.py
+++ b/comfy_api/latest/_input_impl/video_types.py
@@ -444,7 +444,7 @@ class VideoFromComponents(VideoInput):
            output.mux(packet)

            if audio_stream and self.__components.audio:
-                frame = av.AudioFrame.from_ndarray(waveform.float().cpu().numpy(), format='fltp', layout=layout)
+                frame = av.AudioFrame.from_ndarray(waveform.float().cpu().contiguous().numpy(), format='fltp', layout=layout)
                frame.sample_rate = audio_sample_rate
                frame.pts = 0
                output.mux(audio_stream.encode(frame))
--- a/comfy_api/latest/_io.py
+++ b/comfy_api/latest/_io.py
@@ -73,6 +73,7 @@ class RemoteOptions:
 class NumberDisplay(str, Enum):
    number = "number"
    slider = "slider"
+    gradient_slider = "gradientslider"


 class ControlAfterGenerate(str, Enum):
@@ -296,13 +297,15 @@ class Float(ComfyTypeIO):
        '''Float input.'''
        def __init__(self, id: str, display_name: str=None, optional=False, tooltip: str=None, lazy: bool=None,
                    default: float=None, min: float=None, max: float=None, step: float=None, round: float=None,
-                    display_mode: NumberDisplay=None, socketless: bool=None, force_input: bool=None, extra_dict=None, raw_link: bool=None, advanced: bool=None):
+                    display_mode: NumberDisplay=None, gradient_stops: list[list[float]]=None,
+                    socketless: bool=None, force_input: bool=None, extra_dict=None, raw_link: bool=None, advanced: bool=None):
            super().__init__(id, display_name, optional, tooltip, lazy, default, socketless, None, force_input, extra_dict, raw_link, advanced)
            self.min = min
            self.max = max
            self.step = step
            self.round = round
            self.display_mode = display_mode
+            self.gradient_stops = gradient_stops
            self.default: float

        def as_dict(self):
@@ -312,6 +315,7 @@ class Float(ComfyTypeIO):
                "step": self.step,
                "round": self.round,
                "display": self.display_mode,
+                "gradient_stops": self.gradient_stops,
            })

@comfytype(io_type="STRING")
@@ -1220,9 +1224,10 @@ class BoundingBox(ComfyTypeIO):

    class Input(WidgetInput):
        def __init__(self, id: str, display_name: str=None, optional=False, tooltip: str=None,
-                     socketless: bool=True, default: dict=None, component: str=None):
+                     socketless: bool=True, default: dict=None, component: str=None, force_input: bool=None):
            super().__init__(id, display_name, optional, tooltip, None, default, socketless)
            self.component = component
+            self.force_input = force_input
            if default is None:
                self.default = {"x": 0, "y": 0, "width": 512, "height": 512}

@@ -1230,6 +1235,8 @@ class BoundingBox(ComfyTypeIO):
            d = super().as_dict()
            if self.component:
                d["component"] = self.component
+            if self.force_input is not None:
+                d["forceInput"] = self.force_input
            return d


@@ -1339,6 +1346,7 @@ class NodeInfoV1:
    api_node: bool=None
    price_badge: dict | None = None
    search_aliases: list[str]=None
+    essentials_category: str=None


@dataclass
@@ -1460,6 +1468,8 @@ class Schema:
    """Flags a node as expandable, allowing NodeOutput to include 'expand' property."""
    accept_all_inputs: bool=False
    """When True, all inputs from the prompt will be passed to the node as kwargs, even if not defined in the schema."""
+    essentials_category: str | None = None
+    """Optional category for the Essentials tab. Path-based like category field (e.g., 'Basic', 'Image Tools/Editing')."""

    def validate(self):
        '''Validate the schema:
@@ -1566,6 +1576,7 @@ class Schema:
            python_module=getattr(cls, "RELATIVE_PYTHON_MODULE", "nodes"),
            price_badge=self.price_badge.as_dict(self.inputs) if self.price_badge is not None else None,
            search_aliases=self.search_aliases if self.search_aliases else None,
+            essentials_category=self.essentials_category,
        )
        return info

--- a/comfy_api_nodes/apis/bytedance.py
+++ b/comfy_api_nodes/apis/bytedance.py
@@ -27,6 +27,7 @@ class Seedream4TaskCreationRequest(BaseModel):
    sequential_image_generation: str = Field("disabled")
    sequential_image_generation_options: Seedream4Options = Field(Seedream4Options(max_images=15))
    watermark: bool = Field(False)
+    output_format: str | None = None


 class ImageTaskCreationResponse(BaseModel):
@@ -106,6 +107,7 @@ RECOMMENDED_PRESETS_SEEDREAM_4 = [
    ("2496x1664 (3:2)", 2496, 1664),
    ("1664x2496 (2:3)", 1664, 2496),
    ("3024x1296 (21:9)", 3024, 1296),
+    ("3072x3072 (1:1)", 3072, 3072),
    ("4096x4096 (1:1)", 4096, 4096),
    ("Custom", None, None),
 ]
--- a/comfy_api_nodes/apis/elevenlabs.py
+++ b/comfy_api_nodes/apis/elevenlabs.py
@@ -0,0 +1,88 @@
+from pydantic import BaseModel, Field
+
+
+class SpeechToTextRequest(BaseModel):
+    model_id: str = Field(...)
+    cloud_storage_url: str = Field(...)
+    language_code: str | None = Field(None, description="ISO-639-1 or ISO-639-3 language code")
+    tag_audio_events: bool | None = Field(None, description="Annotate sounds like (laughter) in transcript")
+    num_speakers: int | None = Field(None, description="Max speakers predicted")
+    timestamps_granularity: str = Field(default="word", description="Timing precision: none, word, or character")
+    diarize: bool | None = Field(None, description="Annotate which speaker is talking")
+    diarization_threshold: float | None = Field(None, description="Speaker separation sensitivity")
+    temperature: float | None = Field(None, description="Randomness control")
+    seed: int = Field(..., description="Seed for deterministic sampling")
+
+
+class SpeechToTextWord(BaseModel):
+    text: str = Field(..., description="The word text")
+    type: str = Field(default="word", description="Type of text element (word, spacing, etc.)")
+    start: float | None = Field(None, description="Start time in seconds (when timestamps enabled)")
+    end: float | None = Field(None, description="End time in seconds (when timestamps enabled)")
+    speaker_id: str | None = Field(None, description="Speaker identifier when diarization is enabled")
+    logprob: float | None = Field(None, description="Log probability of the word")
+
+
+class SpeechToTextResponse(BaseModel):
+    language_code: str = Field(..., description="Detected or specified language code")
+    language_probability: float | None = Field(None, description="Confidence of language detection")
+    text: str = Field(..., description="Full transcript text")
+    words: list[SpeechToTextWord] | None = Field(None, description="Word-level timing information")
+
+
+class TextToSpeechVoiceSettings(BaseModel):
+    stability: float | None = Field(None, description="Voice stability")
+    similarity_boost: float | None = Field(None, description="Similarity boost")
+    style: float | None = Field(None, description="Style exaggeration")
+    use_speaker_boost: bool | None = Field(None, description="Boost similarity to original speaker")
+    speed: float | None = Field(None, description="Speech speed")
+
+
+class TextToSpeechRequest(BaseModel):
+    text: str = Field(..., description="Text to convert to speech")
+    model_id: str = Field(..., description="Model ID for TTS")
+    language_code: str | None = Field(None, description="ISO-639-1 or ISO-639-3 language code")
+    voice_settings: TextToSpeechVoiceSettings | None = Field(None, description="Voice settings")
+    seed: int = Field(..., description="Seed for deterministic sampling")
+    apply_text_normalization: str | None = Field(None, description="Text normalization mode: auto, on, off")
+
+
+class TextToSoundEffectsRequest(BaseModel):
+    text: str = Field(..., description="Text prompt to convert into a sound effect")
+    duration_seconds: float = Field(..., description="Duration of generated sound in seconds")
+    prompt_influence: float = Field(..., description="How closely generation follows the prompt")
+    loop: bool | None = Field(None, description="Whether to create a smoothly looping sound effect")
+
+
+class AddVoiceRequest(BaseModel):
+    name: str = Field(..., description="Name that identifies the voice")
+    remove_background_noise: bool = Field(..., description="Remove background noise from voice samples")
+
+
+class AddVoiceResponse(BaseModel):
+    voice_id: str = Field(..., description="The newly created voice's unique identifier")
+
+
+class SpeechToSpeechRequest(BaseModel):
+    model_id: str = Field(..., description="Model ID for speech-to-speech")
+    voice_settings: str = Field(..., description="JSON string of voice settings")
+    seed: int = Field(..., description="Seed for deterministic sampling")
+    remove_background_noise: bool = Field(..., description="Remove background noise from input audio")
+
+
+class DialogueInput(BaseModel):
+    text: str = Field(..., description="Text content to convert to speech")
+    voice_id: str = Field(..., description="Voice identifier for this dialogue segment")
+
+
+class DialogueSettings(BaseModel):
+    stability: float | None = Field(None, description="Voice stability (0-1)")
+
+
+class TextToDialogueRequest(BaseModel):
+    inputs: list[DialogueInput] = Field(..., description="List of dialogue segments")
+    model_id: str = Field(..., description="Model ID for dialogue generation")
+    language_code: str | None = Field(None, description="ISO-639-1 language code")
+    settings: DialogueSettings | None = Field(None, description="Voice settings")
+    seed: int | None = Field(None, description="Seed for deterministic sampling")
+    apply_text_normalization: str | None = Field(None, description="Text normalization mode: auto, on, off")
--- a/comfy_api_nodes/apis/gemini.py
+++ b/comfy_api_nodes/apis/gemini.py
@@ -116,14 +116,26 @@ class GeminiGenerationConfig(BaseModel):
    topP: float | None = Field(None, ge=0.0, le=1.0)


+class GeminiImageOutputOptions(BaseModel):
+    mimeType: str = Field("image/png")
+    compressionQuality: int | None = Field(None)
+
+
 class GeminiImageConfig(BaseModel):
    aspectRatio: str | None = Field(None)
    imageSize: str | None = Field(None)
+    imageOutputOptions: GeminiImageOutputOptions = Field(default_factory=GeminiImageOutputOptions)
+
+
+class GeminiThinkingConfig(BaseModel):
+    includeThoughts: bool | None = Field(None)
+    thinkingLevel: str = Field(...)


 class GeminiImageGenerationConfig(GeminiGenerationConfig):
    responseModalities: list[str] | None = Field(None)
    imageConfig: GeminiImageConfig | None = Field(None)
+    thinkingConfig: GeminiThinkingConfig | None = Field(None)


 class GeminiImageGenerateContentRequest(BaseModel):
--- a/comfy_api_nodes/apis/kling.py
+++ b/comfy_api_nodes/apis/kling.py
@@ -134,6 +134,13 @@ class ImageToVideoWithAudioRequest(BaseModel):
    shot_type: str | None = Field(None)


+class KlingAvatarRequest(BaseModel):
+    image: str = Field(...)
+    sound_file: str = Field(...)
+    prompt: str | None = Field(None)
+    mode: str = Field(...)
+
+
 class MotionControlRequest(BaseModel):
    prompt: str = Field(...)
    image_url: str = Field(...)
--- a/comfy_api_nodes/nodes_bfl.py
+++ b/comfy_api_nodes/nodes_bfl.py
@@ -57,6 +57,7 @@ class FluxProUltraImageNode(IO.ComfyNode):
                    tooltip="Whether to perform upsampling on the prompt. "
                    "If active, automatically modifies the prompt for more creative generation, "
                    "but results are nondeterministic (same seed will not produce exactly the same result).",
+                    advanced=True,
                ),
                IO.Int.Input(
                    "seed",
@@ -200,6 +201,7 @@ class FluxKontextProImageNode(IO.ComfyNode):
                    "prompt_upsampling",
                    default=False,
                    tooltip="Whether to perform upsampling on the prompt. If active, automatically modifies the prompt for more creative generation, but results are nondeterministic (same seed will not produce exactly the same result).",
+                    advanced=True,
                ),
                IO.Image.Input(
                    "input_image",
@@ -296,6 +298,7 @@ class FluxProExpandNode(IO.ComfyNode):
                    tooltip="Whether to perform upsampling on the prompt. "
                    "If active, automatically modifies the prompt for more creative generation, "
                    "but results are nondeterministic (same seed will not produce exactly the same result).",
+                    advanced=True,
                ),
                IO.Int.Input(
                    "top",
@@ -433,6 +436,7 @@ class FluxProFillNode(IO.ComfyNode):
                    tooltip="Whether to perform upsampling on the prompt. "
                    "If active, automatically modifies the prompt for more creative generation, "
                    "but results are nondeterministic (same seed will not produce exactly the same result).",
+                    advanced=True,
                ),
                IO.Float.Input(
                    "guidance",
@@ -577,6 +581,7 @@ class Flux2ProImageNode(IO.ComfyNode):
                    default=True,
                    tooltip="Whether to perform upsampling on the prompt. "
                    "If active, automatically modifies the prompt for more creative generation.",
+                    advanced=True,
                ),
                IO.Image.Input("images", optional=True, tooltip="Up to 9 images to be used as references."),
            ],
--- a/comfy_api_nodes/nodes_bytedance.py
+++ b/comfy_api_nodes/nodes_bytedance.py
@@ -37,6 +37,12 @@ from comfy_api_nodes.util import (

 BYTEPLUS_IMAGE_ENDPOINT = "/proxy/byteplus/api/v3/images/generations"

+SEEDREAM_MODELS = {
+    "seedream 5.0 lite": "seedream-5-0-260128",
+    "seedream-4-5-251128": "seedream-4-5-251128",
+    "seedream-4-0-250828": "seedream-4-0-250828",
+}
+
 # Long-running tasks endpoints(e.g., video)
 BYTEPLUS_TASK_ENDPOINT = "/proxy/byteplus/api/v3/contents/generations/tasks"
 BYTEPLUS_TASK_STATUS_ENDPOINT = "/proxy/byteplus/api/v3/contents/generations/tasks"  # + /{task_id}
@@ -114,6 +120,7 @@ class ByteDanceImageNode(IO.ComfyNode):
                    default=False,
                    tooltip='Whether to add an "AI generated" watermark to the image',
                    optional=True,
+                    advanced=True,
                ),
            ],
            outputs=[
@@ -179,14 +186,13 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
    def define_schema(cls):
        return IO.Schema(
            node_id="ByteDanceSeedreamNode",
-            display_name="ByteDance Seedream 4.5",
+            display_name="ByteDance Seedream 4.5 & 5.0",
            category="api node/image/ByteDance",
            description="Unified text-to-image generation and precise single-sentence editing at up to 4K resolution.",
            inputs=[
                IO.Combo.Input(
                    "model",
-                    options=["seedream-4-5-251128", "seedream-4-0-250828"],
-                    tooltip="Model name",
+                    options=list(SEEDREAM_MODELS.keys()),
                ),
                IO.String.Input(
                    "prompt",
@@ -197,7 +203,7 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
                IO.Image.Input(
                    "image",
                    tooltip="Input image(s) for image-to-image generation. "
-                    "List of 1-10 images for single or multi-reference generation.",
+                    "Reference image(s) for single or multi-reference generation.",
                    optional=True,
                ),
                IO.Combo.Input(
@@ -209,8 +215,8 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
                    "width",
                    default=2048,
                    min=1024,
-                    max=4096,
-                    step=8,
+                    max=6240,
+                    step=2,
                    tooltip="Custom width for image. Value is working only if `size_preset` is set to `Custom`",
                    optional=True,
                ),
@@ -218,8 +224,8 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
                    "height",
                    default=2048,
                    min=1024,
-                    max=4096,
-                    step=8,
+                    max=4992,
+                    step=2,
                    tooltip="Custom height for image. Value is working only if `size_preset` is set to `Custom`",
                    optional=True,
                ),
@@ -259,12 +265,14 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
                    default=False,
                    tooltip='Whether to add an "AI generated" watermark to the image.',
                    optional=True,
+                    advanced=True,
                ),
                IO.Boolean.Input(
                    "fail_on_partial",
                    default=True,
                    tooltip="If enabled, abort execution if any requested images are missing or return an error.",
                    optional=True,
+                    advanced=True,
                ),
            ],
            outputs=[
@@ -280,7 +288,8 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
                depends_on=IO.PriceBadgeDepends(widgets=["model"]),
                expr="""
                (
-                  $price := $contains(widgets.model, "seedream-4-5-251128") ? 0.04 : 0.03;
+                  $price := $contains(widgets.model, "5.0 lite") ? 0.035 :
+                            $contains(widgets.model, "4-5") ? 0.04 : 0.03;
                  {
                    "type":"usd",
                    "usd": $price,
@@ -306,6 +315,7 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
        watermark: bool = False,
        fail_on_partial: bool = True,
    ) -> IO.NodeOutput:
+        model = SEEDREAM_MODELS[model]
        validate_string(prompt, strip_whitespace=True, min_length=1)
        w = h = None
        for label, tw, th in RECOMMENDED_PRESETS_SEEDREAM_4:
@@ -315,15 +325,12 @@ class ByteDanceSeedreamNode(IO.ComfyNode):

        if w is None or h is None:
            w, h = width, height
-            if not (1024 <= w <= 4096) or not (1024 <= h <= 4096):
-                raise ValueError(
-                    f"Custom size out of range: {w}x{h}. " "Both width and height must be between 1024 and 4096 pixels."
-                )
+
        out_num_pixels = w * h
        mp_provided = out_num_pixels / 1_000_000.0
-        if "seedream-4-5" in model and out_num_pixels < 3686400:
+        if ("seedream-4-5" in model or "seedream-5-0" in model) and out_num_pixels < 3686400:
            raise ValueError(
-                f"Minimum image resolution that Seedream 4.5 can generate is 3.68MP, "
+                f"Minimum image resolution for the selected model is 3.68MP, "
                f"but {mp_provided:.2f}MP provided."
            )
        if "seedream-4-0" in model and out_num_pixels < 921600:
@@ -331,9 +338,18 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
                f"Minimum image resolution that the selected model can generate is 0.92MP, "
                f"but {mp_provided:.2f}MP provided."
            )
+        max_pixels = 10_404_496 if "seedream-5-0" in model else 16_777_216
+        if out_num_pixels > max_pixels:
+            raise ValueError(
+                f"Maximum image resolution for the selected model is {max_pixels / 1_000_000:.2f}MP, "
+                f"but {mp_provided:.2f}MP provided."
+            )
        n_input_images = get_number_of_images(image) if image is not None else 0
-        if n_input_images > 10:
-            raise ValueError(f"Maximum of 10 reference images are supported, but {n_input_images} received.")
+        max_num_of_images = 14 if model == "seedream-5-0-260128" else 10
+        if n_input_images > max_num_of_images:
+            raise ValueError(
+                f"Maximum of {max_num_of_images} reference images are supported, but {n_input_images} received."
+            )
        if sequential_image_generation == "auto" and n_input_images + max_images > 15:
            raise ValueError(
                "The maximum number of generated images plus the number of reference images cannot exceed 15."
@@ -361,6 +377,7 @@ class ByteDanceSeedreamNode(IO.ComfyNode):
                sequential_image_generation=sequential_image_generation,
                sequential_image_generation_options=Seedream4Options(max_images=max_images),
                watermark=watermark,
+                output_format="png" if model == "seedream-5-0-260128" else None,
            ),
        )
        if len(response.data) == 1:
@@ -432,18 +449,21 @@ class ByteDanceTextToVideoNode(IO.ComfyNode):
                    tooltip="Specifies whether to fix the camera. The platform appends an instruction "
                    "to fix the camera to your prompt, but does not guarantee the actual effect.",
                    optional=True,
+                    advanced=True,
                ),
                IO.Boolean.Input(
                    "watermark",
                    default=False,
                    tooltip='Whether to add an "AI generated" watermark to the video.',
                    optional=True,
+                    advanced=True,
                ),
                IO.Boolean.Input(
                    "generate_audio",
                    default=False,
                    tooltip="This parameter is ignored for any model except seedance-1-5-pro.",
                    optional=True,
+                    advanced=True,
                ),
            ],
            outputs=[
@@ -561,18 +581,21 @@ class ByteDanceImageToVideoNode(IO.ComfyNode):
                    tooltip="Specifies whether to fix the camera. The platform appends an instruction "
                    "to fix the camera to your prompt, but does not guarantee the actual effect.",
                    optional=True,
+                    advanced=True,
                ),
                IO.Boolean.Input(
                    "watermark",
                    default=False,
                    tooltip='Whether to add an "AI generated" watermark to the video.',
                    optional=True,
+                    advanced=True,
                ),
                IO.Boolean.Input(
                    "generate_audio",
                    default=False,
                    tooltip="This parameter is ignored for any model except seedance-1-5-pro.",
                    optional=True,
+                    advanced=True,
                ),
            ],
            outputs=[
@@ -694,18 +717,21 @@ class ByteDanceFirstLastFrameNode(IO.ComfyNode):
                    tooltip="Specifies whether to fix the camera. The platform appends an instruction "
                    "to fix the camera to your prompt, but does not guarantee the actual effect.",
                    optional=True,
+                    advanced=True,
                ),
                IO.Boolean.Input(
                    "watermark",
                    default=False,
                    tooltip='Whether to add an "AI generated" watermark to the video.',
                    optional=True,
+                    advanced=True,
                ),
                IO.Boolean.Input(
                    "generate_audio",
                    default=False,
                    tooltip="This parameter is ignored for any model except seedance-1-5-pro.",
                    optional=True,
+                    advanced=True,
                ),
            ],
            outputs=[
@@ -834,6 +860,7 @@ class ByteDanceImageReferenceNode(IO.ComfyNode):
                    default=False,
                    tooltip='Whether to add an "AI generated" watermark to the video.',
                    optional=True,
+                    advanced=True,
                ),
            ],
            outputs=[
--- a/comfy_api_nodes/nodes_elevenlabs.py
+++ b/comfy_api_nodes/nodes_elevenlabs.py
@@ -0,0 +1,924 @@
+import json
+import uuid
+
+from typing_extensions import override
+
+from comfy_api.latest import IO, ComfyExtension, Input
+from comfy_api_nodes.apis.elevenlabs import (
+    AddVoiceRequest,
+    AddVoiceResponse,
+    DialogueInput,
+    DialogueSettings,
+    SpeechToSpeechRequest,
+    SpeechToTextRequest,
+    SpeechToTextResponse,
+    TextToDialogueRequest,
+    TextToSoundEffectsRequest,
+    TextToSpeechRequest,
+    TextToSpeechVoiceSettings,
+)
+from comfy_api_nodes.util import (
+    ApiEndpoint,
+    audio_bytes_to_audio_input,
+    audio_ndarray_to_bytesio,
+    audio_tensor_to_contiguous_ndarray,
+    sync_op,
+    sync_op_raw,
+    upload_audio_to_comfyapi,
+    validate_string,
+)
+
+ELEVENLABS_MUSIC_SECTIONS = "ELEVENLABS_MUSIC_SECTIONS"  # Custom type for music sections
+ELEVENLABS_COMPOSITION_PLAN = "ELEVENLABS_COMPOSITION_PLAN"  # Custom type for composition plan
+ELEVENLABS_VOICE = "ELEVENLABS_VOICE"  # Custom type for voice selection
+
+# Predefined ElevenLabs voices: (voice_id, display_name, gender, accent)
+ELEVENLABS_VOICES = [
+    ("CwhRBWXzGAHq8TQ4Fs17", "Roger", "male", "american"),
+    ("EXAVITQu4vr4xnSDxMaL", "Sarah", "female", "american"),
+    ("FGY2WhTYpPnrIDTdsKH5", "Laura", "female", "american"),
+    ("IKne3meq5aSn9XLyUdCD", "Charlie", "male", "australian"),
+    ("JBFqnCBsd6RMkjVDRZzb", "George", "male", "british"),
+    ("N2lVS1w4EtoT3dr4eOWO", "Callum", "male", "american"),
+    ("SAz9YHcvj6GT2YYXdXww", "River", "neutral", "american"),
+    ("SOYHLrjzK2X1ezoPC6cr", "Harry", "male", "american"),
+    ("TX3LPaxmHKxFdv7VOQHJ", "Liam", "male", "american"),
+    ("Xb7hH8MSUJpSbSDYk0k2", "Alice", "female", "british"),
+    ("XrExE9yKIg1WjnnlVkGX", "Matilda", "female", "american"),
+    ("bIHbv24MWmeRgasZH58o", "Will", "male", "american"),
+    ("cgSgspJ2msm6clMCkdW9", "Jessica", "female", "american"),
+    ("cjVigY5qzO86Huf0OWal", "Eric", "male", "american"),
+    ("hpp4J3VqNfWAUOO0d1Us", "Bella", "female", "american"),
+    ("iP95p4xoKVk53GoZ742B", "Chris", "male", "american"),
+    ("nPczCjzI2devNBz1zQrb", "Brian", "male", "american"),
+    ("onwK4e9ZLuTAKqWW03F9", "Daniel", "male", "british"),
+    ("pFZP5JQG7iQjIQuC4Bku", "Lily", "female", "british"),
+    ("pNInz6obpgDQGcFmaJgB", "Adam", "male", "american"),
+    ("pqHfZKP75CvOlQylNhV4", "Bill", "male", "american"),
+]
+
+ELEVENLABS_VOICE_OPTIONS = [f"{name} ({gender}, {accent})" for _, name, gender, accent in ELEVENLABS_VOICES]
+ELEVENLABS_VOICE_MAP = {
+    f"{name} ({gender}, {accent})": voice_id for voice_id, name, gender, accent in ELEVENLABS_VOICES
+}
+
+
+class ElevenLabsSpeechToText(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
+            node_id="ElevenLabsSpeechToText",
+            display_name="ElevenLabs Speech to Text",
+            category="api node/audio/ElevenLabs",
+            description="Transcribe audio to text. "
+            "Supports automatic language detection, speaker diarization, and audio event tagging.",
+            inputs=[
+                IO.Audio.Input(
+                    "audio",
+                    tooltip="Audio to transcribe.",
+                ),
+                IO.DynamicCombo.Input(
+                    "model",
+                    options=[
+                        IO.DynamicCombo.Option(
+                            "scribe_v2",
+                            [
+                                IO.Boolean.Input(
+                                    "tag_audio_events",
+                                    default=False,
+                                    tooltip="Annotate sounds like (laughter), (music), etc. in transcript.",
+                                ),
+                                IO.Boolean.Input(
+                                    "diarize",
+                                    default=False,
+                                    tooltip="Annotate which speaker is talking.",
+                                ),
+                                IO.Float.Input(
+                                    "diarization_threshold",
+                                    default=0.22,
+                                    min=0.1,
+                                    max=0.4,
+                                    step=0.01,
+                                    display_mode=IO.NumberDisplay.slider,
+                                    tooltip="Speaker separation sensitivity. "
+                                    "Lower values are more sensitive to speaker changes.",
+                                ),
+                                IO.Float.Input(
+                                    "temperature",
+                                    default=0.0,
+                                    min=0.0,
+                                    max=2.0,
+                                    step=0.01,
+                                    display_mode=IO.NumberDisplay.slider,
+                                    tooltip="Randomness control. "
+                                    "0.0 uses model default. Higher values increase randomness.",
+                                ),
+                                IO.Combo.Input(
+                                    "timestamps_granularity",
+                                    options=["word", "character", "none"],
+                                    default="word",
+                                    tooltip="Timing precision for transcript words.",
+                                ),
+                            ],
+                        ),
+                    ],
+                    tooltip="Model to use for transcription.",
+                ),
+                IO.String.Input(
+                    "language_code",
+                    default="",
+                    tooltip="ISO-639-1 or ISO-639-3 language code (e.g., 'en', 'es', 'fra'). "
+                    "Leave empty for automatic detection.",
+                ),
+                IO.Int.Input(
+                    "num_speakers",
+                    default=0,
+                    min=0,
+                    max=32,
+                    display_mode=IO.NumberDisplay.slider,
+                    tooltip="Maximum number of speakers to predict. Set to 0 for automatic detection.",
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=1,
+                    min=0,
+                    max=2147483647,
+                    tooltip="Seed for reproducibility (determinism not guaranteed).",
+                ),
+            ],
+            outputs=[
+                IO.String.Output(display_name="text"),
+                IO.String.Output(display_name="language_code"),
+                IO.String.Output(display_name="words_json"),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                expr="""{"type":"usd","usd":0.0073,"format":{"approximate":true,"suffix":"/minute"}}""",
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        audio: Input.Audio,
+        model: dict,
+        language_code: str,
+        num_speakers: int,
+        seed: int,
+    ) -> IO.NodeOutput:
+        if model["diarize"] and num_speakers:
+            raise ValueError(
+                "Number of speakers cannot be specified when diarization is enabled. "
+                "Either disable diarization or set num_speakers to 0."
+            )
+        request = SpeechToTextRequest(
+            model_id=model["model"],
+            cloud_storage_url=await upload_audio_to_comfyapi(
+                cls, audio, container_format="mp4", codec_name="aac", mime_type="audio/mp4"
+            ),
+            language_code=language_code if language_code.strip() else None,
+            tag_audio_events=model["tag_audio_events"],
+            num_speakers=num_speakers if num_speakers > 0 else None,
+            timestamps_granularity=model["timestamps_granularity"],
+            diarize=model["diarize"],
+            diarization_threshold=model["diarization_threshold"] if model["diarize"] else None,
+            seed=seed,
+            temperature=model["temperature"],
+        )
+        response = await sync_op(
+            cls,
+            ApiEndpoint(path="/proxy/elevenlabs/v1/speech-to-text", method="POST"),
+            response_model=SpeechToTextResponse,
+            data=request,
+            content_type="multipart/form-data",
+        )
+        words_json = json.dumps(
+            [w.model_dump(exclude_none=True) for w in response.words] if response.words else [],
+            indent=2,
+        )
+        return IO.NodeOutput(response.text, response.language_code, words_json)
+
+
+class ElevenLabsVoiceSelector(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
+            node_id="ElevenLabsVoiceSelector",
+            display_name="ElevenLabs Voice Selector",
+            category="api node/audio/ElevenLabs",
+            description="Select a predefined ElevenLabs voice for text-to-speech generation.",
+            inputs=[
+                IO.Combo.Input(
+                    "voice",
+                    options=ELEVENLABS_VOICE_OPTIONS,
+                    tooltip="Choose a voice from the predefined ElevenLabs voices.",
+                ),
+            ],
+            outputs=[
+                IO.Custom(ELEVENLABS_VOICE).Output(display_name="voice"),
+            ],
+            is_api_node=False,
+        )
+
+    @classmethod
+    def execute(cls, voice: str) -> IO.NodeOutput:
+        voice_id = ELEVENLABS_VOICE_MAP.get(voice)
+        if not voice_id:
+            raise ValueError(f"Unknown voice: {voice}")
+        return IO.NodeOutput(voice_id)
+
+
+class ElevenLabsTextToSpeech(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
+            node_id="ElevenLabsTextToSpeech",
+            display_name="ElevenLabs Text to Speech",
+            category="api node/audio/ElevenLabs",
+            description="Convert text to speech.",
+            inputs=[
+                IO.Custom(ELEVENLABS_VOICE).Input(
+                    "voice",
+                    tooltip="Voice to use for speech synthesis. Connect from Voice Selector or Instant Voice Clone.",
+                ),
+                IO.String.Input(
+                    "text",
+                    multiline=True,
+                    default="",
+                    tooltip="The text to convert to speech.",
+                ),
+                IO.Float.Input(
+                    "stability",
+                    default=0.5,
+                    min=0.0,
+                    max=1.0,
+                    step=0.01,
+                    display_mode=IO.NumberDisplay.slider,
+                    tooltip="Voice stability. Lower values give broader emotional range, "
+                    "higher values produce more consistent but potentially monotonous speech.",
+                ),
+                IO.Combo.Input(
+                    "apply_text_normalization",
+                    options=["auto", "on", "off"],
+                    tooltip="Text normalization mode. 'auto' lets the system decide, "
+                    "'on' always applies normalization, 'off' skips it.",
+                ),
+                IO.DynamicCombo.Input(
+                    "model",
+                    options=[
+                        IO.DynamicCombo.Option(
+                            "eleven_multilingual_v2",
+                            [
+                                IO.Float.Input(
+                                    "speed",
+                                    default=1.0,
+                                    min=0.7,
+                                    max=1.3,
+                                    step=0.01,
+                                    display_mode=IO.NumberDisplay.slider,
+                                    tooltip="Speech speed. 1.0 is normal, <1.0 slower, >1.0 faster.",
+                                ),
+                                IO.Float.Input(
+                                    "similarity_boost",
+                                    default=0.75,
+                                    min=0.0,
+                                    max=1.0,
+                                    step=0.01,
+                                    display_mode=IO.NumberDisplay.slider,
+                                    tooltip="Similarity boost. Higher values make the voice more similar to the original.",
+                                ),
+                                IO.Boolean.Input(
+                                    "use_speaker_boost",
+                                    default=False,
+                                    tooltip="Boost similarity to the original speaker voice.",
+                                ),
+                                IO.Float.Input(
+                                    "style",
+                                    default=0.0,
+                                    min=0.0,
+                                    max=0.2,
+                                    step=0.01,
+                                    display_mode=IO.NumberDisplay.slider,
+                                    tooltip="Style exaggeration. Higher values increase stylistic expression "
+                                    "but may reduce stability.",
+                                ),
+                            ],
+                        ),
+                        IO.DynamicCombo.Option(
+                            "eleven_v3",
+                            [
+                                IO.Float.Input(
+                                    "speed",
+                                    default=1.0,
+                                    min=0.7,
+                                    max=1.3,
+                                    step=0.01,
+                                    display_mode=IO.NumberDisplay.slider,
+                                    tooltip="Speech speed. 1.0 is normal, <1.0 slower, >1.0 faster.",
+                                ),
+                                IO.Float.Input(
+                                    "similarity_boost",
+                                    default=0.75,
+                                    min=0.0,
+                                    max=1.0,
+                                    step=0.01,
+                                    display_mode=IO.NumberDisplay.slider,
+                                    tooltip="Similarity boost. Higher values make the voice more similar to the original.",
+                                ),
+                            ],
+                        ),
+                    ],
+                    tooltip="Model to use for text-to-speech.",
+                ),
+                IO.String.Input(
+                    "language_code",
+                    default="",
+                    tooltip="ISO-639-1 or ISO-639-3 language code (e.g., 'en', 'es', 'fra'). "
+                    "Leave empty for automatic detection.",
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=1,
+                    min=0,
+                    max=2147483647,
+                    tooltip="Seed for reproducibility (determinism not guaranteed).",
+                ),
+                IO.Combo.Input(
+                    "output_format",
+                    options=["mp3_44100_192", "opus_48000_192"],
+                    tooltip="Audio output format.",
+                ),
+            ],
+            outputs=[
+                IO.Audio.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                expr="""{"type":"usd","usd":0.24,"format":{"approximate":true,"suffix":"/1K chars"}}""",
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        voice: str,
+        text: str,
+        stability: float,
+        apply_text_normalization: str,
+        model: dict,
+        language_code: str,
+        seed: int,
+        output_format: str,
+    ) -> IO.NodeOutput:
+        validate_string(text, min_length=1)
+        request = TextToSpeechRequest(
+            text=text,
+            model_id=model["model"],
+            language_code=language_code if language_code.strip() else None,
+            voice_settings=TextToSpeechVoiceSettings(
+                stability=stability,
+                similarity_boost=model["similarity_boost"],
+                speed=model["speed"],
+                use_speaker_boost=model.get("use_speaker_boost", None),
+                style=model.get("style", None),
+            ),
+            seed=seed,
+            apply_text_normalization=apply_text_normalization,
+        )
+        response = await sync_op_raw(
+            cls,
+            ApiEndpoint(
+                path=f"/proxy/elevenlabs/v1/text-to-speech/{voice}",
+                method="POST",
+                query_params={"output_format": output_format},
+            ),
+            data=request,
+            as_binary=True,
+        )
+        return IO.NodeOutput(audio_bytes_to_audio_input(response))
+
+
+class ElevenLabsAudioIsolation(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
+            node_id="ElevenLabsAudioIsolation",
+            display_name="ElevenLabs Voice Isolation",
+            category="api node/audio/ElevenLabs",
+            description="Remove background noise from audio, isolating vocals or speech.",
+            inputs=[
+                IO.Audio.Input(
+                    "audio",
+                    tooltip="Audio to process for background noise removal.",
+                ),
+            ],
+            outputs=[
+                IO.Audio.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                expr="""{"type":"usd","usd":0.24,"format":{"approximate":true,"suffix":"/minute"}}""",
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        audio: Input.Audio,
+    ) -> IO.NodeOutput:
+        audio_data_np = audio_tensor_to_contiguous_ndarray(audio["waveform"])
+        audio_bytes_io = audio_ndarray_to_bytesio(audio_data_np, audio["sample_rate"], "mp4", "aac")
+        response = await sync_op_raw(
+            cls,
+            ApiEndpoint(path="/proxy/elevenlabs/v1/audio-isolation", method="POST"),
+            files={"audio": ("audio.mp4", audio_bytes_io, "audio/mp4")},
+            content_type="multipart/form-data",
+            as_binary=True,
+        )
+        return IO.NodeOutput(audio_bytes_to_audio_input(response))
+
+
+class ElevenLabsTextToSoundEffects(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
+            node_id="ElevenLabsTextToSoundEffects",
+            display_name="ElevenLabs Text to Sound Effects",
+            category="api node/audio/ElevenLabs",
+            description="Generate sound effects from text descriptions.",
+            inputs=[
+                IO.String.Input(
+                    "text",
+                    multiline=True,
+                    default="",
+                    tooltip="Text description of the sound effect to generate.",
+                ),
+                IO.DynamicCombo.Input(
+                    "model",
+                    options=[
+                        IO.DynamicCombo.Option(
+                            "eleven_sfx_v2",
+                            [
+                                IO.Float.Input(
+                                    "duration",
+                                    default=5.0,
+                                    min=0.5,
+                                    max=30.0,
+                                    step=0.1,
+                                    display_mode=IO.NumberDisplay.slider,
+                                    tooltip="Duration of generated sound in seconds.",
+                                ),
+                                IO.Boolean.Input(
+                                    "loop",
+                                    default=False,
+                                    tooltip="Create a smoothly looping sound effect.",
+                                ),
+                                IO.Float.Input(
+                                    "prompt_influence",
+                                    default=0.3,
+                                    min=0.0,
+                                    max=1.0,
+                                    step=0.01,
+                                    display_mode=IO.NumberDisplay.slider,
+                                    tooltip="How closely generation follows the prompt. "
+                                    "Higher values make the sound follow the text more closely.",
+                                ),
+                            ],
+                        ),
+                    ],
+                    tooltip="Model to use for sound effect generation.",
+                ),
+                IO.Combo.Input(
+                    "output_format",
+                    options=["mp3_44100_192", "opus_48000_192"],
+                    tooltip="Audio output format.",
+                ),
+            ],
+            outputs=[
+                IO.Audio.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                expr="""{"type":"usd","usd":0.14,"format":{"approximate":true,"suffix":"/minute"}}""",
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        text: str,
+        model: dict,
+        output_format: str,
+    ) -> IO.NodeOutput:
+        validate_string(text, min_length=1)
+        response = await sync_op_raw(
+            cls,
+            ApiEndpoint(
+                path="/proxy/elevenlabs/v1/sound-generation",
+                method="POST",
+                query_params={"output_format": output_format},
+            ),
+            data=TextToSoundEffectsRequest(
+                text=text,
+                duration_seconds=model["duration"],
+                prompt_influence=model["prompt_influence"],
+                loop=model.get("loop", None),
+            ),
+            as_binary=True,
+        )
+        return IO.NodeOutput(audio_bytes_to_audio_input(response))
+
+
+class ElevenLabsInstantVoiceClone(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
+            node_id="ElevenLabsInstantVoiceClone",
+            display_name="ElevenLabs Instant Voice Clone",
+            category="api node/audio/ElevenLabs",
+            description="Create a cloned voice from audio samples. "
+            "Provide 1-8 audio recordings of the voice to clone.",
+            inputs=[
+                IO.Autogrow.Input(
+                    "files",
+                    template=IO.Autogrow.TemplatePrefix(
+                        IO.Audio.Input("audio"),
+                        prefix="audio",
+                        min=1,
+                        max=8,
+                    ),
+                    tooltip="Audio recordings for voice cloning.",
+                ),
+                IO.Boolean.Input(
+                    "remove_background_noise",
+                    default=False,
+                    tooltip="Remove background noise from voice samples using audio isolation.",
+                ),
+            ],
+            outputs=[
+                IO.Custom(ELEVENLABS_VOICE).Output(display_name="voice"),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(expr="""{"type":"usd","usd":0.15}"""),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        files: IO.Autogrow.Type,
+        remove_background_noise: bool,
+    ) -> IO.NodeOutput:
+        file_tuples: list[tuple[str, tuple[str, bytes, str]]] = []
+        for key in files:
+            audio = files[key]
+            sample_rate: int = audio["sample_rate"]
+            waveform = audio["waveform"]
+            audio_data_np = audio_tensor_to_contiguous_ndarray(waveform)
+            audio_bytes_io = audio_ndarray_to_bytesio(audio_data_np, sample_rate, "mp4", "aac")
+            file_tuples.append(("files", (f"{key}.mp4", audio_bytes_io.getvalue(), "audio/mp4")))
+
+        response = await sync_op(
+            cls,
+            ApiEndpoint(path="/proxy/elevenlabs/v1/voices/add", method="POST"),
+            response_model=AddVoiceResponse,
+            data=AddVoiceRequest(
+                name=str(uuid.uuid4()),
+                remove_background_noise=remove_background_noise,
+            ),
+            files=file_tuples,
+            content_type="multipart/form-data",
+        )
+        return IO.NodeOutput(response.voice_id)
+
+
+ELEVENLABS_STS_VOICE_SETTINGS = [
+    IO.Float.Input(
+        "speed",
+        default=1.0,
+        min=0.7,
+        max=1.3,
+        step=0.01,
+        display_mode=IO.NumberDisplay.slider,
+        tooltip="Speech speed. 1.0 is normal, <1.0 slower, >1.0 faster.",
+    ),
+    IO.Float.Input(
+        "similarity_boost",
+        default=0.75,
+        min=0.0,
+        max=1.0,
+        step=0.01,
+        display_mode=IO.NumberDisplay.slider,
+        tooltip="Similarity boost. Higher values make the voice more similar to the original.",
+    ),
+    IO.Boolean.Input(
+        "use_speaker_boost",
+        default=False,
+        tooltip="Boost similarity to the original speaker voice.",
+    ),
+    IO.Float.Input(
+        "style",
+        default=0.0,
+        min=0.0,
+        max=0.2,
+        step=0.01,
+        display_mode=IO.NumberDisplay.slider,
+        tooltip="Style exaggeration. Higher values increase stylistic expression but may reduce stability.",
+    ),
+]
+
+
+class ElevenLabsSpeechToSpeech(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
+            node_id="ElevenLabsSpeechToSpeech",
+            display_name="ElevenLabs Speech to Speech",
+            category="api node/audio/ElevenLabs",
+            description="Transform speech from one voice to another while preserving the original content and emotion.",
+            inputs=[
+                IO.Custom(ELEVENLABS_VOICE).Input(
+                    "voice",
+                    tooltip="Target voice for the transformation. "
+                    "Connect from Voice Selector or Instant Voice Clone.",
+                ),
+                IO.Audio.Input(
+                    "audio",
+                    tooltip="Source audio to transform.",
+                ),
+                IO.Float.Input(
+                    "stability",
+                    default=0.5,
+                    min=0.0,
+                    max=1.0,
+                    step=0.01,
+                    display_mode=IO.NumberDisplay.slider,
+                    tooltip="Voice stability. Lower values give broader emotional range, "
+                    "higher values produce more consistent but potentially monotonous speech.",
+                ),
+                IO.DynamicCombo.Input(
+                    "model",
+                    options=[
+                        IO.DynamicCombo.Option(
+                            "eleven_multilingual_sts_v2",
+                            ELEVENLABS_STS_VOICE_SETTINGS,
+                        ),
+                        IO.DynamicCombo.Option(
+                            "eleven_english_sts_v2",
+                            ELEVENLABS_STS_VOICE_SETTINGS,
+                        ),
+                    ],
+                    tooltip="Model to use for speech-to-speech transformation.",
+                ),
+                IO.Combo.Input(
+                    "output_format",
+                    options=["mp3_44100_192", "opus_48000_192"],
+                    tooltip="Audio output format.",
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=4294967295,
+                    tooltip="Seed for reproducibility.",
+                ),
+                IO.Boolean.Input(
+                    "remove_background_noise",
+                    default=False,
+                    tooltip="Remove background noise from input audio using audio isolation.",
+                ),
+            ],
+            outputs=[
+                IO.Audio.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                expr="""{"type":"usd","usd":0.24,"format":{"approximate":true,"suffix":"/minute"}}""",
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        voice: str,
+        audio: Input.Audio,
+        stability: float,
+        model: dict,
+        output_format: str,
+        seed: int,
+        remove_background_noise: bool,
+    ) -> IO.NodeOutput:
+        audio_data_np = audio_tensor_to_contiguous_ndarray(audio["waveform"])
+        audio_bytes_io = audio_ndarray_to_bytesio(audio_data_np, audio["sample_rate"], "mp4", "aac")
+        voice_settings = TextToSpeechVoiceSettings(
+            stability=stability,
+            similarity_boost=model["similarity_boost"],
+            style=model["style"],
+            use_speaker_boost=model["use_speaker_boost"],
+            speed=model["speed"],
+        )
+        response = await sync_op_raw(
+            cls,
+            ApiEndpoint(
+                path=f"/proxy/elevenlabs/v1/speech-to-speech/{voice}",
+                method="POST",
+                query_params={"output_format": output_format},
+            ),
+            data=SpeechToSpeechRequest(
+                model_id=model["model"],
+                voice_settings=voice_settings.model_dump_json(exclude_none=True),
+                seed=seed,
+                remove_background_noise=remove_background_noise,
+            ),
+            files={"audio": ("audio.mp4", audio_bytes_io.getvalue(), "audio/mp4")},
+            content_type="multipart/form-data",
+            as_binary=True,
+        )
+        return IO.NodeOutput(audio_bytes_to_audio_input(response))
+
+
+def _generate_dialogue_inputs(count: int) -> list:
+    """Generate input widgets for a given number of dialogue entries."""
+    inputs = []
+    for i in range(1, count + 1):
+        inputs.extend(
+            [
+                IO.String.Input(
+                    f"text{i}",
+                    multiline=True,
+                    default="",
+                    tooltip=f"Text content for dialogue entry {i}.",
+                ),
+                IO.Custom(ELEVENLABS_VOICE).Input(
+                    f"voice{i}",
+                    tooltip=f"Voice for dialogue entry {i}. Connect from Voice Selector or Instant Voice Clone.",
+                ),
+            ]
+        )
+    return inputs
+
+
+class ElevenLabsTextToDialogue(IO.ComfyNode):
+    @classmethod
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
+            node_id="ElevenLabsTextToDialogue",
+            display_name="ElevenLabs Text to Dialogue",
+            category="api node/audio/ElevenLabs",
+            description="Generate multi-speaker dialogue from text. Each dialogue entry has its own text and voice.",
+            inputs=[
+                IO.Float.Input(
+                    "stability",
+                    default=0.5,
+                    min=0.0,
+                    max=1.0,
+                    step=0.5,
+                    display_mode=IO.NumberDisplay.slider,
+                    tooltip="Voice stability. Lower values give broader emotional range, "
+                    "higher values produce more consistent but potentially monotonous speech.",
+                ),
+                IO.Combo.Input(
+                    "apply_text_normalization",
+                    options=["auto", "on", "off"],
+                    tooltip="Text normalization mode. 'auto' lets the system decide, "
+                    "'on' always applies normalization, 'off' skips it.",
+                ),
+                IO.Combo.Input(
+                    "model",
+                    options=["eleven_v3"],
+                    tooltip="Model to use for dialogue generation.",
+                ),
+                IO.DynamicCombo.Input(
+                    "inputs",
+                    options=[
+                        IO.DynamicCombo.Option("1", _generate_dialogue_inputs(1)),
+                        IO.DynamicCombo.Option("2", _generate_dialogue_inputs(2)),
+                        IO.DynamicCombo.Option("3", _generate_dialogue_inputs(3)),
+                        IO.DynamicCombo.Option("4", _generate_dialogue_inputs(4)),
+                        IO.DynamicCombo.Option("5", _generate_dialogue_inputs(5)),
+                        IO.DynamicCombo.Option("6", _generate_dialogue_inputs(6)),
+                        IO.DynamicCombo.Option("7", _generate_dialogue_inputs(7)),
+                        IO.DynamicCombo.Option("8", _generate_dialogue_inputs(8)),
+                        IO.DynamicCombo.Option("9", _generate_dialogue_inputs(9)),
+                        IO.DynamicCombo.Option("10", _generate_dialogue_inputs(10)),
+                    ],
+                    tooltip="Number of dialogue entries.",
+                ),
+                IO.String.Input(
+                    "language_code",
+                    default="",
+                    tooltip="ISO-639-1 or ISO-639-3 language code (e.g., 'en', 'es', 'fra'). "
+                    "Leave empty for automatic detection.",
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=1,
+                    min=0,
+                    max=4294967295,
+                    tooltip="Seed for reproducibility.",
+                ),
+                IO.Combo.Input(
+                    "output_format",
+                    options=["mp3_44100_192", "opus_48000_192"],
+                    tooltip="Audio output format.",
+                ),
+            ],
+            outputs=[
+                IO.Audio.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                expr="""{"type":"usd","usd":0.24,"format":{"approximate":true,"suffix":"/1K chars"}}""",
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        stability: float,
+        apply_text_normalization: str,
+        model: str,
+        inputs: dict,
+        language_code: str,
+        seed: int,
+        output_format: str,
+    ) -> IO.NodeOutput:
+        num_entries = int(inputs["inputs"])
+        dialogue_inputs: list[DialogueInput] = []
+        for i in range(1, num_entries + 1):
+            text = inputs[f"text{i}"]
+            voice_id = inputs[f"voice{i}"]
+            validate_string(text, min_length=1)
+            dialogue_inputs.append(DialogueInput(text=text, voice_id=voice_id))
+        request = TextToDialogueRequest(
+            inputs=dialogue_inputs,
+            model_id=model,
+            language_code=language_code if language_code.strip() else None,
+            settings=DialogueSettings(stability=stability),
+            seed=seed,
+            apply_text_normalization=apply_text_normalization,
+        )
+        response = await sync_op_raw(
+            cls,
+            ApiEndpoint(
+                path="/proxy/elevenlabs/v1/text-to-dialogue",
+                method="POST",
+                query_params={"output_format": output_format},
+            ),
+            data=request,
+            as_binary=True,
+        )
+        return IO.NodeOutput(audio_bytes_to_audio_input(response))
+
+
+class ElevenLabsExtension(ComfyExtension):
+    @override
+    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
+        return [
+            ElevenLabsSpeechToText,
+            ElevenLabsVoiceSelector,
+            ElevenLabsTextToSpeech,
+            ElevenLabsAudioIsolation,
+            ElevenLabsTextToSoundEffects,
+            ElevenLabsInstantVoiceClone,
+            ElevenLabsSpeechToSpeech,
+            ElevenLabsTextToDialogue,
+        ]
+
+
+async def comfy_entrypoint() -> ElevenLabsExtension:
+    return ElevenLabsExtension()
--- a/comfy_api_nodes/nodes_gemini.py
+++ b/comfy_api_nodes/nodes_gemini.py
@@ -6,6 +6,7 @@ See: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/infer
 import base64
 import os
 from enum import Enum
+from fnmatch import fnmatch
 from io import BytesIO
 from typing import Literal

@@ -28,6 +29,7 @@ from comfy_api_nodes.apis.gemini import (
    GeminiRole,
    GeminiSystemInstructionContent,
    GeminiTextPart,
+    GeminiThinkingConfig,
    Modality,
 )
 from comfy_api_nodes.util import (
@@ -54,6 +56,21 @@ GEMINI_IMAGE_SYS_PROMPT = (
    "Prioritize generating the visual representation above any text, formatting, or conversational requests."
 )

+GEMINI_IMAGE_2_PRICE_BADGE = IO.PriceBadge(
+    depends_on=IO.PriceBadgeDepends(widgets=["model", "resolution"]),
+    expr="""
+    (
+      $m := widgets.model;
+      $r := widgets.resolution;
+      $isFlash := $contains($m, "nano banana 2");
+      $flashPrices := {"1k": 0.0696, "2k": 0.0696, "4k": 0.123};
+      $proPrices := {"1k": 0.134, "2k": 0.134, "4k": 0.24};
+      $prices := $isFlash ? $flashPrices : $proPrices;
+      {"type":"usd","usd": $lookup($prices, $r), "format":{"suffix":"/Image","approximate":true}}
+    )
+    """,
+)
+

 class GeminiModel(str, Enum):
    """
@@ -119,6 +136,13 @@ async def create_image_parts(
    return image_parts


+def _mime_matches(mime: GeminiMimeType | None, pattern: str) -> bool:
+    """Check if a MIME type matches a pattern. Supports fnmatch globs (e.g. 'image/*')."""
+    if mime is None:
+        return False
+    return fnmatch(mime.value, pattern)
+
+
 def get_parts_by_type(response: GeminiGenerateContentResponse, part_type: Literal["text"] | str) -> list[GeminiPart]:
    """
    Filter response parts by their type.
@@ -151,9 +175,9 @@ def get_parts_by_type(response: GeminiGenerateContentResponse, part_type: Litera
        for part in candidate.content.parts:
            if part_type == "text" and part.text:
                parts.append(part)
-            elif part.inlineData and part.inlineData.mimeType == part_type:
+            elif part.inlineData and _mime_matches(part.inlineData.mimeType, part_type):
                parts.append(part)
-            elif part.fileData and part.fileData.mimeType == part_type:
+            elif part.fileData and _mime_matches(part.fileData.mimeType, part_type):
                parts.append(part)

    if not parts and blocked_reasons:
@@ -178,7 +202,7 @@ def get_text_from_response(response: GeminiGenerateContentResponse) -> str:

 async def get_image_from_response(response: GeminiGenerateContentResponse) -> Input.Image:
    image_tensors: list[Input.Image] = []
-    parts = get_parts_by_type(response, "image/png")
+    parts = get_parts_by_type(response, "image/*")
    for part in parts:
        if part.inlineData:
            image_data = base64.b64decode(part.inlineData.data)
@@ -221,6 +245,10 @@ def calculate_tokens_price(response: GeminiGenerateContentResponse) -> float | N
        input_tokens_price = 2
        output_text_tokens_price = 12.0
        output_image_tokens_price = 120.0
+    elif response.modelVersion == "gemini-3.1-flash-image-preview":
+        input_tokens_price = 0.5
+        output_text_tokens_price = 3.0
+        output_image_tokens_price = 60.0
    else:
        return None
    final_price = response.usageMetadata.promptTokenCount * input_tokens_price
@@ -308,6 +336,7 @@ class GeminiNode(IO.ComfyNode):
                    default="",
                    optional=True,
                    tooltip="Foundational instructions that dictate an AI's behavior.",
+                    advanced=True,
                ),
            ],
            outputs=[
@@ -585,6 +614,7 @@ class GeminiImage(IO.ComfyNode):
                    tooltip="Choose 'IMAGE' for image-only output, or "
                    "'IMAGE+TEXT' to return both the generated image and a text response.",
                    optional=True,
+                    advanced=True,
                ),
                IO.String.Input(
                    "system_prompt",
@@ -592,6 +622,7 @@ class GeminiImage(IO.ComfyNode):
                    default=GEMINI_IMAGE_SYS_PROMPT,
                    optional=True,
                    tooltip="Foundational instructions that dictate an AI's behavior.",
+                    advanced=True,
                ),
            ],
            outputs=[
@@ -626,7 +657,7 @@ class GeminiImage(IO.ComfyNode):

        if not aspect_ratio:
            aspect_ratio = "auto"  # for backward compatability with old workflows; to-do remove this in December
-        image_config = GeminiImageConfig(aspectRatio=aspect_ratio)
+        image_config = GeminiImageConfig() if aspect_ratio == "auto" else GeminiImageConfig(aspectRatio=aspect_ratio)

        if images is not None:
            parts.extend(await create_image_parts(cls, images))
@@ -646,7 +677,7 @@ class GeminiImage(IO.ComfyNode):
                ],
                generationConfig=GeminiImageGenerationConfig(
                    responseModalities=(["IMAGE"] if response_modalities == "IMAGE" else ["TEXT", "IMAGE"]),
-                    imageConfig=None if aspect_ratio == "auto" else image_config,
+                    imageConfig=image_config,
                ),
                systemInstruction=gemini_system_prompt,
            ),
@@ -675,7 +706,7 @@ class GeminiImage2(IO.ComfyNode):
                ),
                IO.Combo.Input(
                    "model",
-                    options=["gemini-3-pro-image-preview"],
+                    options=["gemini-3-pro-image-preview", "Nano Banana 2 (Gemini 3.1 Flash Image)"],
                ),
                IO.Int.Input(
                    "seed",
@@ -706,6 +737,7 @@ class GeminiImage2(IO.ComfyNode):
                    options=["IMAGE+TEXT", "IMAGE"],
                    tooltip="Choose 'IMAGE' for image-only output, or "
                    "'IMAGE+TEXT' to return both the generated image and a text response.",
+                    advanced=True,
                ),
                IO.Image.Input(
                    "images",
@@ -725,6 +757,7 @@ class GeminiImage2(IO.ComfyNode):
                    default=GEMINI_IMAGE_SYS_PROMPT,
                    optional=True,
                    tooltip="Foundational instructions that dictate an AI's behavior.",
+                    advanced=True,
                ),
            ],
            outputs=[
@@ -737,19 +770,7 @@ class GeminiImage2(IO.ComfyNode):
                IO.Hidden.unique_id,
            ],
            is_api_node=True,
-            price_badge=IO.PriceBadge(
-                depends_on=IO.PriceBadgeDepends(widgets=["resolution"]),
-                expr="""
-                (
-                  $r := widgets.resolution;
-                  ($contains($r,"1k") or $contains($r,"2k"))
-                    ? {"type":"usd","usd":0.134,"format":{"suffix":"/Image","approximate":true}}
-                    : $contains($r,"4k")
-                      ? {"type":"usd","usd":0.24,"format":{"suffix":"/Image","approximate":true}}
-                      : {"type":"text","text":"Token-based"}
-                )
-                """,
-            ),
+            price_badge=GEMINI_IMAGE_2_PRICE_BADGE,
        )

    @classmethod
@@ -766,6 +787,10 @@ class GeminiImage2(IO.ComfyNode):
        system_prompt: str = "",
    ) -> IO.NodeOutput:
        validate_string(prompt, strip_whitespace=True, min_length=1)
+        if model == "Nano Banana 2 (Gemini 3.1 Flash Image)":
+            model = "gemini-3.1-flash-image-preview"
+            if response_modalities == "IMAGE+TEXT":
+                raise ValueError("IMAGE+TEXT is not currently available for the Nano Banana 2 model.")

        parts: list[GeminiPart] = [GeminiPart(text=prompt)]
        if images is not None:
@@ -802,6 +827,168 @@ class GeminiImage2(IO.ComfyNode):
        return IO.NodeOutput(await get_image_from_response(response), get_text_from_response(response))


+class GeminiNanoBanana2(IO.ComfyNode):
+
+    @classmethod
+    def define_schema(cls):
+        return IO.Schema(
+            node_id="GeminiNanoBanana2",
+            display_name="Nano Banana 2",
+            category="api node/image/Gemini",
+            description="Generate or edit images synchronously via Google Vertex API.",
+            inputs=[
+                IO.String.Input(
+                    "prompt",
+                    multiline=True,
+                    tooltip="Text prompt describing the image to generate or the edits to apply. "
+                    "Include any constraints, styles, or details the model should follow.",
+                    default="",
+                ),
+                IO.Combo.Input(
+                    "model",
+                    options=["Nano Banana 2 (Gemini 3.1 Flash Image)"],
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=42,
+                    min=0,
+                    max=0xFFFFFFFFFFFFFFFF,
+                    control_after_generate=True,
+                    tooltip="When the seed is fixed to a specific value, the model makes a best effort to provide "
+                    "the same response for repeated requests. Deterministic output isn't guaranteed. "
+                    "Also, changing the model or parameter settings, such as the temperature, "
+                    "can cause variations in the response even when you use the same seed value. "
+                    "By default, a random seed value is used.",
+                ),
+                IO.Combo.Input(
+                    "aspect_ratio",
+                    options=[
+                        "auto",
+                        "1:1",
+                        "2:3",
+                        "3:2",
+                        "3:4",
+                        "4:3",
+                        "4:5",
+                        "5:4",
+                        "9:16",
+                        "16:9",
+                        "21:9",
+                        # "1:4",
+                        # "4:1",
+                        # "8:1",
+                        # "1:8",
+                    ],
+                    default="auto",
+                    tooltip="If set to 'auto', matches your input image's aspect ratio; "
+                    "if no image is provided, a 16:9 square is usually generated.",
+                ),
+                IO.Combo.Input(
+                    "resolution",
+                    options=[
+                        # "512px",
+                        "1K",
+                        "2K",
+                        "4K",
+                    ],
+                    tooltip="Target output resolution. For 2K/4K the native Gemini upscaler is used.",
+                ),
+                IO.Combo.Input(
+                    "response_modalities",
+                    options=["IMAGE"],
+                    advanced=True,
+                ),
+                IO.Combo.Input(
+                    "thinking_level",
+                    options=["MINIMAL", "HIGH"],
+                ),
+                IO.Image.Input(
+                    "images",
+                    optional=True,
+                    tooltip="Optional reference image(s). "
+                    "To include multiple images, use the Batch Images node (up to 14).",
+                ),
+                IO.Custom("GEMINI_INPUT_FILES").Input(
+                    "files",
+                    optional=True,
+                    tooltip="Optional file(s) to use as context for the model. "
+                    "Accepts inputs from the Gemini Generate Content Input Files node.",
+                ),
+                IO.String.Input(
+                    "system_prompt",
+                    multiline=True,
+                    default=GEMINI_IMAGE_SYS_PROMPT,
+                    optional=True,
+                    tooltip="Foundational instructions that dictate an AI's behavior.",
+                    advanced=True,
+                ),
+            ],
+            outputs=[
+                IO.Image.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=GEMINI_IMAGE_2_PRICE_BADGE,
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        prompt: str,
+        model: str,
+        seed: int,
+        aspect_ratio: str,
+        resolution: str,
+        response_modalities: str,
+        thinking_level: str,
+        images: Input.Image | None = None,
+        files: list[GeminiPart] | None = None,
+        system_prompt: str = "",
+    ) -> IO.NodeOutput:
+        validate_string(prompt, strip_whitespace=True, min_length=1)
+        if model == "Nano Banana 2 (Gemini 3.1 Flash Image)":
+            model = "gemini-3.1-flash-image-preview"
+
+        parts: list[GeminiPart] = [GeminiPart(text=prompt)]
+        if images is not None:
+            if get_number_of_images(images) > 14:
+                raise ValueError("The current maximum number of supported images is 14.")
+            parts.extend(await create_image_parts(cls, images))
+        if files is not None:
+            parts.extend(files)
+
+        image_config = GeminiImageConfig(imageSize=resolution)
+        if aspect_ratio != "auto":
+            image_config.aspectRatio = aspect_ratio
+
+        gemini_system_prompt = None
+        if system_prompt:
+            gemini_system_prompt = GeminiSystemInstructionContent(parts=[GeminiTextPart(text=system_prompt)], role=None)
+
+        response = await sync_op(
+            cls,
+            ApiEndpoint(path=f"/proxy/vertexai/gemini/{model}", method="POST"),
+            data=GeminiImageGenerateContentRequest(
+                contents=[
+                    GeminiContent(role=GeminiRole.user, parts=parts),
+                ],
+                generationConfig=GeminiImageGenerationConfig(
+                    responseModalities=(["IMAGE"] if response_modalities == "IMAGE" else ["TEXT", "IMAGE"]),
+                    imageConfig=image_config,
+                    thinkingConfig=GeminiThinkingConfig(thinkingLevel=thinking_level),
+                ),
+                systemInstruction=gemini_system_prompt,
+            ),
+            response_model=GeminiGenerateContentResponse,
+            price_extractor=calculate_tokens_price,
+        )
+        return IO.NodeOutput(await get_image_from_response(response), get_text_from_response(response))
+
+
 class GeminiExtension(ComfyExtension):
    @override
    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
@@ -809,6 +996,7 @@ class GeminiExtension(ComfyExtension):
            GeminiNode,
            GeminiImage,
            GeminiImage2,
+            GeminiNanoBanana2,
            GeminiInputFiles,
        ]

--- a/comfy_api_nodes/nodes_hunyuan3d.py
+++ b/comfy_api_nodes/nodes_hunyuan3d.py
@@ -54,6 +54,7 @@ class TencentTextToModelNode(IO.ComfyNode):
            node_id="TencentTextToModelNode",
            display_name="Hunyuan3D: Text to Model",
            category="api node/3d/Tencent",
+            essentials_category="3D",
            inputs=[
                IO.Combo.Input(
                    "model",
@@ -168,6 +169,7 @@ class TencentImageToModelNode(IO.ComfyNode):
            node_id="TencentImageToModelNode",
            display_name="Hunyuan3D: Image(s) to Model",
            category="api node/3d/Tencent",
+            essentials_category="3D",
            inputs=[
                IO.Combo.Input(
                    "model",
--- a/comfy_api_nodes/nodes_ideogram.py
+++ b/comfy_api_nodes/nodes_ideogram.py
@@ -261,6 +261,7 @@ class IdeogramV1(IO.ComfyNode):
                    default="AUTO",
                    tooltip="Determine if MagicPrompt should be used in generation",
                    optional=True,
+                    advanced=True,
                ),
                IO.Int.Input(
                    "seed",
@@ -394,6 +395,7 @@ class IdeogramV2(IO.ComfyNode):
                    default="AUTO",
                    tooltip="Determine if MagicPrompt should be used in generation",
                    optional=True,
+                    advanced=True,
                ),
                IO.Int.Input(
                    "seed",
@@ -411,6 +413,7 @@ class IdeogramV2(IO.ComfyNode):
                    default="NONE",
                    tooltip="Style type for generation (V2 only)",
                    optional=True,
+                    advanced=True,
                ),
                IO.String.Input(
                    "negative_prompt",
@@ -564,6 +567,7 @@ class IdeogramV3(IO.ComfyNode):
                    default="AUTO",
                    tooltip="Determine if MagicPrompt should be used in generation",
                    optional=True,
+                    advanced=True,
                ),
                IO.Int.Input(
                    "seed",
@@ -590,6 +594,7 @@ class IdeogramV3(IO.ComfyNode):
                    default="DEFAULT",
                    tooltip="Controls the trade-off between generation speed and quality",
                    optional=True,
+                    advanced=True,
                ),
                IO.Image.Input(
                    "character_image",
--- a/comfy_api_nodes/nodes_kling.py
+++ b/comfy_api_nodes/nodes_kling.py
@@ -50,6 +50,7 @@ from comfy_api_nodes.apis import (
 )
 from comfy_api_nodes.apis.kling import (
    ImageToVideoWithAudioRequest,
+    KlingAvatarRequest,
    MotionControlRequest,
    MultiPromptEntry,
    OmniImageParamImage,
@@ -74,6 +75,7 @@ from comfy_api_nodes.util import (
    upload_image_to_comfyapi,
    upload_images_to_comfyapi,
    upload_video_to_comfyapi,
+    validate_audio_duration,
    validate_image_aspect_ratio,
    validate_image_dimensions,
    validate_string,
@@ -2262,6 +2264,7 @@ class KlingLipSyncAudioToVideoNode(IO.ComfyNode):
            node_id="KlingLipSyncAudioToVideoNode",
            display_name="Kling Lip Sync Video with Audio",
            category="api node/video/Kling",
+            essentials_category="Video Generation",
            description="Kling Lip Sync Audio to Video Node. Syncs mouth movements in a video file to the audio content of an audio file. When using, ensure that the audio contains clearly distinguishable vocals and that the video contains a distinct face. The audio file should not be larger than 5MB. The video file should not be larger than 100MB, should have height/width between 720px and 1920px, and should be between 2s and 10s in length.",
            inputs=[
                IO.Video.Input("video"),
@@ -2333,6 +2336,7 @@ class KlingLipSyncTextToVideoNode(IO.ComfyNode):
                    max=2.0,
                    display_mode=IO.NumberDisplay.slider,
                    tooltip="Speech Rate. Valid range: 0.8~2.0, accurate to one decimal place.",
+                    advanced=True,
                ),
            ],
            outputs=[
@@ -2454,6 +2458,7 @@ class KlingImageGenerationNode(IO.ComfyNode):
                IO.Combo.Input(
                    "image_type",
                    options=[i.value for i in KlingImageGenImageReferenceType],
+                    advanced=True,
                ),
                IO.Float.Input(
                    "image_fidelity",
@@ -2463,6 +2468,7 @@ class KlingImageGenerationNode(IO.ComfyNode):
                    step=0.01,
                    display_mode=IO.NumberDisplay.slider,
                    tooltip="Reference intensity for user-uploaded images",
+                    advanced=True,
                ),
                IO.Float.Input(
                    "human_fidelity",
@@ -2472,6 +2478,7 @@ class KlingImageGenerationNode(IO.ComfyNode):
                    step=0.01,
                    display_mode=IO.NumberDisplay.slider,
                    tooltip="Subject reference similarity",
+                    advanced=True,
                ),
                IO.Combo.Input("model_name", options=["kling-v3", "kling-v2", "kling-v1-5"]),
                IO.Combo.Input(
@@ -2587,7 +2594,7 @@ class TextToVideoWithAudio(IO.ComfyNode):
                IO.Combo.Input("mode", options=["pro"]),
                IO.Combo.Input("aspect_ratio", options=["16:9", "9:16", "1:1"]),
                IO.Combo.Input("duration", options=[5, 10]),
-                IO.Boolean.Input("generate_audio", default=True),
+                IO.Boolean.Input("generate_audio", default=True, advanced=True),
            ],
            outputs=[
                IO.Video.Output(),
@@ -2655,7 +2662,7 @@ class ImageToVideoWithAudio(IO.ComfyNode):
                IO.String.Input("prompt", multiline=True, tooltip="Positive text prompt."),
                IO.Combo.Input("mode", options=["pro"]),
                IO.Combo.Input("duration", options=[5, 10]),
-                IO.Boolean.Input("generate_audio", default=True),
+                IO.Boolean.Input("generate_audio", default=True, advanced=True),
            ],
            outputs=[
                IO.Video.Output(),
@@ -3134,6 +3141,103 @@ class KlingFirstLastFrameNode(IO.ComfyNode):
        return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))


+class KlingAvatarNode(IO.ComfyNode):
+
+    @classmethod
+    def define_schema(cls) -> IO.Schema:
+        return IO.Schema(
+            node_id="KlingAvatarNode",
+            display_name="Kling Avatar 2.0",
+            category="api node/video/Kling",
+            description="Generate broadcast-style digital human videos from a single photo and an audio file.",
+            inputs=[
+                IO.Image.Input(
+                    "image",
+                    tooltip="Avatar reference image. "
+                    "Width and height must be at least 300px. Aspect ratio must be between 1:2.5 and 2.5:1.",
+                ),
+                IO.Audio.Input(
+                    "sound_file",
+                    tooltip="Audio input. Must be between 2 and 300 seconds in duration.",
+                ),
+                IO.Combo.Input("mode", options=["std", "pro"]),
+                IO.String.Input(
+                    "prompt",
+                    multiline=True,
+                    default="",
+                    optional=True,
+                    tooltip="Optional prompt to define avatar actions, emotions, and camera movements.",
+                ),
+                IO.Int.Input(
+                    "seed",
+                    default=0,
+                    min=0,
+                    max=2147483647,
+                    display_mode=IO.NumberDisplay.number,
+                    control_after_generate=True,
+                    tooltip="Seed controls whether the node should re-run; "
+                    "results are non-deterministic regardless of seed.",
+                ),
+            ],
+            outputs=[
+                IO.Video.Output(),
+            ],
+            hidden=[
+                IO.Hidden.auth_token_comfy_org,
+                IO.Hidden.api_key_comfy_org,
+                IO.Hidden.unique_id,
+            ],
+            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                depends_on=IO.PriceBadgeDepends(widgets=["mode"]),
+                expr="""
+                (
+                  $prices := {"std": 0.056, "pro": 0.112};
+                  {"type":"usd","usd": $lookup($prices, widgets.mode), "format":{"suffix":"/second"}}
+                )
+                """,
+            ),
+        )
+
+    @classmethod
+    async def execute(
+        cls,
+        image: Input.Image,
+        sound_file: Input.Audio,
+        mode: str,
+        seed: int,
+        prompt: str = "",
+    ) -> IO.NodeOutput:
+        validate_image_dimensions(image, min_width=300, min_height=300)
+        validate_image_aspect_ratio(image, (1, 2.5), (2.5, 1))
+        validate_audio_duration(sound_file, min_duration=2, max_duration=300)
+        response = await sync_op(
+            cls,
+            ApiEndpoint(path="/proxy/kling/v1/videos/avatar/image2video", method="POST"),
+            response_model=TaskStatusResponse,
+            data=KlingAvatarRequest(
+                image=await upload_image_to_comfyapi(cls, image),
+                sound_file=await upload_audio_to_comfyapi(
+                    cls, sound_file, container_format="mp3", codec_name="libmp3lame", mime_type="audio/mpeg"
+                ),
+                prompt=prompt or None,
+                mode=mode,
+            ),
+        )
+        if response.code:
+            raise RuntimeError(
+                f"Kling request failed. Code: {response.code}, Message: {response.message}, Data: {response.data}"
+            )
+        final_response = await poll_op(
+            cls,
+            ApiEndpoint(path=f"/proxy/kling/v1/videos/avatar/image2video/{response.data.task_id}"),
+            response_model=TaskStatusResponse,
+            status_extractor=lambda r: (r.data.task_status if r.data else None),
+            max_poll_attempts=800,
+        )
+        return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url))
+
+
 class KlingExtension(ComfyExtension):
    @override
    async def get_node_list(self) -> list[type[IO.ComfyNode]]:
@@ -3162,6 +3266,7 @@ class KlingExtension(ComfyExtension):
            MotionControl,
            KlingVideoNode,
            KlingFirstLastFrameNode,
+            KlingAvatarNode,
        ]


--- a/comfy_api_nodes/nodes_ltxv.py
+++ b/comfy_api_nodes/nodes_ltxv.py
@@ -74,6 +74,7 @@ class TextToVideoNode(IO.ComfyNode):
                    default=False,
                    optional=True,
                    tooltip="When true, the generated video will include AI-generated audio matching the scene.",
+                    advanced=True,
                ),
            ],
            outputs=[
@@ -151,6 +152,7 @@ class ImageToVideoNode(IO.ComfyNode):
                    default=False,
                    optional=True,
                    tooltip="When true, the generated video will include AI-generated audio matching the scene.",
+                    advanced=True,
                ),
            ],
            outputs=[
--- a/comfy_api_nodes/nodes_magnific.py
+++ b/comfy_api_nodes/nodes_magnific.py
@@ -110,11 +110,13 @@ class MagnificImageUpscalerCreativeNode(IO.ComfyNode):
                IO.Combo.Input(
                    "engine",
                    options=["automatic", "magnific_illusio", "magnific_sharpy", "magnific_sparkle"],
+                    advanced=True,
                ),
                IO.Boolean.Input(
                    "auto_downscale",
                    default=False,
                    tooltip="Automatically downscale input image if output would exceed maximum pixel limit.",
+                    advanced=True,
                ),
            ],
            outputs=[
@@ -280,6 +282,7 @@ class MagnificImageUpscalerPreciseV2Node(IO.ComfyNode):
                    "auto_downscale",
                    default=False,
                    tooltip="Automatically downscale input image if output would exceed maximum resolution.",
+                    advanced=True,
                ),
            ],
            outputs=[
@@ -440,6 +443,7 @@ class MagnificImageStyleTransferNode(IO.ComfyNode):
                        "softy",
                    ],
                    tooltip="Processing engine selection.",
+                    advanced=True,
                ),
                IO.DynamicCombo.Input(
                    "portrait_mode",
@@ -468,6 +472,7 @@ class MagnificImageStyleTransferNode(IO.ComfyNode):
                    default=True,
                    tooltip="When disabled, expect each generation to introduce a degree of randomness, "
                    "leading to more diverse outcomes.",
+                    advanced=True,
                ),
            ],
            outputs=[
@@ -582,16 +587,19 @@ class MagnificImageRelightNode(IO.ComfyNode):
                    "interpolate_from_original",
                    default=False,
                    tooltip="Restricts generation freedom to match original more closely.",
+                    advanced=True,
                ),
                IO.Boolean.Input(
                    "change_background",
                    default=True,
                    tooltip="Modifies background based on prompt/reference.",
+                    advanced=True,
                ),
                IO.Boolean.Input(
                    "preserve_details",
                    default=True,
                    tooltip="Maintains texture and fine details from original.",
+                    advanced=True,
                ),
                IO.DynamicCombo.Input(
                    "advanced_settings",
--- a/comfy_api_nodes/nodes_meshy.py
+++ b/comfy_api_nodes/nodes_meshy.py
@@ -58,11 +58,12 @@ class MeshyTextToModelNode(IO.ComfyNode):
                    ],
                    tooltip="When set to false, returns an unprocessed triangular mesh.",
                ),
-                IO.Combo.Input("symmetry_mode", options=["auto", "on", "off"]),
+                IO.Combo.Input("symmetry_mode", options=["auto", "on", "off"], advanced=True),
                IO.Combo.Input(
                    "pose_mode",
                    options=["", "A-pose", "T-pose"],
                    tooltip="Specify the pose mode for the generated model.",
+                    advanced=True,
                ),
                IO.Int.Input(
                    "seed",
@@ -155,6 +156,7 @@ class MeshyRefineNode(IO.ComfyNode):
                    tooltip="Generate PBR Maps (metallic, roughness, normal) in addition to the base color. "
                    "Note: this should be set to false when using Sculpture style, "
                    "as Sculpture style generates its own set of PBR maps.",
+                    advanced=True,
                ),
                IO.String.Input(
                    "texture_prompt",
@@ -299,6 +301,7 @@ class MeshyImageToModelNode(IO.ComfyNode):
                    "pose_mode",
                    options=["", "A-pose", "T-pose"],
                    tooltip="Specify the pose mode for the generated model.",
+                    advanced=True,
                ),
                IO.Int.Input(
                    "seed",
@@ -429,7 +432,7 @@ class MeshyMultiImageToModelNode(IO.ComfyNode):
                    ],
                    tooltip="When set to false, returns an unprocessed triangular mesh.",
                ),
-                IO.Combo.Input("symmetry_mode", options=["auto", "on", "off"]),
+                IO.Combo.Input("symmetry_mode", options=["auto", "on", "off"], advanced=True),
                IO.DynamicCombo.Input(
                    "should_texture",
                    options=[
@@ -466,6 +469,7 @@ class MeshyMultiImageToModelNode(IO.ComfyNode):
                    "pose_mode",
                    options=["", "A-pose", "T-pose"],
                    tooltip="Specify the pose mode for the generated model.",
+                    advanced=True,
                ),
                IO.Int.Input(
                    "seed",
@@ -728,8 +732,9 @@ class MeshyTextureNode(IO.ComfyNode):
                    tooltip="Use the original UV of the model instead of generating new UVs. "
                    "When enabled, Meshy preserves existing textures from the uploaded model. "
                    "If the model has no original UV, the quality of the output might not be as good.",
+                    advanced=True,
                ),
-                IO.Boolean.Input("pbr", default=False),
+                IO.Boolean.Input("pbr", default=False, advanced=True),
                IO.String.Input(
                    "text_style_prompt",
                    default="",
--- a/comfy_api_nodes/nodes_openai.py
+++ b/comfy_api_nodes/nodes_openai.py
@@ -575,6 +575,7 @@ class OpenAIChatNode(IO.ComfyNode):
            node_id="OpenAIChatNode",
            display_name="OpenAI ChatGPT",
            category="api node/text/OpenAI",
+            essentials_category="Text Generation",
            description="Generate text responses from an OpenAI model.",
            inputs=[
                IO.String.Input(
@@ -587,6 +588,7 @@ class OpenAIChatNode(IO.ComfyNode):
                    "persist_context",
                    default=False,
                    tooltip="This parameter is deprecated and has no effect.",
+                    advanced=True,
                ),
                IO.Combo.Input(
                    "model",
@@ -856,6 +858,7 @@ class OpenAIChatConfig(IO.ComfyNode):
                    options=["auto", "disabled"],
                    default="auto",
                    tooltip="The truncation strategy to use for the model response. auto: If the context of this response and previous ones exceeds the model's context window size, the model will truncate the response to fit the context window by dropping input items in the middle of the conversation.disabled: If a model response will exceed the context window size for a model, the request will fail with a 400 error",
+                    advanced=True,
                ),
                IO.Int.Input(
                    "max_output_tokens",
@@ -864,6 +867,7 @@ class OpenAIChatConfig(IO.ComfyNode):
                    max=16384,
                    tooltip="An upper bound for the number of tokens that can be generated for a response, including visible output tokens",
                    optional=True,
+                    advanced=True,
                ),
                IO.String.Input(
                    "instructions",
--- a/comfy_api_nodes/nodes_recraft.py
+++ b/comfy_api_nodes/nodes_recraft.py
@@ -963,6 +963,7 @@ class RecraftRemoveBackgroundNode(IO.ComfyNode):
            node_id="RecraftRemoveBackgroundNode",
            display_name="Recraft Remove Background",
            category="api node/image/Recraft",
+            essentials_category="Image Tools",
            description="Remove background from image, and return processed image and mask.",
            inputs=[
                IO.Image.Input("image"),
--- a/comfy_api_nodes/nodes_rodin.py
+++ b/comfy_api_nodes/nodes_rodin.py
@@ -493,7 +493,7 @@ class Rodin3D_Gen2(IO.ComfyNode):
                    default="500K-Triangle",
                    optional=True,
                ),
-                IO.Boolean.Input("TAPose", default=False),
+                IO.Boolean.Input("TAPose", default=False, advanced=True),
            ],
            outputs=[
                IO.String.Output(display_name="3D Model Path"),  # for backward compatibility only
@@ -505,6 +505,9 @@ class Rodin3D_Gen2(IO.ComfyNode):
                IO.Hidden.unique_id,
            ],
            is_api_node=True,
+            price_badge=IO.PriceBadge(
+                expr="""{"type":"usd","usd":0.4}""",
+            ),
        )

    @classmethod
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
Jedrzej Kosinski	896eea51b1	Merge branch 'master' into fix/aspect-ratio-enum-keys	2026-02-27 20:35:12 -08:00
comfyanonymous	9d0e114ee3	PyOpenGL-accelerate is not necessary. (#12692 )	2026-02-27 23:34:58 -05:00
Talmaj	ac4412d0fa	Native LongCat-Image implementation (#12597 )	2026-02-27 23:04:34 -05:00
comfyanonymous	94f1a1cc9d	Limit overlap in image tile and combine nodes to prevent issues. (#12688 )	2026-02-27 20:16:24 -05:00
bymyself	77badcb169	refactor: use AspectRatio enum members as ASPECT_RATIOS dict keys Amp-Thread-ID: https://ampcode.com/threads/T-019ca1cb-0150-7549-8b1b-6713060d3408	2026-02-27 17:13:54 -08:00
rattus	e721e24136	ops: implement lora requanting for non QuantizedTensor fp8 (#12668 ) Allow non QuantizedTensor layer to set want_requant to get the post lora calculation stochastic cast down to the original input dtype. This is then used by the legacy fp8 Linear implementation to set the compute_dtype to the preferred lora dtype but then want_requant it back down to fp8. This fixes the issue with --fast fp8_matrix_mult is combined with --fast dynamic_vram which doing a lora on an fp8_ non QT model.	2026-02-27 19:05:51 -05:00
Reiner "Tiles" Prokein	25ec3d96a3	Class WanVAE, def encode, feat_map is using self.decoder instead of self.encoder (#12682 )	2026-02-27 19:03:45 -05:00
Christian Byrne	1f1ec377ce	feat: add ResolutionSelector node for aspect ratio and megapixel-based resolution calculation (#12199 ) Amp-Thread-ID: https://ampcode.com/threads/T-019c179e-cd8c-768f-ae66-207c7a53c01d Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com>	2026-02-27 09:13:57 -08:00
pythongosssss	0a7f8e11b6	fix torch.cat requiring inputs to all be same dimensions (#12673 )	2026-02-27 08:13:24 -08:00
vickytsang	35e9fce775	Enable Pytorch Attention for gfx950 (#12641 )	2026-02-26 20:16:12 -05:00
Jukka Seppänen	c7f7d52b68	feat: Support SDPose-OOD (#12661 )	2026-02-26 19:59:05 -05:00
rattus	08b26ed7c2	bug_report template: Push harder for logs (#12657 ) We get a lot od bug reports without logs, especially for performance issues.	2026-02-26 18:59:24 -05:00
fappaz	b233dbe0bc	feat(ace-step): add ACE-Step 1.5 lycoris key alias mapping for LoKR #12638 (#12665 )	2026-02-26 18:19:19 -05:00
comfyanonymous	3811780e4f	Portable with cu128 isn't useful anymore. (#12666 ) Users should either use the cu126 one or the regular one (cu130 at the moment) The cu128 portable is still included in the latest github release but I will stop including it as soon as it becomes slightly annoying to deal with. This might happen as soon as next week.	2026-02-26 17:12:29 -05:00
comfyanonymous	3dd10a59c0	ComfyUI v0.15.1	2026-02-26 15:59:22 -05:00
ComfyUI Wiki	88d05fe483	chore: update workflow templates to v0.9.4 (#12664 )	2026-02-26 15:52:45 -05:00
Alexander Piskun	fd41ec97cc	feat(api-nodes): add NanoBanana2 (#12660 )	2026-02-26 15:52:10 -05:00
rattus	420e900f69	main: load aimdo earlier (#12655 ) Some custom node packs are naughty, and violate the dont-load-torch-on-load rule. This causes aimdo to lose preference on its allocator hook on linux. Go super early on the aimdo first-stage init before custom nodes are mentioned at all.	2026-02-26 15:19:38 -05:00
pythongosssss	38ca94599f	pyopengl-accelerate can cause object to be numpy ints instead of bare ints, which the glDeleteTextures function does not accept, explicitly cast to int (#12650 )	2026-02-26 03:07:35 -08:00
Christian Byrne	74b5a337dc	fix: move essentials_category to correct replacement nodes (#12568 ) Move essentials_category from deprecated/incorrect nodes to their replacements: - ImageBatch → BatchImagesNode (ImageBatch is deprecated) - Blur → removed (should use subgraph blueprint) - GetVideoComponents → Video Slice Amp-Thread-ID: https://ampcode.com/threads/T-019c8340-4da2-723b-a09f-83895c5bbda5	2026-02-26 01:00:32 -08:00
comfyanonymous	8a4d85c708	Cleanups to the last PR. (#12646 )	2026-02-26 01:30:31 -05:00
Tavi Halperin	a4522017c5	feat: per-guide attention strength control in self-attention (#12518 ) Implements per-guide attention attenuation via log-space additive bias in self-attention. Each guide reference tracks its own strength and optional spatial mask in conditioning metadata (guide_attention_entries).	2026-02-26 01:25:23 -05:00
Jukka Seppänen	907e5dcbbf	initial FlowRVS support (#12637 )	2026-02-25 23:38:46 -05:00
comfyanonymous	7253531670	Fix ltxav te mem estimation. (#12643 )	2026-02-25 23:13:47 -05:00
comfyanonymous	e14b04478c	Fix LTXAV text enc min length. (#12640 ) Should have been 1024 instead of 512	2026-02-25 22:36:02 -05:00
Christian Byrne	eb8737d675	Update requirements.txt (#12642 )	2026-02-25 18:30:48 -08:00
rattus	0467f690a8	comfy aimdo 0.2.2 (#12635 ) Comfy Aimdo 0.2.2 moves the cuda allocator hook from the cudart API to the cuda driver API on windows. This is needed to handle Windows+cu13 where cudart is statically linked.	2026-02-25 16:50:05 -05:00
rattus	4f5b7dbf1f	Fix Aimdo fallback on probe to not use zero-copy sft (#12634 ) * utils: dont use comfy sft loader in aimdo fallback This was going to the raw command line switch and should respect main.py probe of whether aimdo actually loaded successfully. * ops: dont use deferred linear load in Aimdo fallback Avoid changes of behaviour on --fast dynamic_vram when aimdo doesnt work.	2026-02-25 16:49:48 -05:00
rattus	3ebe1ac22e	Disable dynamic_vram when using torch compiler (#12612 ) * mp: attach re-construction arguments to model patcher When making a model-patcher from a unet or ckpt, attach a callable function that can be called to replay the model construction. This can be used to deep clone model patcher WRT the actual model. Originally written by Kosinkadink `f4b99bc623` * mp: Add disable_dynamic clone argument Add a clone argument that lets a caller clone a ModelPatcher but disable dynamic to demote the clone to regular MP. This is useful for legacy features where dynamic_vram support is missing or TBD. * torch_compile: disable dynamic_vram This is a bigger feature. Disable for the interim to preserve functionality.	2026-02-24 19:13:46 -05:00
rattus	befa83d434	comfy aimdo 0.2.1 (#12620 ) Changes: throttle VRAM threshold checks to restore performance in high-layer-rate conditions.	2026-02-24 16:02:26 -05:00
Jedrzej Kosinski	33f83d53ae	Fix KeyError when prompt entries lack class_type key (#12595 ) Skip entries in the prompt dict that don't contain a class_type key in apply_replacements(), preventing crashes on metadata or non-node entries. Fixes Comfy-Org/ComfyUI#12517	2026-02-24 16:02:05 -05:00
comfyanonymous	b874bd2b8c	ComfyUI v0.15.0	2026-02-24 12:42:15 -05:00
ComfyUI Wiki	0aa02453bb	chore: update embedded docs to v0.4.3 (#12601 )	2026-02-24 12:41:36 -05:00
comfyanonymous	599f9c5010	Don't crash right away if op is uninitialized. (#12615 )	2026-02-24 12:28:25 -05:00
ComfyUI Wiki	11fefa58e9	chore: update workflow templates to v0.9.3 (#12610 )	2026-02-24 09:04:51 -08:00
Alexander Piskun	d8090013b8	feat(api-nodes): add ByteDance Seedream-5 model (#12609 ) * feat(api-nodes): add ByteDance Seedream-5 model * made error message more correct * rename seedream 5.0 model	2026-02-24 09:03:30 -08:00
Christian Byrne	048dd2f321	Patch frontend to 1.39.16 (from 1.39.14) (#12604 ) * Update requirements.txt * Update requirements.txt --------- Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com>	2026-02-24 00:44:40 -08:00
comfyanonymous	84aba95e03	Temporality unbreak some LTXAV workflows to give people time to migrate. (#12605 )	2026-02-24 00:50:03 -05:00
comfyanonymous	9b1c63eb69	Add SplitImageToTileList and ImageMergeTileList nodes. (#12599 ) With these you can split an image into tiles, do operations and then combine it back to a single image.	2026-02-23 21:01:17 -05:00
ComfyUI Wiki	7a7debcaf1	chore: update workflow templates to v0.9.2 (#12596 )	2026-02-23 18:27:20 -05:00
Alexander Piskun	dba2766e53	feat(api-nodes): add KlingAvatar node (#12591 )	2026-02-23 11:27:16 -08:00
comfyanonymous	caa43d2395	Fix issue loading fp8 ltxav checkpoints. (#12582 )	2026-02-22 16:00:02 -05:00
comfyanonymous	07ca6852e8	Fix dtype issue in embeddings connector. (#12570 )	2026-02-22 03:18:20 -05:00
comfyanonymous	f266b8d352	Move LTXAV av embedding connectors to diffusion model. (#12569 )	2026-02-21 22:29:58 -05:00
Christian Byrne	b6cb30bab5	chore: tune CodeRabbit config to limit review scope and disable for drafts (#12567 ) * chore: tune CodeRabbit config to limit review scope and disable for drafts - Add tone_instructions to focus only on newly introduced issues - Add global path_instructions entry to ignore pre-existing issues in moved/reformatted code - Disable draft PR reviews (drafts: false) and add WIP title keywords - Disable ruff tool to prevent linter-based outside-diff-range comments Addresses feedback from maintainers about CodeRabbit flagging pre-existing issues in code that was merely moved or de-indented (e.g., PR #12557), which can discourage community contributions and cause scope creep. Amp-Thread-ID: https://ampcode.com/threads/T-019c82de-0481-7253-ad42-20cb595bb1ba * chore: add 'DO NOT MERGE' to ignore_title_keywords Amp-Thread-ID: https://ampcode.com/threads/T-019c82de-0481-7253-ad42-20cb595bb1ba	2026-02-21 18:32:15 -08:00
Christian Byrne	ee72752162	Add category to Normalized Attention Guidance node (#12565 )	2026-02-21 19:51:21 -05:00
Alexander Brown	7591d781a7	fix: specify UTF-8 encoding when reading subgraph files (#12563 ) On Windows, Python defaults to cp1252 encoding when no encoding is specified. JSON files containing UTF-8 characters (e.g., non-ASCII characters) cause UnicodeDecodeError when read with cp1252. This fixes the error that occurs when loading blueprint subgraphs on Windows systems. https://claude.ai/code/session_014WHi3SL9Gzsi3U6kbSjbSb Co-authored-by: Claude <noreply@anthropic.com>	2026-02-21 15:05:00 -08:00
rattus	0bfb936ab4	comfy-aimdo 0.2 - Improved pytorch allocator integration (#12557 ) Integrate comfy-aimdo 0.2 which takes a different approach to installing the memory allocator hook. Instead of using the complicated and buggy pytorch MemPool+CudaPluggableAlloctor, cuda is directly hooked making the process much more transparent to both comfy and pytorch. As far as pytorch knows, aimdo doesnt exist anymore, and just operates behind the scenes. Remove all the mempool setup stuff for dynamic_vram and bump the comfy-aimdo version. Remove the allocator object from memory_management and demote its use as an enablment check to a boolean flag. Comfy-aimdo 0.2 also support the pytorch cuda async allocator, so remove the dynamic_vram based force disablement of cuda_malloc and just go back to the old settings of allocators based on command line input.	2026-02-21 10:52:57 -08:00
pythongosssss	602b2505a4	add support for pyopengl < 3.1.4 where the size parameter does not exist (#12555 )	2026-02-21 06:14:57 -08:00
Christian Byrne	04a55d5019	fix: swap essentials_category from CLIPTextEncode to PrimitiveStringMultiline (#12553 ) Remove CLIPTextEncode from Basics essentials category and add PrimitiveStringMultiline (String Multiline) in its place. Amp-Thread-ID: https://ampcode.com/threads/T-019c7efb-d916-7244-8c43-77b615ba0622 Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com>	2026-02-20 23:46:46 -08:00
Christian Byrne	5fb8f06495	feat: add essential subgraph blueprints (#12552 ) Add 24 non-cloud essential blueprints from comfyui-wiki/Subgraph-Blueprints. These cover common workflows: text/image/video generation, editing, inpainting, outpainting, upscaling, depth maps, pose, captioning, and more. Cloud-only blueprints (5) are excluded and will be added once client-side distribution filtering lands. Amp-Thread-ID: https://ampcode.com/threads/T-019c6f43-6212-7308-bea6-bfc35a486cbf	2026-02-20 23:40:13 -08:00
Terry Jia	5a182bfaf1	update glsl blueprint with gradient (#12548 ) Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com>	2026-02-20 23:02:36 -08:00
Terry Jia	f394af8d0f	feat: add gradient-slider display mode for FLOAT inputs (#12536 ) * feat: add gradient-slider display mode for FLOAT inputs * fix: use precise type annotation list[list[float]] for gradient_stops Amp-Thread-ID: https://ampcode.com/threads/T-019c7eea-be2b-72ce-a51f-838376f9b7a7 --------- Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com> Co-authored-by: bymyself <cbyrne@comfy.org>	2026-02-20 22:52:32 -08:00
Arthur R Longbottom	aeb5bdc8f6	Fix non-contiguous audio waveform crash in video save (#12550 ) Fixes #12549	2026-02-20 23:37:55 -05:00
comfyanonymous	64953bda0a	Update nightly installation command for ROCm (#12547 )	2026-02-20 21:32:05 -05:00
Christian Byrne	b254cecd03	chore: add CodeRabbit configuration for automated code review (#12539 ) Amp-Thread-ID: https://ampcode.com/threads/T-019c7915-2abf-743c-9c74-b93d87d63055 Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com>	2026-02-20 00:25:54 -08:00
Alexander Piskun	1bb956fb66	[API Nodes] add ElevenLabs nodes (#12207 ) * feat(api-nodes): add ElevenLabs API nodes * added price badge for ElevenLabsInstantVoiceClone node --------- Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com>	2026-02-19 22:12:28 -08:00
pythongosssss	96d6bd1a4a	Add GLSL shader node using PyOpenGL (#12148 ) * adds support for executing simple glsl shaders using moderngl package * tidy * Support multiple outputs * Try fix build * fix casing * fix line endings * convert to using PyOpenGL and glfw * remove cpu support * tidy * add additional support for egl & osmesa backends * fix ci perf: only read required outputs * add diagnostics, update mac initialization * GLSL glueprints + node fixes (#12492) * Add image operation blueprints * Add channels * Add glow * brightness/contrast * hsb * add glsl shader update system * shader nit iteration * add multipass for faster blur * more fixes * rebuild blueprints * print -> logger * Add edge preserving blur * fix: move _initialized flag to end of GLContext.__init__ Prevents '_vao' attribute error when init fails partway through and subsequent calls skip initialization due to early _initialized flag. * update valid ranges - threshold 0-100 - step 0+ * fix value ranges * rebuild node to remove extra inputs * Fix gamma step * clamp saturation in colorize instead of wrapping * Fix crash on 1x1 px images * rework description * remove unnecessary f Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com> Co-authored-by: Hunter Senft-Grupp <hunter@comfy.org>	2026-02-19 23:22:13 -05:00
comfyanonymous	5f2117528a	Force min length 1 when tokenizing for text generation. (#12538 )	2026-02-19 22:57:44 -05:00
comfyanonymous	0301ccf745	Small cleanup and try to get qwen 3 work with the text gen. (#12537 )	2026-02-19 22:42:28 -05:00
Christian Byrne	4d172e9ad7	feat: mark 429 widgets as advanced for collapsible UI (#12197 ) * feat: mark 429 widgets as advanced for collapsible UI Mark widgets as advanced across core, comfy_extras, and comfy_api_nodes to support the new collapsible advanced inputs section in the frontend. Changes: - 267 advanced markers in comfy_extras/ - 162 advanced markers in comfy_api_nodes/ - All files pass python3 -m py_compile verification Widgets marked advanced (hidden by default): - Scheduler internals: sigma_max, sigma_min, rho, mu, beta, alpha - Sampler internals: eta, s_noise, order, rtol, atol, h_init, pcoeff, etc. - Memory optimization: tile_size, overlap, temporal_size, temporal_overlap - Pipeline controls: add_noise, start_at_step, end_at_step - Timing controls: start_percent, end_percent - Layer selection: stop_at_clip_layer, layers, block_number - Video encoding: codec, crf, format - Device/dtype: device, noise_device, dtype, weight_dtype Widgets kept basic (always visible): - Core params: strength, steps, cfg, denoise, seed, width, height - Model selectors: ckpt_name, lora_name, vae_name, sampler_name - Common controls: upscale_method, crop, batch_size, fps, opacity Related: frontend PR #11939 Amp-Thread-ID: https://ampcode.com/threads/T-019c1734-6b61-702e-b333-f02c399963fc * fix: remove advanced=True from DynamicCombo.Input (unsupported) Amp-Thread-ID: https://ampcode.com/threads/T-019c1734-6b61-702e-b333-f02c399963fc * fix: address review - un-mark model merge, video, image, and training node widgets as advanced Per comfyanonymous review: - Model merge arguments should not be advanced (all 14 model-specific merge classes) - SaveAnimatedWEBP lossless/quality/method should not be advanced - SaveWEBM/SaveVideo codec/crf/format should not be advanced - TrainLoraNode options should not be advanced (7 inputs) Amp-Thread-ID: https://ampcode.com/threads/T-019c322b-a3a8-71b7-9962-d44573ca6352 * fix: un-mark batch_size and webcam width/height as advanced (should stay basic) Amp-Thread-ID: https://ampcode.com/threads/T-019c3236-1417-74aa-82a3-bcb365fbe9d1 --------- Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com>	2026-02-19 19:20:02 -08:00
Yourz	5632b2df9d	feat: add essentials_category (#12357 ) * feat: add essentials_category field to node schema Amp-Thread-ID: https://ampcode.com/threads/T-019c2b25-cd90-7218-9071-03cb46b351b3 * feat: add ESSENTIALS_CATEGORY to core nodes Marked nodes: - Basic: LoadImage, SaveImage, LoadVideo, SaveVideo, Load3D, CLIPTextEncode - Image Tools: ImageScale, ImageInvert, ImageBatch, ImageCrop, ImageRotate, ImageBlur - Image Tools/Preprocessing: Canny - Image Generation: LoraLoader - Audio: LoadAudio, SaveAudio Amp-Thread-ID: https://ampcode.com/threads/T-019c2b25-cd90-7218-9071-03cb46b351b3 * Add ESSENTIALS_CATEGORY to more nodes - SaveGLB (Basic) - GetVideoComponents (Video Tools) - TencentTextToModelNode, TencentImageToModelNode (3D) - RecraftRemoveBackgroundNode (Image Tools) - KlingLipSyncAudioToVideoNode (Video Generation) - OpenAIChatNode (Text Generation) - StabilityTextToAudio (Audio) Amp-Thread-ID: https://ampcode.com/threads/T-019c2b69-81c1-71c3-8096-450a39e20910 * fix: correct essentials category for Canny node Amp-Thread-ID: https://ampcode.com/threads/T-019c7303-ab53-7341-be76-a5da1f7a657e Co-authored-by: Amp <amp@ampcode.com> * refactor: replace essentials_category string literals with constants Amp-Thread-ID: https://ampcode.com/threads/T-019c7303-ab53-7341-be76-a5da1f7a657e Co-authored-by: Amp <amp@ampcode.com> * refactor: revert constants, use string literals for essentials_category Amp-Thread-ID: https://ampcode.com/threads/T-019c7303-ab53-7341-be76-a5da1f7a657e Co-authored-by: Amp <amp@ampcode.com> * fix: update basics --------- Co-authored-by: bymyself <cbyrne@comfy.org> Co-authored-by: Amp <amp@ampcode.com> Co-authored-by: Jedrzej Kosinski <kosinkadink1@gmail.com>	2026-02-19 19:00:26 -08:00
Alexander Piskun	2687652530	fix(api-nodes): force Gemini to return uncompressed images (#12516 )	2026-02-19 04:10:39 -08:00
Jukka Seppänen	6d11cc7354	feat: Add basic text generation support with native models, initially supporting Gemma3 (#12392 )	2026-02-18 20:49:43 -05:00
comfyanonymous	f262444dd4	Add simple 3 band equalizer node for audio. (#12519 )	2026-02-18 18:36:35 -05:00
Alexander Piskun	239ddd3327	fix(api-nodes): add price badge for Rodin Gen-2 node (#12512 )	2026-02-17 23:15:23 -08:00
Hunter	83dd65f23a	fix: use glob matching for Gemini image MIME types (#12511 ) gemini-3-pro-image-preview nondeterministically returns image/jpeg instead of image/png. get_image_from_response() hardcoded get_parts_by_type(response, "image/png"), silently dropping JPEG responses and falling back to torch.zeros (all-black output). Add _mime_matches() helper using fnmatch for glob-style MIME matching. Change get_image_from_response() to request "image/*" so any image format returned by the API is correctly captured.	2026-02-18 00:03:54 -05:00
				`@@ -0,0 +1 @@`
				{"revision": 0, "last_node_id": 29, "last_link_id": 0, "nodes": [{"id": 29, "type": "4c9d6ea4-b912-40e5-8766-6793a9758c53", "pos": [1970, -230], "size": [180, 86], "flags": {}, "order": 5, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "R", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}, {"label": "G", "localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": []}, {"label": "B", "localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": []}, {"label": "A", "localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": []}], "title": "Image Channels", "properties": {"proxyWidgets": []}, "widgets_values": []}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "4c9d6ea4-b912-40e5-8766-6793a9758c53", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 28, "lastLinkId": 39, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Image Channels", "inputNode": {"id": -10, "bounding": [1820, -185, 120, 60]}, "outputNode": {"id": -20, "bounding": [2460, -215, 120, 120]}, "inputs": [{"id": "3522932b-2d86-4a1f-a02a-cb29f3a9d7fe", "name": "images.image0", "type": "IMAGE", "linkIds": [39], "localized_name": "images.image0", "label": "image", "pos": [1920, -165]}], "outputs": [{"id": "605cb9c3-b065-4d9b-81d2-3ec331889b2b", "name": "IMAGE0", "type": "IMAGE", "linkIds": [26], "localized_name": "IMAGE0", "label": "R", "pos": [2480, -195]}, {"id": "fb44a77e-0522-43e9-9527-82e7465b3596", "name": "IMAGE1", "type": "IMAGE", "linkIds": [27], "localized_name": "IMAGE1", "label": "G", "pos": [2480, -175]}, {"id": "81460ee6-0131-402a-874f-6bf3001fc4ff", "name": "IMAGE2", "type": "IMAGE", "linkIds": [28], "localized_name": "IMAGE2", "label": "B", "pos": [2480, -155]}, {"id": "ae690246-80d4-4951-b1d9-9306d8a77417", "name": "IMAGE3", "type": "IMAGE", "linkIds": [29], "localized_name": "IMAGE3", "label": "A", "pos": [2480, -135]}], "widgets": [], "nodes": [{"id": 23, "type": "GLSLShader", "pos": [2000, -330], "size": [400, 172], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 39}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}], "outputs": [{"label": "R", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [26]}, {"label": "G", "localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": [27]}, {"label": "B", "localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": [28]}, {"label": "A", "localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": [29]}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\nlayout(location = 1) out vec4 fragColor1;\nlayout(location = 2) out vec4 fragColor2;\nlayout(location = 3) out vec4 fragColor3;\n\nvoid main() {\n vec4 color = texture(u_image0, v_texCoord);\n // Output each channel as grayscale to separate render targets\n fragColor0 = vec4(vec3(color.r), 1.0); // Red channel\n fragColor1 = vec4(vec3(color.g), 1.0); // Green channel\n fragColor2 = vec4(vec3(color.b), 1.0); // Blue channel\n fragColor3 = vec4(vec3(color.a), 1.0); // Alpha channel\n}\n", "from_input"]}], "groups": [], "links": [{"id": 39, "origin_id": -10, "origin_slot": 0, "target_id": 23, "target_slot": 0, "type": "IMAGE"}, {"id": 26, "origin_id": 23, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 27, "origin_id": 23, "origin_slot": 1, "target_id": -20, "target_slot": 1, "type": "IMAGE"}, {"id": 28, "origin_id": 23, "origin_slot": 2, "target_id": -20, "target_slot": 2, "type": "IMAGE"}, {"id": 29, "origin_id": 23, "origin_slot": 3, "target_id": -20, "target_slot": 3, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Color adjust"}]}}
				`@@ -0,0 +1 @@`
				{"revision": 0, "last_node_id": 15, "last_link_id": 0, "nodes": [{"id": 15, "type": "24d8bbfd-39d4-4774-bff0-3de40cc7a471", "pos": [-1490, 2040], "size": [400, 260], "flags": {}, "order": 0, "mode": 0, "inputs": [{"name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": null}, {"label": "reference images", "name": "images", "type": "IMAGE", "link": null}], "outputs": [{"name": "STRING", "type": "STRING", "links": null}], "title": "Prompt Enhance", "properties": {"proxyWidgets": [["-1", "prompt"]], "cnr_id": "comfy-core", "ver": "0.14.1"}, "widgets_values": [""]}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "24d8bbfd-39d4-4774-bff0-3de40cc7a471", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 15, "lastLinkId": 14, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Prompt Enhance", "inputNode": {"id": -10, "bounding": [-2170, 2110, 138.876953125, 80]}, "outputNode": {"id": -20, "bounding": [-640, 2110, 120, 60]}, "inputs": [{"id": "aeab7216-00e0-4528-a09b-bba50845c5a6", "name": "prompt", "type": "STRING", "linkIds": [11], "pos": [-2051.123046875, 2130]}, {"id": "7b73fd36-aa31-4771-9066-f6c83879994b", "name": "images", "type": "IMAGE", "linkIds": [14], "label": "reference images", "pos": [-2051.123046875, 2150]}], "outputs": [{"id": "c7b0d930-68a1-48d1-b496-0519e5837064", "name": "STRING", "type": "STRING", "linkIds": [13], "pos": [-620, 2130]}], "widgets": [], "nodes": [{"id": 11, "type": "GeminiNode", "pos": [-1560, 1990], "size": [470, 470], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "shape": 7, "type": "IMAGE", "link": 14}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": null}, {"localized_name": "video", "name": "video", "shape": 7, "type": "VIDEO", "link": null}, {"localized_name": "files", "name": "files", "shape": 7, "type": "GEMINI_INPUT_FILES", "link": null}, {"localized_name": "prompt", "name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": 11}, {"localized_name": "model", "name": "model", "type": "COMBO", "widget": {"name": "model"}, "link": null}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "system_prompt", "name": "system_prompt", "shape": 7, "type": "STRING", "widget": {"name": "system_prompt"}, "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": [13]}], "properties": {"cnr_id": "comfy-core", "ver": "0.14.1", "Node name for S&R": "GeminiNode"}, "widgets_values": ["", "gemini-3-pro-preview", 42, "randomize", "You are an expert in prompt writing.\nBased on the input, rewrite the user's input into a detailed prompt.\nincluding camera settings, lighting, composition, and style.\nReturn the prompt only"], "color": "#432", "bgcolor": "#653"}], "groups": [], "links": [{"id": 11, "origin_id": -10, "origin_slot": 0, "target_id": 11, "target_slot": 4, "type": "STRING"}, {"id": 13, "origin_id": 11, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "STRING"}, {"id": 14, "origin_id": -10, "origin_slot": 1, "target_id": 11, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Text generation/Prompt enhance"}]}, "extra": {}}
				`@@ -0,0 +1 @@`
				{"revision": 0, "last_node_id": 25, "last_link_id": 0, "nodes": [{"id": 25, "type": "621ba4e2-22a8-482d-a369-023753198b7b", "pos": [4610, -790], "size": [230, 58], "flags": {}, "order": 4, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "IMAGE", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}], "title": "Sharpen", "properties": {"proxyWidgets": [["24", "value"]]}, "widgets_values": []}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "621ba4e2-22a8-482d-a369-023753198b7b", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 24, "lastLinkId": 36, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Sharpen", "inputNode": {"id": -10, "bounding": [4090, -825, 120, 60]}, "outputNode": {"id": -20, "bounding": [5150, -825, 120, 60]}, "inputs": [{"id": "37011fb7-14b7-4e0e-b1a0-6a02e8da1fd7", "name": "images.image0", "type": "IMAGE", "linkIds": [34], "localized_name": "images.image0", "label": "image", "pos": [4190, -805]}], "outputs": [{"id": "e9182b3f-635c-4cd4-a152-4b4be17ae4b9", "name": "IMAGE0", "type": "IMAGE", "linkIds": [35], "localized_name": "IMAGE0", "label": "IMAGE", "pos": [5170, -805]}], "widgets": [], "nodes": [{"id": 24, "type": "PrimitiveFloat", "pos": [4280, -1240], "size": [270, 58], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "strength", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [36]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 3, "precision": 2, "step": 0.05}, "widgets_values": [0.5]}, {"id": 23, "type": "GLSLShader", "pos": [4570, -1240], "size": [370, 192], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 34}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}, {"label": "u_float0", "localized_name": "floats.u_float0", "name": "floats.u_float0", "shape": 7, "type": "FLOAT", "link": 36}, {"label": "u_float1", "localized_name": "floats.u_float1", "name": "floats.u_float1", "shape": 7, "type": "FLOAT", "link": null}, {"label": "u_int0", "localized_name": "ints.u_int0", "name": "ints.u_int0", "shape": 7, "type": "INT", "link": null}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [35]}, {"localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": null}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform float u_float0; // strength [0.0 – 2.0] typical: 0.3–1.0\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nvoid main() {\n vec2 texel = 1.0 / u_resolution;\n \n // Sample center and neighbors\n vec4 center = texture(u_image0, v_texCoord);\n vec4 top = texture(u_image0, v_texCoord + vec2( 0.0, -texel.y));\n vec4 bottom = texture(u_image0, v_texCoord + vec2( 0.0, texel.y));\n vec4 left = texture(u_image0, v_texCoord + vec2(-texel.x, 0.0));\n vec4 right = texture(u_image0, v_texCoord + vec2( texel.x, 0.0));\n \n // Edge enhancement (Laplacian)\n vec4 edges = center * 4.0 - top - bottom - left - right;\n \n // Add edges back scaled by strength\n vec4 sharpened = center + edges * u_float0;\n \n fragColor0 = vec4(clamp(sharpened.rgb, 0.0, 1.0), center.a);\n}", "from_input"]}], "groups": [], "links": [{"id": 36, "origin_id": 24, "origin_slot": 0, "target_id": 23, "target_slot": 2, "type": "FLOAT"}, {"id": 34, "origin_id": -10, "origin_slot": 0, "target_id": 23, "target_slot": 0, "type": "IMAGE"}, {"id": 35, "origin_id": 23, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Sharpen"}]}}
				`@@ -0,0 +1 @@`
				{"revision": 0, "last_node_id": 13, "last_link_id": 0, "nodes": [{"id": 13, "type": "cf95b747-3e17-46cb-8097-cac60ff9b2e1", "pos": [1120, 330], "size": [240, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "video", "name": "video", "type": "VIDEO", "link": null}, {"name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": null}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": []}], "title": "Video Upscale(GAN x4)", "properties": {"proxyWidgets": [["-1", "model_name"]], "cnr_id": "comfy-core", "ver": "0.14.1"}, "widgets_values": ["RealESRGAN_x4plus.safetensors"]}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "cf95b747-3e17-46cb-8097-cac60ff9b2e1", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 13, "lastLinkId": 19, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Video Upscale(GAN x4)", "inputNode": {"id": -10, "bounding": [550, 460, 120, 80]}, "outputNode": {"id": -20, "bounding": [1490, 460, 120, 60]}, "inputs": [{"id": "666d633e-93e7-42dc-8d11-2b7b99b0f2a6", "name": "video", "type": "VIDEO", "linkIds": [10], "localized_name": "video", "pos": [650, 480]}, {"id": "2e23a087-caa8-4d65-99e6-662761aa905a", "name": "model_name", "type": "COMBO", "linkIds": [19], "pos": [650, 500]}], "outputs": [{"id": "0c1768ea-3ec2-412f-9af6-8e0fa36dae70", "name": "VIDEO", "type": "VIDEO", "linkIds": [15], "localized_name": "VIDEO", "pos": [1510, 480]}], "widgets": [], "nodes": [{"id": 2, "type": "ImageUpscaleWithModel", "pos": [1110, 450], "size": [320, 46], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "upscale_model", "name": "upscale_model", "type": "UPSCALE_MODEL", "link": 1}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 14}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [13]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "ImageUpscaleWithModel"}}, {"id": 11, "type": "CreateVideo", "pos": [1110, 550], "size": [320, 78], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 13}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": 16}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "widget": {"name": "fps"}, "link": 12}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": [15]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "CreateVideo"}, "widgets_values": [30]}, {"id": 10, "type": "GetVideoComponents", "pos": [1110, 330], "size": [320, 70], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "video", "name": "video", "type": "VIDEO", "link": 10}], "outputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "links": [14]}, {"localized_name": "audio", "name": "audio", "type": "AUDIO", "links": [16]}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "links": [12]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "GetVideoComponents"}}, {"id": 1, "type": "UpscaleModelLoader", "pos": [750, 450], "size": [280, 60], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "model_name", "name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": 19}], "outputs": [{"localized_name": "UPSCALE_MODEL", "name": "UPSCALE_MODEL", "type": "UPSCALE_MODEL", "links": [1]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "UpscaleModelLoader", "models": [{"name": "RealESRGAN_x4plus.safetensors", "url": "https://huggingface.co/Comfy-Org/Real-ESRGAN_repackaged/resolve/main/RealESRGAN_x4plus.safetensors", "directory": "upscale_models"}]}, "widgets_values": ["RealESRGAN_x4plus.safetensors"]}], "groups": [], "links": [{"id": 1, "origin_id": 1, "origin_slot": 0, "target_id": 2, "target_slot": 0, "type": "UPSCALE_MODEL"}, {"id": 14, "origin_id": 10, "origin_slot": 0, "target_id": 2, "target_slot": 1, "type": "IMAGE"}, {"id": 13, "origin_id": 2, "origin_slot": 0, "target_id": 11, "target_slot": 0, "type": "IMAGE"}, {"id": 16, "origin_id": 10, "origin_slot": 1, "target_id": 11, "target_slot": 1, "type": "AUDIO"}, {"id": 12, "origin_id": 10, "origin_slot": 2, "target_id": 11, "target_slot": 2, "type": "FLOAT"}, {"id": 10, "origin_id": -10, "origin_slot": 0, "target_id": 10, "target_slot": 0, "type": "VIDEO"}, {"id": 15, "origin_id": 11, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "VIDEO"}, {"id": 19, "origin_id": -10, "origin_slot": 1, "target_id": 1, "target_slot": 0, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Video generation and editing/Enhance video"}]}, "extra": {}}