diff --git a/.coderabbit.yaml b/.coderabbit.yaml new file mode 100644 index 000000000..0d1e49270 --- /dev/null +++ b/.coderabbit.yaml @@ -0,0 +1,127 @@ +# yaml-language-server: $schema=https://coderabbit.ai/integrations/schema.v2.json +language: "en-US" +early_access: false +tone_instructions: "Only comment on issues introduced by this PR's changes. Do not flag pre-existing problems in moved, re-indented, or reformatted code." + +reviews: + profile: "chill" + request_changes_workflow: false + high_level_summary: false + poem: false + review_status: false + review_details: false + commit_status: true + collapse_walkthrough: true + changed_files_summary: false + sequence_diagrams: false + estimate_code_review_effort: false + assess_linked_issues: false + related_issues: false + related_prs: false + suggested_labels: false + auto_apply_labels: false + suggested_reviewers: false + auto_assign_reviewers: false + in_progress_fortune: false + enable_prompt_for_ai_agents: true + + path_filters: + - "!comfy_api_nodes/apis/**" + - "!**/generated/*.pyi" + - "!.ci/**" + - "!script_examples/**" + - "!**/__pycache__/**" + - "!**/*.ipynb" + - "!**/*.png" + - "!**/*.bat" + + path_instructions: + - path: "**" + instructions: | + IMPORTANT: Only comment on issues directly introduced by this PR's code changes. + Do NOT flag pre-existing issues in code that was merely moved, re-indented, + de-indented, or reformatted without logic changes. If code appears in the diff + only due to whitespace or structural reformatting (e.g., removing a `with:` block), + treat it as unchanged. Contributors should not feel obligated to address + pre-existing issues outside the scope of their contribution. + - path: "comfy/**" + instructions: | + Core ML/diffusion engine. Focus on: + - Backward compatibility (breaking changes affect all custom nodes) + - Memory management and GPU resource handling + - Performance implications in hot paths + - Thread safety for concurrent execution + - path: "comfy_api_nodes/**" + instructions: | + Third-party API integration nodes. Focus on: + - No hardcoded API keys or secrets + - Proper error handling for API failures (timeouts, rate limits, auth errors) + - Correct Pydantic model usage + - Security of user data passed to external APIs + - path: "comfy_extras/**" + instructions: | + Community-contributed extra nodes. Focus on: + - Consistency with node patterns (INPUT_TYPES, RETURN_TYPES, FUNCTION, CATEGORY) + - No breaking changes to existing node interfaces + - path: "comfy_execution/**" + instructions: | + Execution engine (graph execution, caching, jobs). Focus on: + - Caching correctness + - Concurrent execution safety + - Graph validation edge cases + - path: "nodes.py" + instructions: | + Core node definitions (2500+ lines). Focus on: + - Backward compatibility of NODE_CLASS_MAPPINGS + - Consistency of INPUT_TYPES return format + - path: "alembic_db/**" + instructions: | + Database migrations. Focus on: + - Migration safety and rollback support + - Data preservation during schema changes + + auto_review: + enabled: true + auto_incremental_review: true + drafts: false + ignore_title_keywords: + - "WIP" + - "DO NOT REVIEW" + - "DO NOT MERGE" + + finishing_touches: + docstrings: + enabled: false + unit_tests: + enabled: false + + tools: + ruff: + enabled: false + pylint: + enabled: false + flake8: + enabled: false + gitleaks: + enabled: true + shellcheck: + enabled: false + markdownlint: + enabled: false + yamllint: + enabled: false + languagetool: + enabled: false + github-checks: + enabled: true + timeout_ms: 90000 + ast-grep: + essential_rules: true + +chat: + auto_reply: true + +knowledge_base: + opt_out: false + learnings: + scope: "auto" diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index 6556677e0..95cc48f88 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -16,7 +16,7 @@ body: ## Very Important - Please make sure that you post ALL your ComfyUI logs in the bug report. A bug report without logs will likely be ignored. + Please make sure that you post ALL your ComfyUI logs in the bug report **even if there is no crash**. Just paste everything. The startup log (everything before "To see the GUI go to: ...") contains critical information to developers trying to help. For a performance issue or crash, paste everything from "got prompt" to the end, including the crash. More is better - always. A bug report without logs will likely be ignored. - type: checkboxes id: custom-nodes-test attributes: diff --git a/.github/workflows/release-webhook.yml b/.github/workflows/release-webhook.yml index 6fceb7560..737e4c488 100644 --- a/.github/workflows/release-webhook.yml +++ b/.github/workflows/release-webhook.yml @@ -7,6 +7,8 @@ on: jobs: send-webhook: runs-on: ubuntu-latest + env: + DESKTOP_REPO_DISPATCH_TOKEN: ${{ secrets.DESKTOP_REPO_DISPATCH_TOKEN }} steps: - name: Send release webhook env: @@ -106,3 +108,37 @@ jobs: --fail --silent --show-error echo "✅ Release webhook sent successfully" + + - name: Send repository dispatch to desktop + env: + DISPATCH_TOKEN: ${{ env.DESKTOP_REPO_DISPATCH_TOKEN }} + RELEASE_TAG: ${{ github.event.release.tag_name }} + RELEASE_URL: ${{ github.event.release.html_url }} + run: | + set -euo pipefail + + if [ -z "${DISPATCH_TOKEN:-}" ]; then + echo "::error::DESKTOP_REPO_DISPATCH_TOKEN is required but not set." + exit 1 + fi + + PAYLOAD="$(jq -n \ + --arg release_tag "$RELEASE_TAG" \ + --arg release_url "$RELEASE_URL" \ + '{ + event_type: "comfyui_release_published", + client_payload: { + release_tag: $release_tag, + release_url: $release_url + } + }')" + + curl -fsSL \ + -X POST \ + -H "Accept: application/vnd.github+json" \ + -H "Content-Type: application/json" \ + -H "Authorization: Bearer ${DISPATCH_TOKEN}" \ + https://api.github.com/repos/Comfy-Org/desktop/dispatches \ + -d "$PAYLOAD" + + echo "✅ Dispatched ComfyUI release ${RELEASE_TAG} to Comfy-Org/desktop" diff --git a/.gitignore b/.gitignore index 4e8cea71e..2700ad5c2 100644 --- a/.gitignore +++ b/.gitignore @@ -11,7 +11,7 @@ extra_model_paths.yaml /.vs .vscode/ .idea/ -venv/ +venv*/ .venv/ /web/extensions/* !/web/extensions/logging.js.example diff --git a/README.md b/README.md index 96dc2904b..56b7966cf 100644 --- a/README.md +++ b/README.md @@ -189,8 +189,6 @@ The portable above currently comes with python 3.13 and pytorch cuda 13.0. Updat [Experimental portable for AMD GPUs](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_amd.7z) -[Portable with pytorch cuda 12.8 and python 3.12](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_nvidia_cu128.7z). - [Portable with pytorch cuda 12.6 and python 3.12](https://github.com/comfyanonymous/ComfyUI/releases/latest/download/ComfyUI_windows_portable_nvidia_cu126.7z) (Supports Nvidia 10 series and older GPUs). #### How do I share models between another UI and ComfyUI? @@ -227,11 +225,11 @@ Put your VAE in: models/vae AMD users can install rocm and pytorch with pip if you don't have it already installed, this is the command to install the stable version: -```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm6.4``` +```pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm7.1``` -This is the command to install the nightly with ROCm 7.1 which might have some performance improvements: +This is the command to install the nightly with ROCm 7.2 which might have some performance improvements: -```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm7.1``` +```pip install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/rocm7.2``` ### AMD GPUs (Experimental: Windows and Linux), RDNA 3, 3.5 and 4 only. diff --git a/app/node_replace_manager.py b/app/node_replace_manager.py new file mode 100644 index 000000000..d9aab5b22 --- /dev/null +++ b/app/node_replace_manager.py @@ -0,0 +1,107 @@ +from __future__ import annotations + +from aiohttp import web + +from typing import TYPE_CHECKING, TypedDict +if TYPE_CHECKING: + from comfy_api.latest._io_public import NodeReplace + +from comfy_execution.graph_utils import is_link +import nodes + +class NodeStruct(TypedDict): + inputs: dict[str, str | int | float | bool | tuple[str, int]] + class_type: str + _meta: dict[str, str] + +def copy_node_struct(node_struct: NodeStruct, empty_inputs: bool = False) -> NodeStruct: + new_node_struct = node_struct.copy() + if empty_inputs: + new_node_struct["inputs"] = {} + else: + new_node_struct["inputs"] = node_struct["inputs"].copy() + new_node_struct["_meta"] = node_struct["_meta"].copy() + return new_node_struct + + +class NodeReplaceManager: + """Manages node replacement registrations.""" + + def __init__(self): + self._replacements: dict[str, list[NodeReplace]] = {} + + def register(self, node_replace: NodeReplace): + """Register a node replacement mapping.""" + self._replacements.setdefault(node_replace.old_node_id, []).append(node_replace) + + def get_replacement(self, old_node_id: str) -> list[NodeReplace] | None: + """Get replacements for an old node ID.""" + return self._replacements.get(old_node_id) + + def has_replacement(self, old_node_id: str) -> bool: + """Check if a replacement exists for an old node ID.""" + return old_node_id in self._replacements + + def apply_replacements(self, prompt: dict[str, NodeStruct]): + connections: dict[str, list[tuple[str, str, int]]] = {} + need_replacement: set[str] = set() + for node_number, node_struct in prompt.items(): + if "class_type" not in node_struct or "inputs" not in node_struct: + continue + class_type = node_struct["class_type"] + # need replacement if not in NODE_CLASS_MAPPINGS and has replacement + if class_type not in nodes.NODE_CLASS_MAPPINGS.keys() and self.has_replacement(class_type): + need_replacement.add(node_number) + # keep track of connections + for input_id, input_value in node_struct["inputs"].items(): + if is_link(input_value): + conn_number = input_value[0] + connections.setdefault(conn_number, []).append((node_number, input_id, input_value[1])) + for node_number in need_replacement: + node_struct = prompt[node_number] + class_type = node_struct["class_type"] + replacements = self.get_replacement(class_type) + if replacements is None: + continue + # just use the first replacement + replacement = replacements[0] + new_node_id = replacement.new_node_id + # if replacement is not a valid node, skip trying to replace it as will only cause confusion + if new_node_id not in nodes.NODE_CLASS_MAPPINGS.keys(): + continue + # first, replace node id (class_type) + new_node_struct = copy_node_struct(node_struct, empty_inputs=True) + new_node_struct["class_type"] = new_node_id + # TODO: consider replacing display_name in _meta as well for error reporting purposes; would need to query node schema + # second, replace inputs + if replacement.input_mapping is not None: + for input_map in replacement.input_mapping: + if "set_value" in input_map: + new_node_struct["inputs"][input_map["new_id"]] = input_map["set_value"] + elif "old_id" in input_map: + new_node_struct["inputs"][input_map["new_id"]] = node_struct["inputs"][input_map["old_id"]] + # finalize input replacement + prompt[node_number] = new_node_struct + # third, replace outputs + if replacement.output_mapping is not None: + # re-mapping outputs requires changing the input values of nodes that receive connections from this one + if node_number in connections: + for conns in connections[node_number]: + conn_node_number, conn_input_id, old_output_idx = conns + for output_map in replacement.output_mapping: + if output_map["old_idx"] == old_output_idx: + new_output_idx = output_map["new_idx"] + previous_input = prompt[conn_node_number]["inputs"][conn_input_id] + previous_input[1] = new_output_idx + + def as_dict(self): + """Serialize all replacements to dict.""" + return { + k: [v.as_dict() for v in v_list] + for k, v_list in self._replacements.items() + } + + def add_routes(self, routes): + @routes.get("/node_replacements") + async def get_node_replacements(request): + return web.json_response(self.as_dict()) diff --git a/app/subgraph_manager.py b/app/subgraph_manager.py index 6a8f586a4..08ad8c302 100644 --- a/app/subgraph_manager.py +++ b/app/subgraph_manager.py @@ -53,7 +53,7 @@ class SubgraphManager: return entry_id, entry async def load_entry_data(self, entry: SubgraphEntry): - with open(entry['path'], 'r') as f: + with open(entry['path'], 'r', encoding='utf-8') as f: entry['data'] = f.read() return entry diff --git a/blueprints/.glsl/Brightness_and_Contrast_1.frag b/blueprints/.glsl/Brightness_and_Contrast_1.frag new file mode 100644 index 000000000..da5424080 --- /dev/null +++ b/blueprints/.glsl/Brightness_and_Contrast_1.frag @@ -0,0 +1,44 @@ +#version 300 es +precision highp float; + +uniform sampler2D u_image0; +uniform float u_float0; // Brightness slider -100..100 +uniform float u_float1; // Contrast slider -100..100 + +in vec2 v_texCoord; +out vec4 fragColor; + +const float MID_GRAY = 0.18; // 18% reflectance + +// sRGB gamma 2.2 approximation +vec3 srgbToLinear(vec3 c) { + return pow(max(c, 0.0), vec3(2.2)); +} + +vec3 linearToSrgb(vec3 c) { + return pow(max(c, 0.0), vec3(1.0/2.2)); +} + +float mapBrightness(float b) { + return clamp(b / 100.0, -1.0, 1.0); +} + +float mapContrast(float c) { + return clamp(c / 100.0 + 1.0, 0.0, 2.0); +} + +void main() { + vec4 orig = texture(u_image0, v_texCoord); + + float brightness = mapBrightness(u_float0); + float contrast = mapContrast(u_float1); + + vec3 lin = srgbToLinear(orig.rgb); + + lin = (lin - MID_GRAY) * contrast + brightness + MID_GRAY; + + // Convert back to sRGB + vec3 result = linearToSrgb(clamp(lin, 0.0, 1.0)); + + fragColor = vec4(result, orig.a); +} diff --git a/blueprints/.glsl/Chromatic_Aberration_16.frag b/blueprints/.glsl/Chromatic_Aberration_16.frag new file mode 100644 index 000000000..09a271146 --- /dev/null +++ b/blueprints/.glsl/Chromatic_Aberration_16.frag @@ -0,0 +1,72 @@ +#version 300 es +precision highp float; + +uniform sampler2D u_image0; +uniform vec2 u_resolution; +uniform int u_int0; // Mode +uniform float u_float0; // Amount (0 to 100) + +in vec2 v_texCoord; +out vec4 fragColor; + +const int MODE_LINEAR = 0; +const int MODE_RADIAL = 1; +const int MODE_BARREL = 2; +const int MODE_SWIRL = 3; +const int MODE_DIAGONAL = 4; + +const float AMOUNT_SCALE = 0.0005; +const float RADIAL_MULT = 4.0; +const float BARREL_MULT = 8.0; +const float INV_SQRT2 = 0.70710678118; + +void main() { + vec2 uv = v_texCoord; + vec4 original = texture(u_image0, uv); + + float amount = u_float0 * AMOUNT_SCALE; + + if (amount < 0.000001) { + fragColor = original; + return; + } + + // Aspect-corrected coordinates for circular effects + float aspect = u_resolution.x / u_resolution.y; + vec2 centered = uv - 0.5; + vec2 corrected = vec2(centered.x * aspect, centered.y); + float r = length(corrected); + vec2 dir = r > 0.0001 ? corrected / r : vec2(0.0); + vec2 offset = vec2(0.0); + + if (u_int0 == MODE_LINEAR) { + // Horizontal shift (no aspect correction needed) + offset = vec2(amount, 0.0); + } + else if (u_int0 == MODE_RADIAL) { + // Outward from center, stronger at edges + offset = dir * r * amount * RADIAL_MULT; + offset.x /= aspect; // Convert back to UV space + } + else if (u_int0 == MODE_BARREL) { + // Lens distortion simulation (r² falloff) + offset = dir * r * r * amount * BARREL_MULT; + offset.x /= aspect; // Convert back to UV space + } + else if (u_int0 == MODE_SWIRL) { + // Perpendicular to radial (rotational aberration) + vec2 perp = vec2(-dir.y, dir.x); + offset = perp * r * amount * RADIAL_MULT; + offset.x /= aspect; // Convert back to UV space + } + else if (u_int0 == MODE_DIAGONAL) { + // 45° offset (no aspect correction needed) + offset = vec2(amount, amount) * INV_SQRT2; + } + + float red = texture(u_image0, uv + offset).r; + float green = original.g; + float blue = texture(u_image0, uv - offset).b; + + fragColor = vec4(red, green, blue, original.a); +} \ No newline at end of file diff --git a/blueprints/.glsl/Color_Adjustment_15.frag b/blueprints/.glsl/Color_Adjustment_15.frag new file mode 100644 index 000000000..697525f14 --- /dev/null +++ b/blueprints/.glsl/Color_Adjustment_15.frag @@ -0,0 +1,78 @@ +#version 300 es +precision highp float; + +uniform sampler2D u_image0; +uniform float u_float0; // temperature (-100 to 100) +uniform float u_float1; // tint (-100 to 100) +uniform float u_float2; // vibrance (-100 to 100) +uniform float u_float3; // saturation (-100 to 100) + +in vec2 v_texCoord; +out vec4 fragColor; + +const float INPUT_SCALE = 0.01; +const float TEMP_TINT_PRIMARY = 0.3; +const float TEMP_TINT_SECONDARY = 0.15; +const float VIBRANCE_BOOST = 2.0; +const float SATURATION_BOOST = 2.0; +const float SKIN_PROTECTION = 0.5; +const float EPSILON = 0.001; +const vec3 LUMA_WEIGHTS = vec3(0.299, 0.587, 0.114); + +void main() { + vec4 tex = texture(u_image0, v_texCoord); + vec3 color = tex.rgb; + + // Scale inputs: -100/100 → -1/1 + float temperature = u_float0 * INPUT_SCALE; + float tint = u_float1 * INPUT_SCALE; + float vibrance = u_float2 * INPUT_SCALE; + float saturation = u_float3 * INPUT_SCALE; + + // Temperature (warm/cool): positive = warm, negative = cool + color.r += temperature * TEMP_TINT_PRIMARY; + color.b -= temperature * TEMP_TINT_PRIMARY; + + // Tint (green/magenta): positive = green, negative = magenta + color.g += tint * TEMP_TINT_PRIMARY; + color.r -= tint * TEMP_TINT_SECONDARY; + color.b -= tint * TEMP_TINT_SECONDARY; + + // Single clamp after temperature/tint + color = clamp(color, 0.0, 1.0); + + // Vibrance with skin protection + if (vibrance != 0.0) { + float maxC = max(color.r, max(color.g, color.b)); + float minC = min(color.r, min(color.g, color.b)); + float sat = maxC - minC; + float gray = dot(color, LUMA_WEIGHTS); + + if (vibrance < 0.0) { + // Desaturate: -100 → gray + color = mix(vec3(gray), color, 1.0 + vibrance); + } else { + // Boost less saturated colors more + float vibranceAmt = vibrance * (1.0 - sat); + + // Branchless skin tone protection + float isWarmTone = step(color.b, color.g) * step(color.g, color.r); + float warmth = (color.r - color.b) / max(maxC, EPSILON); + float skinTone = isWarmTone * warmth * sat * (1.0 - sat); + vibranceAmt *= (1.0 - skinTone * SKIN_PROTECTION); + + color = mix(vec3(gray), color, 1.0 + vibranceAmt * VIBRANCE_BOOST); + } + } + + // Saturation + if (saturation != 0.0) { + float gray = dot(color, LUMA_WEIGHTS); + float satMix = saturation < 0.0 + ? 1.0 + saturation // -100 → gray + : 1.0 + saturation * SATURATION_BOOST; // +100 → 3x boost + color = mix(vec3(gray), color, satMix); + } + + fragColor = vec4(clamp(color, 0.0, 1.0), tex.a); +} \ No newline at end of file diff --git a/blueprints/.glsl/Edge-Preserving_Blur_128.frag b/blueprints/.glsl/Edge-Preserving_Blur_128.frag new file mode 100644 index 000000000..f269aebd6 --- /dev/null +++ b/blueprints/.glsl/Edge-Preserving_Blur_128.frag @@ -0,0 +1,94 @@ +#version 300 es +precision highp float; + +uniform sampler2D u_image0; +uniform float u_float0; // Blur radius (0–20, default ~5) +uniform float u_float1; // Edge threshold (0–100, default ~30) +uniform int u_int0; // Step size (0/1 = every pixel, 2+ = skip pixels) + +in vec2 v_texCoord; +out vec4 fragColor; + +const int MAX_RADIUS = 20; +const float EPSILON = 0.0001; + +// Perceptual luminance +float getLuminance(vec3 rgb) { + return dot(rgb, vec3(0.299, 0.587, 0.114)); +} + +vec4 bilateralFilter(vec2 uv, vec2 texelSize, int radius, + float sigmaSpatial, float sigmaColor) +{ + vec4 center = texture(u_image0, uv); + vec3 centerRGB = center.rgb; + + float invSpatial2 = -0.5 / (sigmaSpatial * sigmaSpatial); + float invColor2 = -0.5 / (sigmaColor * sigmaColor + EPSILON); + + vec3 sumRGB = vec3(0.0); + float sumWeight = 0.0; + + int step = max(u_int0, 1); + float radius2 = float(radius * radius); + + for (int dy = -MAX_RADIUS; dy <= MAX_RADIUS; dy++) { + if (dy < -radius || dy > radius) continue; + if (abs(dy) % step != 0) continue; + + for (int dx = -MAX_RADIUS; dx <= MAX_RADIUS; dx++) { + if (dx < -radius || dx > radius) continue; + if (abs(dx) % step != 0) continue; + + vec2 offset = vec2(float(dx), float(dy)); + float dist2 = dot(offset, offset); + if (dist2 > radius2) continue; + + vec3 sampleRGB = texture(u_image0, uv + offset * texelSize).rgb; + + // Spatial Gaussian + float spatialWeight = exp(dist2 * invSpatial2); + + // Perceptual color distance (weighted RGB) + vec3 diff = sampleRGB - centerRGB; + float colorDist = dot(diff * diff, vec3(0.299, 0.587, 0.114)); + float colorWeight = exp(colorDist * invColor2); + + float w = spatialWeight * colorWeight; + sumRGB += sampleRGB * w; + sumWeight += w; + } + } + + vec3 resultRGB = sumRGB / max(sumWeight, EPSILON); + return vec4(resultRGB, center.a); // preserve center alpha +} + +void main() { + vec2 texelSize = 1.0 / vec2(textureSize(u_image0, 0)); + + float radiusF = clamp(u_float0, 0.0, float(MAX_RADIUS)); + int radius = int(radiusF + 0.5); + + if (radius == 0) { + fragColor = texture(u_image0, v_texCoord); + return; + } + + // Edge threshold → color sigma + // Squared curve for better low-end control + float t = clamp(u_float1, 0.0, 100.0) / 100.0; + t *= t; + float sigmaColor = mix(0.01, 0.5, t); + + // Spatial sigma tied to radius + float sigmaSpatial = max(radiusF * 0.75, 0.5); + + fragColor = bilateralFilter( + v_texCoord, + texelSize, + radius, + sigmaSpatial, + sigmaColor + ); +} \ No newline at end of file diff --git a/blueprints/.glsl/Film_Grain_15.frag b/blueprints/.glsl/Film_Grain_15.frag new file mode 100644 index 000000000..21585825b --- /dev/null +++ b/blueprints/.glsl/Film_Grain_15.frag @@ -0,0 +1,124 @@ +#version 300 es +precision highp float; + +uniform sampler2D u_image0; +uniform vec2 u_resolution; +uniform float u_float0; // grain amount [0.0 – 1.0] typical: 0.2–0.8 +uniform float u_float1; // grain size [0.3 – 3.0] lower = finer grain +uniform float u_float2; // color amount [0.0 – 1.0] 0 = monochrome, 1 = RGB grain +uniform float u_float3; // luminance bias [0.0 – 1.0] 0 = uniform, 1 = shadows only +uniform int u_int0; // noise mode [0 or 1] 0 = smooth, 1 = grainy + +in vec2 v_texCoord; +layout(location = 0) out vec4 fragColor0; + +// High-quality integer hash (pcg-like) +uint pcg(uint v) { + uint state = v * 747796405u + 2891336453u; + uint word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u; + return (word >> 22u) ^ word; +} + +// 2D -> 1D hash input +uint hash2d(uvec2 p) { + return pcg(p.x + pcg(p.y)); +} + +// Hash to float [0, 1] +float hashf(uvec2 p) { + return float(hash2d(p)) / float(0xffffffffu); +} + +// Hash to float with offset (for RGB channels) +float hashf(uvec2 p, uint offset) { + return float(pcg(hash2d(p) + offset)) / float(0xffffffffu); +} + +// Convert uniform [0,1] to roughly Gaussian distribution +// Using simple approximation: average of multiple samples +float toGaussian(uvec2 p) { + float sum = hashf(p, 0u) + hashf(p, 1u) + hashf(p, 2u) + hashf(p, 3u); + return (sum - 2.0) * 0.7; // Centered, scaled +} + +float toGaussian(uvec2 p, uint offset) { + float sum = hashf(p, offset) + hashf(p, offset + 1u) + + hashf(p, offset + 2u) + hashf(p, offset + 3u); + return (sum - 2.0) * 0.7; +} + +// Smooth noise with better interpolation +float smoothNoise(vec2 p) { + vec2 i = floor(p); + vec2 f = fract(p); + + // Quintic interpolation (less banding than cubic) + f = f * f * f * (f * (f * 6.0 - 15.0) + 10.0); + + uvec2 ui = uvec2(i); + float a = toGaussian(ui); + float b = toGaussian(ui + uvec2(1u, 0u)); + float c = toGaussian(ui + uvec2(0u, 1u)); + float d = toGaussian(ui + uvec2(1u, 1u)); + + return mix(mix(a, b, f.x), mix(c, d, f.x), f.y); +} + +float smoothNoise(vec2 p, uint offset) { + vec2 i = floor(p); + vec2 f = fract(p); + + f = f * f * f * (f * (f * 6.0 - 15.0) + 10.0); + + uvec2 ui = uvec2(i); + float a = toGaussian(ui, offset); + float b = toGaussian(ui + uvec2(1u, 0u), offset); + float c = toGaussian(ui + uvec2(0u, 1u), offset); + float d = toGaussian(ui + uvec2(1u, 1u), offset); + + return mix(mix(a, b, f.x), mix(c, d, f.x), f.y); +} + +void main() { + vec4 color = texture(u_image0, v_texCoord); + + // Luminance (Rec.709) + float luma = dot(color.rgb, vec3(0.2126, 0.7152, 0.0722)); + + // Grain UV (resolution-independent) + vec2 grainUV = v_texCoord * u_resolution / max(u_float1, 0.01); + uvec2 grainPixel = uvec2(grainUV); + + float g; + vec3 grainRGB; + + if (u_int0 == 1) { + // Grainy mode: pure hash noise (no interpolation = no banding) + g = toGaussian(grainPixel); + grainRGB = vec3( + toGaussian(grainPixel, 100u), + toGaussian(grainPixel, 200u), + toGaussian(grainPixel, 300u) + ); + } else { + // Smooth mode: interpolated with quintic curve + g = smoothNoise(grainUV); + grainRGB = vec3( + smoothNoise(grainUV, 100u), + smoothNoise(grainUV, 200u), + smoothNoise(grainUV, 300u) + ); + } + + // Luminance weighting (less grain in highlights) + float lumWeight = mix(1.0, 1.0 - luma, clamp(u_float3, 0.0, 1.0)); + + // Strength + float strength = u_float0 * 0.15; + + // Color vs monochrome grain + vec3 grainColor = mix(vec3(g), grainRGB, clamp(u_float2, 0.0, 1.0)); + + color.rgb += grainColor * strength * lumWeight; + fragColor0 = vec4(clamp(color.rgb, 0.0, 1.0), color.a); +} diff --git a/blueprints/.glsl/Glow_30.frag b/blueprints/.glsl/Glow_30.frag new file mode 100644 index 000000000..0ee152628 --- /dev/null +++ b/blueprints/.glsl/Glow_30.frag @@ -0,0 +1,133 @@ +#version 300 es +precision mediump float; + +uniform sampler2D u_image0; +uniform vec2 u_resolution; +uniform int u_int0; // Blend mode +uniform int u_int1; // Color tint +uniform float u_float0; // Intensity +uniform float u_float1; // Radius +uniform float u_float2; // Threshold + +in vec2 v_texCoord; +out vec4 fragColor; + +const int BLEND_ADD = 0; +const int BLEND_SCREEN = 1; +const int BLEND_SOFT = 2; +const int BLEND_OVERLAY = 3; +const int BLEND_LIGHTEN = 4; + +const float GOLDEN_ANGLE = 2.39996323; +const int MAX_SAMPLES = 48; +const vec3 LUMA = vec3(0.299, 0.587, 0.114); + +float hash(vec2 p) { + p = fract(p * vec2(123.34, 456.21)); + p += dot(p, p + 45.32); + return fract(p.x * p.y); +} + +vec3 hexToRgb(int h) { + return vec3( + float((h >> 16) & 255), + float((h >> 8) & 255), + float(h & 255) + ) * (1.0 / 255.0); +} + +vec3 blend(vec3 base, vec3 glow, int mode) { + if (mode == BLEND_SCREEN) { + return 1.0 - (1.0 - base) * (1.0 - glow); + } + if (mode == BLEND_SOFT) { + return mix( + base - (1.0 - 2.0 * glow) * base * (1.0 - base), + base + (2.0 * glow - 1.0) * (sqrt(base) - base), + step(0.5, glow) + ); + } + if (mode == BLEND_OVERLAY) { + return mix( + 2.0 * base * glow, + 1.0 - 2.0 * (1.0 - base) * (1.0 - glow), + step(0.5, base) + ); + } + if (mode == BLEND_LIGHTEN) { + return max(base, glow); + } + return base + glow; +} + +void main() { + vec4 original = texture(u_image0, v_texCoord); + + float intensity = u_float0 * 0.05; + float radius = u_float1 * u_float1 * 0.012; + + if (intensity < 0.001 || radius < 0.1) { + fragColor = original; + return; + } + + float threshold = 1.0 - u_float2 * 0.01; + float t0 = threshold - 0.15; + float t1 = threshold + 0.15; + + vec2 texelSize = 1.0 / u_resolution; + float radius2 = radius * radius; + + float sampleScale = clamp(radius * 0.75, 0.35, 1.0); + int samples = int(float(MAX_SAMPLES) * sampleScale); + + float noise = hash(gl_FragCoord.xy); + float angleOffset = noise * GOLDEN_ANGLE; + float radiusJitter = 0.85 + noise * 0.3; + + float ca = cos(GOLDEN_ANGLE); + float sa = sin(GOLDEN_ANGLE); + vec2 dir = vec2(cos(angleOffset), sin(angleOffset)); + + vec3 glow = vec3(0.0); + float totalWeight = 0.0; + + // Center tap + float centerMask = smoothstep(t0, t1, dot(original.rgb, LUMA)); + glow += original.rgb * centerMask * 2.0; + totalWeight += 2.0; + + for (int i = 1; i < MAX_SAMPLES; i++) { + if (i >= samples) break; + + float fi = float(i); + float dist = sqrt(fi / float(samples)) * radius * radiusJitter; + + vec2 offset = dir * dist * texelSize; + vec3 c = texture(u_image0, v_texCoord + offset).rgb; + float mask = smoothstep(t0, t1, dot(c, LUMA)); + + float w = 1.0 - (dist * dist) / (radius2 * 1.5); + w = max(w, 0.0); + w *= w; + + glow += c * mask * w; + totalWeight += w; + + dir = vec2( + dir.x * ca - dir.y * sa, + dir.x * sa + dir.y * ca + ); + } + + glow *= intensity / max(totalWeight, 0.001); + + if (u_int1 > 0) { + glow *= hexToRgb(u_int1); + } + + vec3 result = blend(original.rgb, glow, u_int0); + result += (noise - 0.5) * (1.0 / 255.0); + + fragColor = vec4(clamp(result, 0.0, 1.0), original.a); +} \ No newline at end of file diff --git a/blueprints/.glsl/Hue_and_Saturation_1.frag b/blueprints/.glsl/Hue_and_Saturation_1.frag new file mode 100644 index 000000000..0fa6810af --- /dev/null +++ b/blueprints/.glsl/Hue_and_Saturation_1.frag @@ -0,0 +1,222 @@ +#version 300 es +precision highp float; + +uniform sampler2D u_image0; +uniform int u_int0; // Mode: 0=Master, 1=Reds, 2=Yellows, 3=Greens, 4=Cyans, 5=Blues, 6=Magentas, 7=Colorize +uniform int u_int1; // Color Space: 0=HSL, 1=HSB/HSV +uniform float u_float0; // Hue (-180 to 180) +uniform float u_float1; // Saturation (-100 to 100) +uniform float u_float2; // Lightness/Brightness (-100 to 100) +uniform float u_float3; // Overlap (0 to 100) - feathering between adjacent color ranges + +in vec2 v_texCoord; +out vec4 fragColor; + +// Color range modes +const int MODE_MASTER = 0; +const int MODE_RED = 1; +const int MODE_YELLOW = 2; +const int MODE_GREEN = 3; +const int MODE_CYAN = 4; +const int MODE_BLUE = 5; +const int MODE_MAGENTA = 6; +const int MODE_COLORIZE = 7; + +// Color space modes +const int COLORSPACE_HSL = 0; +const int COLORSPACE_HSB = 1; + +const float EPSILON = 0.0001; + +//============================================================================= +// RGB <-> HSL Conversions +//============================================================================= + +vec3 rgb2hsl(vec3 c) { + float maxC = max(max(c.r, c.g), c.b); + float minC = min(min(c.r, c.g), c.b); + float delta = maxC - minC; + + float h = 0.0; + float s = 0.0; + float l = (maxC + minC) * 0.5; + + if (delta > EPSILON) { + s = l < 0.5 + ? delta / (maxC + minC) + : delta / (2.0 - maxC - minC); + + if (maxC == c.r) { + h = (c.g - c.b) / delta + (c.g < c.b ? 6.0 : 0.0); + } else if (maxC == c.g) { + h = (c.b - c.r) / delta + 2.0; + } else { + h = (c.r - c.g) / delta + 4.0; + } + h /= 6.0; + } + + return vec3(h, s, l); +} + +float hue2rgb(float p, float q, float t) { + t = fract(t); + if (t < 1.0/6.0) return p + (q - p) * 6.0 * t; + if (t < 0.5) return q; + if (t < 2.0/3.0) return p + (q - p) * (2.0/3.0 - t) * 6.0; + return p; +} + +vec3 hsl2rgb(vec3 hsl) { + if (hsl.y < EPSILON) return vec3(hsl.z); + + float q = hsl.z < 0.5 + ? hsl.z * (1.0 + hsl.y) + : hsl.z + hsl.y - hsl.z * hsl.y; + float p = 2.0 * hsl.z - q; + + return vec3( + hue2rgb(p, q, hsl.x + 1.0/3.0), + hue2rgb(p, q, hsl.x), + hue2rgb(p, q, hsl.x - 1.0/3.0) + ); +} + +vec3 rgb2hsb(vec3 c) { + float maxC = max(max(c.r, c.g), c.b); + float minC = min(min(c.r, c.g), c.b); + float delta = maxC - minC; + + float h = 0.0; + float s = (maxC > EPSILON) ? delta / maxC : 0.0; + float b = maxC; + + if (delta > EPSILON) { + if (maxC == c.r) { + h = (c.g - c.b) / delta + (c.g < c.b ? 6.0 : 0.0); + } else if (maxC == c.g) { + h = (c.b - c.r) / delta + 2.0; + } else { + h = (c.r - c.g) / delta + 4.0; + } + h /= 6.0; + } + + return vec3(h, s, b); +} + +vec3 hsb2rgb(vec3 hsb) { + vec3 rgb = clamp(abs(mod(hsb.x * 6.0 + vec3(0.0, 4.0, 2.0), 6.0) - 3.0) - 1.0, 0.0, 1.0); + return hsb.z * mix(vec3(1.0), rgb, hsb.y); +} + +//============================================================================= +// Color Range Weight Calculation +//============================================================================= + +float hueDistance(float a, float b) { + float d = abs(a - b); + return min(d, 1.0 - d); +} + +float getHueWeight(float hue, float center, float overlap) { + float baseWidth = 1.0 / 6.0; + float feather = baseWidth * overlap; + + float d = hueDistance(hue, center); + + float inner = baseWidth * 0.5; + float outer = inner + feather; + + return 1.0 - smoothstep(inner, outer, d); +} + +float getModeWeight(float hue, int mode, float overlap) { + if (mode == MODE_MASTER || mode == MODE_COLORIZE) return 1.0; + + if (mode == MODE_RED) { + return max( + getHueWeight(hue, 0.0, overlap), + getHueWeight(hue, 1.0, overlap) + ); + } + + float center = float(mode - 1) / 6.0; + return getHueWeight(hue, center, overlap); +} + +//============================================================================= +// Adjustment Functions +//============================================================================= + +float adjustLightness(float l, float amount) { + return amount > 0.0 + ? l + (1.0 - l) * amount + : l + l * amount; +} + +float adjustBrightness(float b, float amount) { + return clamp(b + amount, 0.0, 1.0); +} + +float adjustSaturation(float s, float amount) { + return amount > 0.0 + ? s + (1.0 - s) * amount + : s + s * amount; +} + +vec3 colorize(vec3 rgb, float hue, float sat, float light) { + float lum = dot(rgb, vec3(0.299, 0.587, 0.114)); + float l = adjustLightness(lum, light); + + vec3 hsl = vec3(fract(hue), clamp(sat, 0.0, 1.0), clamp(l, 0.0, 1.0)); + return hsl2rgb(hsl); +} + +//============================================================================= +// Main +//============================================================================= + +void main() { + vec4 original = texture(u_image0, v_texCoord); + + float hueShift = u_float0 / 360.0; // -180..180 -> -0.5..0.5 + float satAmount = u_float1 / 100.0; // -100..100 -> -1..1 + float lightAmount= u_float2 / 100.0; // -100..100 -> -1..1 + float overlap = u_float3 / 100.0; // 0..100 -> 0..1 + + vec3 result; + + if (u_int0 == MODE_COLORIZE) { + result = colorize(original.rgb, hueShift, satAmount, lightAmount); + fragColor = vec4(result, original.a); + return; + } + + vec3 hsx = (u_int1 == COLORSPACE_HSL) + ? rgb2hsl(original.rgb) + : rgb2hsb(original.rgb); + + float weight = getModeWeight(hsx.x, u_int0, overlap); + + if (u_int0 != MODE_MASTER && hsx.y < EPSILON) { + weight = 0.0; + } + + if (weight > EPSILON) { + float h = fract(hsx.x + hueShift * weight); + float s = clamp(adjustSaturation(hsx.y, satAmount * weight), 0.0, 1.0); + float v = (u_int1 == COLORSPACE_HSL) + ? clamp(adjustLightness(hsx.z, lightAmount * weight), 0.0, 1.0) + : clamp(adjustBrightness(hsx.z, lightAmount * weight), 0.0, 1.0); + + vec3 adjusted = vec3(h, s, v); + result = (u_int1 == COLORSPACE_HSL) + ? hsl2rgb(adjusted) + : hsb2rgb(adjusted); + } else { + result = original.rgb; + } + + fragColor = vec4(result, original.a); +} diff --git a/blueprints/.glsl/Image_Blur_1.frag b/blueprints/.glsl/Image_Blur_1.frag new file mode 100644 index 000000000..83238111d --- /dev/null +++ b/blueprints/.glsl/Image_Blur_1.frag @@ -0,0 +1,111 @@ +#version 300 es +#pragma passes 2 +precision highp float; + +// Blur type constants +const int BLUR_GAUSSIAN = 0; +const int BLUR_BOX = 1; +const int BLUR_RADIAL = 2; + +// Radial blur config +const int RADIAL_SAMPLES = 12; +const float RADIAL_STRENGTH = 0.0003; + +uniform sampler2D u_image0; +uniform vec2 u_resolution; +uniform int u_int0; // Blur type (BLUR_GAUSSIAN, BLUR_BOX, BLUR_RADIAL) +uniform float u_float0; // Blur radius/amount +uniform int u_pass; // Pass index (0 = horizontal, 1 = vertical) + +in vec2 v_texCoord; +layout(location = 0) out vec4 fragColor0; + +float gaussian(float x, float sigma) { + return exp(-(x * x) / (2.0 * sigma * sigma)); +} + +void main() { + vec2 texelSize = 1.0 / u_resolution; + float radius = max(u_float0, 0.0); + + // Radial (angular) blur - single pass, doesn't use separable + if (u_int0 == BLUR_RADIAL) { + // Only execute on first pass + if (u_pass > 0) { + fragColor0 = texture(u_image0, v_texCoord); + return; + } + + vec2 center = vec2(0.5); + vec2 dir = v_texCoord - center; + float dist = length(dir); + + if (dist < 1e-4) { + fragColor0 = texture(u_image0, v_texCoord); + return; + } + + vec4 sum = vec4(0.0); + float totalWeight = 0.0; + float angleStep = radius * RADIAL_STRENGTH; + + dir /= dist; + + float cosStep = cos(angleStep); + float sinStep = sin(angleStep); + + float negAngle = -float(RADIAL_SAMPLES) * angleStep; + vec2 rotDir = vec2( + dir.x * cos(negAngle) - dir.y * sin(negAngle), + dir.x * sin(negAngle) + dir.y * cos(negAngle) + ); + + for (int i = -RADIAL_SAMPLES; i <= RADIAL_SAMPLES; i++) { + vec2 uv = center + rotDir * dist; + float w = 1.0 - abs(float(i)) / float(RADIAL_SAMPLES); + sum += texture(u_image0, uv) * w; + totalWeight += w; + + rotDir = vec2( + rotDir.x * cosStep - rotDir.y * sinStep, + rotDir.x * sinStep + rotDir.y * cosStep + ); + } + + fragColor0 = sum / max(totalWeight, 0.001); + return; + } + + // Separable Gaussian / Box blur + int samples = int(ceil(radius)); + + if (samples == 0) { + fragColor0 = texture(u_image0, v_texCoord); + return; + } + + // Direction: pass 0 = horizontal, pass 1 = vertical + vec2 dir = (u_pass == 0) ? vec2(1.0, 0.0) : vec2(0.0, 1.0); + + vec4 color = vec4(0.0); + float totalWeight = 0.0; + float sigma = radius / 2.0; + + for (int i = -samples; i <= samples; i++) { + vec2 offset = dir * float(i) * texelSize; + vec4 sample_color = texture(u_image0, v_texCoord + offset); + + float weight; + if (u_int0 == BLUR_GAUSSIAN) { + weight = gaussian(float(i), sigma); + } else { + // BLUR_BOX + weight = 1.0; + } + + color += sample_color * weight; + totalWeight += weight; + } + + fragColor0 = color / totalWeight; +} diff --git a/blueprints/.glsl/Image_Channels_23.frag b/blueprints/.glsl/Image_Channels_23.frag new file mode 100644 index 000000000..76d70af13 --- /dev/null +++ b/blueprints/.glsl/Image_Channels_23.frag @@ -0,0 +1,19 @@ +#version 300 es +precision highp float; + +uniform sampler2D u_image0; + +in vec2 v_texCoord; +layout(location = 0) out vec4 fragColor0; +layout(location = 1) out vec4 fragColor1; +layout(location = 2) out vec4 fragColor2; +layout(location = 3) out vec4 fragColor3; + +void main() { + vec4 color = texture(u_image0, v_texCoord); + // Output each channel as grayscale to separate render targets + fragColor0 = vec4(vec3(color.r), 1.0); // Red channel + fragColor1 = vec4(vec3(color.g), 1.0); // Green channel + fragColor2 = vec4(vec3(color.b), 1.0); // Blue channel + fragColor3 = vec4(vec3(color.a), 1.0); // Alpha channel +} diff --git a/blueprints/.glsl/Image_Levels_1.frag b/blueprints/.glsl/Image_Levels_1.frag new file mode 100644 index 000000000..f34ed1d81 --- /dev/null +++ b/blueprints/.glsl/Image_Levels_1.frag @@ -0,0 +1,71 @@ +#version 300 es +precision highp float; + +// Levels Adjustment +// u_int0: channel (0=RGB, 1=R, 2=G, 3=B) default: 0 +// u_float0: input black (0-255) default: 0 +// u_float1: input white (0-255) default: 255 +// u_float2: gamma (0.01-9.99) default: 1.0 +// u_float3: output black (0-255) default: 0 +// u_float4: output white (0-255) default: 255 + +uniform sampler2D u_image0; +uniform int u_int0; +uniform float u_float0; +uniform float u_float1; +uniform float u_float2; +uniform float u_float3; +uniform float u_float4; + +in vec2 v_texCoord; +out vec4 fragColor; + +vec3 applyLevels(vec3 color, float inBlack, float inWhite, float gamma, float outBlack, float outWhite) { + float inRange = max(inWhite - inBlack, 0.0001); + vec3 result = clamp((color - inBlack) / inRange, 0.0, 1.0); + result = pow(result, vec3(1.0 / gamma)); + result = mix(vec3(outBlack), vec3(outWhite), result); + return result; +} + +float applySingleChannel(float value, float inBlack, float inWhite, float gamma, float outBlack, float outWhite) { + float inRange = max(inWhite - inBlack, 0.0001); + float result = clamp((value - inBlack) / inRange, 0.0, 1.0); + result = pow(result, 1.0 / gamma); + result = mix(outBlack, outWhite, result); + return result; +} + +void main() { + vec4 texColor = texture(u_image0, v_texCoord); + vec3 color = texColor.rgb; + + float inBlack = u_float0 / 255.0; + float inWhite = u_float1 / 255.0; + float gamma = u_float2; + float outBlack = u_float3 / 255.0; + float outWhite = u_float4 / 255.0; + + vec3 result; + + if (u_int0 == 0) { + result = applyLevels(color, inBlack, inWhite, gamma, outBlack, outWhite); + } + else if (u_int0 == 1) { + result = color; + result.r = applySingleChannel(color.r, inBlack, inWhite, gamma, outBlack, outWhite); + } + else if (u_int0 == 2) { + result = color; + result.g = applySingleChannel(color.g, inBlack, inWhite, gamma, outBlack, outWhite); + } + else if (u_int0 == 3) { + result = color; + result.b = applySingleChannel(color.b, inBlack, inWhite, gamma, outBlack, outWhite); + } + else { + result = color; + } + + fragColor = vec4(result, texColor.a); +} \ No newline at end of file diff --git a/blueprints/.glsl/README.md b/blueprints/.glsl/README.md new file mode 100644 index 000000000..d4084284b --- /dev/null +++ b/blueprints/.glsl/README.md @@ -0,0 +1,28 @@ +# GLSL Shader Sources + +This folder contains the GLSL fragment shaders extracted from blueprint JSON files for easier editing and version control. + +## File Naming Convention + +`{Blueprint_Name}_{node_id}.frag` + +- **Blueprint_Name**: The JSON filename with spaces/special chars replaced by underscores +- **node_id**: The GLSLShader node ID within the subgraph + +## Usage + +```bash +# Extract shaders from blueprint JSONs to this folder +python update_blueprints.py extract + +# Patch edited shaders back into blueprint JSONs +python update_blueprints.py patch +``` + +## Workflow + +1. Run `extract` to pull current shaders from JSONs +2. Edit `.frag` files +3. Run `patch` to update the blueprint JSONs +4. Test +5. Commit both `.frag` files and updated JSONs diff --git a/blueprints/.glsl/Sharpen_23.frag b/blueprints/.glsl/Sharpen_23.frag new file mode 100644 index 000000000..c03f94b66 --- /dev/null +++ b/blueprints/.glsl/Sharpen_23.frag @@ -0,0 +1,28 @@ +#version 300 es +precision highp float; + +uniform sampler2D u_image0; +uniform vec2 u_resolution; +uniform float u_float0; // strength [0.0 – 2.0] typical: 0.3–1.0 + +in vec2 v_texCoord; +layout(location = 0) out vec4 fragColor0; + +void main() { + vec2 texel = 1.0 / u_resolution; + + // Sample center and neighbors + vec4 center = texture(u_image0, v_texCoord); + vec4 top = texture(u_image0, v_texCoord + vec2( 0.0, -texel.y)); + vec4 bottom = texture(u_image0, v_texCoord + vec2( 0.0, texel.y)); + vec4 left = texture(u_image0, v_texCoord + vec2(-texel.x, 0.0)); + vec4 right = texture(u_image0, v_texCoord + vec2( texel.x, 0.0)); + + // Edge enhancement (Laplacian) + vec4 edges = center * 4.0 - top - bottom - left - right; + + // Add edges back scaled by strength + vec4 sharpened = center + edges * u_float0; + + fragColor0 = vec4(clamp(sharpened.rgb, 0.0, 1.0), center.a); +} \ No newline at end of file diff --git a/blueprints/.glsl/Unsharp_Mask_26.frag b/blueprints/.glsl/Unsharp_Mask_26.frag new file mode 100644 index 000000000..f5990cb4a --- /dev/null +++ b/blueprints/.glsl/Unsharp_Mask_26.frag @@ -0,0 +1,61 @@ +#version 300 es +precision highp float; + +uniform sampler2D u_image0; +uniform vec2 u_resolution; +uniform float u_float0; // amount [0.0 - 3.0] typical: 0.5-1.5 +uniform float u_float1; // radius [0.5 - 10.0] blur radius in pixels +uniform float u_float2; // threshold [0.0 - 0.1] min difference to sharpen + +in vec2 v_texCoord; +layout(location = 0) out vec4 fragColor0; + +float gaussian(float x, float sigma) { + return exp(-(x * x) / (2.0 * sigma * sigma)); +} + +float getLuminance(vec3 color) { + return dot(color, vec3(0.2126, 0.7152, 0.0722)); +} + +void main() { + vec2 texel = 1.0 / u_resolution; + float radius = max(u_float1, 0.5); + float amount = u_float0; + float threshold = u_float2; + + vec4 original = texture(u_image0, v_texCoord); + + // Gaussian blur for the "unsharp" mask + int samples = int(ceil(radius)); + float sigma = radius / 2.0; + + vec4 blurred = vec4(0.0); + float totalWeight = 0.0; + + for (int x = -samples; x <= samples; x++) { + for (int y = -samples; y <= samples; y++) { + vec2 offset = vec2(float(x), float(y)) * texel; + vec4 sample_color = texture(u_image0, v_texCoord + offset); + + float dist = length(vec2(float(x), float(y))); + float weight = gaussian(dist, sigma); + blurred += sample_color * weight; + totalWeight += weight; + } + } + blurred /= totalWeight; + + // Unsharp mask = original - blurred + vec3 mask = original.rgb - blurred.rgb; + + // Luminance-based threshold with smooth falloff + float lumaDelta = abs(getLuminance(original.rgb) - getLuminance(blurred.rgb)); + float thresholdScale = smoothstep(0.0, threshold, lumaDelta); + mask *= thresholdScale; + + // Sharpen: original + mask * amount + vec3 sharpened = original.rgb + mask * amount; + + fragColor0 = vec4(clamp(sharpened, 0.0, 1.0), original.a); +} diff --git a/blueprints/.glsl/update_blueprints.py b/blueprints/.glsl/update_blueprints.py new file mode 100644 index 000000000..c5bd0ed54 --- /dev/null +++ b/blueprints/.glsl/update_blueprints.py @@ -0,0 +1,159 @@ +#!/usr/bin/env python3 +""" +Shader Blueprint Updater + +Syncs GLSL shader files between this folder and blueprint JSON files. + +File naming convention: + {Blueprint Name}_{node_id}.frag + +Usage: + python update_blueprints.py extract # Extract shaders from JSONs to here + python update_blueprints.py patch # Patch shaders back into JSONs + python update_blueprints.py # Same as patch (default) +""" + +import json +import logging +import sys +import re +from pathlib import Path + +logging.basicConfig(level=logging.INFO, format='%(message)s') +logger = logging.getLogger(__name__) + +GLSL_DIR = Path(__file__).parent +BLUEPRINTS_DIR = GLSL_DIR.parent + + +def get_blueprint_files(): + """Get all blueprint JSON files.""" + return sorted(BLUEPRINTS_DIR.glob("*.json")) + + +def sanitize_filename(name): + """Convert blueprint name to safe filename.""" + return re.sub(r'[^\w\-]', '_', name) + + +def extract_shaders(): + """Extract all shaders from blueprint JSONs to this folder.""" + extracted = 0 + for json_path in get_blueprint_files(): + blueprint_name = json_path.stem + + try: + with open(json_path, 'r') as f: + data = json.load(f) + except (json.JSONDecodeError, IOError) as e: + logger.warning("Skipping %s: %s", json_path.name, e) + continue + + # Find GLSLShader nodes in subgraphs + for subgraph in data.get('definitions', {}).get('subgraphs', []): + for node in subgraph.get('nodes', []): + if node.get('type') == 'GLSLShader': + node_id = node.get('id') + widgets = node.get('widgets_values', []) + + # Find shader code (first string that looks like GLSL) + for widget in widgets: + if isinstance(widget, str) and widget.startswith('#version'): + safe_name = sanitize_filename(blueprint_name) + frag_name = f"{safe_name}_{node_id}.frag" + frag_path = GLSL_DIR / frag_name + + with open(frag_path, 'w') as f: + f.write(widget) + + logger.info(" Extracted: %s", frag_name) + extracted += 1 + break + + logger.info("\nExtracted %d shader(s)", extracted) + + +def patch_shaders(): + """Patch shaders from this folder back into blueprint JSONs.""" + # Build lookup: blueprint_name -> [(node_id, shader_code), ...] + shader_updates = {} + + for frag_path in sorted(GLSL_DIR.glob("*.frag")): + # Parse filename: {blueprint_name}_{node_id}.frag + parts = frag_path.stem.rsplit('_', 1) + if len(parts) != 2: + logger.warning("Skipping %s: invalid filename format", frag_path.name) + continue + + blueprint_name, node_id_str = parts + + try: + node_id = int(node_id_str) + except ValueError: + logger.warning("Skipping %s: invalid node_id", frag_path.name) + continue + + with open(frag_path, 'r') as f: + shader_code = f.read() + + if blueprint_name not in shader_updates: + shader_updates[blueprint_name] = [] + shader_updates[blueprint_name].append((node_id, shader_code)) + + # Apply updates to JSON files + patched = 0 + for json_path in get_blueprint_files(): + blueprint_name = sanitize_filename(json_path.stem) + + if blueprint_name not in shader_updates: + continue + + try: + with open(json_path, 'r') as f: + data = json.load(f) + except (json.JSONDecodeError, IOError) as e: + logger.error("Error reading %s: %s", json_path.name, e) + continue + + modified = False + for node_id, shader_code in shader_updates[blueprint_name]: + # Find the node and update + for subgraph in data.get('definitions', {}).get('subgraphs', []): + for node in subgraph.get('nodes', []): + if node.get('id') == node_id and node.get('type') == 'GLSLShader': + widgets = node.get('widgets_values', []) + if len(widgets) > 0 and widgets[0] != shader_code: + widgets[0] = shader_code + modified = True + logger.info(" Patched: %s (node %d)", json_path.name, node_id) + patched += 1 + + if modified: + with open(json_path, 'w') as f: + json.dump(data, f) + + if patched == 0: + logger.info("No changes to apply.") + else: + logger.info("\nPatched %d shader(s)", patched) + + +def main(): + if len(sys.argv) < 2: + command = "patch" + else: + command = sys.argv[1].lower() + + if command == "extract": + logger.info("Extracting shaders from blueprints...") + extract_shaders() + elif command in ("patch", "update", "apply"): + logger.info("Patching shaders into blueprints...") + patch_shaders() + else: + logger.info(__doc__) + sys.exit(1) + + +if __name__ == "__main__": + main() diff --git a/blueprints/Brightness and Contrast.json b/blueprints/Brightness and Contrast.json new file mode 100644 index 000000000..2c7e60eb1 --- /dev/null +++ b/blueprints/Brightness and Contrast.json @@ -0,0 +1 @@ +{"revision": 0, "last_node_id": 140, "last_link_id": 0, "nodes": [{"id": 140, "type": "916dff42-6166-4d45-b028-04eaf69fbb35", "pos": [500, 1440], "size": [250, 178], "flags": {}, "order": 2, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "IMAGE", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}], "properties": {"proxyWidgets": [["4", "value"], ["5", "value"]]}, "widgets_values": [], "title": "Brightness and Contrast"}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "916dff42-6166-4d45-b028-04eaf69fbb35", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 143, "lastLinkId": 118, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Brightness and Contrast", "inputNode": {"id": -10, "bounding": [360, -176, 120, 60]}, "outputNode": {"id": -20, "bounding": [1410, -176, 120, 60]}, "inputs": [{"id": "a5aae7ea-b511-4045-b5da-94101e269cd7", "name": "images.image0", "type": "IMAGE", "linkIds": [117], "localized_name": "images.image0", "label": "image", "pos": [460, -156]}], "outputs": [{"id": "30b72604-69b3-4944-b253-a9099bbd73a9", "name": "IMAGE0", "type": "IMAGE", "linkIds": [118], "localized_name": "IMAGE0", "label": "IMAGE", "pos": [1430, -156]}], "widgets": [], "nodes": [{"id": 4, "type": "PrimitiveFloat", "pos": [540, -280], "size": [270, 58], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "brightness", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [115]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 100, "precision": 1, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [0, 0, 0]}, {"offset": 1, "color": [255, 255, 255]}]}, "widgets_values": [50]}, {"id": 5, "type": "PrimitiveFloat", "pos": [540, -170], "size": [270, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "contrast", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [116]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 100, "precision": 1, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [136, 136, 136]}, {"offset": 0.4, "color": [68, 68, 68]}, {"offset": 0.6, "color": [187, 187, 187]}, {"offset": 0.8, "color": [0, 0, 0]}, {"offset": 1, "color": [255, 255, 255]}]}, "widgets_values": [0]}, {"id": 143, "type": "GLSLShader", "pos": [840, -280], "size": [400, 212], "flags": {}, "order": 2, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 117}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}, {"label": "u_float0", "localized_name": "floats.u_float0", "name": "floats.u_float0", "shape": 7, "type": "FLOAT", "link": 115}, {"label": "u_float1", "localized_name": "floats.u_float1", "name": "floats.u_float1", "shape": 7, "type": "FLOAT", "link": 116}, {"label": "u_float2", "localized_name": "floats.u_float2", "name": "floats.u_float2", "shape": 7, "type": "FLOAT", "link": null}, {"label": "u_int0", "localized_name": "ints.u_int0", "name": "ints.u_int0", "shape": 7, "type": "INT", "link": null}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [118]}, {"localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": null}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform float u_float0; // Brightness slider -100..100\nuniform float u_float1; // Contrast slider -100..100\n\nin vec2 v_texCoord;\nout vec4 fragColor;\n\nconst float MID_GRAY = 0.18; // 18% reflectance\n\n// sRGB gamma 2.2 approximation\nvec3 srgbToLinear(vec3 c) {\n return pow(max(c, 0.0), vec3(2.2));\n}\n\nvec3 linearToSrgb(vec3 c) {\n return pow(max(c, 0.0), vec3(1.0/2.2));\n}\n\nfloat mapBrightness(float b) {\n return clamp(b / 100.0, -1.0, 1.0);\n}\n\nfloat mapContrast(float c) {\n return clamp(c / 100.0 + 1.0, 0.0, 2.0);\n}\n\nvoid main() {\n vec4 orig = texture(u_image0, v_texCoord);\n\n float brightness = mapBrightness(u_float0);\n float contrast = mapContrast(u_float1);\n\n vec3 lin = srgbToLinear(orig.rgb);\n\n lin = (lin - MID_GRAY) * contrast + brightness + MID_GRAY;\n\n // Convert back to sRGB\n vec3 result = linearToSrgb(clamp(lin, 0.0, 1.0));\n\n fragColor = vec4(result, orig.a);\n}\n", "from_input"]}], "groups": [], "links": [{"id": 115, "origin_id": 4, "origin_slot": 0, "target_id": 143, "target_slot": 2, "type": "FLOAT"}, {"id": 116, "origin_id": 5, "origin_slot": 0, "target_id": 143, "target_slot": 3, "type": "FLOAT"}, {"id": 117, "origin_id": -10, "origin_slot": 0, "target_id": 143, "target_slot": 0, "type": "IMAGE"}, {"id": 118, "origin_id": 143, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Color adjust"}]}, "extra": {}} diff --git a/blueprints/Canny to Image (Z-Image-Turbo).json b/blueprints/Canny to Image (Z-Image-Turbo).json new file mode 100644 index 000000000..8b78a834a --- /dev/null +++ b/blueprints/Canny to Image (Z-Image-Turbo).json @@ -0,0 +1 @@ +{"id": "e046dd74-e2a7-4f31-a75b-5e11a8c72d4e", "revision": 0, "last_node_id": 18, "last_link_id": 32, "nodes": [{"id": 18, "type": "c84f7959-3738-422b-ba6e-5808b5e90101", "pos": [300, 3830], "size": [400, 460], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "control image", "name": "image", "type": "IMAGE", "link": null}, {"label": "prompt", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}, {"label": "canny low threshold", "name": "low_threshold", "type": "FLOAT", "widget": {"name": "low_threshold"}, "link": null}, {"label": "canny high threshold", "name": "high_threshold", "type": "FLOAT", "widget": {"name": "high_threshold"}, "link": null}, {"name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": null}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}, {"name": "name", "type": "COMBO", "widget": {"name": "name"}, "link": null}], "outputs": [{"name": "IMAGE", "type": "IMAGE", "links": null}], "properties": {"proxyWidgets": [["-1", "text"], ["-1", "low_threshold"], ["-1", "high_threshold"], ["7", "seed"], ["7", "control_after_generate"], ["-1", "unet_name"], ["-1", "clip_name"], ["-1", "vae_name"], ["-1", "name"]], "cnr_id": "comfy-core", "ver": "0.11.0"}, "widgets_values": ["", 0.3, 0.4, null, null, "z_image_turbo_bf16.safetensors", "qwen_3_4b.safetensors", "ae.safetensors", "Z-Image-Turbo-Fun-Controlnet-Union.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "c84f7959-3738-422b-ba6e-5808b5e90101", "version": 1, "state": {"lastGroupId": 3, "lastNodeId": 18, "lastLinkId": 32, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Canny to Image (Z-Image-Turbo)", "inputNode": {"id": -10, "bounding": [-280, 4960, 158.880859375, 200]}, "outputNode": {"id": -20, "bounding": [1598.6038576146689, 4936.043696127976, 120, 60]}, "inputs": [{"id": "29ca271b-8f63-4e7b-a4b8-c9b4192ada0b", "name": "image", "type": "IMAGE", "linkIds": [26], "label": "control image", "pos": [-141.119140625, 4980]}, {"id": "b6549f90-39ee-4b79-9e00-af4d9df969fe", "name": "text", "type": "STRING", "linkIds": [16], "label": "prompt", "pos": [-141.119140625, 5000]}, {"id": "6bd34d18-79f6-470f-94df-ca14c84ef3d8", "name": "low_threshold", "type": "FLOAT", "linkIds": [24], "label": "canny low threshold", "pos": [-141.119140625, 5020]}, {"id": "bbced993-057f-4d2d-909c-d791be73d1d2", "name": "high_threshold", "type": "FLOAT", "linkIds": [25], "label": "canny high threshold", "pos": [-141.119140625, 5040]}, {"id": "db7969bf-4b05-48a0-9598-87d3ac85b505", "name": "unet_name", "type": "COMBO", "linkIds": [29], "pos": [-141.119140625, 5060]}, {"id": "925b611c-5edf-406f-8dc5-7fec07d049a7", "name": "clip_name", "type": "COMBO", "linkIds": [30], "pos": [-141.119140625, 5080]}, {"id": "b4cf508b-4753-40d2-8c83-5a424237ee07", "name": "vae_name", "type": "COMBO", "linkIds": [31], "pos": [-141.119140625, 5100]}, {"id": "bd948f38-3a11-4091-99fc-bb2b3511bcd2", "name": "name", "type": "COMBO", "linkIds": [32], "pos": [-141.119140625, 5120]}], "outputs": [{"id": "47f9a22d-6619-4917-9447-a7d5d08dceb5", "name": "IMAGE", "type": "IMAGE", "linkIds": [18], "pos": [1618.6038576146689, 4956.043696127976]}], "widgets": [], "nodes": [{"id": 1, "type": "CLIPLoader", "pos": [228.60376290329597, 4700.188357350136], "size": [270, 106], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "clip_name", "name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": 30}, {"localized_name": "type", "name": "type", "type": "COMBO", "widget": {"name": "type"}, "link": null}, {"localized_name": "device", "name": "device", "shape": 7, "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "links": [14]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "CLIPLoader", "models": [{"name": "qwen_3_4b.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/text_encoders/qwen_3_4b.safetensors", "directory": "text_encoders"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["qwen_3_4b.safetensors", "lumina2", "default"]}, {"id": 2, "type": "UNETLoader", "pos": [228.60376290329597, 4550.1883046176445], "size": [270, 82], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 29}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [9]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "UNETLoader", "models": [{"name": "z_image_turbo_bf16.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/diffusion_models/z_image_turbo_bf16.safetensors", "directory": "diffusion_models"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["z_image_turbo_bf16.safetensors", "default"]}, {"id": 3, "type": "VAELoader", "pos": [228.60376290329597, 4880.18831633181], "size": [270, 58], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 31}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "links": [2, 11]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "VAELoader", "models": [{"name": "ae.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/vae/ae.safetensors", "directory": "vae"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ae.safetensors"]}, {"id": 4, "type": "ModelPatchLoader", "pos": [228.60376290329597, 5010.1884895078], "size": [270, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "name", "name": "name", "type": "COMBO", "widget": {"name": "name"}, "link": 32}], "outputs": [{"localized_name": "MODEL_PATCH", "name": "MODEL_PATCH", "type": "MODEL_PATCH", "links": [10]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "ModelPatchLoader", "models": [{"name": "Z-Image-Turbo-Fun-Controlnet-Union.safetensors", "url": "https://huggingface.co/alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union/resolve/main/Z-Image-Turbo-Fun-Controlnet-Union.safetensors", "directory": "model_patches"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["Z-Image-Turbo-Fun-Controlnet-Union.safetensors"]}, {"id": 6, "type": "ModelSamplingAuraFlow", "pos": [998.6039930366841, 4490.18831829042], "size": [290, 58], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 3}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [4]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "ModelSamplingAuraFlow", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [3]}, {"id": 7, "type": "KSampler", "pos": [998.6039930366841, 4600.188351166619], "size": [300, 460], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 4}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 5}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 6}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 7}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [1]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "KSampler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, "randomize", 9, 1, "res_multistep", "simple", 1]}, {"id": 8, "type": "ConditioningZeroOut", "pos": [748.2704434516113, 5044.855005348689], "size": [204.134765625, 26.000000000000004], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "link": 8}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [6]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "ConditioningZeroOut", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 9, "type": "QwenImageDiffsynthControlnet", "pos": [608.2704174118008, 5204.85499785943], "size": [290, 138], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 9}, {"localized_name": "model_patch", "name": "model_patch", "type": "MODEL_PATCH", "link": 10}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 11}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 22}, {"localized_name": "mask", "name": "mask", "shape": 7, "type": "MASK", "link": null}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [3]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.76", "Node name for S&R": "QwenImageDiffsynthControlnet", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1]}, {"id": 12, "type": "CLIPTextEncode", "pos": [548.2704310845766, 4544.854974431101], "size": [400, 330], "flags": {}, "order": 11, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 14}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": 16}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [5, 8]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [""], "color": "#232", "bgcolor": "#353"}, {"id": 5, "type": "VAEDecode", "pos": [1338.6038576146689, 4500.188344983101], "size": [200, 46], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 1}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 2}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [18]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "VAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 15, "type": "ImageScaleToTotalPixels", "pos": [220, 5220], "size": [270, 106], "flags": {}, "order": 13, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 26}, {"localized_name": "upscale_method", "name": "upscale_method", "type": "COMBO", "widget": {"name": "upscale_method"}, "link": null}, {"localized_name": "megapixels", "name": "megapixels", "type": "FLOAT", "widget": {"name": "megapixels"}, "link": null}, {"localized_name": "resolution_steps", "name": "resolution_steps", "type": "INT", "widget": {"name": "resolution_steps"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [27]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.0", "Node name for S&R": "ImageScaleToTotalPixels"}, "widgets_values": ["nearest-exact", 1, 1]}, {"id": 11, "type": "GetImageSize", "pos": [540, 5450], "size": [140, 66], "flags": {"collapsed": false}, "order": 10, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 23}], "outputs": [{"localized_name": "width", "name": "width", "type": "INT", "links": [12]}, {"localized_name": "height", "name": "height", "type": "INT", "links": [13]}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "links": null}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.76", "Node name for S&R": "GetImageSize", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 10, "type": "EmptySD3LatentImage", "pos": [760, 5430], "size": [260, 106], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "width", "name": "width", "type": "INT", "widget": {"name": "width"}, "link": 12}, {"localized_name": "height", "name": "height", "type": "INT", "widget": {"name": "height"}, "link": 13}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [7]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "EmptySD3LatentImage", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1024, 1024, 1]}, {"id": 14, "type": "Canny", "pos": [220, 5380], "size": [270, 82], "flags": {}, "order": 12, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 27}, {"localized_name": "low_threshold", "name": "low_threshold", "type": "FLOAT", "widget": {"name": "low_threshold"}, "link": 24}, {"localized_name": "high_threshold", "name": "high_threshold", "type": "FLOAT", "widget": {"name": "high_threshold"}, "link": 25}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [22, 23, 28]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.0", "Node name for S&R": "Canny"}, "widgets_values": [0.3, 0.4]}, {"id": 16, "type": "PreviewImage", "pos": [220, 5520], "size": [260, 270], "flags": {}, "order": 14, "mode": 4, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 28}], "outputs": [], "properties": {"cnr_id": "comfy-core", "ver": "0.11.0", "Node name for S&R": "PreviewImage"}, "widgets_values": []}], "groups": [{"id": 1, "title": "Prompt", "bounding": [530, 4460, 440, 630], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 2, "title": "Models", "bounding": [210, 4460, 300, 640], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 3, "title": "Apple ControlNet", "bounding": [530, 5120, 440, 260], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 1, "origin_id": 7, "origin_slot": 0, "target_id": 5, "target_slot": 0, "type": "LATENT"}, {"id": 2, "origin_id": 3, "origin_slot": 0, "target_id": 5, "target_slot": 1, "type": "VAE"}, {"id": 3, "origin_id": 9, "origin_slot": 0, "target_id": 6, "target_slot": 0, "type": "MODEL"}, {"id": 4, "origin_id": 6, "origin_slot": 0, "target_id": 7, "target_slot": 0, "type": "MODEL"}, {"id": 5, "origin_id": 12, "origin_slot": 0, "target_id": 7, "target_slot": 1, "type": "CONDITIONING"}, {"id": 6, "origin_id": 8, "origin_slot": 0, "target_id": 7, "target_slot": 2, "type": "CONDITIONING"}, {"id": 7, "origin_id": 10, "origin_slot": 0, "target_id": 7, "target_slot": 3, "type": "LATENT"}, {"id": 8, "origin_id": 12, "origin_slot": 0, "target_id": 8, "target_slot": 0, "type": "CONDITIONING"}, {"id": 9, "origin_id": 2, "origin_slot": 0, "target_id": 9, "target_slot": 0, "type": "MODEL"}, {"id": 10, "origin_id": 4, "origin_slot": 0, "target_id": 9, "target_slot": 1, "type": "MODEL_PATCH"}, {"id": 11, "origin_id": 3, "origin_slot": 0, "target_id": 9, "target_slot": 2, "type": "VAE"}, {"id": 12, "origin_id": 11, "origin_slot": 0, "target_id": 10, "target_slot": 0, "type": "INT"}, {"id": 13, "origin_id": 11, "origin_slot": 1, "target_id": 10, "target_slot": 1, "type": "INT"}, {"id": 14, "origin_id": 1, "origin_slot": 0, "target_id": 12, "target_slot": 0, "type": "CLIP"}, {"id": 16, "origin_id": -10, "origin_slot": 1, "target_id": 12, "target_slot": 1, "type": "STRING"}, {"id": 18, "origin_id": 5, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 22, "origin_id": 14, "origin_slot": 0, "target_id": 9, "target_slot": 3, "type": "IMAGE"}, {"id": 23, "origin_id": 14, "origin_slot": 0, "target_id": 11, "target_slot": 0, "type": "IMAGE"}, {"id": 24, "origin_id": -10, "origin_slot": 2, "target_id": 14, "target_slot": 1, "type": "FLOAT"}, {"id": 25, "origin_id": -10, "origin_slot": 3, "target_id": 14, "target_slot": 2, "type": "FLOAT"}, {"id": 26, "origin_id": -10, "origin_slot": 0, "target_id": 15, "target_slot": 0, "type": "IMAGE"}, {"id": 27, "origin_id": 15, "origin_slot": 0, "target_id": 14, "target_slot": 0, "type": "IMAGE"}, {"id": 28, "origin_id": 14, "origin_slot": 0, "target_id": 16, "target_slot": 0, "type": "IMAGE"}, {"id": 29, "origin_id": -10, "origin_slot": 4, "target_id": 2, "target_slot": 0, "type": "COMBO"}, {"id": 30, "origin_id": -10, "origin_slot": 5, "target_id": 1, "target_slot": 0, "type": "COMBO"}, {"id": 31, "origin_id": -10, "origin_slot": 6, "target_id": 3, "target_slot": 0, "type": "COMBO"}, {"id": 32, "origin_id": -10, "origin_slot": 7, "target_id": 4, "target_slot": 0, "type": "COMBO"}], "extra": {"frontendVersion": "1.37.10", "workflowRendererVersion": "LG", "VHS_latentpreview": false, "VHS_latentpreviewrate": 0, "VHS_MetadataImage": true, "VHS_KeepIntermediate": true}, "category": "Image generation and editing/Canny to image"}]}, "config": {}, "extra": {"frontendVersion": "1.37.10", "workflowRendererVersion": "LG", "VHS_latentpreview": false, "VHS_latentpreviewrate": 0, "VHS_MetadataImage": true, "VHS_KeepIntermediate": true, "ds": {"scale": 0.967267584583181, "offset": [444.759060017523, -3564.372163194443]}}, "version": 0.4} diff --git a/blueprints/Canny to Video (LTX 2.0).json b/blueprints/Canny to Video (LTX 2.0).json new file mode 100644 index 000000000..cd2c4e594 --- /dev/null +++ b/blueprints/Canny to Video (LTX 2.0).json @@ -0,0 +1 @@ +{"id": "02f6166f-32f8-4673-b861-76be1464cba5", "revision": 0, "last_node_id": 155, "last_link_id": 391, "nodes": [{"id": 1, "type": "884e1862-7567-4e72-bd2a-fd4fdfd06320", "pos": [1519.643633934233, 3717.5350173634242], "size": [400, 500], "flags": {"collapsed": false}, "order": 0, "mode": 0, "inputs": [{"name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}, {"label": "canny_images", "name": "image", "type": "IMAGE", "link": null}, {"label": "image_strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": null}, {"label": "disable_first_frame", "name": "bypass", "type": "BOOLEAN", "widget": {"name": "bypass"}, "link": null}, {"label": "first_frame", "name": "image_1", "type": "IMAGE", "link": null}, {"name": "ckpt_name", "type": "COMBO", "widget": {"name": "ckpt_name"}, "link": null}, {"name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": null}, {"name": "text_encoder", "type": "COMBO", "widget": {"name": "text_encoder"}, "link": null}, {"label": "distlled_lora", "name": "lora_name_1", "type": "COMBO", "widget": {"name": "lora_name_1"}, "link": null}, {"label": "upscale_model", "name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": null}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": []}], "properties": {"proxyWidgets": [["-1", "text"], ["-1", "strength"], ["143", "noise_seed"], ["126", "control_after_generate"], ["-1", "bypass"], ["-1", "ckpt_name"], ["-1", "lora_name"], ["-1", "text_encoder"], ["-1", "lora_name_1"], ["-1", "model_name"]], "cnr_id": "comfy-core", "ver": "0.7.0", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["", 1, null, null, false, "ltx-2-19b-dev-fp8.safetensors", "ltx-2-19b-ic-lora-canny-control.safetensors", "gemma_3_12B_it_fp4_mixed.safetensors", "ltx-2-19b-distilled-lora-384.safetensors", "ltx-2-spatial-upscaler-x2-1.0.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "884e1862-7567-4e72-bd2a-fd4fdfd06320", "version": 1, "state": {"lastGroupId": 11, "lastNodeId": 155, "lastLinkId": 391, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Canny to Video (LTX 2.0)", "inputNode": {"id": -10, "bounding": [-2180, 4070, 146.8515625, 240]}, "outputNode": {"id": -20, "bounding": [1750, 4090, 120, 60]}, "inputs": [{"id": "0f1d2f96-933a-4a7b-8f1a-7b49fc4ade09", "name": "text", "type": "STRING", "linkIds": [345], "pos": [-2053.1484375, 4090]}, {"id": "35a07084-3ecf-482a-a330-b40278770ca3", "name": "image", "type": "IMAGE", "linkIds": [348, 349], "label": "canny_images", "pos": [-2053.1484375, 4110]}, {"id": "59430efe-1090-4e36-8afe-b21ce7f4268b", "name": "strength", "type": "FLOAT", "linkIds": [370, 371], "label": "image_strength", "pos": [-2053.1484375, 4130]}, {"id": "6145a9b9-68ed-4956-89f7-7a5ebdd5c99e", "name": "bypass", "type": "BOOLEAN", "linkIds": [363, 368], "label": "disable_first_frame", "pos": [-2053.1484375, 4150]}, {"id": "bea20802-d654-4287-a8ef-0f834314bcf9", "name": "image_1", "type": "IMAGE", "linkIds": [364, 379], "label": "first_frame", "pos": [-2053.1484375, 4170]}, {"id": "4e2f26b5-9ad6-49a6-8e90-0ed24fc6a423", "name": "ckpt_name", "type": "COMBO", "linkIds": [385, 386, 387], "pos": [-2053.1484375, 4190]}, {"id": "81fdfcf3-92ca-4f8d-b13d-d22758231530", "name": "lora_name", "type": "COMBO", "linkIds": [388], "pos": [-2053.1484375, 4210]}, {"id": "3fa7991e-4419-44a7-9377-1b6125fef355", "name": "text_encoder", "type": "COMBO", "linkIds": [389], "pos": [-2053.1484375, 4230]}, {"id": "b9277d33-2f18-47bb-95ab-666799e8730f", "name": "lora_name_1", "type": "COMBO", "linkIds": [390], "label": "distlled_lora", "pos": [-2053.1484375, 4250]}, {"id": "80b2e9cf-e1a7-462f-ae0d-ffb4ba668a65", "name": "model_name", "type": "COMBO", "linkIds": [391], "label": "upscale_model", "pos": [-2053.1484375, 4270]}], "outputs": [{"id": "4e837941-de2d-4df8-8f94-686e24036897", "name": "VIDEO", "type": "VIDEO", "linkIds": [304], "localized_name": "VIDEO", "pos": [1770, 4110]}], "widgets": [], "nodes": [{"id": 93, "type": "CFGGuider", "pos": [-698, 3670], "size": [270, 106.66666666666667], "flags": {}, "order": 16, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 326}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 309}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 311}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}], "outputs": [{"localized_name": "GUIDER", "name": "GUIDER", "type": "GUIDER", "links": [261]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "CFGGuider", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [3]}, {"id": 94, "type": "KSamplerSelect", "pos": [-698, 3840], "size": [270, 68.88020833333334], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}], "outputs": [{"localized_name": "SAMPLER", "name": "SAMPLER", "type": "SAMPLER", "links": [262]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "KSamplerSelect", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["euler"]}, {"id": 99, "type": "ManualSigmas", "pos": [410, 3850], "size": [270, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "sigmas", "name": "sigmas", "type": "STRING", "widget": {"name": "sigmas"}, "link": null}], "outputs": [{"localized_name": "SIGMAS", "name": "SIGMAS", "type": "SIGMAS", "links": [278]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "ManualSigmas", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["0.909375, 0.725, 0.421875, 0.0"]}, {"id": 101, "type": "LTXVConcatAVLatent", "pos": [410, 4100], "size": [270, 110], "flags": {}, "order": 18, "mode": 0, "inputs": [{"localized_name": "video_latent", "name": "video_latent", "type": "LATENT", "link": 365}, {"localized_name": "audio_latent", "name": "audio_latent", "type": "LATENT", "link": 266}], "outputs": [{"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [279]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "LTXVConcatAVLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 108, "type": "CFGGuider", "pos": [410, 3700], "size": [270, 98], "flags": {}, "order": 22, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 280}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 281}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 282}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}], "outputs": [{"localized_name": "GUIDER", "name": "GUIDER", "type": "GUIDER", "links": [276]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.71", "Node name for S&R": "CFGGuider", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1]}, {"id": 111, "type": "LTXVEmptyLatentAudio", "pos": [-1100, 4810], "size": [270, 120], "flags": {}, "order": 24, "mode": 0, "inputs": [{"localized_name": "audio_vae", "name": "audio_vae", "type": "VAE", "link": 383}, {"localized_name": "frames_number", "name": "frames_number", "type": "INT", "widget": {"name": "frames_number"}, "link": 329}, {"localized_name": "frame_rate", "name": "frame_rate", "type": "INT", "widget": {"name": "frame_rate"}, "link": 354}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "Latent", "name": "Latent", "type": "LATENT", "links": [300]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.68", "Node name for S&R": "LTXVEmptyLatentAudio", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [97, 25, 1]}, {"id": 123, "type": "SamplerCustomAdvanced", "pos": [-388, 3520], "size": [213.125, 120], "flags": {}, "order": 31, "mode": 0, "inputs": [{"localized_name": "noise", "name": "noise", "type": "NOISE", "link": 260}, {"localized_name": "guider", "name": "guider", "type": "GUIDER", "link": 261}, {"localized_name": "sampler", "name": "sampler", "type": "SAMPLER", "link": 262}, {"localized_name": "sigmas", "name": "sigmas", "type": "SIGMAS", "link": 263}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 323}], "outputs": [{"localized_name": "output", "name": "output", "type": "LATENT", "links": [272]}, {"localized_name": "denoised_output", "name": "denoised_output", "type": "LATENT", "links": []}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.60", "Node name for S&R": "SamplerCustomAdvanced", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 114, "type": "LTXVConditioning", "pos": [-1134, 4140], "size": [270, 86.66666666666667], "flags": {}, "order": 27, "mode": 0, "inputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 292}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 293}, {"localized_name": "frame_rate", "name": "frame_rate", "type": "FLOAT", "widget": {"name": "frame_rate"}, "link": 355}], "outputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "links": [313]}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "links": [314]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "LTXVConditioning", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [25]}, {"id": 119, "type": "CLIPTextEncode", "pos": [-1164, 3880], "size": [400, 200], "flags": {}, "order": 14, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 294}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [293]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["blurry, low quality, still frame, frames, watermark, overlay, titles, has blurbox, has subtitles"], "color": "#323", "bgcolor": "#535"}, {"id": 116, "type": "LTXVConcatAVLatent", "pos": [-520, 4700], "size": [187.5, 60], "flags": {}, "order": 29, "mode": 0, "inputs": [{"localized_name": "video_latent", "name": "video_latent", "type": "LATENT", "link": 324}, {"localized_name": "audio_latent", "name": "audio_latent", "type": "LATENT", "link": 300}], "outputs": [{"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [322, 323]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVConcatAVLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 122, "type": "LTXVSeparateAVLatent", "pos": [-394, 3800], "size": [240, 46], "flags": {}, "order": 30, "mode": 0, "inputs": [{"localized_name": "av_latent", "name": "av_latent", "type": "LATENT", "link": 272}], "outputs": [{"localized_name": "video_latent", "name": "video_latent", "type": "LATENT", "links": [270]}, {"localized_name": "audio_latent", "name": "audio_latent", "type": "LATENT", "links": [266]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "LTXVSeparateAVLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 124, "type": "CLIPTextEncode", "pos": [-1174.999849798713, 3514.000055195033], "size": [410, 320], "flags": {}, "order": 32, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 295}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": 345}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [292]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [""], "color": "#232", "bgcolor": "#353"}, {"id": 98, "type": "KSamplerSelect", "pos": [410, 3980], "size": [270, 58], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}], "outputs": [{"localized_name": "SAMPLER", "name": "SAMPLER", "type": "SAMPLER", "links": [277]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "KSamplerSelect", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["gradient_estimation"]}, {"id": 95, "type": "LTXVScheduler", "pos": [-700, 3980], "size": [270, 170], "flags": {}, "order": 17, "mode": 0, "inputs": [{"localized_name": "latent", "name": "latent", "shape": 7, "type": "LATENT", "link": 322}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "max_shift", "name": "max_shift", "type": "FLOAT", "widget": {"name": "max_shift"}, "link": null}, {"localized_name": "base_shift", "name": "base_shift", "type": "FLOAT", "widget": {"name": "base_shift"}, "link": null}, {"localized_name": "stretch", "name": "stretch", "type": "BOOLEAN", "widget": {"name": "stretch"}, "link": null}, {"localized_name": "terminal", "name": "terminal", "type": "FLOAT", "widget": {"name": "terminal"}, "link": null}], "outputs": [{"localized_name": "SIGMAS", "name": "SIGMAS", "type": "SIGMAS", "links": [263]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "LTXVScheduler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [20, 2.05, 0.95, true, 0.1]}, {"id": 126, "type": "RandomNoise", "pos": [-698, 3520], "size": [270, 82], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "noise_seed", "name": "noise_seed", "type": "INT", "widget": {"name": "noise_seed"}, "link": null}], "outputs": [{"localized_name": "NOISE", "name": "NOISE", "type": "NOISE", "links": [260]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "RandomNoise", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, "randomize"]}, {"id": 107, "type": "SamplerCustomAdvanced", "pos": [710, 3570], "size": [212.38333740234376, 106], "flags": {}, "order": 21, "mode": 0, "inputs": [{"localized_name": "noise", "name": "noise", "type": "NOISE", "link": 347}, {"localized_name": "guider", "name": "guider", "type": "GUIDER", "link": 276}, {"localized_name": "sampler", "name": "sampler", "type": "SAMPLER", "link": 277}, {"localized_name": "sigmas", "name": "sigmas", "type": "SIGMAS", "link": 278}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 279}], "outputs": [{"localized_name": "output", "name": "output", "type": "LATENT", "links": []}, {"localized_name": "denoised_output", "name": "denoised_output", "type": "LATENT", "links": [336]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "SamplerCustomAdvanced", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 143, "type": "RandomNoise", "pos": [410, 3570], "size": [270, 82], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "noise_seed", "name": "noise_seed", "type": "INT", "widget": {"name": "noise_seed"}, "link": null}], "outputs": [{"localized_name": "NOISE", "name": "NOISE", "type": "NOISE", "links": [347]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "RandomNoise", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, "fixed"]}, {"id": 139, "type": "LTXVAudioVAEDecode", "pos": [1130, 3840], "size": [240, 46], "flags": {}, "order": 35, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 338}, {"label": "Audio VAE", "localized_name": "audio_vae", "name": "audio_vae", "type": "VAE", "link": 384}], "outputs": [{"localized_name": "Audio", "name": "Audio", "type": "AUDIO", "links": [339]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVAudioVAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 106, "type": "CreateVideo", "pos": [1420, 3760], "size": [270, 78], "flags": {}, "order": 20, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 352}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": 339}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "widget": {"name": "fps"}, "link": 356}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": [304]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "CreateVideo", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [25]}, {"id": 134, "type": "LoraLoaderModelOnly", "pos": [-1650, 3760], "size": [420, 82], "flags": {}, "order": 12, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 325}, {"localized_name": "lora_name", "name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": 388}, {"localized_name": "strength_model", "name": "strength_model", "type": "FLOAT", "widget": {"name": "strength_model"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [326, 327]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LoraLoaderModelOnly", "models": [{"name": "ltx-2-19b-ic-lora-canny-control.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2-19b-IC-LoRA-Canny-Control/resolve/main/ltx-2-19b-ic-lora-canny-control.safetensors", "directory": "loras"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ltx-2-19b-ic-lora-canny-control.safetensors", 1], "color": "#322", "bgcolor": "#533"}, {"id": 138, "type": "LTXVSeparateAVLatent", "pos": [730, 3730], "size": [193.2916015625, 46], "flags": {}, "order": 34, "mode": 0, "inputs": [{"localized_name": "av_latent", "name": "av_latent", "type": "LATENT", "link": 336}], "outputs": [{"localized_name": "video_latent", "name": "video_latent", "type": "LATENT", "links": [337, 351]}, {"localized_name": "audio_latent", "name": "audio_latent", "type": "LATENT", "links": [338]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "LTXVSeparateAVLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 144, "type": "VAEDecodeTiled", "pos": [1120, 3640], "size": [270, 150], "flags": {}, "order": 36, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 351}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 353}, {"localized_name": "tile_size", "name": "tile_size", "type": "INT", "widget": {"name": "tile_size"}, "link": null}, {"localized_name": "overlap", "name": "overlap", "type": "INT", "widget": {"name": "overlap"}, "link": null}, {"localized_name": "temporal_size", "name": "temporal_size", "type": "INT", "widget": {"name": "temporal_size"}, "link": null}, {"localized_name": "temporal_overlap", "name": "temporal_overlap", "type": "INT", "widget": {"name": "temporal_overlap"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [352]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "VAEDecodeTiled", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [512, 64, 4096, 8]}, {"id": 113, "type": "VAEDecode", "pos": [1130, 3530], "size": [240, 50], "flags": {}, "order": 26, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 337}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 291}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": []}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "VAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 110, "type": "GetImageSize", "pos": [-1630, 4450], "size": [260, 80], "flags": {}, "order": 23, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 349}], "outputs": [{"localized_name": "width", "name": "width", "type": "INT", "links": [296]}, {"localized_name": "height", "name": "height", "type": "INT", "links": [297]}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "links": [329, 330]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "GetImageSize", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 145, "type": "PrimitiveInt", "pos": [-1630, 4620], "size": [270, 82], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "value", "name": "value", "type": "INT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "INT", "name": "INT", "type": "INT", "links": [354]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "PrimitiveInt", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [24, "fixed"]}, {"id": 148, "type": "PrimitiveFloat", "pos": [-1630, 4750], "size": [270, 58], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [355, 356]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "PrimitiveFloat", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [24]}, {"id": 115, "type": "EmptyLTXVLatentVideo", "pos": [-1100, 4610], "size": [270, 146.66666666666669], "flags": {}, "order": 28, "mode": 0, "inputs": [{"localized_name": "width", "name": "width", "type": "INT", "widget": {"name": "width"}, "link": 296}, {"localized_name": "height", "name": "height", "type": "INT", "widget": {"name": "height"}, "link": 297}, {"localized_name": "length", "name": "length", "type": "INT", "widget": {"name": "length"}, "link": 330}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [360]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.60", "Node name for S&R": "EmptyLTXVLatentVideo", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [768, 512, 97, 1]}, {"id": 149, "type": "LTXVImgToVideoInplace", "pos": [-1090, 4400], "size": [270, 152], "flags": {}, "order": 37, "mode": 0, "inputs": [{"localized_name": "vae", "name": "vae", "type": "VAE", "link": 359}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 364}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "link": 360}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": 370}, {"localized_name": "bypass", "name": "bypass", "type": "BOOLEAN", "widget": {"name": "bypass"}, "link": 363}], "outputs": [{"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [357]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVImgToVideoInplace", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1, false]}, {"id": 118, "type": "Reroute", "pos": [-230, 4210], "size": [75, 26], "flags": {}, "order": 13, "mode": 0, "inputs": [{"name": "", "type": "*", "link": 303}], "outputs": [{"name": "", "type": "VAE", "links": [289, 291, 367]}], "properties": {"showOutputText": false, "horizontal": false}}, {"id": 151, "type": "LTXVImgToVideoInplace", "pos": [-20, 4070], "size": [270, 182], "flags": {}, "order": 38, "mode": 0, "inputs": [{"localized_name": "vae", "name": "vae", "type": "VAE", "link": 367}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 379}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "link": 366}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": 371}, {"localized_name": "bypass", "name": "bypass", "type": "BOOLEAN", "widget": {"name": "bypass"}, "link": 368}], "outputs": [{"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [365]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVImgToVideoInplace", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1, false]}, {"id": 104, "type": "LTXVCropGuides", "pos": [-10, 3840], "size": [240, 66], "flags": {}, "order": 19, "mode": 0, "inputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 310}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 312}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "link": 270}], "outputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "links": [281]}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "links": [282]}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "slot_index": 2, "links": [287]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.68", "Node name for S&R": "LTXVCropGuides", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 112, "type": "LTXVLatentUpsampler", "pos": [-10, 3960], "size": [260, 66], "flags": {}, "order": 25, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 287}, {"localized_name": "upscale_model", "name": "upscale_model", "type": "LATENT_UPSCALE_MODEL", "link": 288}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 289}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [366]}], "title": "spatial", "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVLatentUpsampler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 132, "type": "LTXVAddGuide", "pos": [-600, 4420], "size": [270, 209.16666666666669], "flags": {}, "order": 33, "mode": 0, "inputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 313}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 314}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 328}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "link": 357}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 348}, {"localized_name": "frame_idx", "name": "frame_idx", "type": "INT", "widget": {"name": "frame_idx"}, "link": null}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": null}], "outputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "links": [309, 310]}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "links": [311, 312]}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [324]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "LTXVAddGuide", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, 1]}, {"id": 103, "type": "CheckpointLoaderSimple", "pos": [-1650, 3590], "size": [420, 98], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "ckpt_name", "name": "ckpt_name", "type": "COMBO", "widget": {"name": "ckpt_name"}, "link": 385}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [325]}, {"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "links": []}, {"localized_name": "VAE", "name": "VAE", "type": "VAE", "links": [303, 328, 353, 359]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "CheckpointLoaderSimple", "models": [{"name": "ltx-2-19b-dev-fp8.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-dev-fp8.safetensors", "directory": "checkpoints"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ltx-2-19b-dev-fp8.safetensors"]}, {"id": 97, "type": "LTXAVTextEncoderLoader", "pos": [-1650, 4040], "size": [420, 106], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "text_encoder", "name": "text_encoder", "type": "COMBO", "widget": {"name": "text_encoder"}, "link": 389}, {"localized_name": "ckpt_name", "name": "ckpt_name", "type": "COMBO", "widget": {"name": "ckpt_name"}, "link": 387}, {"localized_name": "device", "name": "device", "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "links": [294, 295]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXAVTextEncoderLoader", "models": [{"name": "ltx-2-19b-dev-fp8.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-dev-fp8.safetensors", "directory": "checkpoints"}, {"name": "gemma_3_12B_it_fp4_mixed.safetensors", "url": "https://huggingface.co/Comfy-Org/ltx-2/resolve/main/split_files/text_encoders/gemma_3_12B_it_fp4_mixed.safetensors", "directory": "text_encoders"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["gemma_3_12B_it_fp4_mixed.safetensors", "ltx-2-19b-dev-fp8.safetensors", "default"]}, {"id": 105, "type": "LoraLoaderModelOnly", "pos": [-70, 3570], "size": [390, 82], "flags": {}, "order": 15, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 327}, {"localized_name": "lora_name", "name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": 390}, {"localized_name": "strength_model", "name": "strength_model", "type": "FLOAT", "widget": {"name": "strength_model"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [280]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "LoraLoaderModelOnly", "models": [{"name": "ltx-2-19b-distilled-lora-384.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-distilled-lora-384.safetensors", "directory": "loras"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ltx-2-19b-distilled-lora-384.safetensors", 1]}, {"id": 100, "type": "LatentUpscaleModelLoader", "pos": [-70, 3700], "size": [390, 60], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "model_name", "name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": 391}], "outputs": [{"localized_name": "LATENT_UPSCALE_MODEL", "name": "LATENT_UPSCALE_MODEL", "type": "LATENT_UPSCALE_MODEL", "links": [288]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LatentUpscaleModelLoader", "models": [{"name": "ltx-2-spatial-upscaler-x2-1.0.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-spatial-upscaler-x2-1.0.safetensors", "directory": "latent_upscale_models"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ltx-2-spatial-upscaler-x2-1.0.safetensors"]}, {"id": 154, "type": "MarkdownNote", "pos": [-1660, 4870], "size": [350, 170], "flags": {"collapsed": false}, "order": 10, "mode": 0, "inputs": [], "outputs": [], "title": "Frame Rate Note", "properties": {}, "widgets_values": ["Please make sure the frame rate value is the same in both boxes"], "color": "#222", "bgcolor": "#000"}, {"id": 155, "type": "LTXVAudioVAELoader", "pos": [-1640, 3910], "size": [400, 58], "flags": {}, "order": 11, "mode": 0, "inputs": [{"localized_name": "ckpt_name", "name": "ckpt_name", "type": "COMBO", "widget": {"name": "ckpt_name"}, "link": 386}], "outputs": [{"localized_name": "Audio VAE", "name": "Audio VAE", "type": "VAE", "links": [383, 384]}], "properties": {"cnr_id": "comfy-core", "ver": "0.14.1", "Node name for S&R": "LTXVAudioVAELoader"}, "widgets_values": ["ltx-2-19b-dev-fp8.safetensors"]}], "groups": [{"id": 1, "title": "Model", "bounding": [-1660, 3440, 440, 820], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 2, "title": "Basic Sampling", "bounding": [-700, 3440, 570, 820], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 3, "title": "Prompt", "bounding": [-1180, 3440, 440, 820], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 5, "title": "Latent", "bounding": [-1180, 4290, 1050, 680], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 9, "title": "Upscale Sampling(2x)", "bounding": [-100, 3440, 1090, 820], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 6, "title": "Sampler", "bounding": [350, 3480, 620, 750], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 7, "title": "Model", "bounding": [-90, 3480, 430, 310], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 11, "title": "Frame rate", "bounding": [-1640, 4550, 290, 271.6], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 326, "origin_id": 134, "origin_slot": 0, "target_id": 93, "target_slot": 0, "type": "MODEL"}, {"id": 309, "origin_id": 132, "origin_slot": 0, "target_id": 93, "target_slot": 1, "type": "CONDITIONING"}, {"id": 311, "origin_id": 132, "origin_slot": 1, "target_id": 93, "target_slot": 2, "type": "CONDITIONING"}, {"id": 266, "origin_id": 122, "origin_slot": 1, "target_id": 101, "target_slot": 1, "type": "LATENT"}, {"id": 280, "origin_id": 105, "origin_slot": 0, "target_id": 108, "target_slot": 0, "type": "MODEL"}, {"id": 281, "origin_id": 104, "origin_slot": 0, "target_id": 108, "target_slot": 1, "type": "CONDITIONING"}, {"id": 282, "origin_id": 104, "origin_slot": 1, "target_id": 108, "target_slot": 2, "type": "CONDITIONING"}, {"id": 329, "origin_id": 110, "origin_slot": 2, "target_id": 111, "target_slot": 1, "type": "INT"}, {"id": 260, "origin_id": 126, "origin_slot": 0, "target_id": 123, "target_slot": 0, "type": "NOISE"}, {"id": 261, "origin_id": 93, "origin_slot": 0, "target_id": 123, "target_slot": 1, "type": "GUIDER"}, {"id": 262, "origin_id": 94, "origin_slot": 0, "target_id": 123, "target_slot": 2, "type": "SAMPLER"}, {"id": 263, "origin_id": 95, "origin_slot": 0, "target_id": 123, "target_slot": 3, "type": "SIGMAS"}, {"id": 323, "origin_id": 116, "origin_slot": 0, "target_id": 123, "target_slot": 4, "type": "LATENT"}, {"id": 296, "origin_id": 110, "origin_slot": 0, "target_id": 115, "target_slot": 0, "type": "INT"}, {"id": 297, "origin_id": 110, "origin_slot": 1, "target_id": 115, "target_slot": 1, "type": "INT"}, {"id": 330, "origin_id": 110, "origin_slot": 2, "target_id": 115, "target_slot": 2, "type": "INT"}, {"id": 325, "origin_id": 103, "origin_slot": 0, "target_id": 134, "target_slot": 0, "type": "MODEL"}, {"id": 292, "origin_id": 124, "origin_slot": 0, "target_id": 114, "target_slot": 0, "type": "CONDITIONING"}, {"id": 293, "origin_id": 119, "origin_slot": 0, "target_id": 114, "target_slot": 1, "type": "CONDITIONING"}, {"id": 294, "origin_id": 97, "origin_slot": 0, "target_id": 119, "target_slot": 0, "type": "CLIP"}, {"id": 324, "origin_id": 132, "origin_slot": 2, "target_id": 116, "target_slot": 0, "type": "LATENT"}, {"id": 300, "origin_id": 111, "origin_slot": 0, "target_id": 116, "target_slot": 1, "type": "LATENT"}, {"id": 313, "origin_id": 114, "origin_slot": 0, "target_id": 132, "target_slot": 0, "type": "CONDITIONING"}, {"id": 314, "origin_id": 114, "origin_slot": 1, "target_id": 132, "target_slot": 1, "type": "CONDITIONING"}, {"id": 328, "origin_id": 103, "origin_slot": 2, "target_id": 132, "target_slot": 2, "type": "VAE"}, {"id": 272, "origin_id": 123, "origin_slot": 0, "target_id": 122, "target_slot": 0, "type": "LATENT"}, {"id": 336, "origin_id": 107, "origin_slot": 1, "target_id": 138, "target_slot": 0, "type": "LATENT"}, {"id": 339, "origin_id": 139, "origin_slot": 0, "target_id": 106, "target_slot": 1, "type": "AUDIO"}, {"id": 295, "origin_id": 97, "origin_slot": 0, "target_id": 124, "target_slot": 0, "type": "CLIP"}, {"id": 303, "origin_id": 103, "origin_slot": 2, "target_id": 118, "target_slot": 0, "type": "VAE"}, {"id": 338, "origin_id": 138, "origin_slot": 1, "target_id": 139, "target_slot": 0, "type": "LATENT"}, {"id": 337, "origin_id": 138, "origin_slot": 0, "target_id": 113, "target_slot": 0, "type": "LATENT"}, {"id": 291, "origin_id": 118, "origin_slot": 0, "target_id": 113, "target_slot": 1, "type": "VAE"}, {"id": 276, "origin_id": 108, "origin_slot": 0, "target_id": 107, "target_slot": 1, "type": "GUIDER"}, {"id": 277, "origin_id": 98, "origin_slot": 0, "target_id": 107, "target_slot": 2, "type": "SAMPLER"}, {"id": 278, "origin_id": 99, "origin_slot": 0, "target_id": 107, "target_slot": 3, "type": "SIGMAS"}, {"id": 279, "origin_id": 101, "origin_slot": 0, "target_id": 107, "target_slot": 4, "type": "LATENT"}, {"id": 327, "origin_id": 134, "origin_slot": 0, "target_id": 105, "target_slot": 0, "type": "MODEL"}, {"id": 310, "origin_id": 132, "origin_slot": 0, "target_id": 104, "target_slot": 0, "type": "CONDITIONING"}, {"id": 312, "origin_id": 132, "origin_slot": 1, "target_id": 104, "target_slot": 1, "type": "CONDITIONING"}, {"id": 270, "origin_id": 122, "origin_slot": 0, "target_id": 104, "target_slot": 2, "type": "LATENT"}, {"id": 287, "origin_id": 104, "origin_slot": 2, "target_id": 112, "target_slot": 0, "type": "LATENT"}, {"id": 288, "origin_id": 100, "origin_slot": 0, "target_id": 112, "target_slot": 1, "type": "LATENT_UPSCALE_MODEL"}, {"id": 289, "origin_id": 118, "origin_slot": 0, "target_id": 112, "target_slot": 2, "type": "VAE"}, {"id": 322, "origin_id": 116, "origin_slot": 0, "target_id": 95, "target_slot": 0, "type": "LATENT"}, {"id": 304, "origin_id": 106, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "VIDEO"}, {"id": 345, "origin_id": -10, "origin_slot": 0, "target_id": 124, "target_slot": 1, "type": "STRING"}, {"id": 347, "origin_id": 143, "origin_slot": 0, "target_id": 107, "target_slot": 0, "type": "NOISE"}, {"id": 348, "origin_id": -10, "origin_slot": 1, "target_id": 132, "target_slot": 4, "type": "IMAGE"}, {"id": 349, "origin_id": -10, "origin_slot": 1, "target_id": 110, "target_slot": 0, "type": "IMAGE"}, {"id": 351, "origin_id": 138, "origin_slot": 0, "target_id": 144, "target_slot": 0, "type": "LATENT"}, {"id": 352, "origin_id": 144, "origin_slot": 0, "target_id": 106, "target_slot": 0, "type": "IMAGE"}, {"id": 353, "origin_id": 103, "origin_slot": 2, "target_id": 144, "target_slot": 1, "type": "VAE"}, {"id": 354, "origin_id": 145, "origin_slot": 0, "target_id": 111, "target_slot": 2, "type": "INT"}, {"id": 355, "origin_id": 148, "origin_slot": 0, "target_id": 114, "target_slot": 2, "type": "FLOAT"}, {"id": 356, "origin_id": 148, "origin_slot": 0, "target_id": 106, "target_slot": 2, "type": "FLOAT"}, {"id": 357, "origin_id": 149, "origin_slot": 0, "target_id": 132, "target_slot": 3, "type": "LATENT"}, {"id": 359, "origin_id": 103, "origin_slot": 2, "target_id": 149, "target_slot": 0, "type": "VAE"}, {"id": 360, "origin_id": 115, "origin_slot": 0, "target_id": 149, "target_slot": 2, "type": "LATENT"}, {"id": 363, "origin_id": -10, "origin_slot": 3, "target_id": 149, "target_slot": 4, "type": "BOOLEAN"}, {"id": 364, "origin_id": -10, "origin_slot": 4, "target_id": 149, "target_slot": 1, "type": "IMAGE"}, {"id": 365, "origin_id": 151, "origin_slot": 0, "target_id": 101, "target_slot": 0, "type": "LATENT"}, {"id": 366, "origin_id": 112, "origin_slot": 0, "target_id": 151, "target_slot": 2, "type": "LATENT"}, {"id": 367, "origin_id": 118, "origin_slot": 0, "target_id": 151, "target_slot": 0, "type": "VAE"}, {"id": 368, "origin_id": -10, "origin_slot": 3, "target_id": 151, "target_slot": 4, "type": "BOOLEAN"}, {"id": 370, "origin_id": -10, "origin_slot": 2, "target_id": 149, "target_slot": 3, "type": "FLOAT"}, {"id": 371, "origin_id": -10, "origin_slot": 2, "target_id": 151, "target_slot": 3, "type": "FLOAT"}, {"id": 379, "origin_id": -10, "origin_slot": 4, "target_id": 151, "target_slot": 1, "type": "IMAGE"}, {"id": 383, "origin_id": 155, "origin_slot": 0, "target_id": 111, "target_slot": 0, "type": "VAE"}, {"id": 384, "origin_id": 155, "origin_slot": 0, "target_id": 139, "target_slot": 1, "type": "VAE"}, {"id": 385, "origin_id": -10, "origin_slot": 5, "target_id": 103, "target_slot": 0, "type": "COMBO"}, {"id": 386, "origin_id": -10, "origin_slot": 5, "target_id": 155, "target_slot": 0, "type": "COMBO"}, {"id": 387, "origin_id": -10, "origin_slot": 5, "target_id": 97, "target_slot": 1, "type": "COMBO"}, {"id": 388, "origin_id": -10, "origin_slot": 6, "target_id": 134, "target_slot": 1, "type": "COMBO"}, {"id": 389, "origin_id": -10, "origin_slot": 7, "target_id": 97, "target_slot": 0, "type": "COMBO"}, {"id": 390, "origin_id": -10, "origin_slot": 8, "target_id": 105, "target_slot": 1, "type": "COMBO"}, {"id": 391, "origin_id": -10, "origin_slot": 9, "target_id": 100, "target_slot": 0, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Video generation and editing/Canny to video"}]}, "config": {}, "extra": {"workflowRendererVersion": "LG", "ds": {"scale": 0.7537190265006444, "offset": [-330.27244430536007, -3324.725077010053]}}, "version": 0.4} diff --git a/blueprints/Chromatic Aberration.json b/blueprints/Chromatic Aberration.json new file mode 100644 index 000000000..5513cc665 --- /dev/null +++ b/blueprints/Chromatic Aberration.json @@ -0,0 +1 @@ +{"revision": 0, "last_node_id": 19, "last_link_id": 0, "nodes": [{"id": 19, "type": "2c5ef154-2bde-496d-bc8b-9dcf42f2913f", "pos": [3710, -2070], "size": [260, 82], "flags": {}, "order": 4, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "IMAGE", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}], "title": "Chromatic Aberration", "properties": {"proxyWidgets": [["17", "choice"], ["18", "value"]]}, "widgets_values": []}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "2c5ef154-2bde-496d-bc8b-9dcf42f2913f", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 18, "lastLinkId": 23, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Chromatic Aberration", "inputNode": {"id": -10, "bounding": [3270, -2050, 120, 60]}, "outputNode": {"id": -20, "bounding": [4260, -2050, 120, 60]}, "inputs": [{"id": "3b33ac46-93a6-4b1c-896a-ed6fbd24e59c", "name": "images.image0", "type": "IMAGE", "linkIds": [20], "localized_name": "images.image0", "label": "image", "pos": [3370, -2030]}], "outputs": [{"id": "abe7cd79-a87b-4bd0-8923-d79a57d81a6e", "name": "IMAGE0", "type": "IMAGE", "linkIds": [23], "localized_name": "IMAGE0", "label": "IMAGE", "pos": [4280, -2030]}], "widgets": [], "nodes": [{"id": 16, "type": "GLSLShader", "pos": [3810, -2320], "size": [390, 212], "flags": {}, "order": 2, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 20}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}, {"label": "u_float0", "localized_name": "floats.u_float0", "name": "floats.u_float0", "shape": 7, "type": "FLOAT", "link": 22}, {"label": "u_float1", "localized_name": "floats.u_float1", "name": "floats.u_float1", "shape": 7, "type": "FLOAT", "link": null}, {"label": "u_int0", "localized_name": "ints.u_int0", "name": "ints.u_int0", "shape": 7, "type": "INT", "link": 21}, {"label": "u_int1", "localized_name": "ints.u_int1", "name": "ints.u_int1", "shape": 7, "type": "INT", "link": null}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [23]}, {"localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": null}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform int u_int0; // Mode\nuniform float u_float0; // Amount (0 to 100)\n\nin vec2 v_texCoord;\nout vec4 fragColor;\n\nconst int MODE_LINEAR = 0;\nconst int MODE_RADIAL = 1;\nconst int MODE_BARREL = 2;\nconst int MODE_SWIRL = 3;\nconst int MODE_DIAGONAL = 4;\n\nconst float AMOUNT_SCALE = 0.0005;\nconst float RADIAL_MULT = 4.0;\nconst float BARREL_MULT = 8.0;\nconst float INV_SQRT2 = 0.70710678118;\n\nvoid main() {\n vec2 uv = v_texCoord;\n vec4 original = texture(u_image0, uv);\n\n float amount = u_float0 * AMOUNT_SCALE;\n\n if (amount < 0.000001) {\n fragColor = original;\n return;\n }\n\n // Aspect-corrected coordinates for circular effects\n float aspect = u_resolution.x / u_resolution.y;\n vec2 centered = uv - 0.5;\n vec2 corrected = vec2(centered.x * aspect, centered.y);\n float r = length(corrected);\n vec2 dir = r > 0.0001 ? corrected / r : vec2(0.0);\n vec2 offset = vec2(0.0);\n\n if (u_int0 == MODE_LINEAR) {\n // Horizontal shift (no aspect correction needed)\n offset = vec2(amount, 0.0);\n }\n else if (u_int0 == MODE_RADIAL) {\n // Outward from center, stronger at edges\n offset = dir * r * amount * RADIAL_MULT;\n offset.x /= aspect; // Convert back to UV space\n }\n else if (u_int0 == MODE_BARREL) {\n // Lens distortion simulation (r² falloff)\n offset = dir * r * r * amount * BARREL_MULT;\n offset.x /= aspect; // Convert back to UV space\n }\n else if (u_int0 == MODE_SWIRL) {\n // Perpendicular to radial (rotational aberration)\n vec2 perp = vec2(-dir.y, dir.x);\n offset = perp * r * amount * RADIAL_MULT;\n offset.x /= aspect; // Convert back to UV space\n }\n else if (u_int0 == MODE_DIAGONAL) {\n // 45° offset (no aspect correction needed)\n offset = vec2(amount, amount) * INV_SQRT2;\n }\n \n float red = texture(u_image0, uv + offset).r;\n float green = original.g;\n float blue = texture(u_image0, uv - offset).b;\n \n fragColor = vec4(red, green, blue, original.a);\n}", "from_input"]}, {"id": 18, "type": "PrimitiveFloat", "pos": [3810, -2430], "size": [270, 58], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "amount", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [22]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 100, "step": 1}, "widgets_values": [30]}, {"id": 17, "type": "CustomCombo", "pos": [3520, -2320], "size": [270, 222], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "mode", "localized_name": "choice", "name": "choice", "type": "COMBO", "widget": {"name": "choice"}, "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": null}, {"localized_name": "INDEX", "name": "INDEX", "type": "INT", "links": [21]}], "properties": {"Node name for S&R": "CustomCombo"}, "widgets_values": ["Linear", 0, "Linear", "Radial", "Barrel", "Swirl", "Diagonal", ""]}], "groups": [], "links": [{"id": 22, "origin_id": 18, "origin_slot": 0, "target_id": 16, "target_slot": 2, "type": "FLOAT"}, {"id": 21, "origin_id": 17, "origin_slot": 1, "target_id": 16, "target_slot": 4, "type": "INT"}, {"id": 20, "origin_id": -10, "origin_slot": 0, "target_id": 16, "target_slot": 0, "type": "IMAGE"}, {"id": 23, "origin_id": 16, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Color adjust"}]}} diff --git a/blueprints/Color Adjustment.json b/blueprints/Color Adjustment.json new file mode 100644 index 000000000..c599f7213 --- /dev/null +++ b/blueprints/Color Adjustment.json @@ -0,0 +1 @@ +{"revision": 0, "last_node_id": 14, "last_link_id": 0, "nodes": [{"id": 14, "type": "36677b92-5dd8-47a5-9380-4da982c1894f", "pos": [3610, -2630], "size": [270, 150], "flags": {}, "order": 3, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "IMAGE", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}], "properties": {"proxyWidgets": [["4", "value"], ["5", "value"], ["7", "value"], ["6", "value"]]}, "widgets_values": [], "title": "Color Adjustment"}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "36677b92-5dd8-47a5-9380-4da982c1894f", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 16, "lastLinkId": 36, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Color Adjustment", "inputNode": {"id": -10, "bounding": [3110, -3560, 120, 60]}, "outputNode": {"id": -20, "bounding": [4070, -3560, 120, 60]}, "inputs": [{"id": "0431d493-5f28-4430-bd00-84733997fc08", "name": "images.image0", "type": "IMAGE", "linkIds": [29], "localized_name": "images.image0", "label": "image", "pos": [3210, -3540]}], "outputs": [{"id": "bee8ea06-a114-4612-8937-939f2c927bdb", "name": "IMAGE0", "type": "IMAGE", "linkIds": [28], "localized_name": "IMAGE0", "label": "IMAGE", "pos": [4090, -3540]}], "widgets": [], "nodes": [{"id": 15, "type": "GLSLShader", "pos": [3590, -3940], "size": [420, 252], "flags": {}, "order": 4, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 29}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}, {"label": "u_float0", "localized_name": "floats.u_float0", "name": "floats.u_float0", "shape": 7, "type": "FLOAT", "link": 34}, {"label": "u_float1", "localized_name": "floats.u_float1", "name": "floats.u_float1", "shape": 7, "type": "FLOAT", "link": 30}, {"label": "u_float2", "localized_name": "floats.u_float2", "name": "floats.u_float2", "shape": 7, "type": "FLOAT", "link": 31}, {"label": "u_float3", "localized_name": "floats.u_float3", "name": "floats.u_float3", "shape": 7, "type": "FLOAT", "link": 33}, {"label": "u_float4", "localized_name": "floats.u_float4", "name": "floats.u_float4", "shape": 7, "type": "FLOAT", "link": null}, {"label": "u_int0", "localized_name": "ints.u_int0", "name": "ints.u_int0", "shape": 7, "type": "INT", "link": null}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [28]}, {"localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": null}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform float u_float0; // temperature (-100 to 100)\nuniform float u_float1; // tint (-100 to 100)\nuniform float u_float2; // vibrance (-100 to 100)\nuniform float u_float3; // saturation (-100 to 100)\n\nin vec2 v_texCoord;\nout vec4 fragColor;\n\nconst float INPUT_SCALE = 0.01;\nconst float TEMP_TINT_PRIMARY = 0.3;\nconst float TEMP_TINT_SECONDARY = 0.15;\nconst float VIBRANCE_BOOST = 2.0;\nconst float SATURATION_BOOST = 2.0;\nconst float SKIN_PROTECTION = 0.5;\nconst float EPSILON = 0.001;\nconst vec3 LUMA_WEIGHTS = vec3(0.299, 0.587, 0.114);\n\nvoid main() {\n vec4 tex = texture(u_image0, v_texCoord);\n vec3 color = tex.rgb;\n \n // Scale inputs: -100/100 \u2192 -1/1\n float temperature = u_float0 * INPUT_SCALE;\n float tint = u_float1 * INPUT_SCALE;\n float vibrance = u_float2 * INPUT_SCALE;\n float saturation = u_float3 * INPUT_SCALE;\n \n // Temperature (warm/cool): positive = warm, negative = cool\n color.r += temperature * TEMP_TINT_PRIMARY;\n color.b -= temperature * TEMP_TINT_PRIMARY;\n \n // Tint (green/magenta): positive = green, negative = magenta\n color.g += tint * TEMP_TINT_PRIMARY;\n color.r -= tint * TEMP_TINT_SECONDARY;\n color.b -= tint * TEMP_TINT_SECONDARY;\n \n // Single clamp after temperature/tint\n color = clamp(color, 0.0, 1.0);\n \n // Vibrance with skin protection\n if (vibrance != 0.0) {\n float maxC = max(color.r, max(color.g, color.b));\n float minC = min(color.r, min(color.g, color.b));\n float sat = maxC - minC;\n float gray = dot(color, LUMA_WEIGHTS);\n \n if (vibrance < 0.0) {\n // Desaturate: -100 \u2192 gray\n color = mix(vec3(gray), color, 1.0 + vibrance);\n } else {\n // Boost less saturated colors more\n float vibranceAmt = vibrance * (1.0 - sat);\n \n // Branchless skin tone protection\n float isWarmTone = step(color.b, color.g) * step(color.g, color.r);\n float warmth = (color.r - color.b) / max(maxC, EPSILON);\n float skinTone = isWarmTone * warmth * sat * (1.0 - sat);\n vibranceAmt *= (1.0 - skinTone * SKIN_PROTECTION);\n \n color = mix(vec3(gray), color, 1.0 + vibranceAmt * VIBRANCE_BOOST);\n }\n }\n \n // Saturation\n if (saturation != 0.0) {\n float gray = dot(color, LUMA_WEIGHTS);\n float satMix = saturation < 0.0\n ? 1.0 + saturation // -100 \u2192 gray\n : 1.0 + saturation * SATURATION_BOOST; // +100 \u2192 3x boost\n color = mix(vec3(gray), color, satMix);\n }\n \n fragColor = vec4(clamp(color, 0.0, 1.0), tex.a);\n}", "from_input"]}, {"id": 6, "type": "PrimitiveFloat", "pos": [3290, -3610], "size": [270, 58], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "vibrance", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [26, 31]}], "title": "Vibrance", "properties": {"Node name for S&R": "PrimitiveFloat", "max": 100, "min": -100, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [128, 128, 128]}, {"offset": 1, "color": [255, 0, 0]}]}, "widgets_values": [0]}, {"id": 7, "type": "PrimitiveFloat", "pos": [3290, -3720], "size": [270, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "saturation", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [33]}], "title": "Saturation", "properties": {"Node name for S&R": "PrimitiveFloat", "max": 100, "min": -100, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [128, 128, 128]}, {"offset": 1, "color": [255, 0, 0]}]}, "widgets_values": [0]}, {"id": 5, "type": "PrimitiveFloat", "pos": [3290, -3830], "size": [270, 58], "flags": {}, "order": 2, "mode": 0, "inputs": [{"label": "tint", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [30]}], "title": "Tint", "properties": {"Node name for S&R": "PrimitiveFloat", "max": 100, "min": -100, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [0, 255, 0]}, {"offset": 0.5, "color": [255, 255, 255]}, {"offset": 1, "color": [255, 0, 255]}]}, "widgets_values": [0]}, {"id": 4, "type": "PrimitiveFloat", "pos": [3290, -3940], "size": [270, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"label": "temperature", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [34]}], "title": "Temperature", "properties": {"Node name for S&R": "PrimitiveFloat", "max": 100, "min": -100, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [68, 136, 255]}, {"offset": 0.5, "color": [255, 255, 255]}, {"offset": 1, "color": [255, 136, 0]}]}, "widgets_values": [100]}], "groups": [], "links": [{"id": 34, "origin_id": 4, "origin_slot": 0, "target_id": 15, "target_slot": 2, "type": "FLOAT"}, {"id": 30, "origin_id": 5, "origin_slot": 0, "target_id": 15, "target_slot": 3, "type": "FLOAT"}, {"id": 31, "origin_id": 6, "origin_slot": 0, "target_id": 15, "target_slot": 4, "type": "FLOAT"}, {"id": 33, "origin_id": 7, "origin_slot": 0, "target_id": 15, "target_slot": 5, "type": "FLOAT"}, {"id": 29, "origin_id": -10, "origin_slot": 0, "target_id": 15, "target_slot": 0, "type": "IMAGE"}, {"id": 28, "origin_id": 15, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Color adjust"}]}} diff --git a/blueprints/Depth to Image (Z-Image-Turbo).json b/blueprints/Depth to Image (Z-Image-Turbo).json new file mode 100644 index 000000000..baffc4fc9 --- /dev/null +++ b/blueprints/Depth to Image (Z-Image-Turbo).json @@ -0,0 +1 @@ +{"id": "e046dd74-e2a7-4f31-a75b-5e11a8c72d4e", "revision": 0, "last_node_id": 76, "last_link_id": 259, "nodes": [{"id": 13, "type": "d8492a46-9e6c-4917-b5ea-4273aabf5f51", "pos": [400, 3630], "size": [400, 470], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "control image", "name": "image", "type": "IMAGE", "link": null}, {"label": "prompt", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}, {"name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": null}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}, {"name": "name", "type": "COMBO", "widget": {"name": "name"}, "link": null}, {"label": "lotus_model", "name": "unet_name_1", "type": "COMBO", "widget": {"name": "unet_name_1"}, "link": null}, {"label": "sd15_vae", "name": "vae_name_1", "type": "COMBO", "widget": {"name": "vae_name_1"}, "link": null}], "outputs": [{"name": "IMAGE", "type": "IMAGE", "links": null}], "properties": {"proxyWidgets": [["-1", "text"], ["-1", "unet_name"], ["-1", "clip_name"], ["-1", "vae_name"], ["-1", "name"], ["-1", "unet_name_1"], ["-1", "vae_name_1"], ["7", "control_after_generate"], ["7", "seed"]], "cnr_id": "comfy-core", "ver": "0.11.0"}, "widgets_values": ["", "z_image_turbo_bf16.safetensors", "qwen_3_4b.safetensors", "ae.safetensors", "Z-Image-Turbo-Fun-Controlnet-Union.safetensors", "lotus-depth-d-v1-1.safetensors", "vae-ft-mse-840000-ema-pruned.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "d8492a46-9e6c-4917-b5ea-4273aabf5f51", "version": 1, "state": {"lastGroupId": 3, "lastNodeId": 76, "lastLinkId": 259, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Depth to Image (Z-Image-Turbo)", "inputNode": {"id": -10, "bounding": [27.60368520069494, 4936.043696127976, 120, 200]}, "outputNode": {"id": -20, "bounding": [1598.6038576146689, 4936.043696127976, 120, 60]}, "inputs": [{"id": "29ca271b-8f63-4e7b-a4b8-c9b4192ada0b", "name": "image", "type": "IMAGE", "linkIds": [25], "label": "control image", "pos": [127.60368520069494, 4956.043696127976]}, {"id": "b6549f90-39ee-4b79-9e00-af4d9df969fe", "name": "text", "type": "STRING", "linkIds": [16], "label": "prompt", "pos": [127.60368520069494, 4976.043696127976]}, {"id": "add4a703-1185-4848-9494-b27dd37ff434", "name": "unet_name", "type": "COMBO", "linkIds": [252], "pos": [127.60368520069494, 4996.043696127976]}, {"id": "03233f9e-df65-4e05-b5c5-34d83129e85e", "name": "clip_name", "type": "COMBO", "linkIds": [253], "pos": [127.60368520069494, 5016.043696127976]}, {"id": "0c643ffb-326d-40ca-8a89-ebc585cf5015", "name": "vae_name", "type": "COMBO", "linkIds": [254], "pos": [127.60368520069494, 5036.043696127976]}, {"id": "409cdebe-632b-410f-a66c-711c2a1527e1", "name": "name", "type": "COMBO", "linkIds": [255], "pos": [127.60368520069494, 5056.043696127976]}, {"id": "80e6915f-5d59-4d6b-a197-d8c565ad2922", "name": "unet_name_1", "type": "COMBO", "linkIds": [258], "label": "lotus_model", "pos": [127.60368520069494, 5076.043696127976]}, {"id": "4207ec84-4409-4816-8444-76062bf6310c", "name": "vae_name_1", "type": "COMBO", "linkIds": [259], "label": "sd15_vae", "pos": [127.60368520069494, 5096.043696127976]}], "outputs": [{"id": "47f9a22d-6619-4917-9447-a7d5d08dceb5", "name": "IMAGE", "type": "IMAGE", "linkIds": [18], "pos": [1618.6038576146689, 4956.043696127976]}], "widgets": [], "nodes": [{"id": 1, "type": "CLIPLoader", "pos": [228.60381716506714, 4700.188262345759], "size": [269.9479166666667, 106], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "clip_name", "name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": 253}, {"localized_name": "type", "name": "type", "type": "COMBO", "widget": {"name": "type"}, "link": null}, {"localized_name": "device", "name": "device", "shape": 7, "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "links": [14]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "CLIPLoader", "models": [{"name": "qwen_3_4b.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/text_encoders/qwen_3_4b.safetensors", "directory": "text_encoders"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["qwen_3_4b.safetensors", "lumina2", "default"]}, {"id": 2, "type": "UNETLoader", "pos": [228.60381716506714, 4550.188402733727], "size": [269.9479166666667, 82], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 252}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [9]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "UNETLoader", "models": [{"name": "z_image_turbo_bf16.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/diffusion_models/z_image_turbo_bf16.safetensors", "directory": "diffusion_models"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["z_image_turbo_bf16.safetensors", "default"]}, {"id": 3, "type": "VAELoader", "pos": [228.60381716506714, 4880.188283008492], "size": [269.9479166666667, 58], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 254}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "links": [2, 11]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "VAELoader", "models": [{"name": "ae.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/vae/ae.safetensors", "directory": "vae"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ae.safetensors"]}, {"id": 4, "type": "ModelPatchLoader", "pos": [228.60381716506714, 5010.1883654774], "size": [269.9479166666667, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "name", "name": "name", "type": "COMBO", "widget": {"name": "name"}, "link": 255}], "outputs": [{"localized_name": "MODEL_PATCH", "name": "MODEL_PATCH", "type": "MODEL_PATCH", "links": [10]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "ModelPatchLoader", "models": [{"name": "Z-Image-Turbo-Fun-Controlnet-Union.safetensors", "url": "https://huggingface.co/alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union/resolve/main/Z-Image-Turbo-Fun-Controlnet-Union.safetensors", "directory": "model_patches"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["Z-Image-Turbo-Fun-Controlnet-Union.safetensors"]}, {"id": 6, "type": "ModelSamplingAuraFlow", "pos": [998.6041081931173, 4490.1880693746825], "size": [289.97395833333337, 58], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 3}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [4]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "ModelSamplingAuraFlow", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [3]}, {"id": 7, "type": "KSampler", "pos": [998.6041081931173, 4600.188363442829], "size": [300, 262], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 4}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 5}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 6}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 7}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [1]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "KSampler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, "randomize", 9, 1, "res_multistep", "simple", 1]}, {"id": 8, "type": "ConditioningZeroOut", "pos": [748.2706508086186, 5044.854997097082], "size": [204.134765625, 26], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "link": 8}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [6]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "ConditioningZeroOut", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 10, "type": "EmptySD3LatentImage", "pos": [1028.2702326451792, 5334.855683329977], "size": [259.9479166666667, 106], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "width", "name": "width", "type": "INT", "widget": {"name": "width"}, "link": 12}, {"localized_name": "height", "name": "height", "type": "INT", "widget": {"name": "height"}, "link": 13}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [7]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "EmptySD3LatentImage", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1024, 1024, 1]}, {"id": 5, "type": "VAEDecode", "pos": [1338.604012131086, 4500.188453282262], "size": [200, 46], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 1}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 2}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [18]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "VAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 9, "type": "QwenImageDiffsynthControlnet", "pos": [608.2704996459613, 5204.85528564724], "size": [289.97395833333337, 138], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 9}, {"localized_name": "model_patch", "name": "model_patch", "type": "MODEL_PATCH", "link": 10}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 11}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 248}, {"localized_name": "mask", "name": "mask", "shape": 7, "type": "MASK", "link": null}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [3]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.76", "Node name for S&R": "QwenImageDiffsynthControlnet", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1]}, {"id": 11, "type": "GetImageSize", "pos": [530, 5440], "size": [140, 66], "flags": {"collapsed": false}, "order": 10, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 247}], "outputs": [{"localized_name": "width", "name": "width", "type": "INT", "links": [12]}, {"localized_name": "height", "name": "height", "type": "INT", "links": [13]}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "links": null}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.76", "Node name for S&R": "GetImageSize", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 12, "type": "CLIPTextEncode", "pos": [548.2706278500244, 4544.854827124228], "size": [400, 420], "flags": {}, "order": 11, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 14}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": 16}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [5, 8]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [""], "color": "#232", "bgcolor": "#353"}, {"id": 14, "type": "ImageScaleToTotalPixels", "pos": [90, 5180], "size": [270, 106], "flags": {}, "order": 12, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 25}, {"localized_name": "upscale_method", "name": "upscale_method", "type": "COMBO", "widget": {"name": "upscale_method"}, "link": null}, {"localized_name": "megapixels", "name": "megapixels", "type": "FLOAT", "widget": {"name": "megapixels"}, "link": null}, {"localized_name": "resolution_steps", "name": "resolution_steps", "type": "INT", "widget": {"name": "resolution_steps"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [248, 250]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.0", "Node name for S&R": "ImageScaleToTotalPixels"}, "widgets_values": ["lanczos", 1, 1]}, {"id": 15, "type": "PreviewImage", "pos": [90, 5530], "size": [380, 260], "flags": {}, "order": 13, "mode": 4, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 251}], "outputs": [], "properties": {"cnr_id": "comfy-core", "ver": "0.11.0", "Node name for S&R": "PreviewImage"}, "widgets_values": []}, {"id": 76, "type": "458bdf3c-4b58-421c-af50-c9c663a4d74c", "pos": [90, 5340], "size": [400, 150], "flags": {}, "order": 14, "mode": 0, "inputs": [{"localized_name": "pixels", "name": "pixels", "type": "IMAGE", "link": 250}, {"label": "depth_intensity", "name": "sigma", "type": "FLOAT", "widget": {"name": "sigma"}, "link": null}, {"name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 258}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 259}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [247, 251]}], "properties": {"proxyWidgets": [["-1", "sigma"], ["-1", "unet_name"], ["-1", "vae_name"]], "cnr_id": "comfy-core", "ver": "0.14.1"}, "widgets_values": [999.0000000000002, "lotus-depth-d-v1-1.safetensors", "vae-ft-mse-840000-ema-pruned.safetensors"]}], "groups": [{"id": 1, "title": "Prompt", "bounding": [530, 4470, 440, 630], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 2, "title": "Models", "bounding": [210, 4470, 300, 640], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 3, "title": "Apple ControlNet", "bounding": [530, 5120, 440, 260], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 1, "origin_id": 7, "origin_slot": 0, "target_id": 5, "target_slot": 0, "type": "LATENT"}, {"id": 2, "origin_id": 3, "origin_slot": 0, "target_id": 5, "target_slot": 1, "type": "VAE"}, {"id": 3, "origin_id": 9, "origin_slot": 0, "target_id": 6, "target_slot": 0, "type": "MODEL"}, {"id": 4, "origin_id": 6, "origin_slot": 0, "target_id": 7, "target_slot": 0, "type": "MODEL"}, {"id": 5, "origin_id": 12, "origin_slot": 0, "target_id": 7, "target_slot": 1, "type": "CONDITIONING"}, {"id": 6, "origin_id": 8, "origin_slot": 0, "target_id": 7, "target_slot": 2, "type": "CONDITIONING"}, {"id": 7, "origin_id": 10, "origin_slot": 0, "target_id": 7, "target_slot": 3, "type": "LATENT"}, {"id": 8, "origin_id": 12, "origin_slot": 0, "target_id": 8, "target_slot": 0, "type": "CONDITIONING"}, {"id": 9, "origin_id": 2, "origin_slot": 0, "target_id": 9, "target_slot": 0, "type": "MODEL"}, {"id": 10, "origin_id": 4, "origin_slot": 0, "target_id": 9, "target_slot": 1, "type": "MODEL_PATCH"}, {"id": 11, "origin_id": 3, "origin_slot": 0, "target_id": 9, "target_slot": 2, "type": "VAE"}, {"id": 12, "origin_id": 11, "origin_slot": 0, "target_id": 10, "target_slot": 0, "type": "INT"}, {"id": 13, "origin_id": 11, "origin_slot": 1, "target_id": 10, "target_slot": 1, "type": "INT"}, {"id": 14, "origin_id": 1, "origin_slot": 0, "target_id": 12, "target_slot": 0, "type": "CLIP"}, {"id": 16, "origin_id": -10, "origin_slot": 1, "target_id": 12, "target_slot": 1, "type": "STRING"}, {"id": 18, "origin_id": 5, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 25, "origin_id": -10, "origin_slot": 0, "target_id": 14, "target_slot": 0, "type": "IMAGE"}, {"id": 247, "origin_id": 76, "origin_slot": 0, "target_id": 11, "target_slot": 0, "type": "IMAGE"}, {"id": 248, "origin_id": 14, "origin_slot": 0, "target_id": 9, "target_slot": 3, "type": "IMAGE"}, {"id": 250, "origin_id": 14, "origin_slot": 0, "target_id": 76, "target_slot": 0, "type": "IMAGE"}, {"id": 251, "origin_id": 76, "origin_slot": 0, "target_id": 15, "target_slot": 0, "type": "IMAGE"}, {"id": 252, "origin_id": -10, "origin_slot": 2, "target_id": 2, "target_slot": 0, "type": "COMBO"}, {"id": 253, "origin_id": -10, "origin_slot": 3, "target_id": 1, "target_slot": 0, "type": "COMBO"}, {"id": 254, "origin_id": -10, "origin_slot": 4, "target_id": 3, "target_slot": 0, "type": "COMBO"}, {"id": 255, "origin_id": -10, "origin_slot": 5, "target_id": 4, "target_slot": 0, "type": "COMBO"}, {"id": 258, "origin_id": -10, "origin_slot": 6, "target_id": 76, "target_slot": 2, "type": "COMBO"}, {"id": 259, "origin_id": -10, "origin_slot": 7, "target_id": 76, "target_slot": 3, "type": "COMBO"}], "extra": {"ds": {"scale": 1.3889423076923078, "offset": [22.056074766355096, -3503.3333333333335]}, "frontendVersion": "1.37.10", "workflowRendererVersion": "LG", "VHS_latentpreview": false, "VHS_latentpreviewrate": 0, "VHS_MetadataImage": true, "VHS_KeepIntermediate": true}, "category": "Image generation and editing/Depth to image"}, {"id": "458bdf3c-4b58-421c-af50-c9c663a4d74c", "version": 1, "state": {"lastGroupId": 3, "lastNodeId": 76, "lastLinkId": 259, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Image to Depth Map (Lotus)", "inputNode": {"id": -10, "bounding": [-60, -172.61268043518066, 126.625, 120]}, "outputNode": {"id": -20, "bounding": [1650, -172.61268043518066, 120, 60]}, "inputs": [{"id": "3bdd30c3-4ec9-485a-814b-e7d39fb6b5cc", "name": "pixels", "type": "IMAGE", "linkIds": [37], "localized_name": "pixels", "pos": [46.625, -152.61268043518066]}, {"id": "f9a1017c-f4b9-43b4-94c2-41c088b3a492", "name": "sigma", "type": "FLOAT", "linkIds": [243], "label": "depth_intensity", "pos": [46.625, -132.61268043518066]}, {"id": "d721b249-fd2a-441b-9a78-2805f04e2644", "name": "unet_name", "type": "COMBO", "linkIds": [256], "pos": [46.625, -112.61268043518066]}, {"id": "0430e2ea-f8b5-4191-9b72-b7d62176f97c", "name": "vae_name", "type": "COMBO", "linkIds": [257], "pos": [46.625, -92.61268043518066]}], "outputs": [{"id": "2ec278bd-0b66-4b30-9c5b-994d5f638214", "name": "IMAGE", "type": "IMAGE", "linkIds": [242], "localized_name": "IMAGE", "pos": [1670, -152.61268043518066]}], "widgets": [], "nodes": [{"id": 8, "type": "VAEDecode", "pos": [1380.0000135211146, -240.0000135211144], "size": [210, 60], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 232}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 240}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [35]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "VAEDecode", "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 10, "type": "UNETLoader", "pos": [135.34178335388546, -290.1947851765315], "size": [305.9244791666667, 97.7734375], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 256}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [31, 241]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "UNETLoader", "models": [{"name": "lotus-depth-d-v1-1.safetensors", "url": "https://huggingface.co/Comfy-Org/lotus/resolve/main/lotus-depth-d-v1-1.safetensors", "directory": "diffusion_models"}], "widget_ue_connectable": {}}, "widgets_values": ["lotus-depth-d-v1-1.safetensors", "default"]}, {"id": 14, "type": "VAELoader", "pos": [134.53144605616137, -165.18194011768782], "size": [305.9244791666667, 68.88020833333334], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 257}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "slot_index": 0, "links": [38, 240]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "VAELoader", "models": [{"name": "vae-ft-mse-840000-ema-pruned.safetensors", "url": "https://huggingface.co/stabilityai/sd-vae-ft-mse-original/resolve/main/vae-ft-mse-840000-ema-pruned.safetensors", "directory": "vae"}], "widget_ue_connectable": {}}, "widgets_values": ["vae-ft-mse-840000-ema-pruned.safetensors"]}, {"id": 16, "type": "SamplerCustomAdvanced", "pos": [990.6585475753939, -319.91444852782104], "size": [355.1953125, 325.98958333333337], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "noise", "name": "noise", "type": "NOISE", "link": 237}, {"localized_name": "guider", "name": "guider", "type": "GUIDER", "link": 27}, {"localized_name": "sampler", "name": "sampler", "type": "SAMPLER", "link": 33}, {"localized_name": "sigmas", "name": "sigmas", "type": "SIGMAS", "link": 194}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 201}], "outputs": [{"localized_name": "output", "name": "output", "type": "LATENT", "slot_index": 0, "links": [232]}, {"localized_name": "denoised_output", "name": "denoised_output", "type": "LATENT", "slot_index": 1, "links": []}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "SamplerCustomAdvanced", "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 18, "type": "DisableNoise", "pos": [730.4769792883567, -320.00005408445816], "size": [210, 40], "flags": {}, "order": 2, "mode": 0, "inputs": [], "outputs": [{"localized_name": "NOISE", "name": "NOISE", "type": "NOISE", "slot_index": 0, "links": [237]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "DisableNoise", "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 19, "type": "BasicGuider", "pos": [730.2630921572128, -251.22541185314978], "size": [210, 60], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 241}, {"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "link": 238}], "outputs": [{"localized_name": "GUIDER", "name": "GUIDER", "type": "GUIDER", "slot_index": 0, "links": [27]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "BasicGuider", "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 20, "type": "BasicScheduler", "pos": [488.64457755981744, -147.67201223931278], "size": [210, 122.21354166666667], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 31}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"localized_name": "SIGMAS", "name": "SIGMAS", "type": "SIGMAS", "slot_index": 0, "links": [66]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "BasicScheduler", "widget_ue_connectable": {}}, "widgets_values": ["normal", 1, 1]}, {"id": 21, "type": "KSamplerSelect", "pos": [730.2630921572128, -161.22540847287118], "size": [210, 68.88020833333334], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}], "outputs": [{"localized_name": "SAMPLER", "name": "SAMPLER", "type": "SAMPLER", "slot_index": 0, "links": [33]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "KSamplerSelect", "widget_ue_connectable": {}}, "widgets_values": ["euler"]}, {"id": 22, "type": "ImageInvert", "pos": [1373.3333333333335, -318.33333333333337], "size": [210, 40], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 35}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [242]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "ImageInvert", "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 23, "type": "VAEEncode", "pos": [730.2630921572128, 38.774608428522015], "size": [210, 60], "flags": {}, "order": 10, "mode": 0, "inputs": [{"localized_name": "pixels", "name": "pixels", "type": "IMAGE", "link": 37}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 38}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [201]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "VAEEncode", "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 28, "type": "SetFirstSigma", "pos": [730.2630921572128, -61.225357768691524], "size": [210, 66.66666666666667], "flags": {}, "order": 11, "mode": 0, "inputs": [{"localized_name": "sigmas", "name": "sigmas", "type": "SIGMAS", "link": 66}, {"localized_name": "sigma", "name": "sigma", "type": "FLOAT", "widget": {"name": "sigma"}, "link": 243}], "outputs": [{"localized_name": "SIGMAS", "name": "SIGMAS", "type": "SIGMAS", "slot_index": 0, "links": [194]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "SetFirstSigma", "widget_ue_connectable": {}}, "widgets_values": [999.0000000000002]}, {"id": 68, "type": "LotusConditioning", "pos": [489.99998478874613, -229.99996619721344], "size": [210, 40], "flags": {}, "order": 4, "mode": 0, "inputs": [], "outputs": [{"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "slot_index": 0, "links": [238]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "LotusConditioning", "widget_ue_connectable": {}}, "widgets_values": []}], "groups": [{"id": 2, "title": "Models", "bounding": [123.33333333333334, -351.6666666666667, 323.4014831310574, 263.55972005884377], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 232, "origin_id": 16, "origin_slot": 0, "target_id": 8, "target_slot": 0, "type": "LATENT"}, {"id": 240, "origin_id": 14, "origin_slot": 0, "target_id": 8, "target_slot": 1, "type": "VAE"}, {"id": 237, "origin_id": 18, "origin_slot": 0, "target_id": 16, "target_slot": 0, "type": "NOISE"}, {"id": 27, "origin_id": 19, "origin_slot": 0, "target_id": 16, "target_slot": 1, "type": "GUIDER"}, {"id": 33, "origin_id": 21, "origin_slot": 0, "target_id": 16, "target_slot": 2, "type": "SAMPLER"}, {"id": 194, "origin_id": 28, "origin_slot": 0, "target_id": 16, "target_slot": 3, "type": "SIGMAS"}, {"id": 201, "origin_id": 23, "origin_slot": 0, "target_id": 16, "target_slot": 4, "type": "LATENT"}, {"id": 241, "origin_id": 10, "origin_slot": 0, "target_id": 19, "target_slot": 0, "type": "MODEL"}, {"id": 238, "origin_id": 68, "origin_slot": 0, "target_id": 19, "target_slot": 1, "type": "CONDITIONING"}, {"id": 31, "origin_id": 10, "origin_slot": 0, "target_id": 20, "target_slot": 0, "type": "MODEL"}, {"id": 35, "origin_id": 8, "origin_slot": 0, "target_id": 22, "target_slot": 0, "type": "IMAGE"}, {"id": 38, "origin_id": 14, "origin_slot": 0, "target_id": 23, "target_slot": 1, "type": "VAE"}, {"id": 66, "origin_id": 20, "origin_slot": 0, "target_id": 28, "target_slot": 0, "type": "SIGMAS"}, {"id": 37, "origin_id": -10, "origin_slot": 0, "target_id": 23, "target_slot": 0, "type": "IMAGE"}, {"id": 242, "origin_id": 22, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 243, "origin_id": -10, "origin_slot": 1, "target_id": 28, "target_slot": 1, "type": "FLOAT"}, {"id": 256, "origin_id": -10, "origin_slot": 2, "target_id": 10, "target_slot": 0, "type": "COMBO"}, {"id": 257, "origin_id": -10, "origin_slot": 3, "target_id": 14, "target_slot": 0, "type": "COMBO"}], "extra": {"ds": {"scale": 1.2354281696404266, "offset": [-114.15605447786857, -754.3368938705543]}, "workflowRendererVersion": "LG"}}]}, "config": {}, "extra": {"ds": {"scale": 0.7886233956111374, "offset": [741.6589462093539, -3278.0806447095165]}, "frontendVersion": "1.37.10", "workflowRendererVersion": "LG", "VHS_latentpreview": false, "VHS_latentpreviewrate": 0, "VHS_MetadataImage": true, "VHS_KeepIntermediate": true}, "version": 0.4} diff --git a/blueprints/Depth to Video (ltx 2.0).json b/blueprints/Depth to Video (ltx 2.0).json new file mode 100644 index 000000000..9656b6253 --- /dev/null +++ b/blueprints/Depth to Video (ltx 2.0).json @@ -0,0 +1 @@ +{"id": "ec176c82-4db5-4ab9-b5a0-8aa8e5684a81", "revision": 0, "last_node_id": 191, "last_link_id": 433, "nodes": [{"id": 143, "type": "68857357-cbc2-4c3a-a786-c3a58d43f9b1", "pos": [289.99998661973035, 3960.0002084505168], "size": [400, 500], "flags": {"collapsed": false}, "order": 0, "mode": 0, "inputs": [{"label": "prompt", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}, {"label": "image_strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": null}, {"label": "disable_first_frame", "name": "bypass", "type": "BOOLEAN", "widget": {"name": "bypass"}, "link": null}, {"label": "depth reference video", "name": "video", "type": "VIDEO", "link": null}, {"label": "first frame", "name": "image_2", "type": "IMAGE", "link": null}, {"label": "width", "name": "resize_type.width", "type": "INT", "widget": {"name": "resize_type.width"}, "link": null}, {"label": "height", "name": "resize_type.height", "type": "INT", "widget": {"name": "resize_type.height"}, "link": null}, {"name": "length", "type": "INT", "widget": {"name": "length"}, "link": null}, {"name": "ckpt_name", "type": "COMBO", "widget": {"name": "ckpt_name"}, "link": null}, {"name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": null}, {"name": "text_encoder", "type": "COMBO", "widget": {"name": "text_encoder"}, "link": null}, {"label": "distill_lora", "name": "lora_name_1", "type": "COMBO", "widget": {"name": "lora_name_1"}, "link": null}, {"name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": null}, {"label": "lotus_depth_model", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"label": "sd15_vae", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": []}], "properties": {"proxyWidgets": [["-1", "text"], ["-1", "bypass"], ["-1", "strength"], ["-1", "resize_type.width"], ["-1", "resize_type.height"], ["-1", "length"], ["126", "noise_seed"], ["143", "control_after_generate"], ["-1", "ckpt_name"], ["-1", "lora_name"], ["-1", "text_encoder"], ["-1", "lora_name_1"], ["-1", "model_name"], ["-1", "unet_name"], ["-1", "vae_name"]], "cnr_id": "comfy-core", "ver": "0.7.0", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["", false, 1, 1280, 720, 121, null, null, "ltx-2-19b-dev-fp8.safetensors", "ltx-2-19b-ic-lora-depth-control.safetensors", "gemma_3_12B_it_fp4_mixed.safetensors", "ltx-2-19b-distilled-lora-384.safetensors", "ltx-2-spatial-upscaler-x2-1.0.safetensors", "lotus-depth-d-v1-1.safetensors", "vae-ft-mse-840000-ema-pruned.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "68857357-cbc2-4c3a-a786-c3a58d43f9b1", "version": 1, "state": {"lastGroupId": 16, "lastNodeId": 191, "lastLinkId": 433, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Depth to Video (LTX 2.0)", "inputNode": {"id": -10, "bounding": [-2730, 4020, 165.30859375, 340]}, "outputNode": {"id": -20, "bounding": [1750, 4090, 120, 60]}, "inputs": [{"id": "0f1d2f96-933a-4a7b-8f1a-7b49fc4ade09", "name": "text", "type": "STRING", "linkIds": [345], "label": "prompt", "pos": [-2584.69140625, 4040]}, {"id": "59430efe-1090-4e36-8afe-b21ce7f4268b", "name": "strength", "type": "FLOAT", "linkIds": [370, 371], "label": "image_strength", "pos": [-2584.69140625, 4060]}, {"id": "6145a9b9-68ed-4956-89f7-7a5ebdd5c99e", "name": "bypass", "type": "BOOLEAN", "linkIds": [363, 368], "label": "disable_first_frame", "pos": [-2584.69140625, 4080]}, {"id": "de434962-832a-485c-a016-869b3f2176ca", "name": "video", "type": "VIDEO", "linkIds": [419], "label": "depth reference video", "pos": [-2584.69140625, 4100]}, {"id": "a1189d3d-bbff-4933-875d-cffa58dd4cb0", "name": "image_2", "type": "IMAGE", "linkIds": [410], "label": "first frame", "pos": [-2584.69140625, 4120]}, {"id": "577dae4c-447b-4c84-9973-56381fdbc6a9", "name": "resize_type.width", "type": "INT", "linkIds": [420], "label": "width", "pos": [-2584.69140625, 4140]}, {"id": "fb30c570-128c-46b8-a140-054aff294edc", "name": "resize_type.height", "type": "INT", "linkIds": [421], "label": "height", "pos": [-2584.69140625, 4160]}, {"id": "33d5f598-00ae-4e2d-8eb2-2da23ae5ba46", "name": "length", "type": "INT", "linkIds": [422], "pos": [-2584.69140625, 4180]}, {"id": "68cc58b0-2013-4c3a-81ff-3d1e86232d76", "name": "ckpt_name", "type": "COMBO", "linkIds": [425, 433], "pos": [-2584.69140625, 4200]}, {"id": "0c65a06b-e12a-4298-8d81-69e57a123188", "name": "lora_name", "type": "COMBO", "linkIds": [426], "pos": [-2584.69140625, 4220]}, {"id": "eba96545-b8c6-4fba-b086-ddeeb4a9130d", "name": "text_encoder", "type": "COMBO", "linkIds": [427], "pos": [-2584.69140625, 4240]}, {"id": "848f9d82-3fde-4b95-b226-4b0db7082112", "name": "lora_name_1", "type": "COMBO", "linkIds": [429], "label": "distill_lora", "pos": [-2584.69140625, 4260]}, {"id": "32ace7dd-4da8-416b-b1e3-00652b3e6838", "name": "model_name", "type": "COMBO", "linkIds": [430], "pos": [-2584.69140625, 4280]}, {"id": "d6ad1978-71b6-425b-be13-c8f1e1d798d9", "name": "unet_name", "type": "COMBO", "linkIds": [431], "label": "lotus_depth_model", "pos": [-2584.69140625, 4300]}, {"id": "b0545a5d-65e8-4baa-a7be-d5f3d2b8b6e3", "name": "vae_name", "type": "COMBO", "linkIds": [432], "label": "sd15_vae", "pos": [-2584.69140625, 4320]}], "outputs": [{"id": "4e837941-de2d-4df8-8f94-686e24036897", "name": "VIDEO", "type": "VIDEO", "linkIds": [304], "localized_name": "VIDEO", "pos": [1770, 4110]}], "widgets": [], "nodes": [{"id": 93, "type": "CFGGuider", "pos": [-697.9999467324425, 3670.0001318308678], "size": [270, 106.66666666666667], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 326}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 309}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 311}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}], "outputs": [{"localized_name": "GUIDER", "name": "GUIDER", "type": "GUIDER", "links": [261]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "CFGGuider", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [3]}, {"id": 94, "type": "KSamplerSelect", "pos": [-697.9999467324425, 3840.0000630985346], "size": [270, 68.88020833333334], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}], "outputs": [{"localized_name": "SAMPLER", "name": "SAMPLER", "type": "SAMPLER", "links": [262]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "KSamplerSelect", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["euler"]}, {"id": 99, "type": "ManualSigmas", "pos": [409.9999946478922, 3850.0001667604133], "size": [270, 70], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "sigmas", "name": "sigmas", "type": "STRING", "widget": {"name": "sigmas"}, "link": null}], "outputs": [{"localized_name": "SIGMAS", "name": "SIGMAS", "type": "SIGMAS", "links": [278]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "ManualSigmas", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["0.909375, 0.725, 0.421875, 0.0"]}, {"id": 101, "type": "LTXVConcatAVLatent", "pos": [409.9999946478922, 4100.000194929402], "size": [270, 110], "flags": {}, "order": 13, "mode": 0, "inputs": [{"localized_name": "video_latent", "name": "video_latent", "type": "LATENT", "link": 365}, {"localized_name": "audio_latent", "name": "audio_latent", "type": "LATENT", "link": 266}], "outputs": [{"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [279]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "LTXVConcatAVLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 108, "type": "CFGGuider", "pos": [409.9999946478922, 3700.00007661965], "size": [270, 106.66666666666667], "flags": {}, "order": 19, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 280}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 281}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 282}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}], "outputs": [{"localized_name": "GUIDER", "name": "GUIDER", "type": "GUIDER", "links": [276]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.71", "Node name for S&R": "CFGGuider", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1]}, {"id": 111, "type": "LTXVEmptyLatentAudio", "pos": [-1100.000003380279, 4810.000230985708], "size": [270, 120], "flags": {}, "order": 21, "mode": 0, "inputs": [{"localized_name": "audio_vae", "name": "audio_vae", "type": "VAE", "link": 285}, {"localized_name": "frames_number", "name": "frames_number", "type": "INT", "widget": {"name": "frames_number"}, "link": 329}, {"localized_name": "frame_rate", "name": "frame_rate", "type": "INT", "widget": {"name": "frame_rate"}, "link": 354}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "Latent", "name": "Latent", "type": "LATENT", "links": [300]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.68", "Node name for S&R": "LTXVEmptyLatentAudio", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [97, 25, 1]}, {"id": 123, "type": "SamplerCustomAdvanced", "pos": [-387.99998321128277, 3520.0000416901034], "size": [213.125, 120], "flags": {}, "order": 30, "mode": 0, "inputs": [{"localized_name": "noise", "name": "noise", "type": "NOISE", "link": 260}, {"localized_name": "guider", "name": "guider", "type": "GUIDER", "link": 261}, {"localized_name": "sampler", "name": "sampler", "type": "SAMPLER", "link": 262}, {"localized_name": "sigmas", "name": "sigmas", "type": "SIGMAS", "link": 263}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 323}], "outputs": [{"localized_name": "output", "name": "output", "type": "LATENT", "links": [272]}, {"localized_name": "denoised_output", "name": "denoised_output", "type": "LATENT", "links": []}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.60", "Node name for S&R": "SamplerCustomAdvanced", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 114, "type": "LTXVConditioning", "pos": [-1134.000099492868, 4140.000243380063], "size": [270, 86.66666666666667], "flags": {}, "order": 24, "mode": 0, "inputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 292}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 293}, {"localized_name": "frame_rate", "name": "frame_rate", "type": "FLOAT", "widget": {"name": "frame_rate"}, "link": 355}], "outputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "links": [313]}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "links": [314]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "LTXVConditioning", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [25]}, {"id": 119, "type": "CLIPTextEncode", "pos": [-1164.0000442816504, 3880.0001115491955], "size": [400, 200], "flags": {}, "order": 28, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 294}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [293]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["blurry, low quality, still frame, frames, watermark, overlay, titles, has blurbox, has subtitles"], "color": "#323", "bgcolor": "#535"}, {"id": 116, "type": "LTXVConcatAVLatent", "pos": [-519.9999874648, 4700.000189295605], "size": [187.5, 60], "flags": {}, "order": 26, "mode": 0, "inputs": [{"localized_name": "video_latent", "name": "video_latent", "type": "LATENT", "link": 324}, {"localized_name": "audio_latent", "name": "audio_latent", "type": "LATENT", "link": 300}], "outputs": [{"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [322, 323]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVConcatAVLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 122, "type": "LTXVSeparateAVLatent", "pos": [-393.9999813239605, 3800.0000146478747], "size": [240, 60], "flags": {}, "order": 29, "mode": 0, "inputs": [{"localized_name": "av_latent", "name": "av_latent", "type": "LATENT", "link": 272}], "outputs": [{"localized_name": "video_latent", "name": "video_latent", "type": "LATENT", "links": [270]}, {"localized_name": "audio_latent", "name": "audio_latent", "type": "LATENT", "links": [266]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "LTXVSeparateAVLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 124, "type": "CLIPTextEncode", "pos": [-1174.9999569014471, 3514.0002724504593], "size": [410, 320], "flags": {}, "order": 31, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 295}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": 345}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [292]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [""], "color": "#232", "bgcolor": "#353"}, {"id": 98, "type": "KSamplerSelect", "pos": [409.9999946478922, 3980.00004957742], "size": [270, 68.88020833333334], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}], "outputs": [{"localized_name": "SAMPLER", "name": "SAMPLER", "type": "SAMPLER", "links": [277]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "KSamplerSelect", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["gradient_estimation"]}, {"id": 95, "type": "LTXVScheduler", "pos": [-699.9999766197394, 3980.00004957742], "size": [270, 170], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "latent", "name": "latent", "shape": 7, "type": "LATENT", "link": 322}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "max_shift", "name": "max_shift", "type": "FLOAT", "widget": {"name": "max_shift"}, "link": null}, {"localized_name": "base_shift", "name": "base_shift", "type": "FLOAT", "widget": {"name": "base_shift"}, "link": null}, {"localized_name": "stretch", "name": "stretch", "type": "BOOLEAN", "widget": {"name": "stretch"}, "link": null}, {"localized_name": "terminal", "name": "terminal", "type": "FLOAT", "widget": {"name": "terminal"}, "link": null}], "outputs": [{"localized_name": "SIGMAS", "name": "SIGMAS", "type": "SIGMAS", "links": [263]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "LTXVScheduler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [20, 2.05, 0.95, true, 0.1]}, {"id": 126, "type": "RandomNoise", "pos": [-697.9999467324425, 3520.0000416901034], "size": [270, 82], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "noise_seed", "name": "noise_seed", "type": "INT", "widget": {"name": "noise_seed"}, "link": null}], "outputs": [{"localized_name": "NOISE", "name": "NOISE", "type": "NOISE", "links": [260]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "RandomNoise", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, "fixed"]}, {"id": 107, "type": "SamplerCustomAdvanced", "pos": [709.9999918309934, 3570.000193802643], "size": [212.3828125, 120], "flags": {}, "order": 18, "mode": 0, "inputs": [{"localized_name": "noise", "name": "noise", "type": "NOISE", "link": 347}, {"localized_name": "guider", "name": "guider", "type": "GUIDER", "link": 276}, {"localized_name": "sampler", "name": "sampler", "type": "SAMPLER", "link": 277}, {"localized_name": "sigmas", "name": "sigmas", "type": "SIGMAS", "link": 278}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 279}], "outputs": [{"localized_name": "output", "name": "output", "type": "LATENT", "links": []}, {"localized_name": "denoised_output", "name": "denoised_output", "type": "LATENT", "links": [336]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "SamplerCustomAdvanced", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 143, "type": "RandomNoise", "pos": [409.9999946478922, 3570.000193802643], "size": [270, 82], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "noise_seed", "name": "noise_seed", "type": "INT", "widget": {"name": "noise_seed"}, "link": null}], "outputs": [{"localized_name": "NOISE", "name": "NOISE", "type": "NOISE", "links": [347]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "RandomNoise", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, "randomize"]}, {"id": 139, "type": "LTXVAudioVAEDecode", "pos": [1129.9999512676497, 3840.0000630985346], "size": [240, 60], "flags": {}, "order": 35, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 338}, {"label": "Audio VAE", "localized_name": "audio_vae", "name": "audio_vae", "type": "VAE", "link": 340}], "outputs": [{"localized_name": "Audio", "name": "Audio", "type": "AUDIO", "links": [339]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVAudioVAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 134, "type": "LoraLoaderModelOnly", "pos": [-1650.0000287323687, 3760.0003323940673], "size": [420, 95.546875], "flags": {}, "order": 33, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 325}, {"localized_name": "lora_name", "name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": 426}, {"localized_name": "strength_model", "name": "strength_model", "type": "FLOAT", "widget": {"name": "strength_model"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [326, 327]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LoraLoaderModelOnly", "models": [{"name": "ltx-2-19b-ic-lora-depth-control.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2-19b-IC-LoRA-Depth-Control/resolve/main/ltx-2-19b-ic-lora-depth-control.safetensors", "directory": "loras"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ltx-2-19b-ic-lora-depth-control.safetensors", 1], "color": "#322", "bgcolor": "#533"}, {"id": 138, "type": "LTXVSeparateAVLatent", "pos": [730.0000160563236, 3730.0000214084316], "size": [193.2916015625, 60], "flags": {}, "order": 34, "mode": 0, "inputs": [{"localized_name": "av_latent", "name": "av_latent", "type": "LATENT", "link": 336}], "outputs": [{"localized_name": "video_latent", "name": "video_latent", "type": "LATENT", "links": [337, 351]}, {"localized_name": "audio_latent", "name": "audio_latent", "type": "LATENT", "links": [338]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "LTXVSeparateAVLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 144, "type": "VAEDecodeTiled", "pos": [1119.9999391549845, 3640.000187042085], "size": [270, 150], "flags": {}, "order": 36, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 351}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 353}, {"localized_name": "tile_size", "name": "tile_size", "type": "INT", "widget": {"name": "tile_size"}, "link": null}, {"localized_name": "overlap", "name": "overlap", "type": "INT", "widget": {"name": "overlap"}, "link": null}, {"localized_name": "temporal_size", "name": "temporal_size", "type": "INT", "widget": {"name": "temporal_size"}, "link": null}, {"localized_name": "temporal_overlap", "name": "temporal_overlap", "type": "INT", "widget": {"name": "temporal_overlap"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [352]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "VAEDecodeTiled", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [512, 64, 4096, 8]}, {"id": 113, "type": "VAEDecode", "pos": [1129.9999512676497, 3530.000145351982], "size": [240, 60], "flags": {}, "order": 23, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 337}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 291}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": []}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "VAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 145, "type": "PrimitiveInt", "pos": [-1630.0000045070383, 4620.0000923942835], "size": [270, 82], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "value", "name": "value", "type": "INT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "INT", "name": "INT", "type": "INT", "links": [354]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "PrimitiveInt", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [24, "fixed"]}, {"id": 148, "type": "PrimitiveFloat", "pos": [-1630.0000045070383, 4749.99997521129], "size": [270, 66.66666666666667], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [355, 356]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "PrimitiveFloat", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [24]}, {"id": 115, "type": "EmptyLTXVLatentVideo", "pos": [-1100.000003380279, 4609.999988732406], "size": [270, 146.66666666666669], "flags": {}, "order": 25, "mode": 0, "inputs": [{"localized_name": "width", "name": "width", "type": "INT", "widget": {"name": "width"}, "link": 296}, {"localized_name": "height", "name": "height", "type": "INT", "widget": {"name": "height"}, "link": 297}, {"localized_name": "length", "name": "length", "type": "INT", "widget": {"name": "length"}, "link": 330}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [360]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.60", "Node name for S&R": "EmptyLTXVLatentVideo", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [768, 512, 97, 1]}, {"id": 149, "type": "LTXVImgToVideoInplace", "pos": [-1089.9999912676137, 4400.000009014077], "size": [270, 151.9921875], "flags": {}, "order": 37, "mode": 0, "inputs": [{"localized_name": "vae", "name": "vae", "type": "VAE", "link": 359}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 417}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "link": 360}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": 370}, {"localized_name": "bypass", "name": "bypass", "type": "BOOLEAN", "widget": {"name": "bypass"}, "link": 363}], "outputs": [{"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [357]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVImgToVideoInplace", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1, false]}, {"id": 118, "type": "Reroute", "pos": [-229.99999095071237, 4210.000236619506], "size": [75, 26], "flags": {}, "order": 27, "mode": 0, "inputs": [{"name": "", "type": "*", "link": 303}], "outputs": [{"name": "", "type": "VAE", "links": [289, 291, 367]}], "properties": {"showOutputText": false, "horizontal": false}}, {"id": 151, "type": "LTXVImgToVideoInplace", "pos": [-19.999999788732577, 4070.0002501406198], "size": [270, 181.9921875], "flags": {}, "order": 38, "mode": 0, "inputs": [{"localized_name": "vae", "name": "vae", "type": "VAE", "link": 367}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 410}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "link": 366}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": 371}, {"localized_name": "bypass", "name": "bypass", "type": "BOOLEAN", "widget": {"name": "bypass"}, "link": 368}], "outputs": [{"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [365]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVImgToVideoInplace", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1, false]}, {"id": 104, "type": "LTXVCropGuides", "pos": [-9.999999119719098, 3840.0000630985346], "size": [240, 80], "flags": {}, "order": 15, "mode": 0, "inputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 310}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 312}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "link": 270}], "outputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "links": [281]}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "links": [282]}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "slot_index": 2, "links": [287]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.68", "Node name for S&R": "LTXVCropGuides", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 112, "type": "LTXVLatentUpsampler", "pos": [-9.999999119719098, 3960.0002084505168], "size": [260, 80], "flags": {}, "order": 22, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 287}, {"localized_name": "upscale_model", "name": "upscale_model", "type": "LATENT_UPSCALE_MODEL", "link": 288}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 289}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [366]}], "title": "spatial", "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVLatentUpsampler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 132, "type": "LTXVAddGuide", "pos": [-599.9999928169079, 4420.000216337834], "size": [270, 209.16666666666669], "flags": {}, "order": 32, "mode": 0, "inputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 313}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 314}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 328}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "link": 357}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 418}, {"localized_name": "frame_idx", "name": "frame_idx", "type": "INT", "widget": {"name": "frame_idx"}, "link": null}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": null}], "outputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "links": [309, 310]}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "links": [311, 312]}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [324]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "LTXVAddGuide", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, 1]}, {"id": 96, "type": "LTXVAudioVAELoader", "pos": [-1650.0000287323687, 3910.000056337978], "size": [420, 68.88020833333334], "flags": {}, "order": 10, "mode": 0, "inputs": [{"localized_name": "ckpt_name", "name": "ckpt_name", "type": "COMBO", "widget": {"name": "ckpt_name"}, "link": 377}], "outputs": [{"localized_name": "Audio VAE", "name": "Audio VAE", "type": "VAE", "links": [285, 340]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.68", "Node name for S&R": "LTXVAudioVAELoader", "models": [{"name": "ltx-2-19b-dev-fp8.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-dev-fp8.safetensors", "directory": "checkpoints"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ltx-2-19b-dev-fp8.safetensors"]}, {"id": 103, "type": "CheckpointLoaderSimple", "pos": [-1650.0000287323687, 3590.0000349295465], "size": [420, 108.88020833333334], "flags": {}, "order": 14, "mode": 0, "inputs": [{"localized_name": "ckpt_name", "name": "ckpt_name", "type": "COMBO", "widget": {"name": "ckpt_name"}, "link": 425}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [325]}, {"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "links": []}, {"localized_name": "VAE", "name": "VAE", "type": "VAE", "links": [303, 328, 353, 359]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "CheckpointLoaderSimple", "models": [{"name": "ltx-2-19b-dev-fp8.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-dev-fp8.safetensors", "directory": "checkpoints"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ltx-2-19b-dev-fp8.safetensors"]}, {"id": 105, "type": "LoraLoaderModelOnly", "pos": [-69.99999741197416, 3570.000193802643], "size": [390, 95.546875], "flags": {}, "order": 16, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 327}, {"localized_name": "lora_name", "name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": 429}, {"localized_name": "strength_model", "name": "strength_model", "type": "FLOAT", "widget": {"name": "strength_model"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [280]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "LoraLoaderModelOnly", "models": [{"name": "ltx-2-19b-distilled-lora-384.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-distilled-lora-384.safetensors", "directory": "loras"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ltx-2-19b-distilled-lora-384.safetensors", 1]}, {"id": 100, "type": "LatentUpscaleModelLoader", "pos": [-69.99999741197416, 3700.00007661965], "size": [390, 68.88020833333334], "flags": {}, "order": 12, "mode": 0, "inputs": [{"localized_name": "model_name", "name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": 430}], "outputs": [{"localized_name": "LATENT_UPSCALE_MODEL", "name": "LATENT_UPSCALE_MODEL", "type": "LATENT_UPSCALE_MODEL", "links": [288]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LatentUpscaleModelLoader", "models": [{"name": "ltx-2-spatial-upscaler-x2-1.0.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-spatial-upscaler-x2-1.0.safetensors", "directory": "latent_upscale_models"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ltx-2-spatial-upscaler-x2-1.0.safetensors"]}, {"id": 110, "type": "GetImageSize", "pos": [-1630.0000045070383, 4450.000161126616], "size": [260, 80], "flags": {}, "order": 20, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 416}], "outputs": [{"localized_name": "width", "name": "width", "type": "INT", "links": [296]}, {"localized_name": "height", "name": "height", "type": "INT", "links": [297]}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "links": [329, 330]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "GetImageSize", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 106, "type": "CreateVideo", "pos": [1419.9999363380857, 3760.0003323940673], "size": [270, 86.66666666666667], "flags": {}, "order": 17, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 352}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": 339}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "widget": {"name": "fps"}, "link": 356}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": [304]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "CreateVideo", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [25]}, {"id": 187, "type": "ImageFromBatch", "pos": [-2310.000095774562, 3689.999972957771], "size": [260, 93.33333333333334], "flags": {}, "order": 39, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 412}, {"localized_name": "batch_index", "name": "batch_index", "type": "INT", "widget": {"name": "batch_index"}, "link": null}, {"localized_name": "length", "name": "length", "type": "INT", "widget": {"name": "length"}, "link": 422}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [415]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "ImageFromBatch", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, 121]}, {"id": 191, "type": "ResizeImageMaskNode", "pos": [-2320.0000163380137, 3850.0001667604133], "size": [284.375, 154], "flags": {}, "order": 43, "mode": 0, "inputs": [{"localized_name": "input", "name": "input", "type": "IMAGE,MASK", "link": 415}, {"localized_name": "resize_type", "name": "resize_type", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "resize_type"}, "link": null}, {"localized_name": "width", "name": "resize_type.width", "type": "INT", "widget": {"name": "resize_type.width"}, "link": 420}, {"localized_name": "height", "name": "resize_type.height", "type": "INT", "widget": {"name": "resize_type.height"}, "link": 421}, {"localized_name": "crop", "name": "resize_type.crop", "type": "COMBO", "widget": {"name": "resize_type.crop"}, "link": null}, {"localized_name": "scale_method", "name": "scale_method", "type": "COMBO", "widget": {"name": "scale_method"}, "link": null}], "outputs": [{"localized_name": "resized", "name": "resized", "type": "IMAGE", "links": [413]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "ResizeImageMaskNode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["scale dimensions", 1280, 720, "center", "lanczos"]}, {"id": 188, "type": "GetVideoComponents", "pos": [-2320.0000163380137, 3520.0000416901034], "size": [280, 80], "flags": {"collapsed": false}, "order": 40, "mode": 0, "inputs": [{"localized_name": "video", "name": "video", "type": "VIDEO", "link": 419}], "outputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "links": [412]}, {"localized_name": "audio", "name": "audio", "type": "AUDIO", "links": []}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "links": []}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "GetVideoComponents", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 189, "type": "ImageScaleBy", "pos": [-1990.0000743661303, 3670.0001318308678], "size": [280, 125.546875], "flags": {}, "order": 41, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 413}, {"localized_name": "upscale_method", "name": "upscale_method", "type": "COMBO", "widget": {"name": "upscale_method"}, "link": null}, {"localized_name": "scale_by", "name": "scale_by", "type": "FLOAT", "widget": {"name": "scale_by"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [414]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "ImageScaleBy", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["lanczos", 0.5]}, {"id": 154, "type": "MarkdownNote", "pos": [-1659.9999492958204, 4870.000120563272], "size": [350, 170], "flags": {"collapsed": false}, "order": 7, "mode": 0, "inputs": [], "outputs": [], "title": "Frame Rate Note", "properties": {}, "widgets_values": ["Please make sure the frame rate value is the same in both boxes"], "color": "#222", "bgcolor": "#000"}, {"id": 190, "type": "38b60539-50a7-42f9-a5fe-bdeca26272e2", "pos": [-1999.9999949295823, 3910.000056337978], "size": [310, 106], "flags": {}, "order": 42, "mode": 0, "inputs": [{"localized_name": "pixels", "name": "pixels", "type": "IMAGE", "link": 414}, {"label": "depth_intensity", "name": "sigma", "type": "FLOAT", "widget": {"name": "sigma"}, "link": null}, {"name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 431}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 432}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [416, 417, 418]}], "properties": {"proxyWidgets": [["-1", "sigma"], ["-1", "unet_name"], ["-1", "vae_name"]], "cnr_id": "comfy-core", "ver": "0.5.1", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [999.0000000000002, "lotus-depth-d-v1-1.safetensors", "vae-ft-mse-840000-ema-pruned.safetensors"], "color": "#322", "bgcolor": "#533"}, {"id": 97, "type": "LTXAVTextEncoderLoader", "pos": [-1650.0000287323687, 4040.0003053518376], "size": [420, 124.44010416666667], "flags": {}, "order": 11, "mode": 0, "inputs": [{"localized_name": "text_encoder", "name": "text_encoder", "type": "COMBO", "widget": {"name": "text_encoder"}, "link": 427}, {"localized_name": "ckpt_name", "name": "ckpt_name", "type": "COMBO", "widget": {"name": "ckpt_name"}, "link": 433}, {"localized_name": "device", "name": "device", "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "links": [294, 295]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXAVTextEncoderLoader", "models": [{"name": "ltx-2-19b-dev-fp8.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-dev-fp8.safetensors", "directory": "checkpoints"}, {"name": "gemma_3_12B_it_fp4_mixed.safetensors", "url": "https://huggingface.co/Comfy-Org/ltx-2/resolve/main/split_files/text_encoders/gemma_3_12B_it_fp4_mixed.safetensors", "directory": "text_encoders"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["gemma_3_12B_it_fp4_mixed.safetensors", "ltx-2-19b-dev-fp8.safetensors", "default"]}], "groups": [{"id": 1, "title": "Model", "bounding": [-1660, 3440, 440, 820], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 2, "title": "Basic Sampling", "bounding": [-700, 3440, 570, 820], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 3, "title": "Prompt", "bounding": [-1180, 3440, 440, 820], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 5, "title": "Latent", "bounding": [-1180, 4290, 1050, 680], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 9, "title": "Upscale Sampling(2x)", "bounding": [-100, 3440, 1090, 820], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 6, "title": "Sampler", "bounding": [350, 3480, 620, 750], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 7, "title": "Model", "bounding": [-90, 3480, 430, 310], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 11, "title": "Frame rate", "bounding": [-1640, 4550, 290, 271.6], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 16, "title": "Video Preprocess", "bounding": [-2330, 3450, 650, 567.6], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 15, "title": "video length", "bounding": [-2320, 3620, 290, 180], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 326, "origin_id": 134, "origin_slot": 0, "target_id": 93, "target_slot": 0, "type": "MODEL"}, {"id": 309, "origin_id": 132, "origin_slot": 0, "target_id": 93, "target_slot": 1, "type": "CONDITIONING"}, {"id": 311, "origin_id": 132, "origin_slot": 1, "target_id": 93, "target_slot": 2, "type": "CONDITIONING"}, {"id": 266, "origin_id": 122, "origin_slot": 1, "target_id": 101, "target_slot": 1, "type": "LATENT"}, {"id": 280, "origin_id": 105, "origin_slot": 0, "target_id": 108, "target_slot": 0, "type": "MODEL"}, {"id": 281, "origin_id": 104, "origin_slot": 0, "target_id": 108, "target_slot": 1, "type": "CONDITIONING"}, {"id": 282, "origin_id": 104, "origin_slot": 1, "target_id": 108, "target_slot": 2, "type": "CONDITIONING"}, {"id": 285, "origin_id": 96, "origin_slot": 0, "target_id": 111, "target_slot": 0, "type": "VAE"}, {"id": 329, "origin_id": 110, "origin_slot": 2, "target_id": 111, "target_slot": 1, "type": "INT"}, {"id": 260, "origin_id": 126, "origin_slot": 0, "target_id": 123, "target_slot": 0, "type": "NOISE"}, {"id": 261, "origin_id": 93, "origin_slot": 0, "target_id": 123, "target_slot": 1, "type": "GUIDER"}, {"id": 262, "origin_id": 94, "origin_slot": 0, "target_id": 123, "target_slot": 2, "type": "SAMPLER"}, {"id": 263, "origin_id": 95, "origin_slot": 0, "target_id": 123, "target_slot": 3, "type": "SIGMAS"}, {"id": 323, "origin_id": 116, "origin_slot": 0, "target_id": 123, "target_slot": 4, "type": "LATENT"}, {"id": 296, "origin_id": 110, "origin_slot": 0, "target_id": 115, "target_slot": 0, "type": "INT"}, {"id": 297, "origin_id": 110, "origin_slot": 1, "target_id": 115, "target_slot": 1, "type": "INT"}, {"id": 330, "origin_id": 110, "origin_slot": 2, "target_id": 115, "target_slot": 2, "type": "INT"}, {"id": 325, "origin_id": 103, "origin_slot": 0, "target_id": 134, "target_slot": 0, "type": "MODEL"}, {"id": 292, "origin_id": 124, "origin_slot": 0, "target_id": 114, "target_slot": 0, "type": "CONDITIONING"}, {"id": 293, "origin_id": 119, "origin_slot": 0, "target_id": 114, "target_slot": 1, "type": "CONDITIONING"}, {"id": 294, "origin_id": 97, "origin_slot": 0, "target_id": 119, "target_slot": 0, "type": "CLIP"}, {"id": 324, "origin_id": 132, "origin_slot": 2, "target_id": 116, "target_slot": 0, "type": "LATENT"}, {"id": 300, "origin_id": 111, "origin_slot": 0, "target_id": 116, "target_slot": 1, "type": "LATENT"}, {"id": 313, "origin_id": 114, "origin_slot": 0, "target_id": 132, "target_slot": 0, "type": "CONDITIONING"}, {"id": 314, "origin_id": 114, "origin_slot": 1, "target_id": 132, "target_slot": 1, "type": "CONDITIONING"}, {"id": 328, "origin_id": 103, "origin_slot": 2, "target_id": 132, "target_slot": 2, "type": "VAE"}, {"id": 272, "origin_id": 123, "origin_slot": 0, "target_id": 122, "target_slot": 0, "type": "LATENT"}, {"id": 336, "origin_id": 107, "origin_slot": 1, "target_id": 138, "target_slot": 0, "type": "LATENT"}, {"id": 339, "origin_id": 139, "origin_slot": 0, "target_id": 106, "target_slot": 1, "type": "AUDIO"}, {"id": 295, "origin_id": 97, "origin_slot": 0, "target_id": 124, "target_slot": 0, "type": "CLIP"}, {"id": 303, "origin_id": 103, "origin_slot": 2, "target_id": 118, "target_slot": 0, "type": "VAE"}, {"id": 338, "origin_id": 138, "origin_slot": 1, "target_id": 139, "target_slot": 0, "type": "LATENT"}, {"id": 340, "origin_id": 96, "origin_slot": 0, "target_id": 139, "target_slot": 1, "type": "VAE"}, {"id": 337, "origin_id": 138, "origin_slot": 0, "target_id": 113, "target_slot": 0, "type": "LATENT"}, {"id": 291, "origin_id": 118, "origin_slot": 0, "target_id": 113, "target_slot": 1, "type": "VAE"}, {"id": 276, "origin_id": 108, "origin_slot": 0, "target_id": 107, "target_slot": 1, "type": "GUIDER"}, {"id": 277, "origin_id": 98, "origin_slot": 0, "target_id": 107, "target_slot": 2, "type": "SAMPLER"}, {"id": 278, "origin_id": 99, "origin_slot": 0, "target_id": 107, "target_slot": 3, "type": "SIGMAS"}, {"id": 279, "origin_id": 101, "origin_slot": 0, "target_id": 107, "target_slot": 4, "type": "LATENT"}, {"id": 327, "origin_id": 134, "origin_slot": 0, "target_id": 105, "target_slot": 0, "type": "MODEL"}, {"id": 310, "origin_id": 132, "origin_slot": 0, "target_id": 104, "target_slot": 0, "type": "CONDITIONING"}, {"id": 312, "origin_id": 132, "origin_slot": 1, "target_id": 104, "target_slot": 1, "type": "CONDITIONING"}, {"id": 270, "origin_id": 122, "origin_slot": 0, "target_id": 104, "target_slot": 2, "type": "LATENT"}, {"id": 287, "origin_id": 104, "origin_slot": 2, "target_id": 112, "target_slot": 0, "type": "LATENT"}, {"id": 288, "origin_id": 100, "origin_slot": 0, "target_id": 112, "target_slot": 1, "type": "LATENT_UPSCALE_MODEL"}, {"id": 289, "origin_id": 118, "origin_slot": 0, "target_id": 112, "target_slot": 2, "type": "VAE"}, {"id": 322, "origin_id": 116, "origin_slot": 0, "target_id": 95, "target_slot": 0, "type": "LATENT"}, {"id": 304, "origin_id": 106, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "VIDEO"}, {"id": 345, "origin_id": -10, "origin_slot": 0, "target_id": 124, "target_slot": 1, "type": "STRING"}, {"id": 347, "origin_id": 143, "origin_slot": 0, "target_id": 107, "target_slot": 0, "type": "NOISE"}, {"id": 351, "origin_id": 138, "origin_slot": 0, "target_id": 144, "target_slot": 0, "type": "LATENT"}, {"id": 352, "origin_id": 144, "origin_slot": 0, "target_id": 106, "target_slot": 0, "type": "IMAGE"}, {"id": 353, "origin_id": 103, "origin_slot": 2, "target_id": 144, "target_slot": 1, "type": "VAE"}, {"id": 354, "origin_id": 145, "origin_slot": 0, "target_id": 111, "target_slot": 2, "type": "INT"}, {"id": 355, "origin_id": 148, "origin_slot": 0, "target_id": 114, "target_slot": 2, "type": "FLOAT"}, {"id": 356, "origin_id": 148, "origin_slot": 0, "target_id": 106, "target_slot": 2, "type": "FLOAT"}, {"id": 357, "origin_id": 149, "origin_slot": 0, "target_id": 132, "target_slot": 3, "type": "LATENT"}, {"id": 359, "origin_id": 103, "origin_slot": 2, "target_id": 149, "target_slot": 0, "type": "VAE"}, {"id": 360, "origin_id": 115, "origin_slot": 0, "target_id": 149, "target_slot": 2, "type": "LATENT"}, {"id": 363, "origin_id": -10, "origin_slot": 2, "target_id": 149, "target_slot": 4, "type": "BOOLEAN"}, {"id": 365, "origin_id": 151, "origin_slot": 0, "target_id": 101, "target_slot": 0, "type": "LATENT"}, {"id": 366, "origin_id": 112, "origin_slot": 0, "target_id": 151, "target_slot": 2, "type": "LATENT"}, {"id": 367, "origin_id": 118, "origin_slot": 0, "target_id": 151, "target_slot": 0, "type": "VAE"}, {"id": 368, "origin_id": -10, "origin_slot": 2, "target_id": 151, "target_slot": 4, "type": "BOOLEAN"}, {"id": 370, "origin_id": -10, "origin_slot": 1, "target_id": 149, "target_slot": 3, "type": "FLOAT"}, {"id": 371, "origin_id": -10, "origin_slot": 1, "target_id": 151, "target_slot": 3, "type": "FLOAT"}, {"id": 377, "origin_id": -10, "origin_slot": 6, "target_id": 96, "target_slot": 0, "type": "COMBO"}, {"id": 410, "origin_id": -10, "origin_slot": 4, "target_id": 151, "target_slot": 1, "type": "IMAGE"}, {"id": 412, "origin_id": 188, "origin_slot": 0, "target_id": 187, "target_slot": 0, "type": "IMAGE"}, {"id": 413, "origin_id": 191, "origin_slot": 0, "target_id": 189, "target_slot": 0, "type": "IMAGE"}, {"id": 414, "origin_id": 189, "origin_slot": 0, "target_id": 190, "target_slot": 0, "type": "IMAGE"}, {"id": 415, "origin_id": 187, "origin_slot": 0, "target_id": 191, "target_slot": 0, "type": "IMAGE"}, {"id": 416, "origin_id": 190, "origin_slot": 0, "target_id": 110, "target_slot": 0, "type": "IMAGE"}, {"id": 417, "origin_id": 190, "origin_slot": 0, "target_id": 149, "target_slot": 1, "type": "IMAGE"}, {"id": 418, "origin_id": 190, "origin_slot": 0, "target_id": 132, "target_slot": 4, "type": "IMAGE"}, {"id": 419, "origin_id": -10, "origin_slot": 3, "target_id": 188, "target_slot": 0, "type": "VIDEO"}, {"id": 420, "origin_id": -10, "origin_slot": 5, "target_id": 191, "target_slot": 2, "type": "INT"}, {"id": 421, "origin_id": -10, "origin_slot": 6, "target_id": 191, "target_slot": 3, "type": "INT"}, {"id": 422, "origin_id": -10, "origin_slot": 7, "target_id": 187, "target_slot": 2, "type": "INT"}, {"id": 425, "origin_id": -10, "origin_slot": 8, "target_id": 103, "target_slot": 0, "type": "COMBO"}, {"id": 426, "origin_id": -10, "origin_slot": 9, "target_id": 134, "target_slot": 1, "type": "COMBO"}, {"id": 427, "origin_id": -10, "origin_slot": 10, "target_id": 97, "target_slot": 0, "type": "COMBO"}, {"id": 429, "origin_id": -10, "origin_slot": 11, "target_id": 105, "target_slot": 1, "type": "COMBO"}, {"id": 430, "origin_id": -10, "origin_slot": 12, "target_id": 100, "target_slot": 0, "type": "COMBO"}, {"id": 431, "origin_id": -10, "origin_slot": 13, "target_id": 190, "target_slot": 2, "type": "COMBO"}, {"id": 432, "origin_id": -10, "origin_slot": 14, "target_id": 190, "target_slot": 3, "type": "COMBO"}, {"id": 433, "origin_id": -10, "origin_slot": 8, "target_id": 97, "target_slot": 1, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Video generation and editing/Depth to video"}, {"id": "38b60539-50a7-42f9-a5fe-bdeca26272e2", "version": 1, "state": {"lastGroupId": 16, "lastNodeId": 191, "lastLinkId": 433, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Image to Depth Map (Lotus)", "inputNode": {"id": -10, "bounding": [-60, -172.61268043518066, 126.625, 120]}, "outputNode": {"id": -20, "bounding": [1650, -172.61268043518066, 120, 60]}, "inputs": [{"id": "3bdd30c3-4ec9-485a-814b-e7d39fb6b5cc", "name": "pixels", "type": "IMAGE", "linkIds": [37], "localized_name": "pixels", "pos": [46.625, -152.61268043518066]}, {"id": "f9a1017c-f4b9-43b4-94c2-41c088b3a492", "name": "sigma", "type": "FLOAT", "linkIds": [243], "label": "depth_intensity", "pos": [46.625, -132.61268043518066]}, {"id": "374bfecc-34bb-47f9-82b6-cbe9383f8756", "name": "unet_name", "type": "COMBO", "linkIds": [423], "pos": [46.625, -112.61268043518066]}, {"id": "bb8707a1-46c3-44be-a15a-0adc908d871d", "name": "vae_name", "type": "COMBO", "linkIds": [424], "pos": [46.625, -92.61268043518066]}], "outputs": [{"id": "2ec278bd-0b66-4b30-9c5b-994d5f638214", "name": "IMAGE", "type": "IMAGE", "linkIds": [242], "localized_name": "IMAGE", "pos": [1670, -152.61268043518066]}], "widgets": [], "nodes": [{"id": 8, "type": "VAEDecode", "pos": [1380, -240], "size": [210, 46], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 232}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 240}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [35]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "VAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 10, "type": "UNETLoader", "pos": [135.34181213378906, -290.1947937011719], "size": [305.93701171875, 82], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 423}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [31, 241]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "UNETLoader", "models": [{"name": "lotus-depth-d-v1-1.safetensors", "url": "https://huggingface.co/Comfy-Org/lotus/resolve/main/lotus-depth-d-v1-1.safetensors", "directory": "diffusion_models"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": ["lotus-depth-d-v1-1.safetensors", "default"]}, {"id": 14, "type": "VAELoader", "pos": [134.531494140625, -165.18197631835938], "size": [305.93701171875, 58], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 424}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "slot_index": 0, "links": [38, 240]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "VAELoader", "models": [{"name": "vae-ft-mse-840000-ema-pruned.safetensors", "url": "https://huggingface.co/stabilityai/sd-vae-ft-mse-original/resolve/main/vae-ft-mse-840000-ema-pruned.safetensors", "directory": "vae"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": ["vae-ft-mse-840000-ema-pruned.safetensors"]}, {"id": 16, "type": "SamplerCustomAdvanced", "pos": [990.6585693359375, -319.9144287109375], "size": [355.20001220703125, 326], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "noise", "name": "noise", "type": "NOISE", "link": 237}, {"localized_name": "guider", "name": "guider", "type": "GUIDER", "link": 27}, {"localized_name": "sampler", "name": "sampler", "type": "SAMPLER", "link": 33}, {"localized_name": "sigmas", "name": "sigmas", "type": "SIGMAS", "link": 194}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 201}], "outputs": [{"localized_name": "output", "name": "output", "type": "LATENT", "slot_index": 0, "links": [232]}, {"localized_name": "denoised_output", "name": "denoised_output", "type": "LATENT", "slot_index": 1, "links": []}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "SamplerCustomAdvanced", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 18, "type": "DisableNoise", "pos": [730.47705078125, -320], "size": [210, 26], "flags": {}, "order": 0, "mode": 0, "inputs": [], "outputs": [{"localized_name": "NOISE", "name": "NOISE", "type": "NOISE", "slot_index": 0, "links": [237]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "DisableNoise", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 19, "type": "BasicGuider", "pos": [730.2631225585938, -251.22537231445312], "size": [210, 46], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 241}, {"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "link": 238}], "outputs": [{"localized_name": "GUIDER", "name": "GUIDER", "type": "GUIDER", "slot_index": 0, "links": [27]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "BasicGuider", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 20, "type": "BasicScheduler", "pos": [488.64459228515625, -147.67201232910156], "size": [210, 106], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 31}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"localized_name": "SIGMAS", "name": "SIGMAS", "type": "SIGMAS", "slot_index": 0, "links": [66]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "BasicScheduler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": ["normal", 1, 1]}, {"id": 21, "type": "KSamplerSelect", "pos": [730.2631225585938, -161.22537231445312], "size": [210, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}], "outputs": [{"localized_name": "SAMPLER", "name": "SAMPLER", "type": "SAMPLER", "slot_index": 0, "links": [33]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "KSamplerSelect", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": ["euler"]}, {"id": 22, "type": "ImageInvert", "pos": [1380, -310], "size": [210, 26], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 35}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [242]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "ImageInvert", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 23, "type": "VAEEncode", "pos": [730.2631225585938, 38.77463912963867], "size": [210, 46], "flags": {}, "order": 10, "mode": 0, "inputs": [{"localized_name": "pixels", "name": "pixels", "type": "IMAGE", "link": 37}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 38}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [201]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "VAEEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 28, "type": "SetFirstSigma", "pos": [730.2631225585938, -61.22536087036133], "size": [210, 58], "flags": {}, "order": 11, "mode": 0, "inputs": [{"localized_name": "sigmas", "name": "sigmas", "type": "SIGMAS", "link": 66}, {"localized_name": "sigma", "name": "sigma", "type": "FLOAT", "widget": {"name": "sigma"}, "link": 243}], "outputs": [{"localized_name": "SIGMAS", "name": "SIGMAS", "type": "SIGMAS", "slot_index": 0, "links": [194]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "SetFirstSigma", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": [999.0000000000002]}, {"id": 68, "type": "LotusConditioning", "pos": [490, -230], "size": [210, 26], "flags": {}, "order": 2, "mode": 0, "inputs": [], "outputs": [{"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "slot_index": 0, "links": [238]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "LotusConditioning", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": []}], "groups": [{"id": 1, "title": "Load Models", "bounding": [120, -370, 335, 281.6000061035156], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 232, "origin_id": 16, "origin_slot": 0, "target_id": 8, "target_slot": 0, "type": "LATENT"}, {"id": 240, "origin_id": 14, "origin_slot": 0, "target_id": 8, "target_slot": 1, "type": "VAE"}, {"id": 237, "origin_id": 18, "origin_slot": 0, "target_id": 16, "target_slot": 0, "type": "NOISE"}, {"id": 27, "origin_id": 19, "origin_slot": 0, "target_id": 16, "target_slot": 1, "type": "GUIDER"}, {"id": 33, "origin_id": 21, "origin_slot": 0, "target_id": 16, "target_slot": 2, "type": "SAMPLER"}, {"id": 194, "origin_id": 28, "origin_slot": 0, "target_id": 16, "target_slot": 3, "type": "SIGMAS"}, {"id": 201, "origin_id": 23, "origin_slot": 0, "target_id": 16, "target_slot": 4, "type": "LATENT"}, {"id": 241, "origin_id": 10, "origin_slot": 0, "target_id": 19, "target_slot": 0, "type": "MODEL"}, {"id": 238, "origin_id": 68, "origin_slot": 0, "target_id": 19, "target_slot": 1, "type": "CONDITIONING"}, {"id": 31, "origin_id": 10, "origin_slot": 0, "target_id": 20, "target_slot": 0, "type": "MODEL"}, {"id": 35, "origin_id": 8, "origin_slot": 0, "target_id": 22, "target_slot": 0, "type": "IMAGE"}, {"id": 38, "origin_id": 14, "origin_slot": 0, "target_id": 23, "target_slot": 1, "type": "VAE"}, {"id": 66, "origin_id": 20, "origin_slot": 0, "target_id": 28, "target_slot": 0, "type": "SIGMAS"}, {"id": 37, "origin_id": -10, "origin_slot": 0, "target_id": 23, "target_slot": 0, "type": "IMAGE"}, {"id": 242, "origin_id": 22, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 243, "origin_id": -10, "origin_slot": 1, "target_id": 28, "target_slot": 1, "type": "FLOAT"}, {"id": 423, "origin_id": -10, "origin_slot": 2, "target_id": 10, "target_slot": 0, "type": "COMBO"}, {"id": 424, "origin_id": -10, "origin_slot": 3, "target_id": 14, "target_slot": 0, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}}]}, "config": {}, "extra": {"ds": {"scale": 1.313181818181818, "offset": [271.9196871428176, -3845.0123774536323]}, "workflowRendererVersion": "LG"}, "version": 0.4} diff --git a/blueprints/Edge-Preserving Blur.json b/blueprints/Edge-Preserving Blur.json new file mode 100644 index 000000000..4f2416e9b --- /dev/null +++ b/blueprints/Edge-Preserving Blur.json @@ -0,0 +1 @@ +{"revision": 0, "last_node_id": 136, "last_link_id": 0, "nodes": [{"id": 136, "type": "c6dc0f88-416b-4db1-bed1-442d793de5ad", "pos": [669.0822222222221, 835.5507407407408], "size": [210, 106], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "IMAGE", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}], "properties": {"proxyWidgets": [["130", "value"], ["131", "value"], ["133", "value"]]}, "widgets_values": [], "title": "Edge-Preserving Blur"}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "c6dc0f88-416b-4db1-bed1-442d793de5ad", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 138, "lastLinkId": 109, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Edge-Preserving Blur", "inputNode": {"id": -10, "bounding": [1750, -620, 120, 60]}, "outputNode": {"id": -20, "bounding": [2700, -620, 120, 60]}, "inputs": [{"id": "06a6d0ad-25d7-4784-8c72-7fc8e7110a22", "name": "images.image0", "type": "IMAGE", "linkIds": [106], "localized_name": "images.image0", "label": "image", "pos": [1850, -600]}], "outputs": [{"id": "3ae9f5d7-be63-4c9f-9893-6f848defa377", "name": "IMAGE0", "type": "IMAGE", "linkIds": [99], "localized_name": "IMAGE0", "label": "IMAGE", "pos": [2720, -600]}], "widgets": [], "nodes": [{"id": 128, "type": "GLSLShader", "pos": [2220, -860], "size": [420, 252], "flags": {}, "order": 3, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 106}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}, {"label": "u_float0", "localized_name": "floats.u_float0", "name": "floats.u_float0", "shape": 7, "type": "FLOAT", "link": 100}, {"label": "u_float1", "localized_name": "floats.u_float1", "name": "floats.u_float1", "shape": 7, "type": "FLOAT", "link": 101}, {"label": "u_float2", "localized_name": "floats.u_float2", "name": "floats.u_float2", "shape": 7, "type": "FLOAT", "link": null}, {"label": "u_int0", "localized_name": "ints.u_int0", "name": "ints.u_int0", "shape": 7, "type": "INT", "link": 107}, {"label": "u_int1", "localized_name": "ints.u_int1", "name": "ints.u_int1", "shape": 7, "type": "INT", "link": 103}, {"label": "u_int2", "localized_name": "ints.u_int2", "name": "ints.u_int2", "shape": 7, "type": "INT", "link": null}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [99]}, {"localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": null}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform float u_float0; // Blur radius (0–20, default ~5)\nuniform float u_float1; // Edge threshold (0–100, default ~30)\nuniform int u_int0; // Step size (0/1 = every pixel, 2+ = skip pixels)\n\nin vec2 v_texCoord;\nout vec4 fragColor;\n\nconst int MAX_RADIUS = 20;\nconst float EPSILON = 0.0001;\n\n// Perceptual luminance\nfloat getLuminance(vec3 rgb) {\n return dot(rgb, vec3(0.299, 0.587, 0.114));\n}\n\nvec4 bilateralFilter(vec2 uv, vec2 texelSize, int radius,\n float sigmaSpatial, float sigmaColor)\n{\n vec4 center = texture(u_image0, uv);\n vec3 centerRGB = center.rgb;\n\n float invSpatial2 = -0.5 / (sigmaSpatial * sigmaSpatial);\n float invColor2 = -0.5 / (sigmaColor * sigmaColor + EPSILON);\n\n vec3 sumRGB = vec3(0.0);\n float sumWeight = 0.0;\n\n int step = max(u_int0, 1);\n float radius2 = float(radius * radius);\n\n for (int dy = -MAX_RADIUS; dy <= MAX_RADIUS; dy++) {\n if (dy < -radius || dy > radius) continue;\n if (abs(dy) % step != 0) continue;\n\n for (int dx = -MAX_RADIUS; dx <= MAX_RADIUS; dx++) {\n if (dx < -radius || dx > radius) continue;\n if (abs(dx) % step != 0) continue;\n\n vec2 offset = vec2(float(dx), float(dy));\n float dist2 = dot(offset, offset);\n if (dist2 > radius2) continue;\n\n vec3 sampleRGB = texture(u_image0, uv + offset * texelSize).rgb;\n\n // Spatial Gaussian\n float spatialWeight = exp(dist2 * invSpatial2);\n\n // Perceptual color distance (weighted RGB)\n vec3 diff = sampleRGB - centerRGB;\n float colorDist = dot(diff * diff, vec3(0.299, 0.587, 0.114));\n float colorWeight = exp(colorDist * invColor2);\n\n float w = spatialWeight * colorWeight;\n sumRGB += sampleRGB * w;\n sumWeight += w;\n }\n }\n\n vec3 resultRGB = sumRGB / max(sumWeight, EPSILON);\n return vec4(resultRGB, center.a); // preserve center alpha\n}\n\nvoid main() {\n vec2 texelSize = 1.0 / vec2(textureSize(u_image0, 0));\n\n float radiusF = clamp(u_float0, 0.0, float(MAX_RADIUS));\n int radius = int(radiusF + 0.5);\n\n if (radius == 0) {\n fragColor = texture(u_image0, v_texCoord);\n return;\n }\n\n // Edge threshold → color sigma\n // Squared curve for better low-end control\n float t = clamp(u_float1, 0.0, 100.0) / 100.0;\n t *= t;\n float sigmaColor = mix(0.01, 0.5, t);\n\n // Spatial sigma tied to radius\n float sigmaSpatial = max(radiusF * 0.75, 0.5);\n\n fragColor = bilateralFilter(\n v_texCoord,\n texelSize,\n radius,\n sigmaSpatial,\n sigmaColor\n );\n}", "from_input"]}, {"id": 130, "type": "PrimitiveFloat", "pos": [1930, -860], "size": [270, 58], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "blur_radius", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [100]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 20, "step": 0.5, "precision": 1}, "widgets_values": [20]}, {"id": 131, "type": "PrimitiveFloat", "pos": [1930, -760], "size": [270, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "edge_threshold", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [101]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 100, "step": 1}, "widgets_values": [50]}, {"id": 133, "type": "PrimitiveInt", "pos": [1930, -660], "size": [270, 82], "flags": {}, "order": 2, "mode": 0, "inputs": [{"label": "step_size", "localized_name": "value", "name": "value", "type": "INT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "INT", "name": "INT", "type": "INT", "links": [103, 107]}], "properties": {"Node name for S&R": "PrimitiveInt", "min": 0}, "widgets_values": [1, "fixed"]}], "groups": [], "links": [{"id": 100, "origin_id": 130, "origin_slot": 0, "target_id": 128, "target_slot": 2, "type": "FLOAT"}, {"id": 101, "origin_id": 131, "origin_slot": 0, "target_id": 128, "target_slot": 3, "type": "FLOAT"}, {"id": 107, "origin_id": 133, "origin_slot": 0, "target_id": 128, "target_slot": 5, "type": "INT"}, {"id": 103, "origin_id": 133, "origin_slot": 0, "target_id": 128, "target_slot": 6, "type": "INT"}, {"id": 106, "origin_id": -10, "origin_slot": 0, "target_id": 128, "target_slot": 0, "type": "IMAGE"}, {"id": 99, "origin_id": 128, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Blur"}]}, "extra": {}} diff --git a/blueprints/Film Grain.json b/blueprints/Film Grain.json new file mode 100644 index 000000000..b7ebe2a36 --- /dev/null +++ b/blueprints/Film Grain.json @@ -0,0 +1 @@ +{"revision": 0, "last_node_id": 22, "last_link_id": 0, "nodes": [{"id": 22, "type": "3324cf54-bcff-405f-a4bf-c5122c72fe56", "pos": [4800, -1180], "size": [250, 154], "flags": {}, "order": 4, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "IMAGE", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}], "title": "Film Grain", "properties": {"proxyWidgets": [["17", "value"], ["18", "value"], ["19", "value"], ["20", "value"], ["21", "choice"]]}, "widgets_values": []}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "3324cf54-bcff-405f-a4bf-c5122c72fe56", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 21, "lastLinkId": 30, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Film Grain", "inputNode": {"id": -10, "bounding": [4096.671470760602, -948.2184031393472, 120, 60]}, "outputNode": {"id": -20, "bounding": [4900, -948.2184031393472, 120, 60]}, "inputs": [{"id": "062968ea-da25-47e7-a180-d913c267f148", "name": "images.image0", "type": "IMAGE", "linkIds": [22], "localized_name": "images.image0", "label": "image", "pos": [4196.671470760602, -928.2184031393472]}], "outputs": [{"id": "43247d06-a39f-4733-9828-c39400fe02a4", "name": "IMAGE0", "type": "IMAGE", "linkIds": [23], "localized_name": "IMAGE0", "label": "IMAGE", "pos": [4920, -928.2184031393472]}], "widgets": [], "nodes": [{"id": 15, "type": "GLSLShader", "pos": [4510, -1180], "size": [330, 272], "flags": {}, "order": 5, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 22}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}, {"label": "u_float0", "localized_name": "floats.u_float0", "name": "floats.u_float0", "shape": 7, "type": "FLOAT", "link": 26}, {"label": "u_float1", "localized_name": "floats.u_float1", "name": "floats.u_float1", "shape": 7, "type": "FLOAT", "link": 27}, {"label": "u_float2", "localized_name": "floats.u_float2", "name": "floats.u_float2", "shape": 7, "type": "FLOAT", "link": 28}, {"label": "u_float3", "localized_name": "floats.u_float3", "name": "floats.u_float3", "shape": 7, "type": "FLOAT", "link": 29}, {"label": "u_float4", "localized_name": "floats.u_float4", "name": "floats.u_float4", "shape": 7, "type": "FLOAT", "link": null}, {"label": "u_int0", "localized_name": "ints.u_int0", "name": "ints.u_int0", "shape": 7, "type": "INT", "link": 30}, {"label": "u_int1", "localized_name": "ints.u_int1", "name": "ints.u_int1", "shape": 7, "type": "INT", "link": null}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [23]}, {"localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": null}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform float u_float0; // grain amount [0.0 – 1.0] typical: 0.2–0.8\nuniform float u_float1; // grain size [0.3 – 3.0] lower = finer grain\nuniform float u_float2; // color amount [0.0 – 1.0] 0 = monochrome, 1 = RGB grain\nuniform float u_float3; // luminance bias [0.0 – 1.0] 0 = uniform, 1 = shadows only\nuniform int u_int0; // noise mode [0 or 1] 0 = smooth, 1 = grainy\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\n// High-quality integer hash (pcg-like)\nuint pcg(uint v) {\n uint state = v * 747796405u + 2891336453u;\n uint word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u;\n return (word >> 22u) ^ word;\n}\n\n// 2D -> 1D hash input\nuint hash2d(uvec2 p) {\n return pcg(p.x + pcg(p.y));\n}\n\n// Hash to float [0, 1]\nfloat hashf(uvec2 p) {\n return float(hash2d(p)) / float(0xffffffffu);\n}\n\n// Hash to float with offset (for RGB channels)\nfloat hashf(uvec2 p, uint offset) {\n return float(pcg(hash2d(p) + offset)) / float(0xffffffffu);\n}\n\n// Convert uniform [0,1] to roughly Gaussian distribution\n// Using simple approximation: average of multiple samples\nfloat toGaussian(uvec2 p) {\n float sum = hashf(p, 0u) + hashf(p, 1u) + hashf(p, 2u) + hashf(p, 3u);\n return (sum - 2.0) * 0.7; // Centered, scaled\n}\n\nfloat toGaussian(uvec2 p, uint offset) {\n float sum = hashf(p, offset) + hashf(p, offset + 1u) \n + hashf(p, offset + 2u) + hashf(p, offset + 3u);\n return (sum - 2.0) * 0.7;\n}\n\n// Smooth noise with better interpolation\nfloat smoothNoise(vec2 p) {\n vec2 i = floor(p);\n vec2 f = fract(p);\n \n // Quintic interpolation (less banding than cubic)\n f = f * f * f * (f * (f * 6.0 - 15.0) + 10.0);\n \n uvec2 ui = uvec2(i);\n float a = toGaussian(ui);\n float b = toGaussian(ui + uvec2(1u, 0u));\n float c = toGaussian(ui + uvec2(0u, 1u));\n float d = toGaussian(ui + uvec2(1u, 1u));\n \n return mix(mix(a, b, f.x), mix(c, d, f.x), f.y);\n}\n\nfloat smoothNoise(vec2 p, uint offset) {\n vec2 i = floor(p);\n vec2 f = fract(p);\n \n f = f * f * f * (f * (f * 6.0 - 15.0) + 10.0);\n \n uvec2 ui = uvec2(i);\n float a = toGaussian(ui, offset);\n float b = toGaussian(ui + uvec2(1u, 0u), offset);\n float c = toGaussian(ui + uvec2(0u, 1u), offset);\n float d = toGaussian(ui + uvec2(1u, 1u), offset);\n \n return mix(mix(a, b, f.x), mix(c, d, f.x), f.y);\n}\n\nvoid main() {\n vec4 color = texture(u_image0, v_texCoord);\n \n // Luminance (Rec.709)\n float luma = dot(color.rgb, vec3(0.2126, 0.7152, 0.0722));\n \n // Grain UV (resolution-independent)\n vec2 grainUV = v_texCoord * u_resolution / max(u_float1, 0.01);\n uvec2 grainPixel = uvec2(grainUV);\n \n float g;\n vec3 grainRGB;\n \n if (u_int0 == 1) {\n // Grainy mode: pure hash noise (no interpolation = no banding)\n g = toGaussian(grainPixel);\n grainRGB = vec3(\n toGaussian(grainPixel, 100u),\n toGaussian(grainPixel, 200u),\n toGaussian(grainPixel, 300u)\n );\n } else {\n // Smooth mode: interpolated with quintic curve\n g = smoothNoise(grainUV);\n grainRGB = vec3(\n smoothNoise(grainUV, 100u),\n smoothNoise(grainUV, 200u),\n smoothNoise(grainUV, 300u)\n );\n }\n \n // Luminance weighting (less grain in highlights)\n float lumWeight = mix(1.0, 1.0 - luma, clamp(u_float3, 0.0, 1.0));\n \n // Strength\n float strength = u_float0 * 0.15;\n \n // Color vs monochrome grain\n vec3 grainColor = mix(vec3(g), grainRGB, clamp(u_float2, 0.0, 1.0));\n \n color.rgb += grainColor * strength * lumWeight;\n fragColor0 = vec4(clamp(color.rgb, 0.0, 1.0), color.a);\n}\n", "from_input"]}, {"id": 21, "type": "CustomCombo", "pos": [4280, -780], "size": [210, 153.8888931274414], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "grain_mode", "localized_name": "choice", "name": "choice", "type": "COMBO", "widget": {"name": "choice"}, "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": null}, {"localized_name": "INDEX", "name": "INDEX", "type": "INT", "links": [30]}], "properties": {"Node name for S&R": "CustomCombo"}, "widgets_values": ["Smooth", 0, "Smooth", "Grainy", ""]}, {"id": 17, "type": "PrimitiveFloat", "pos": [4276.671470760602, -1180.3256994061358], "size": [210, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "grain_amount", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [26]}], "title": "Grain amount", "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 1, "step": 0.05, "precision": 2}, "widgets_values": [0.25]}, {"id": 18, "type": "PrimitiveFloat", "pos": [4280, -1080], "size": [210, 58], "flags": {}, "order": 2, "mode": 0, "inputs": [{"label": "grain_size", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [27]}], "title": "Grain size", "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0.05, "max": 3, "precision": 2, "step": 0.05}, "widgets_values": [0.1]}, {"id": 19, "type": "PrimitiveFloat", "pos": [4280, -980], "size": [210, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"label": "color_amount", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [28]}], "title": "Color amount", "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 1, "precision": 2, "step": 0.05}, "widgets_values": [0]}, {"id": 20, "type": "PrimitiveFloat", "pos": [4280, -880], "size": [210, 58], "flags": {}, "order": 4, "mode": 0, "inputs": [{"label": "shadow_focus", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [29]}], "title": "Luminance bias", "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 1, "precision": 2, "step": 0.05}, "widgets_values": [0]}], "groups": [], "links": [{"id": 26, "origin_id": 17, "origin_slot": 0, "target_id": 15, "target_slot": 2, "type": "FLOAT"}, {"id": 27, "origin_id": 18, "origin_slot": 0, "target_id": 15, "target_slot": 3, "type": "FLOAT"}, {"id": 28, "origin_id": 19, "origin_slot": 0, "target_id": 15, "target_slot": 4, "type": "FLOAT"}, {"id": 29, "origin_id": 20, "origin_slot": 0, "target_id": 15, "target_slot": 5, "type": "FLOAT"}, {"id": 30, "origin_id": 21, "origin_slot": 1, "target_id": 15, "target_slot": 7, "type": "INT"}, {"id": 22, "origin_id": -10, "origin_slot": 0, "target_id": 15, "target_slot": 0, "type": "IMAGE"}, {"id": 23, "origin_id": 15, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Color adjust"}]}} diff --git a/blueprints/Glow.json b/blueprints/Glow.json new file mode 100644 index 000000000..590445c06 --- /dev/null +++ b/blueprints/Glow.json @@ -0,0 +1 @@ +{"revision": 0, "last_node_id": 37, "last_link_id": 0, "nodes": [{"id": 37, "type": "0a99445a-aaf8-4a7f-aec3-d7d710ae1495", "pos": [2160, -360], "size": [260, 154], "flags": {}, "order": 4, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "IMAGE", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}], "properties": {"proxyWidgets": [["34", "value"], ["35", "value"], ["33", "value"], ["31", "choice"], ["32", "color"]]}, "widgets_values": [], "title": "Glow"}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "0a99445a-aaf8-4a7f-aec3-d7d710ae1495", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 36, "lastLinkId": 53, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Glow", "inputNode": {"id": -10, "bounding": [2110, -165, 120, 60]}, "outputNode": {"id": -20, "bounding": [3170, -165, 120, 60]}, "inputs": [{"id": "ffc7cf94-be90-4d56-a3b8-d0514d61c015", "name": "images.image0", "type": "IMAGE", "linkIds": [45], "localized_name": "images.image0", "label": "image", "pos": [2210, -145]}], "outputs": [{"id": "04986101-50be-4762-8957-8e2a5e460bbb", "name": "IMAGE0", "type": "IMAGE", "linkIds": [53], "localized_name": "IMAGE0", "label": "IMAGE", "pos": [3190, -145]}], "widgets": [], "nodes": [{"id": 30, "type": "GLSLShader", "pos": [2590, -520], "size": [520, 272], "flags": {}, "order": 5, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 45}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}, {"label": "u_float0", "localized_name": "floats.u_float0", "name": "floats.u_float0", "shape": 7, "type": "FLOAT", "link": 51}, {"label": "u_float1", "localized_name": "floats.u_float1", "name": "floats.u_float1", "shape": 7, "type": "FLOAT", "link": 50}, {"label": "u_float2", "localized_name": "floats.u_float2", "name": "floats.u_float2", "shape": 7, "type": "FLOAT", "link": 52}, {"label": "u_float3", "localized_name": "floats.u_float3", "name": "floats.u_float3", "shape": 7, "type": "FLOAT", "link": null}, {"label": "u_int0", "localized_name": "ints.u_int0", "name": "ints.u_int0", "shape": 7, "type": "INT", "link": 46}, {"label": "u_int1", "localized_name": "ints.u_int1", "name": "ints.u_int1", "shape": 7, "type": "INT", "link": 47}, {"label": "u_int2", "localized_name": "ints.u_int2", "name": "ints.u_int2", "shape": 7, "type": "INT", "link": null}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [53]}, {"localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": null}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision mediump float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform int u_int0; // Blend mode\nuniform int u_int1; // Color tint\nuniform float u_float0; // Intensity\nuniform float u_float1; // Radius\nuniform float u_float2; // Threshold\n\nin vec2 v_texCoord;\nout vec4 fragColor;\n\nconst int BLEND_ADD = 0;\nconst int BLEND_SCREEN = 1;\nconst int BLEND_SOFT = 2;\nconst int BLEND_OVERLAY = 3;\nconst int BLEND_LIGHTEN = 4;\n\nconst float GOLDEN_ANGLE = 2.39996323;\nconst int MAX_SAMPLES = 48;\nconst vec3 LUMA = vec3(0.299, 0.587, 0.114);\n\nfloat hash(vec2 p) {\n p = fract(p * vec2(123.34, 456.21));\n p += dot(p, p + 45.32);\n return fract(p.x * p.y);\n}\n\nvec3 hexToRgb(int h) {\n return vec3(\n float((h >> 16) & 255),\n float((h >> 8) & 255),\n float(h & 255)\n ) * (1.0 / 255.0);\n}\n\nvec3 blend(vec3 base, vec3 glow, int mode) {\n if (mode == BLEND_SCREEN) {\n return 1.0 - (1.0 - base) * (1.0 - glow);\n }\n if (mode == BLEND_SOFT) {\n return mix(\n base - (1.0 - 2.0 * glow) * base * (1.0 - base),\n base + (2.0 * glow - 1.0) * (sqrt(base) - base),\n step(0.5, glow)\n );\n }\n if (mode == BLEND_OVERLAY) {\n return mix(\n 2.0 * base * glow,\n 1.0 - 2.0 * (1.0 - base) * (1.0 - glow),\n step(0.5, base)\n );\n }\n if (mode == BLEND_LIGHTEN) {\n return max(base, glow);\n }\n return base + glow;\n}\n\nvoid main() {\n vec4 original = texture(u_image0, v_texCoord);\n \n float intensity = u_float0 * 0.05;\n float radius = u_float1 * u_float1 * 0.012;\n \n if (intensity < 0.001 || radius < 0.1) {\n fragColor = original;\n return;\n }\n \n float threshold = 1.0 - u_float2 * 0.01;\n float t0 = threshold - 0.15;\n float t1 = threshold + 0.15;\n \n vec2 texelSize = 1.0 / u_resolution;\n float radius2 = radius * radius;\n \n float sampleScale = clamp(radius * 0.75, 0.35, 1.0);\n int samples = int(float(MAX_SAMPLES) * sampleScale);\n \n float noise = hash(gl_FragCoord.xy);\n float angleOffset = noise * GOLDEN_ANGLE;\n float radiusJitter = 0.85 + noise * 0.3;\n \n float ca = cos(GOLDEN_ANGLE);\n float sa = sin(GOLDEN_ANGLE);\n vec2 dir = vec2(cos(angleOffset), sin(angleOffset));\n \n vec3 glow = vec3(0.0);\n float totalWeight = 0.0;\n \n // Center tap\n float centerMask = smoothstep(t0, t1, dot(original.rgb, LUMA));\n glow += original.rgb * centerMask * 2.0;\n totalWeight += 2.0;\n \n for (int i = 1; i < MAX_SAMPLES; i++) {\n if (i >= samples) break;\n \n float fi = float(i);\n float dist = sqrt(fi / float(samples)) * radius * radiusJitter;\n \n vec2 offset = dir * dist * texelSize;\n vec3 c = texture(u_image0, v_texCoord + offset).rgb;\n float mask = smoothstep(t0, t1, dot(c, LUMA));\n \n float w = 1.0 - (dist * dist) / (radius2 * 1.5);\n w = max(w, 0.0);\n w *= w;\n \n glow += c * mask * w;\n totalWeight += w;\n \n dir = vec2(\n dir.x * ca - dir.y * sa,\n dir.x * sa + dir.y * ca\n );\n }\n \n glow *= intensity / max(totalWeight, 0.001);\n \n if (u_int1 > 0) {\n glow *= hexToRgb(u_int1);\n }\n \n vec3 result = blend(original.rgb, glow, u_int0);\n result += (noise - 0.5) * (1.0 / 255.0);\n \n fragColor = vec4(clamp(result, 0.0, 1.0), original.a);\n}", "from_input"]}, {"id": 34, "type": "PrimitiveFloat", "pos": [2290, -510], "size": [270, 58], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "intensity", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [51]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 100, "precision": 1, "step": 1}, "widgets_values": [30]}, {"id": 35, "type": "PrimitiveFloat", "pos": [2290, -410], "size": [270, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "radius", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [50]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 100, "precision": 1, "step": 1}, "widgets_values": [25]}, {"id": 33, "type": "PrimitiveFloat", "pos": [2290, -310], "size": [270, 58], "flags": {}, "order": 2, "mode": 0, "inputs": [{"label": "threshold", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [52]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 100, "precision": 1, "step": 1}, "widgets_values": [100]}, {"id": 32, "type": "ColorToRGBInt", "pos": [2290, -210], "size": [270, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"label": "color_tint", "localized_name": "color", "name": "color", "type": "COLOR", "widget": {"name": "color"}, "link": null}], "outputs": [{"localized_name": "rgb_int", "name": "rgb_int", "type": "INT", "links": [47]}], "properties": {"Node name for S&R": "ColorToRGBInt"}, "widgets_values": ["#45edf5"]}, {"id": 31, "type": "CustomCombo", "pos": [2290, -110], "size": [270, 222], "flags": {}, "order": 4, "mode": 0, "inputs": [{"label": "blend_mode", "localized_name": "choice", "name": "choice", "type": "COMBO", "widget": {"name": "choice"}, "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": null}, {"localized_name": "INDEX", "name": "INDEX", "type": "INT", "links": [46]}], "properties": {"Node name for S&R": "CustomCombo"}, "widgets_values": ["add", 0, "add", "screen", "soft", "overlay", "lighten", ""]}], "groups": [], "links": [{"id": 51, "origin_id": 34, "origin_slot": 0, "target_id": 30, "target_slot": 2, "type": "FLOAT"}, {"id": 50, "origin_id": 35, "origin_slot": 0, "target_id": 30, "target_slot": 3, "type": "FLOAT"}, {"id": 52, "origin_id": 33, "origin_slot": 0, "target_id": 30, "target_slot": 4, "type": "FLOAT"}, {"id": 46, "origin_id": 31, "origin_slot": 1, "target_id": 30, "target_slot": 6, "type": "INT"}, {"id": 47, "origin_id": 32, "origin_slot": 0, "target_id": 30, "target_slot": 7, "type": "INT"}, {"id": 45, "origin_id": -10, "origin_slot": 0, "target_id": 30, "target_slot": 0, "type": "IMAGE"}, {"id": 53, "origin_id": 30, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Color adjust"}]}} diff --git a/blueprints/Hue and Saturation.json b/blueprints/Hue and Saturation.json new file mode 100644 index 000000000..04846c51d --- /dev/null +++ b/blueprints/Hue and Saturation.json @@ -0,0 +1 @@ +{"revision": 0, "last_node_id": 11, "last_link_id": 0, "nodes": [{"id": 11, "type": "c64f83e9-aa5d-4031-89f1-0704e39299fe", "pos": [870, -220], "size": [250, 178], "flags": {}, "order": 2, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "IMAGE", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}], "title": "Hue and Saturation", "properties": {"proxyWidgets": [["2", "choice"], ["4", "value"], ["5", "value"], ["6", "value"], ["7", "value"], ["3", "choice"]]}, "widgets_values": []}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "c64f83e9-aa5d-4031-89f1-0704e39299fe", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 10, "lastLinkId": 11, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Hue and Saturation", "inputNode": {"id": -10, "bounding": [360, -176, 120, 60]}, "outputNode": {"id": -20, "bounding": [1410, -176, 120, 60]}, "inputs": [{"id": "a5aae7ea-b511-4045-b5da-94101e269cd7", "name": "images.image0", "type": "IMAGE", "linkIds": [10], "localized_name": "images.image0", "label": "image", "pos": [460, -156]}], "outputs": [{"id": "30b72604-69b3-4944-b253-a9099bbd73a9", "name": "IMAGE0", "type": "IMAGE", "linkIds": [8], "localized_name": "IMAGE0", "label": "IMAGE", "pos": [1430, -156]}], "widgets": [], "nodes": [{"id": 3, "type": "CustomCombo", "pos": [540, -240], "size": [270, 150], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "color_space", "localized_name": "choice", "name": "choice", "type": "COMBO", "widget": {"name": "choice"}, "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": null}, {"localized_name": "INDEX", "name": "INDEX", "type": "INT", "links": [2]}], "properties": {"Node name for S&R": "CustomCombo"}, "widgets_values": ["HSL", 0, "HSL", "HSB/HSV", ""]}, {"id": 2, "type": "CustomCombo", "pos": [540, -580], "size": [270, 294], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "mode", "localized_name": "choice", "name": "choice", "type": "COMBO", "widget": {"name": "choice"}, "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": null}, {"localized_name": "INDEX", "name": "INDEX", "type": "INT", "links": [1]}], "properties": {"Node name for S&R": "CustomCombo"}, "widgets_values": ["Master", 0, "Master", "Reds", "Yellows", "Greens", "Cyans", "Blues", "Magentas", "Colorize", ""]}, {"id": 7, "type": "PrimitiveFloat", "pos": [540, 260], "size": [270, 58], "flags": {}, "order": 2, "mode": 0, "inputs": [{"label": "overlap", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [6]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 100, "precision": 1, "step": 1}, "widgets_values": [50]}, {"id": 6, "type": "PrimitiveFloat", "pos": [540, 160], "size": [270, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"label": "brightness", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [5]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": -100, "max": 100, "precision": 1, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [0, 0, 0]}, {"offset": 1, "color": [255, 255, 255]}]}, "widgets_values": [0]}, {"id": 5, "type": "PrimitiveFloat", "pos": [540, 60], "size": [270, 58], "flags": {}, "order": 4, "mode": 0, "inputs": [{"label": "saturation", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [4]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": -100, "max": 100, "precision": 1, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [128, 128, 128]}, {"offset": 1, "color": [255, 0, 0]}]}, "widgets_values": [0]}, {"id": 4, "type": "PrimitiveFloat", "pos": [540, -40], "size": [270, 58], "flags": {}, "order": 5, "mode": 0, "inputs": [{"label": "hue", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [3]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": -180, "max": 180, "precision": 1, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [255, 0, 0]}, {"offset": 0.16666666666666666, "color": [255, 255, 0]}, {"offset": 0.3333333333333333, "color": [0, 255, 0]}, {"offset": 0.5, "color": [0, 255, 255]}, {"offset": 0.6666666666666666, "color": [0, 0, 255]}, {"offset": 0.8333333333333334, "color": [255, 0, 255]}, {"offset": 1, "color": [255, 0, 0]}]}, "widgets_values": [0]}, {"id": 1, "type": "GLSLShader", "pos": [880, -300], "size": [470, 292], "flags": {}, "order": 6, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 10}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}, {"label": "u_float0", "localized_name": "floats.u_float0", "name": "floats.u_float0", "shape": 7, "type": "FLOAT", "link": 3}, {"label": "u_float1", "localized_name": "floats.u_float1", "name": "floats.u_float1", "shape": 7, "type": "FLOAT", "link": 4}, {"label": "u_float2", "localized_name": "floats.u_float2", "name": "floats.u_float2", "shape": 7, "type": "FLOAT", "link": 5}, {"label": "u_float3", "localized_name": "floats.u_float3", "name": "floats.u_float3", "shape": 7, "type": "FLOAT", "link": 6}, {"label": "u_float4", "localized_name": "floats.u_float4", "name": "floats.u_float4", "shape": 7, "type": "FLOAT", "link": null}, {"label": "u_int0", "localized_name": "ints.u_int0", "name": "ints.u_int0", "shape": 7, "type": "INT", "link": 1}, {"label": "u_int1", "localized_name": "ints.u_int1", "name": "ints.u_int1", "shape": 7, "type": "INT", "link": 2}, {"label": "u_int2", "localized_name": "ints.u_int2", "name": "ints.u_int2", "shape": 7, "type": "INT", "link": null}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [8]}, {"localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": null}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform int u_int0; // Mode: 0=Master, 1=Reds, 2=Yellows, 3=Greens, 4=Cyans, 5=Blues, 6=Magentas, 7=Colorize\nuniform int u_int1; // Color Space: 0=HSL, 1=HSB/HSV\nuniform float u_float0; // Hue (-180 to 180)\nuniform float u_float1; // Saturation (-100 to 100)\nuniform float u_float2; // Lightness/Brightness (-100 to 100)\nuniform float u_float3; // Overlap (0 to 100) - feathering between adjacent color ranges\n\nin vec2 v_texCoord;\nout vec4 fragColor;\n\n// Color range modes\nconst int MODE_MASTER = 0;\nconst int MODE_RED = 1;\nconst int MODE_YELLOW = 2;\nconst int MODE_GREEN = 3;\nconst int MODE_CYAN = 4;\nconst int MODE_BLUE = 5;\nconst int MODE_MAGENTA = 6;\nconst int MODE_COLORIZE = 7;\n\n// Color space modes\nconst int COLORSPACE_HSL = 0;\nconst int COLORSPACE_HSB = 1;\n\nconst float EPSILON = 0.0001;\n\n//=============================================================================\n// RGB <-> HSL Conversions\n//=============================================================================\n\nvec3 rgb2hsl(vec3 c) {\n float maxC = max(max(c.r, c.g), c.b);\n float minC = min(min(c.r, c.g), c.b);\n float delta = maxC - minC;\n\n float h = 0.0;\n float s = 0.0;\n float l = (maxC + minC) * 0.5;\n\n if (delta > EPSILON) {\n s = l < 0.5\n ? delta / (maxC + minC)\n : delta / (2.0 - maxC - minC);\n\n if (maxC == c.r) {\n h = (c.g - c.b) / delta + (c.g < c.b ? 6.0 : 0.0);\n } else if (maxC == c.g) {\n h = (c.b - c.r) / delta + 2.0;\n } else {\n h = (c.r - c.g) / delta + 4.0;\n }\n h /= 6.0;\n }\n\n return vec3(h, s, l);\n}\n\nfloat hue2rgb(float p, float q, float t) {\n t = fract(t);\n if (t < 1.0/6.0) return p + (q - p) * 6.0 * t;\n if (t < 0.5) return q;\n if (t < 2.0/3.0) return p + (q - p) * (2.0/3.0 - t) * 6.0;\n return p;\n}\n\nvec3 hsl2rgb(vec3 hsl) {\n if (hsl.y < EPSILON) return vec3(hsl.z);\n\n float q = hsl.z < 0.5\n ? hsl.z * (1.0 + hsl.y)\n : hsl.z + hsl.y - hsl.z * hsl.y;\n float p = 2.0 * hsl.z - q;\n\n return vec3(\n hue2rgb(p, q, hsl.x + 1.0/3.0),\n hue2rgb(p, q, hsl.x),\n hue2rgb(p, q, hsl.x - 1.0/3.0)\n );\n}\n\nvec3 rgb2hsb(vec3 c) {\n float maxC = max(max(c.r, c.g), c.b);\n float minC = min(min(c.r, c.g), c.b);\n float delta = maxC - minC;\n\n float h = 0.0;\n float s = (maxC > EPSILON) ? delta / maxC : 0.0;\n float b = maxC;\n\n if (delta > EPSILON) {\n if (maxC == c.r) {\n h = (c.g - c.b) / delta + (c.g < c.b ? 6.0 : 0.0);\n } else if (maxC == c.g) {\n h = (c.b - c.r) / delta + 2.0;\n } else {\n h = (c.r - c.g) / delta + 4.0;\n }\n h /= 6.0;\n }\n\n return vec3(h, s, b);\n}\n\nvec3 hsb2rgb(vec3 hsb) {\n vec3 rgb = clamp(abs(mod(hsb.x * 6.0 + vec3(0.0, 4.0, 2.0), 6.0) - 3.0) - 1.0, 0.0, 1.0);\n return hsb.z * mix(vec3(1.0), rgb, hsb.y);\n}\n\n//=============================================================================\n// Color Range Weight Calculation\n//=============================================================================\n\nfloat hueDistance(float a, float b) {\n float d = abs(a - b);\n return min(d, 1.0 - d);\n}\n\nfloat getHueWeight(float hue, float center, float overlap) {\n float baseWidth = 1.0 / 6.0;\n float feather = baseWidth * overlap;\n\n float d = hueDistance(hue, center);\n\n float inner = baseWidth * 0.5;\n float outer = inner + feather;\n\n return 1.0 - smoothstep(inner, outer, d);\n}\n\nfloat getModeWeight(float hue, int mode, float overlap) {\n if (mode == MODE_MASTER || mode == MODE_COLORIZE) return 1.0;\n\n if (mode == MODE_RED) {\n return max(\n getHueWeight(hue, 0.0, overlap),\n getHueWeight(hue, 1.0, overlap)\n );\n }\n\n float center = float(mode - 1) / 6.0;\n return getHueWeight(hue, center, overlap);\n}\n\n//=============================================================================\n// Adjustment Functions\n//=============================================================================\n\nfloat adjustLightness(float l, float amount) {\n return amount > 0.0\n ? l + (1.0 - l) * amount\n : l + l * amount;\n}\n\nfloat adjustBrightness(float b, float amount) {\n return clamp(b + amount, 0.0, 1.0);\n}\n\nfloat adjustSaturation(float s, float amount) {\n return amount > 0.0\n ? s + (1.0 - s) * amount\n : s + s * amount;\n}\n\nvec3 colorize(vec3 rgb, float hue, float sat, float light) {\n float lum = dot(rgb, vec3(0.299, 0.587, 0.114));\n float l = adjustLightness(lum, light);\n\n vec3 hsl = vec3(fract(hue), clamp(sat, 0.0, 1.0), clamp(l, 0.0, 1.0));\n return hsl2rgb(hsl);\n}\n\n//=============================================================================\n// Main\n//=============================================================================\n\nvoid main() {\n vec4 original = texture(u_image0, v_texCoord);\n\n float hueShift = u_float0 / 360.0; // -180..180 -> -0.5..0.5\n float satAmount = u_float1 / 100.0; // -100..100 -> -1..1\n float lightAmount= u_float2 / 100.0; // -100..100 -> -1..1\n float overlap = u_float3 / 100.0; // 0..100 -> 0..1\n\n vec3 result;\n\n if (u_int0 == MODE_COLORIZE) {\n result = colorize(original.rgb, hueShift, satAmount, lightAmount);\n fragColor = vec4(result, original.a);\n return;\n }\n\n vec3 hsx = (u_int1 == COLORSPACE_HSL)\n ? rgb2hsl(original.rgb)\n : rgb2hsb(original.rgb);\n\n float weight = getModeWeight(hsx.x, u_int0, overlap);\n\n if (u_int0 != MODE_MASTER && hsx.y < EPSILON) {\n weight = 0.0;\n }\n\n if (weight > EPSILON) {\n float h = fract(hsx.x + hueShift * weight);\n float s = clamp(adjustSaturation(hsx.y, satAmount * weight), 0.0, 1.0);\n float v = (u_int1 == COLORSPACE_HSL)\n ? clamp(adjustLightness(hsx.z, lightAmount * weight), 0.0, 1.0)\n : clamp(adjustBrightness(hsx.z, lightAmount * weight), 0.0, 1.0);\n\n vec3 adjusted = vec3(h, s, v);\n result = (u_int1 == COLORSPACE_HSL)\n ? hsl2rgb(adjusted)\n : hsb2rgb(adjusted);\n } else {\n result = original.rgb;\n }\n\n fragColor = vec4(result, original.a);\n}\n", "from_input"]}], "groups": [], "links": [{"id": 3, "origin_id": 4, "origin_slot": 0, "target_id": 1, "target_slot": 2, "type": "FLOAT"}, {"id": 4, "origin_id": 5, "origin_slot": 0, "target_id": 1, "target_slot": 3, "type": "FLOAT"}, {"id": 5, "origin_id": 6, "origin_slot": 0, "target_id": 1, "target_slot": 4, "type": "FLOAT"}, {"id": 6, "origin_id": 7, "origin_slot": 0, "target_id": 1, "target_slot": 5, "type": "FLOAT"}, {"id": 1, "origin_id": 2, "origin_slot": 1, "target_id": 1, "target_slot": 7, "type": "INT"}, {"id": 2, "origin_id": 3, "origin_slot": 1, "target_id": 1, "target_slot": 8, "type": "INT"}, {"id": 10, "origin_id": -10, "origin_slot": 0, "target_id": 1, "target_slot": 0, "type": "IMAGE"}, {"id": 8, "origin_id": 1, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Color adjust"}]}} diff --git a/blueprints/Image Blur.json b/blueprints/Image Blur.json new file mode 100644 index 000000000..4b9e74255 --- /dev/null +++ b/blueprints/Image Blur.json @@ -0,0 +1 @@ +{"revision": 0, "last_node_id": 8, "last_link_id": 0, "nodes": [{"id": 8, "type": "198632a3-ee76-4aab-9ce7-a69c624eaff9", "pos": [4470, -1840], "size": [210, 82], "flags": {}, "order": 3, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "blurred_image", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}], "properties": {"proxyWidgets": [["12", "choice"], ["10", "value"]]}, "widgets_values": [], "title": "Image Blur"}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "198632a3-ee76-4aab-9ce7-a69c624eaff9", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 12, "lastLinkId": 11, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Image Blur", "inputNode": {"id": -10, "bounding": [3540, -2445, 120, 60]}, "outputNode": {"id": -20, "bounding": [4620, -2445, 121.11666870117188, 60]}, "inputs": [{"id": "7ff2a402-6b11-45e8-a92a-7158d216520a", "name": "images.image0", "type": "IMAGE", "linkIds": [9], "localized_name": "images.image0", "label": "image", "pos": [3640, -2425]}], "outputs": [{"id": "80a8e19e-ffd9-44a5-90f2-710815a5b063", "name": "IMAGE0", "type": "IMAGE", "linkIds": [3], "localized_name": "IMAGE0", "label": "blurred_image", "pos": [4640, -2425]}], "widgets": [], "nodes": [{"id": 12, "type": "CustomCombo", "pos": [3720, -2620], "size": [270, 174], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "blur_type", "localized_name": "choice", "name": "choice", "type": "COMBO", "widget": {"name": "choice"}, "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": null}, {"localized_name": "INDEX", "name": "INDEX", "type": "INT", "links": [11]}], "properties": {"Node name for S&R": "CustomCombo"}, "widgets_values": ["Gaussian", 0, "Gaussian", "Box", "Radial", ""]}, {"id": 10, "type": "PrimitiveFloat", "pos": [4020, -2780], "size": [270, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "strength", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [10]}], "properties": {"Node name for S&R": "PrimitiveFloat", "max": 100, "min": 0}, "widgets_values": [20]}, {"id": 1, "type": "GLSLShader", "pos": [4020, -2670], "size": [430, 212], "flags": {}, "order": 2, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 9}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}, {"label": "u_float0", "localized_name": "floats.u_float0", "name": "floats.u_float0", "shape": 7, "type": "FLOAT", "link": 10}, {"label": "u_float1", "localized_name": "floats.u_float1", "name": "floats.u_float1", "shape": 7, "type": "FLOAT", "link": null}, {"label": "u_int0", "localized_name": "ints.u_int0", "name": "ints.u_int0", "shape": 7, "type": "INT", "link": 11}, {"label": "u_int1", "localized_name": "ints.u_int1", "name": "ints.u_int1", "shape": 7, "type": "INT", "link": null}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [3]}, {"localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": []}, {"localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": []}, {"localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": []}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\n#pragma passes 2\nprecision highp float;\n\n// Blur type constants\nconst int BLUR_GAUSSIAN = 0;\nconst int BLUR_BOX = 1;\nconst int BLUR_RADIAL = 2;\n\n// Radial blur config\nconst int RADIAL_SAMPLES = 12;\nconst float RADIAL_STRENGTH = 0.0003;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform int u_int0; // Blur type (BLUR_GAUSSIAN, BLUR_BOX, BLUR_RADIAL)\nuniform float u_float0; // Blur radius/amount\nuniform int u_pass; // Pass index (0 = horizontal, 1 = vertical)\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nfloat gaussian(float x, float sigma) {\n return exp(-(x * x) / (2.0 * sigma * sigma));\n}\n\nvoid main() {\n vec2 texelSize = 1.0 / u_resolution;\n float radius = max(u_float0, 0.0);\n\n // Radial (angular) blur - single pass, doesn't use separable\n if (u_int0 == BLUR_RADIAL) {\n // Only execute on first pass\n if (u_pass > 0) {\n fragColor0 = texture(u_image0, v_texCoord);\n return;\n }\n\n vec2 center = vec2(0.5);\n vec2 dir = v_texCoord - center;\n float dist = length(dir);\n\n if (dist < 1e-4) {\n fragColor0 = texture(u_image0, v_texCoord);\n return;\n }\n\n vec4 sum = vec4(0.0);\n float totalWeight = 0.0;\n float angleStep = radius * RADIAL_STRENGTH;\n\n dir /= dist;\n\n float cosStep = cos(angleStep);\n float sinStep = sin(angleStep);\n\n float negAngle = -float(RADIAL_SAMPLES) * angleStep;\n vec2 rotDir = vec2(\n dir.x * cos(negAngle) - dir.y * sin(negAngle),\n dir.x * sin(negAngle) + dir.y * cos(negAngle)\n );\n\n for (int i = -RADIAL_SAMPLES; i <= RADIAL_SAMPLES; i++) {\n vec2 uv = center + rotDir * dist;\n float w = 1.0 - abs(float(i)) / float(RADIAL_SAMPLES);\n sum += texture(u_image0, uv) * w;\n totalWeight += w;\n\n rotDir = vec2(\n rotDir.x * cosStep - rotDir.y * sinStep,\n rotDir.x * sinStep + rotDir.y * cosStep\n );\n }\n\n fragColor0 = sum / max(totalWeight, 0.001);\n return;\n }\n\n // Separable Gaussian / Box blur\n int samples = int(ceil(radius));\n\n if (samples == 0) {\n fragColor0 = texture(u_image0, v_texCoord);\n return;\n }\n\n // Direction: pass 0 = horizontal, pass 1 = vertical\n vec2 dir = (u_pass == 0) ? vec2(1.0, 0.0) : vec2(0.0, 1.0);\n\n vec4 color = vec4(0.0);\n float totalWeight = 0.0;\n float sigma = radius / 2.0;\n\n for (int i = -samples; i <= samples; i++) {\n vec2 offset = dir * float(i) * texelSize;\n vec4 sample_color = texture(u_image0, v_texCoord + offset);\n\n float weight;\n if (u_int0 == BLUR_GAUSSIAN) {\n weight = gaussian(float(i), sigma);\n } else {\n // BLUR_BOX\n weight = 1.0;\n }\n\n color += sample_color * weight;\n totalWeight += weight;\n }\n\n fragColor0 = color / totalWeight;\n}\n", "from_input"]}], "groups": [], "links": [{"id": 10, "origin_id": 10, "origin_slot": 0, "target_id": 1, "target_slot": 2, "type": "FLOAT"}, {"id": 11, "origin_id": 12, "origin_slot": 1, "target_id": 1, "target_slot": 4, "type": "INT"}, {"id": 9, "origin_id": -10, "origin_slot": 0, "target_id": 1, "target_slot": 0, "type": "IMAGE"}, {"id": 3, "origin_id": 1, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Blur"}]}} diff --git a/blueprints/Image Captioning (gemini).json b/blueprints/Image Captioning (gemini).json new file mode 100644 index 000000000..89ebac802 --- /dev/null +++ b/blueprints/Image Captioning (gemini).json @@ -0,0 +1 @@ +{"revision": 0, "last_node_id": 231, "last_link_id": 0, "nodes": [{"id": 231, "type": "e3e78497-720e-45a2-b4fb-c7bfdb80dd11", "pos": [23.13283014087665, 1034.468391137315], "size": [280, 260], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": null}, {"name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": null}, {"name": "model", "type": "COMBO", "widget": {"name": "model"}, "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": []}], "properties": {"proxyWidgets": [["-1", "prompt"], ["-1", "model"], ["1", "seed"]], "cnr_id": "comfy-core", "ver": "0.13.0"}, "widgets_values": ["Describe this image", "gemini-2.5-pro"], "title": "Image Captioning(Gemini)"}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "e3e78497-720e-45a2-b4fb-c7bfdb80dd11", "version": 1, "state": {"lastGroupId": 1, "lastNodeId": 16, "lastLinkId": 16, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Image Captioning(Gemini)", "inputNode": {"id": -10, "bounding": [-6870, 2530, 120, 100]}, "outputNode": {"id": -20, "bounding": [-6240, 2530, 120, 60]}, "inputs": [{"id": "97cb8fa5-0514-4e05-b206-46fa6d7b5589", "name": "images", "type": "IMAGE", "linkIds": [1], "localized_name": "images", "shape": 7, "pos": [-6770, 2550]}, {"id": "d8cbd7eb-636a-4d7b-8ff6-b22f1755e26c", "name": "prompt", "type": "STRING", "linkIds": [15], "pos": [-6770, 2570]}, {"id": "b034e26a-d114-4604-aec2-32783e86aa6b", "name": "model", "type": "COMBO", "linkIds": [16], "pos": [-6770, 2590]}], "outputs": [{"id": "e12c6e80-5210-4328-a581-bc8924c53070", "name": "STRING", "type": "STRING", "linkIds": [6], "localized_name": "STRING", "pos": [-6220, 2550]}], "widgets": [], "nodes": [{"id": 1, "type": "GeminiNode", "pos": [-6690, 2360], "size": [390, 430], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "shape": 7, "type": "IMAGE", "link": 1}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": null}, {"localized_name": "video", "name": "video", "shape": 7, "type": "VIDEO", "link": null}, {"localized_name": "files", "name": "files", "shape": 7, "type": "GEMINI_INPUT_FILES", "link": null}, {"localized_name": "prompt", "name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": 15}, {"localized_name": "model", "name": "model", "type": "COMBO", "widget": {"name": "model"}, "link": 16}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "system_prompt", "name": "system_prompt", "shape": 7, "type": "STRING", "widget": {"name": "system_prompt"}, "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": [6]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "GeminiNode"}, "widgets_values": ["Describe this image", "gemini-2.5-pro", 511865409297955, "randomize", "- Role: AI Image Analysis and Description Specialist\n- Background: The user requires a prompt that enables AI to analyze images and generate detailed descriptions which can be used as drawing prompts to create similar images. This is essential for tasks like content creation, design inspiration, and artistic exploration.\n- Profile: As an AI Image Analysis and Description Specialist, you possess extensive knowledge in computer vision, image processing, and natural language generation. You are adept at interpreting visual data and translating it into descriptive text that can guide the creation of new images.\n- Skills: Proficiency in image recognition, feature extraction, descriptive language generation, and understanding of artistic elements such as composition, color, and texture.\n- Goals: To analyze the provided image, generate a comprehensive and detailed description that captures the key visual elements, and ensure this description can effectively serve as a drawing prompt for creating similar images.\n- Constrains: The description must be clear, concise, and specific enough to guide the creation of a similar image. It should avoid ambiguity and focus on the most salient features of the image. The output should only contain the drawing prompt.\n- OutputFormat: A detailed text description of the image, highlighting key visual elements such as objects, colors, composition, and any unique features.\n- Workflow:\n 1. Analyze the image to identify key visual elements including objects, colors, and composition.\n 2. Generate a detailed description that captures the essence of the image, ensuring it is specific and actionable.\n 3. Refine the description to ensure clarity and conciseness, making it suitable for use as a drawing prompt."], "color": "#432", "bgcolor": "#653"}], "groups": [], "links": [{"id": 1, "origin_id": -10, "origin_slot": 0, "target_id": 1, "target_slot": 0, "type": "IMAGE"}, {"id": 6, "origin_id": 1, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "*"}, {"id": 15, "origin_id": -10, "origin_slot": 1, "target_id": 1, "target_slot": 4, "type": "STRING"}, {"id": 16, "origin_id": -10, "origin_slot": 2, "target_id": 1, "target_slot": 5, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Text generation/Image Captioning"}]}} diff --git a/blueprints/Image Channels.json b/blueprints/Image Channels.json new file mode 100644 index 000000000..cb3488883 --- /dev/null +++ b/blueprints/Image Channels.json @@ -0,0 +1 @@ +{"revision": 0, "last_node_id": 29, "last_link_id": 0, "nodes": [{"id": 29, "type": "4c9d6ea4-b912-40e5-8766-6793a9758c53", "pos": [1970, -230], "size": [180, 86], "flags": {}, "order": 5, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "R", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}, {"label": "G", "localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": []}, {"label": "B", "localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": []}, {"label": "A", "localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": []}], "title": "Image Channels", "properties": {"proxyWidgets": []}, "widgets_values": []}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "4c9d6ea4-b912-40e5-8766-6793a9758c53", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 28, "lastLinkId": 39, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Image Channels", "inputNode": {"id": -10, "bounding": [1820, -185, 120, 60]}, "outputNode": {"id": -20, "bounding": [2460, -215, 120, 120]}, "inputs": [{"id": "3522932b-2d86-4a1f-a02a-cb29f3a9d7fe", "name": "images.image0", "type": "IMAGE", "linkIds": [39], "localized_name": "images.image0", "label": "image", "pos": [1920, -165]}], "outputs": [{"id": "605cb9c3-b065-4d9b-81d2-3ec331889b2b", "name": "IMAGE0", "type": "IMAGE", "linkIds": [26], "localized_name": "IMAGE0", "label": "R", "pos": [2480, -195]}, {"id": "fb44a77e-0522-43e9-9527-82e7465b3596", "name": "IMAGE1", "type": "IMAGE", "linkIds": [27], "localized_name": "IMAGE1", "label": "G", "pos": [2480, -175]}, {"id": "81460ee6-0131-402a-874f-6bf3001fc4ff", "name": "IMAGE2", "type": "IMAGE", "linkIds": [28], "localized_name": "IMAGE2", "label": "B", "pos": [2480, -155]}, {"id": "ae690246-80d4-4951-b1d9-9306d8a77417", "name": "IMAGE3", "type": "IMAGE", "linkIds": [29], "localized_name": "IMAGE3", "label": "A", "pos": [2480, -135]}], "widgets": [], "nodes": [{"id": 23, "type": "GLSLShader", "pos": [2000, -330], "size": [400, 172], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 39}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}], "outputs": [{"label": "R", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [26]}, {"label": "G", "localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": [27]}, {"label": "B", "localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": [28]}, {"label": "A", "localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": [29]}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\nlayout(location = 1) out vec4 fragColor1;\nlayout(location = 2) out vec4 fragColor2;\nlayout(location = 3) out vec4 fragColor3;\n\nvoid main() {\n vec4 color = texture(u_image0, v_texCoord);\n // Output each channel as grayscale to separate render targets\n fragColor0 = vec4(vec3(color.r), 1.0); // Red channel\n fragColor1 = vec4(vec3(color.g), 1.0); // Green channel\n fragColor2 = vec4(vec3(color.b), 1.0); // Blue channel\n fragColor3 = vec4(vec3(color.a), 1.0); // Alpha channel\n}\n", "from_input"]}], "groups": [], "links": [{"id": 39, "origin_id": -10, "origin_slot": 0, "target_id": 23, "target_slot": 0, "type": "IMAGE"}, {"id": 26, "origin_id": 23, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 27, "origin_id": 23, "origin_slot": 1, "target_id": -20, "target_slot": 1, "type": "IMAGE"}, {"id": 28, "origin_id": 23, "origin_slot": 2, "target_id": -20, "target_slot": 2, "type": "IMAGE"}, {"id": 29, "origin_id": 23, "origin_slot": 3, "target_id": -20, "target_slot": 3, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Color adjust"}]}} diff --git a/blueprints/Image Edit (Flux.2 Klein 4B).json b/blueprints/Image Edit (Flux.2 Klein 4B).json new file mode 100644 index 000000000..c87c7e122 --- /dev/null +++ b/blueprints/Image Edit (Flux.2 Klein 4B).json @@ -0,0 +1 @@ +{"id": "6686cb78-8003-4289-b969-929755e9a84d", "revision": 0, "last_node_id": 81, "last_link_id": 179, "nodes": [{"id": 75, "type": "7b34ab90-36f9-45ba-a665-71d418f0df18", "pos": [311.66672468419983, 830], "size": [400, 470], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "prompt", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}, {"name": "image", "type": "IMAGE", "link": null}, {"name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": null}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": []}], "properties": {"proxyWidgets": [["-1", "text"], ["73", "noise_seed"], ["73", "control_after_generate"], ["-1", "unet_name"], ["-1", "clip_name"], ["-1", "vae_name"]], "cnr_id": "comfy-core", "ver": "0.8.2", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["", null, null, "flux-2-klein-base-4b-fp8.safetensors", "qwen_3_4b.safetensors", "flux2-vae.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "7b34ab90-36f9-45ba-a665-71d418f0df18", "version": 1, "state": {"lastGroupId": 4, "lastNodeId": 81, "lastLinkId": 179, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Image Edit (Flux.2 Klein 4B)", "inputNode": {"id": -10, "bounding": [-576.3333463986639, 559.0277780034634, 120, 140]}, "outputNode": {"id": -20, "bounding": [1373.6666536013363, 549.0277780034634, 120, 60]}, "inputs": [{"id": "7061147a-fb75-450d-8e97-c8be594a8e16", "name": "text", "type": "STRING", "linkIds": [162], "label": "prompt", "pos": [-476.33334639866393, 579.0277780034634]}, {"id": "68629112-b7b0-41ce-8912-23adad00d3db", "name": "image", "type": "IMAGE", "linkIds": [175], "pos": [-476.33334639866393, 599.0277780034634]}, {"id": "006f0b42-cb11-4484-8b7e-c34a9fb12824", "name": "unet_name", "type": "COMBO", "linkIds": [177], "pos": [-476.33334639866393, 619.0277780034634]}, {"id": "0083499c-8e83-4974-a587-ba6e89e36acc", "name": "clip_name", "type": "COMBO", "linkIds": [178], "pos": [-476.33334639866393, 639.0277780034634]}, {"id": "7c95e27c-7920-43d5-a0ac-c6570653f5da", "name": "vae_name", "type": "COMBO", "linkIds": [179], "pos": [-476.33334639866393, 659.0277780034634]}], "outputs": [{"id": "c5e7966d-07ed-4c9a-ad89-9d378a41ea7b", "name": "IMAGE", "type": "IMAGE", "linkIds": [153], "localized_name": "IMAGE", "pos": [1393.6666536013363, 569.0277780034634]}], "widgets": [], "nodes": [{"id": 61, "type": "KSamplerSelect", "pos": [560, 460], "size": [270, 58], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}], "outputs": [{"localized_name": "SAMPLER", "name": "SAMPLER", "type": "SAMPLER", "links": [144]}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "KSamplerSelect", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["euler"]}, {"id": 62, "type": "Flux2Scheduler", "pos": [560, 560], "size": [270, 106], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "width", "name": "width", "type": "INT", "widget": {"name": "width"}, "link": 171}, {"localized_name": "height", "name": "height", "type": "INT", "widget": {"name": "height"}, "link": 173}], "outputs": [{"localized_name": "SIGMAS", "name": "SIGMAS", "type": "SIGMAS", "links": [145]}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "Flux2Scheduler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [20, 1024, 1024]}, {"id": 63, "type": "CFGGuider", "pos": [560, 320], "size": [270, 98], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 139}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 167}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 168}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}], "outputs": [{"localized_name": "GUIDER", "name": "GUIDER", "type": "GUIDER", "links": [143]}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "CFGGuider", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [5]}, {"id": 65, "type": "VAEDecode", "pos": [1093.6666007601261, 154.02777277882814], "size": [220, 46], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 147}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 148}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [153]}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "VAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 70, "type": "UNETLoader", "pos": [-386.3333318901398, 203.8611174586574], "size": [364.42708333333337, 82], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 177}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [139]}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "UNETLoader", "models": [{"name": "flux-2-klein-base-4b-fp8.safetensors", "url": "https://huggingface.co/black-forest-labs/FLUX.2-klein-base-4b-fp8/resolve/main/flux-2-klein-base-4b-fp8.safetensors", "directory": "diffusion_models"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["flux-2-klein-base-4b-fp8.safetensors", "default"]}, {"id": 71, "type": "CLIPLoader", "pos": [-386.3333318901398, 353.8611341117752], "size": [364.42708333333337, 106], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "clip_name", "name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": 178}, {"localized_name": "type", "name": "type", "type": "COMBO", "widget": {"name": "type"}, "link": null}, {"localized_name": "device", "name": "device", "shape": 7, "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "links": [151, 152]}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "CLIPLoader", "models": [{"name": "qwen_3_4b.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/text_encoders/qwen_3_4b.safetensors", "directory": "text_encoders"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["qwen_3_4b.safetensors", "flux2", "default"]}, {"id": 74, "type": "CLIPTextEncode", "pos": [43.666666014853874, 204.02777159555063], "size": [430, 230], "flags": {}, "order": 11, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 151}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": 162}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "slot_index": 0, "links": [165]}], "title": "CLIP Text Encode (Positive Prompt)", "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [""], "color": "#232", "bgcolor": "#353"}, {"id": 67, "type": "CLIPTextEncode", "pos": [43.666666014853874, 534.0277718670993], "size": [430, 88], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 152}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "slot_index": 0, "links": [166]}], "title": "CLIP Text Encode (Negative Prompt)", "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [""], "color": "#322", "bgcolor": "#533"}, {"id": 72, "type": "VAELoader", "pos": [-386.3333318901398, 523.8611624133522], "size": [364.42708333333337, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 179}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "links": [148, 176]}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "VAELoader", "models": [{"name": "flux2-vae.safetensors", "url": "https://huggingface.co/Comfy-Org/flux2-dev/resolve/main/split_files/vae/flux2-vae.safetensors", "directory": "vae"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["flux2-vae.safetensors"]}, {"id": 66, "type": "EmptyFlux2LatentImage", "pos": [570, 740], "size": [270, 106], "flags": {}, "order": 10, "mode": 0, "inputs": [{"localized_name": "width", "name": "width", "type": "INT", "widget": {"name": "width"}, "link": 172}, {"localized_name": "height", "name": "height", "type": "INT", "widget": {"name": "height"}, "link": 174}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [146]}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "EmptyFlux2LatentImage", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1024, 1024, 1]}, {"id": 80, "type": "ImageScaleToTotalPixels", "pos": [-391.6666683297289, 715.194415255584], "size": [270, 106], "flags": {}, "order": 13, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 175}, {"localized_name": "upscale_method", "name": "upscale_method", "type": "COMBO", "widget": {"name": "upscale_method"}, "link": null}, {"localized_name": "megapixels", "name": "megapixels", "type": "FLOAT", "widget": {"name": "megapixels"}, "link": null}, {"localized_name": "resolution_steps", "name": "resolution_steps", "type": "INT", "widget": {"name": "resolution_steps"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [169, 170]}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "ImageScaleToTotalPixels", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["nearest-exact", 1, 1]}, {"id": 79, "type": "6007e698-2ebd-4917-84d8-299b35d7b7ab", "pos": [238.33332484215495, 835.1944447404384], "size": [240, 86], "flags": {}, "order": 12, "mode": 0, "inputs": [{"label": "positive", "name": "conditioning", "type": "CONDITIONING", "link": 165}, {"label": "negative", "name": "conditioning_1", "type": "CONDITIONING", "link": 166}, {"name": "pixels", "type": "IMAGE", "link": 169}, {"name": "vae", "type": "VAE", "link": 176}], "outputs": [{"label": "positive", "name": "CONDITIONING", "type": "CONDITIONING", "links": [167]}, {"label": "negative", "name": "CONDITIONING_1", "type": "CONDITIONING", "links": [168]}], "properties": {"proxyWidgets": [], "cnr_id": "comfy-core", "ver": "0.8.2", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 81, "type": "GetImageSize", "pos": [310, 720], "size": [187.5, 66], "flags": {}, "order": 14, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 170}], "outputs": [{"localized_name": "width", "name": "width", "type": "INT", "links": [171, 172]}, {"localized_name": "height", "name": "height", "type": "INT", "links": [173, 174]}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "links": null}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "GetImageSize", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 64, "type": "SamplerCustomAdvanced", "pos": [860, 220], "size": [212.3638671875, 106], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "noise", "name": "noise", "type": "NOISE", "link": 142}, {"localized_name": "guider", "name": "guider", "type": "GUIDER", "link": 143}, {"localized_name": "sampler", "name": "sampler", "type": "SAMPLER", "link": 144}, {"localized_name": "sigmas", "name": "sigmas", "type": "SIGMAS", "link": 145}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 146}], "outputs": [{"localized_name": "output", "name": "output", "type": "LATENT", "links": [147]}, {"localized_name": "denoised_output", "name": "denoised_output", "type": "LATENT", "links": []}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "SamplerCustomAdvanced", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 73, "type": "RandomNoise", "pos": [560, 200], "size": [270, 82], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "noise_seed", "name": "noise_seed", "type": "INT", "widget": {"name": "noise_seed"}, "link": null}], "outputs": [{"localized_name": "NOISE", "name": "NOISE", "type": "NOISE", "links": [142]}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "RandomNoise", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, "randomize"]}], "groups": [{"id": 1, "title": "Models", "bounding": [-390, 120, 380, 550], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 2, "title": "Prompt", "bounding": [30, 120, 470, 550], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 3, "title": "Sampler", "bounding": [540, 120, 532.3638671875, 550], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 139, "origin_id": 70, "origin_slot": 0, "target_id": 63, "target_slot": 0, "type": "MODEL"}, {"id": 142, "origin_id": 73, "origin_slot": 0, "target_id": 64, "target_slot": 0, "type": "NOISE"}, {"id": 143, "origin_id": 63, "origin_slot": 0, "target_id": 64, "target_slot": 1, "type": "GUIDER"}, {"id": 144, "origin_id": 61, "origin_slot": 0, "target_id": 64, "target_slot": 2, "type": "SAMPLER"}, {"id": 145, "origin_id": 62, "origin_slot": 0, "target_id": 64, "target_slot": 3, "type": "SIGMAS"}, {"id": 146, "origin_id": 66, "origin_slot": 0, "target_id": 64, "target_slot": 4, "type": "LATENT"}, {"id": 147, "origin_id": 64, "origin_slot": 0, "target_id": 65, "target_slot": 0, "type": "LATENT"}, {"id": 148, "origin_id": 72, "origin_slot": 0, "target_id": 65, "target_slot": 1, "type": "VAE"}, {"id": 152, "origin_id": 71, "origin_slot": 0, "target_id": 67, "target_slot": 0, "type": "CLIP"}, {"id": 151, "origin_id": 71, "origin_slot": 0, "target_id": 74, "target_slot": 0, "type": "CLIP"}, {"id": 153, "origin_id": 65, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 162, "origin_id": -10, "origin_slot": 0, "target_id": 74, "target_slot": 1, "type": "STRING"}, {"id": 165, "origin_id": 74, "origin_slot": 0, "target_id": 79, "target_slot": 0, "type": "CONDITIONING"}, {"id": 166, "origin_id": 67, "origin_slot": 0, "target_id": 79, "target_slot": 1, "type": "CONDITIONING"}, {"id": 167, "origin_id": 79, "origin_slot": 0, "target_id": 63, "target_slot": 1, "type": "CONDITIONING"}, {"id": 168, "origin_id": 79, "origin_slot": 1, "target_id": 63, "target_slot": 2, "type": "CONDITIONING"}, {"id": 169, "origin_id": 80, "origin_slot": 0, "target_id": 79, "target_slot": 2, "type": "IMAGE"}, {"id": 170, "origin_id": 80, "origin_slot": 0, "target_id": 81, "target_slot": 0, "type": "IMAGE"}, {"id": 171, "origin_id": 81, "origin_slot": 0, "target_id": 62, "target_slot": 1, "type": "INT"}, {"id": 172, "origin_id": 81, "origin_slot": 0, "target_id": 66, "target_slot": 0, "type": "INT"}, {"id": 173, "origin_id": 81, "origin_slot": 1, "target_id": 62, "target_slot": 2, "type": "INT"}, {"id": 174, "origin_id": 81, "origin_slot": 1, "target_id": 66, "target_slot": 1, "type": "INT"}, {"id": 175, "origin_id": -10, "origin_slot": 1, "target_id": 80, "target_slot": 0, "type": "IMAGE"}, {"id": 176, "origin_id": 72, "origin_slot": 0, "target_id": 79, "target_slot": 3, "type": "VAE"}, {"id": 177, "origin_id": -10, "origin_slot": 2, "target_id": 70, "target_slot": 0, "type": "COMBO"}, {"id": 178, "origin_id": -10, "origin_slot": 3, "target_id": 71, "target_slot": 0, "type": "COMBO"}, {"id": 179, "origin_id": -10, "origin_slot": 4, "target_id": 72, "target_slot": 0, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image generation and editing/Edit image"}, {"id": "6007e698-2ebd-4917-84d8-299b35d7b7ab", "version": 1, "state": {"lastGroupId": 4, "lastNodeId": 81, "lastLinkId": 179, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Reference Conditioning", "inputNode": {"id": -10, "bounding": [-270, 990, 120, 120]}, "outputNode": {"id": -20, "bounding": [580, 970, 120, 80]}, "inputs": [{"id": "5c9a0f5e-8cee-4947-90bc-330de782043a", "name": "conditioning", "type": "CONDITIONING", "linkIds": [165], "label": "positive", "pos": [-170, 1010]}, {"id": "61826d46-4c21-4ad6-801c-3e3fa94115e2", "name": "conditioning_1", "type": "CONDITIONING", "linkIds": [166], "label": "negative", "pos": [-170, 1030]}, {"id": "345bf085-5939-47ff-9767-8f8f239a719c", "name": "pixels", "type": "IMAGE", "linkIds": [167], "pos": [-170, 1050]}, {"id": "f4594e34-e2f5-4f1e-b1fa-a1dc2aeb0a90", "name": "vae", "type": "VAE", "linkIds": [168], "pos": [-170, 1070]}], "outputs": [{"id": "b3357c0e-6428-4055-9cd3-3595f0896fa8", "name": "CONDITIONING", "type": "CONDITIONING", "linkIds": [169], "label": "positive", "pos": [600, 990]}, {"id": "01519713-2ed1-4694-a387-79f44e088e89", "name": "CONDITIONING_1", "type": "CONDITIONING", "linkIds": [170], "label": "negative", "pos": [600, 1010]}], "widgets": [], "nodes": [{"id": 76, "type": "ReferenceLatent", "pos": [170, 1050], "size": [204.134765625, 46], "flags": {"collapsed": false}, "order": 0, "mode": 0, "inputs": [{"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "link": 166}, {"localized_name": "latent", "name": "latent", "shape": 7, "type": "LATENT", "link": 163}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [170]}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "ReferenceLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 78, "type": "VAEEncode", "pos": [-90, 1150], "size": [190, 46], "flags": {"collapsed": false}, "order": 2, "mode": 0, "inputs": [{"localized_name": "pixels", "name": "pixels", "type": "IMAGE", "link": 167}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 168}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [163, 164]}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "VAEEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 77, "type": "ReferenceLatent", "pos": [170, 940], "size": [210, 46], "flags": {"collapsed": false}, "order": 1, "mode": 0, "inputs": [{"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "link": 165}, {"localized_name": "latent", "name": "latent", "shape": 7, "type": "LATENT", "link": 164}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [169]}], "properties": {"cnr_id": "comfy-core", "ver": "0.8.2", "Node name for S&R": "ReferenceLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}], "groups": [], "links": [{"id": 163, "origin_id": 78, "origin_slot": 0, "target_id": 76, "target_slot": 1, "type": "LATENT"}, {"id": 164, "origin_id": 78, "origin_slot": 0, "target_id": 77, "target_slot": 1, "type": "LATENT"}, {"id": 165, "origin_id": -10, "origin_slot": 0, "target_id": 77, "target_slot": 0, "type": "CONDITIONING"}, {"id": 166, "origin_id": -10, "origin_slot": 1, "target_id": 76, "target_slot": 0, "type": "CONDITIONING"}, {"id": 167, "origin_id": -10, "origin_slot": 2, "target_id": 78, "target_slot": 0, "type": "IMAGE"}, {"id": 168, "origin_id": -10, "origin_slot": 3, "target_id": 78, "target_slot": 1, "type": "VAE"}, {"id": 169, "origin_id": 77, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "CONDITIONING"}, {"id": 170, "origin_id": 76, "origin_slot": 0, "target_id": -20, "target_slot": 1, "type": "CONDITIONING"}], "extra": {"workflowRendererVersion": "LG"}}]}, "config": {}, "extra": {"workflowRendererVersion": "LG", "ds": {"scale": 1.1478862047043865, "offset": [302.91933883258804, -648.9802050882657]}}, "version": 0.4} diff --git a/blueprints/Image Edit (Qwen 2511).json b/blueprints/Image Edit (Qwen 2511).json new file mode 100644 index 000000000..33e85333b --- /dev/null +++ b/blueprints/Image Edit (Qwen 2511).json @@ -0,0 +1 @@ +{"id": "d84b7d1a-a73f-4e31-bd16-983ac0cf5f1b", "revision": 0, "last_node_id": 17, "last_link_id": 32, "nodes": [{"id": 17, "type": "9fa6af8b-8c99-4446-8681-bccf8ba4ea54", "pos": [183.33334355513557, -120.00000702649223], "size": [383.0729166666667, 381.10677083333337], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "image 1", "name": "image1", "type": "IMAGE", "link": null}, {"label": "image 2 (optional)", "name": "image2", "type": "IMAGE", "link": null}, {"label": "image 3 (optional)", "name": "image3", "type": "IMAGE", "link": null}, {"name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": null}, {"name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": null}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}], "outputs": [{"name": "IMAGE", "type": "IMAGE", "links": null}], "properties": {"proxyWidgets": [["-1", "prompt"], ["15", "seed"], ["15", "control_after_generate"], ["-1", "unet_name"], ["-1", "clip_name"], ["-1", "vae_name"]], "cnr_id": "comfy-core", "ver": "0.11.0"}, "widgets_values": ["", null, null, "qwen_image_edit_2511_bf16.safetensors", "qwen_2.5_vl_7b_fp8_scaled.safetensors", "qwen_image_vae.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "9fa6af8b-8c99-4446-8681-bccf8ba4ea54", "version": 1, "state": {"lastGroupId": 2, "lastNodeId": 17, "lastLinkId": 32, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Image Edit (Qwen 2511)", "inputNode": {"id": -10, "bounding": [-412.6162343565087, 327.2321295314722, 142.59765625, 180]}, "outputNode": {"id": -20, "bounding": [1631.0466138212807, 305.6854343585077, 120, 60]}, "inputs": [{"id": "6e401a3f-21a6-4552-8ee4-179c313c1910", "name": "image1", "type": "IMAGE", "linkIds": [25], "label": "image 1", "pos": [-290.0185781065087, 347.2321295314722]}, {"id": "a0a6307b-62b8-481e-bb17-d332eceadbe4", "name": "image2", "type": "IMAGE", "linkIds": [21, 26], "label": "image 2 (optional)", "pos": [-290.0185781065087, 367.2321295314722]}, {"id": "232fe944-fc3f-43dd-bb34-112d0360cb5f", "name": "image3", "type": "IMAGE", "linkIds": [22, 27], "label": "image 3 (optional)", "pos": [-290.0185781065087, 387.2321295314722]}, {"id": "9b8ed2f4-5875-4f59-b4c1-5ab79a412f4e", "name": "prompt", "type": "STRING", "linkIds": [23], "pos": [-290.0185781065087, 407.2321295314722]}, {"id": "403a6bd0-f170-4cfb-b72e-cd7fa1dbcd06", "name": "unet_name", "type": "COMBO", "linkIds": [30], "pos": [-290.0185781065087, 427.2321295314722]}, {"id": "86a53531-2fab-47da-9525-858c80737044", "name": "clip_name", "type": "COMBO", "linkIds": [31], "pos": [-290.0185781065087, 447.2321295314722]}, {"id": "499f39e9-d698-41dc-b126-b7ea6024cf5d", "name": "vae_name", "type": "COMBO", "linkIds": [32], "pos": [-290.0185781065087, 467.2321295314722]}], "outputs": [{"id": "f2ccd1fa-428e-4127-89a6-760906013172", "name": "IMAGE", "type": "IMAGE", "linkIds": [24], "pos": [1651.0466138212807, 325.6854343585077]}], "widgets": [], "nodes": [{"id": 2, "type": "ModelSamplingAuraFlow", "pos": [791.0465113899395, -54.3145423152618], "size": [270, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 29}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [4]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "ModelSamplingAuraFlow", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [3.1]}, {"id": 3, "type": "VAELoader", "pos": [-174.9530552190643, 462.6706561999898], "size": [396.1328125, 58], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 32}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "slot_index": 0, "links": [6, 10, 12, 15]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "VAELoader", "models": [{"name": "qwen_image_vae.safetensors", "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/vae/qwen_image_vae.safetensors", "directory": "vae"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["qwen_image_vae.safetensors"]}, {"id": 4, "type": "UNETLoader", "pos": [-174.9530552190643, -23.329297689188216], "size": [396.1328125, 82], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 30}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [29]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "UNETLoader", "models": [{"name": "qwen_image_edit_2511_bf16.safetensors", "url": "https://huggingface.co/Comfy-Org/Qwen-Image-Edit_ComfyUI/resolve/main/split_files/diffusion_models/qwen_image_edit_2511_bf16.safetensors", "directory": "diffusion_models"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["qwen_image_edit_2511_bf16.safetensors", "default"]}, {"id": 5, "type": "FluxKontextMultiReferenceLatentMethod", "pos": [781.0466382725523, 315.68545764091465], "size": [309.66145833333337, 58], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "link": 2}, {"localized_name": "reference_latents_method", "name": "reference_latents_method", "type": "COMBO", "widget": {"name": "reference_latents_method"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [18]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "FluxKontextMultiReferenceLatentMethod", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["index_timestep_zero"], "color": "#222", "bgcolor": "#000"}, {"id": 6, "type": "FluxKontextMultiReferenceLatentMethod", "pos": [781.0466382725523, 185.68543791920104], "size": [309.66145833333337, 58], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "link": 3}, {"localized_name": "reference_latents_method", "name": "reference_latents_method", "type": "COMBO", "widget": {"name": "reference_latents_method"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [17]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "FluxKontextMultiReferenceLatentMethod", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["index_timestep_zero"], "color": "#222", "bgcolor": "#000"}, {"id": 7, "type": "CFGNorm", "pos": [791.0465113899395, 55.68545297239743], "size": [270, 58], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 4}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": null}], "outputs": [{"localized_name": "patched_model", "name": "patched_model", "type": "MODEL", "links": [16]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "CFGNorm", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1]}, {"id": 8, "type": "MarkdownNote", "pos": [1111.0466241355298, 555.6854726502876], "size": [270, 195.10416666666669], "flags": {}, "order": 0, "mode": 0, "inputs": [], "outputs": [], "title": "KSampler settings", "properties": {}, "widgets_values": ["You can test and find the best setting by yourself. The following table is for reference.\n| | Qwen | Comfy | lightning LoRA |\n|--------|---------|------------|---------------------------|\n| Steps | 40 | 20 | 4 |\n| CFG | 4.0 | 4.0 | 1.0 |\n\nBy default, we use 20 steps as we just don't want it to take too long. Try 40 if you want a better result, but it will take longer."], "color": "#222", "bgcolor": "#000"}, {"id": 9, "type": "TextEncodeQwenImageEditPlus", "pos": [301.0466082538065, 305.6854454238875], "size": [420, 170], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 5}, {"localized_name": "vae", "name": "vae", "shape": 7, "type": "VAE", "link": 6}, {"localized_name": "image1", "name": "image1", "shape": 7, "type": "IMAGE", "link": 28}, {"localized_name": "image2", "name": "image2", "shape": 7, "type": "IMAGE", "link": 21}, {"localized_name": "image3", "name": "image3", "shape": 7, "type": "IMAGE", "link": 22}, {"localized_name": "prompt", "name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [2]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "TextEncodeQwenImageEditPlus", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [""], "color": "#322", "bgcolor": "#533"}, {"id": 10, "type": "Note", "pos": [801.0465236069665, 435.6854651456011], "size": [280, 88], "flags": {}, "order": 1, "mode": 0, "inputs": [], "outputs": [], "properties": {}, "widgets_values": ["The \"Edit Model Reference Method\" nodes above are not needed if you use Comfy files, but may be needed if you use repackaged ones from other people."], "color": "#432", "bgcolor": "#653"}, {"id": 13, "type": "TextEncodeQwenImageEditPlus", "pos": [301.0466082538065, -14.314562996972978], "size": [426.6276041666667, 215.55989583333334], "flags": {}, "order": 11, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 11}, {"localized_name": "vae", "name": "vae", "shape": 7, "type": "VAE", "link": 12}, {"localized_name": "image1", "name": "image1", "shape": 7, "type": "IMAGE", "link": 13}, {"localized_name": "image2", "name": "image2", "shape": 7, "type": "IMAGE", "link": 26}, {"localized_name": "image3", "name": "image3", "shape": 7, "type": "IMAGE", "link": 27}, {"localized_name": "prompt", "name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": 23}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [3]}], "title": "TextEncodeQwenImageEditPlus (Positive)", "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "TextEncodeQwenImageEditPlus", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [""], "color": "#232", "bgcolor": "#353"}, {"id": 14, "type": "VAEEncode", "pos": [511.0465866120977, 645.6854435038923], "size": [187.5, 46], "flags": {}, "order": 12, "mode": 0, "inputs": [{"localized_name": "pixels", "name": "pixels", "type": "IMAGE", "link": 14}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 15}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [19]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "VAEEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 15, "type": "KSampler", "pos": [1101.0466119185025, -54.3145423152618], "size": [280, 510], "flags": {}, "order": 13, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 16}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 17}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 18}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 19}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [9]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "KSampler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, "randomize", 40, 4, "euler", "simple", 1]}, {"id": 12, "type": "VAEDecode", "pos": [1431.0464586818402, -44.31456487314459], "size": [187.5, 46], "flags": {"collapsed": false}, "order": 10, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 9}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 10}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [24]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "VAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 16, "type": "FluxKontextImageScale", "pos": [-170, 630], "size": [194.9458984375, 26], "flags": {}, "order": 14, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 25}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [7, 13, 14, 28]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "FluxKontextImageScale", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 1, "type": "CLIPLoader", "pos": [-170, 200], "size": [396.1328125, 106], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "clip_name", "name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": 31}, {"localized_name": "type", "name": "type", "type": "COMBO", "widget": {"name": "type"}, "link": null}, {"localized_name": "device", "name": "device", "shape": 7, "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "links": [5, 11]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "CLIPLoader", "models": [{"name": "qwen_2.5_vl_7b_fp8_scaled.safetensors", "url": "https://huggingface.co/Comfy-Org/HunyuanVideo_1.5_repackaged/resolve/main/split_files/text_encoders/qwen_2.5_vl_7b_fp8_scaled.safetensors", "directory": "text_encoders"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["qwen_2.5_vl_7b_fp8_scaled.safetensors", "qwen_image", "default"]}], "groups": [{"id": 1, "title": "Models", "bounding": [-180, -90, 416.1419982910156, 630.0299011230469], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 2, "title": "Prompt", "bounding": [250, -90, 510, 630], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 2, "origin_id": 9, "origin_slot": 0, "target_id": 5, "target_slot": 0, "type": "CONDITIONING"}, {"id": 3, "origin_id": 13, "origin_slot": 0, "target_id": 6, "target_slot": 0, "type": "CONDITIONING"}, {"id": 4, "origin_id": 2, "origin_slot": 0, "target_id": 7, "target_slot": 0, "type": "MODEL"}, {"id": 5, "origin_id": 1, "origin_slot": 0, "target_id": 9, "target_slot": 0, "type": "CLIP"}, {"id": 6, "origin_id": 3, "origin_slot": 0, "target_id": 9, "target_slot": 1, "type": "VAE"}, {"id": 9, "origin_id": 15, "origin_slot": 0, "target_id": 12, "target_slot": 0, "type": "LATENT"}, {"id": 10, "origin_id": 3, "origin_slot": 0, "target_id": 12, "target_slot": 1, "type": "VAE"}, {"id": 11, "origin_id": 1, "origin_slot": 0, "target_id": 13, "target_slot": 0, "type": "CLIP"}, {"id": 12, "origin_id": 3, "origin_slot": 0, "target_id": 13, "target_slot": 1, "type": "VAE"}, {"id": 13, "origin_id": 16, "origin_slot": 0, "target_id": 13, "target_slot": 2, "type": "IMAGE"}, {"id": 14, "origin_id": 16, "origin_slot": 0, "target_id": 14, "target_slot": 0, "type": "IMAGE"}, {"id": 15, "origin_id": 3, "origin_slot": 0, "target_id": 14, "target_slot": 1, "type": "VAE"}, {"id": 16, "origin_id": 7, "origin_slot": 0, "target_id": 15, "target_slot": 0, "type": "MODEL"}, {"id": 17, "origin_id": 6, "origin_slot": 0, "target_id": 15, "target_slot": 1, "type": "CONDITIONING"}, {"id": 18, "origin_id": 5, "origin_slot": 0, "target_id": 15, "target_slot": 2, "type": "CONDITIONING"}, {"id": 19, "origin_id": 14, "origin_slot": 0, "target_id": 15, "target_slot": 3, "type": "LATENT"}, {"id": 21, "origin_id": -10, "origin_slot": 1, "target_id": 9, "target_slot": 3, "type": "IMAGE"}, {"id": 22, "origin_id": -10, "origin_slot": 2, "target_id": 9, "target_slot": 4, "type": "IMAGE"}, {"id": 23, "origin_id": -10, "origin_slot": 3, "target_id": 13, "target_slot": 5, "type": "STRING"}, {"id": 24, "origin_id": 12, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 25, "origin_id": -10, "origin_slot": 0, "target_id": 16, "target_slot": 0, "type": "IMAGE"}, {"id": 26, "origin_id": -10, "origin_slot": 1, "target_id": 13, "target_slot": 3, "type": "IMAGE"}, {"id": 27, "origin_id": -10, "origin_slot": 2, "target_id": 13, "target_slot": 4, "type": "IMAGE"}, {"id": 28, "origin_id": 16, "origin_slot": 0, "target_id": 9, "target_slot": 2, "type": "IMAGE"}, {"id": 29, "origin_id": 4, "origin_slot": 0, "target_id": 2, "target_slot": 0, "type": "MODEL"}, {"id": 30, "origin_id": -10, "origin_slot": 4, "target_id": 4, "target_slot": 0, "type": "COMBO"}, {"id": 31, "origin_id": -10, "origin_slot": 5, "target_id": 1, "target_slot": 0, "type": "COMBO"}, {"id": 32, "origin_id": -10, "origin_slot": 6, "target_id": 3, "target_slot": 0, "type": "COMBO"}], "extra": {"frontendVersion": "1.37.11", "workflowRendererVersion": "LG", "VHS_latentpreview": false, "VHS_latentpreviewrate": 0, "VHS_MetadataImage": true, "VHS_KeepIntermediate": true}, "category": "Image generation and editing/Edit image"}]}, "config": {}, "extra": {"frontendVersion": "1.37.11", "workflowRendererVersion": "LG", "VHS_latentpreview": false, "VHS_latentpreviewrate": 0, "VHS_MetadataImage": true, "VHS_KeepIntermediate": true, "ds": {"scale": 0.8597138248970195, "offset": [716.4750075519744, 479.19752576099086]}}, "version": 0.4} diff --git a/blueprints/Image Inpainting (Qwen-image).json b/blueprints/Image Inpainting (Qwen-image).json new file mode 100644 index 000000000..5f8ef81f9 --- /dev/null +++ b/blueprints/Image Inpainting (Qwen-image).json @@ -0,0 +1 @@ +{"id": "84318cde-a839-41d4-8632-df6d7c50ffc5", "revision": 0, "last_node_id": 256, "last_link_id": 403, "nodes": [{"id": 256, "type": "c93d5779-7bfe-4511-98e2-6a665ed0dff2", "pos": [2271.698367680439, -460.52399024524993], "size": [420, 470], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": null}, {"localized_name": "mask", "name": "mask", "type": "MASK", "link": null}, {"name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}, {"name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": null}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}, {"name": "control_net_name", "type": "COMBO", "widget": {"name": "control_net_name"}, "link": null}], "outputs": [{"name": "IMAGE", "type": "IMAGE", "links": null}], "properties": {"proxyWidgets": [["-1", "text"], ["-1", "clip_name"], ["-1", "vae_name"], ["-1", "control_net_name"], ["3", "seed"], ["3", "control_after_generate"]], "cnr_id": "comfy-core", "ver": "0.13.0"}, "widgets_values": ["", "qwen_2.5_vl_7b_fp8_scaled.safetensors", "qwen_image_vae.safetensors", "Qwen-Image-InstantX-ControlNet-Inpainting.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "c93d5779-7bfe-4511-98e2-6a665ed0dff2", "version": 1, "state": {"lastGroupId": 14, "lastNodeId": 256, "lastLinkId": 403, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Image Inpainting (Qwen-image)", "inputNode": {"id": -10, "bounding": [-860, 530, 140.587890625, 160]}, "outputNode": {"id": -20, "bounding": [1290, 530, 120, 60]}, "inputs": [{"id": "61dc027a-a7fc-4c40-8aa4-fd4a6e36d00f", "name": "image", "type": "IMAGE", "linkIds": [399], "localized_name": "image", "pos": [-739.412109375, 550]}, {"id": "28f4cf42-1c6d-49b8-abce-53ef9c628907", "name": "mask", "type": "MASK", "linkIds": [205], "localized_name": "mask", "pos": [-739.412109375, 570]}, {"id": "f082f9ab-9a31-4d99-b4fd-4900453a30a8", "name": "text", "type": "STRING", "linkIds": [394], "pos": [-739.412109375, 590]}, {"id": "9e692477-812a-4054-b780-471228a9821c", "name": "clip_name", "type": "COMBO", "linkIds": [401], "pos": [-739.412109375, 610]}, {"id": "dfbf7eac-1f92-4636-9ead-6a1c2595c5e2", "name": "vae_name", "type": "COMBO", "linkIds": [402], "pos": [-739.412109375, 630]}, {"id": "cfaf4549-e61b-4a88-a514-24894142433a", "name": "control_net_name", "type": "COMBO", "linkIds": [403], "pos": [-739.412109375, 650]}], "outputs": [{"id": "45b4d67e-3d8f-4936-9599-607a23161a3c", "name": "IMAGE", "type": "IMAGE", "linkIds": [400], "pos": [1310, 550]}], "widgets": [], "nodes": [{"id": 38, "type": "CLIPLoader", "pos": [-90, 70], "size": [380, 106], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "clip_name", "name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": 401}, {"localized_name": "type", "name": "type", "type": "COMBO", "widget": {"name": "type"}, "link": null}, {"localized_name": "device", "name": "device", "shape": 7, "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "slot_index": 0, "links": [74, 75]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "CLIPLoader", "models": [{"name": "qwen_2.5_vl_7b_fp8_scaled.safetensors", "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/text_encoders/qwen_2.5_vl_7b_fp8_scaled.safetensors", "directory": "text_encoders"}]}, "widgets_values": ["qwen_2.5_vl_7b_fp8_scaled.safetensors", "qwen_image", "default"]}, {"id": 37, "type": "UNETLoader", "pos": [-90, -60], "size": [380, 82], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [145]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "UNETLoader", "models": [{"name": "qwen_image_fp8_e4m3fn.safetensors", "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/diffusion_models/qwen_image_fp8_e4m3fn.safetensors", "directory": "diffusion_models"}]}, "widgets_values": ["qwen_image_fp8_e4m3fn.safetensors", "default"]}, {"id": 7, "type": "CLIPTextEncode", "pos": [330, 320], "size": [460, 140], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 75}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "slot_index": 0, "links": [191]}], "title": "CLIP Text Encode (Negative Prompt)", "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "CLIPTextEncode"}, "widgets_values": [" "], "color": "#223", "bgcolor": "#335"}, {"id": 84, "type": "ControlNetLoader", "pos": [-90, 340], "size": [380, 58], "flags": {}, "order": 12, "mode": 0, "inputs": [{"localized_name": "control_net_name", "name": "control_net_name", "type": "COMBO", "widget": {"name": "control_net_name"}, "link": 403}], "outputs": [{"localized_name": "CONTROL_NET", "name": "CONTROL_NET", "type": "CONTROL_NET", "links": [192]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "ControlNetLoader", "models": [{"name": "Qwen-Image-InstantX-ControlNet-Inpainting.safetensors", "url": "https://huggingface.co/Comfy-Org/Qwen-Image-InstantX-ControlNets/resolve/main/split_files/controlnet/Qwen-Image-InstantX-ControlNet-Inpainting.safetensors", "directory": "controlnet"}]}, "widgets_values": ["Qwen-Image-InstantX-ControlNet-Inpainting.safetensors"]}, {"id": 39, "type": "VAELoader", "pos": [-90, 230], "size": [380, 58], "flags": {}, "order": 10, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 402}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "slot_index": 0, "links": [76, 144, 193]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "VAELoader", "models": [{"name": "qwen_image_vae.safetensors", "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/vae/qwen_image_vae.safetensors", "directory": "vae"}]}, "widgets_values": ["qwen_image_vae.safetensors"]}, {"id": 66, "type": "ModelSamplingAuraFlow", "pos": [860, -100], "size": [310, 58], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 149}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [156]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "ModelSamplingAuraFlow"}, "widgets_values": [3.1000000000000005]}, {"id": 108, "type": "ControlNetInpaintingAliMamaApply", "pos": [430, 560], "size": [317.0093688964844, 206], "flags": {}, "order": 13, "mode": 0, "inputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 190}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 191}, {"localized_name": "control_net", "name": "control_net", "type": "CONTROL_NET", "link": 192}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 193}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 397}, {"localized_name": "mask", "name": "mask", "type": "MASK", "link": 220}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": null}, {"localized_name": "start_percent", "name": "start_percent", "type": "FLOAT", "widget": {"name": "start_percent"}, "link": null}, {"localized_name": "end_percent", "name": "end_percent", "type": "FLOAT", "widget": {"name": "end_percent"}, "link": null}], "outputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "links": [188]}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "links": [189]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "ControlNetInpaintingAliMamaApply"}, "widgets_values": [1, 0, 1]}, {"id": 86, "type": "Note", "pos": [860, 500], "size": [307.4002380371094, 127.38092803955078], "flags": {}, "order": 1, "mode": 0, "inputs": [], "outputs": [], "properties": {}, "widgets_values": ["Set cfg to 1.0 for a speed boost at the cost of consistency. Samplers like res_multistep work pretty well at cfg 1.0\n\nThe official number of steps is 50 but I think that's too much. Even just 10 steps seems to work."], "color": "#432", "bgcolor": "#653"}, {"id": 76, "type": "VAEEncode", "pos": [430, 830], "size": [140, 46], "flags": {"collapsed": true}, "order": 11, "mode": 0, "inputs": [{"localized_name": "pixels", "name": "pixels", "type": "IMAGE", "link": 396}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 144}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [208]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "VAEEncode"}, "widgets_values": []}, {"id": 122, "type": "SetLatentNoiseMask", "pos": [430, 890], "size": [230, 50], "flags": {"collapsed": true}, "order": 15, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 208}, {"localized_name": "mask", "name": "mask", "type": "MASK", "link": 219}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [210]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "SetLatentNoiseMask"}, "widgets_values": []}, {"id": 223, "type": "MarkdownNote", "pos": [860, 670], "size": [300, 160], "flags": {}, "order": 2, "mode": 0, "inputs": [], "outputs": [], "title": "Note: KSampler settings", "properties": {}, "widgets_values": ["You can test and find the best setting by yourself. The following table is for reference.\n| Parameters | Qwen Team | Comfy Original | with 4steps LoRA |\n|--------|---------|------------|---------------------------|\n| Steps | 50 | 20 | 4 |\n| CFG | 4.0 | 2.5 | 1.0 |"], "color": "#432", "bgcolor": "#653"}, {"id": 80, "type": "LoraLoaderModelOnly", "pos": [350, -70], "size": [430, 82], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 145}, {"localized_name": "lora_name", "name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": null}, {"localized_name": "strength_model", "name": "strength_model", "type": "FLOAT", "widget": {"name": "strength_model"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [149]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "LoraLoaderModelOnly", "models": [{"name": "Qwen-Image-Lightning-4steps-V1.0.safetensors", "url": "https://huggingface.co/lightx2v/Qwen-Image-Lightning/resolve/main/Qwen-Image-Lightning-4steps-V1.0.safetensors", "directory": "loras"}]}, "widgets_values": ["Qwen-Image-Lightning-4steps-V1.0.safetensors", 1]}, {"id": 6, "type": "CLIPTextEncode", "pos": [330, 110], "size": [460, 164.31304931640625], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 74}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": 394}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "slot_index": 0, "links": [190]}], "title": "CLIP Text Encode (Positive Prompt)", "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "CLIPTextEncode"}, "widgets_values": [""], "color": "#232", "bgcolor": "#353"}, {"id": 121, "type": "56a1f603-fbd2-40ed-94ef-c9ecbd96aca8", "pos": [430, 950], "size": [330, 100], "flags": {}, "order": 14, "mode": 0, "inputs": [{"localized_name": "mask", "name": "mask", "type": "MASK", "link": 205}, {"name": "expand", "type": "INT", "widget": {"name": "expand"}, "link": null}, {"name": "blur_radius", "type": "INT", "widget": {"name": "blur_radius"}, "link": null}], "outputs": [{"localized_name": "MASK", "name": "MASK", "type": "MASK", "links": [215, 219, 220]}], "properties": {"proxyWidgets": [["-1", "expand"], ["-1", "blur_radius"]], "cnr_id": "comfy-core", "ver": "0.3.59"}, "widgets_values": [0, 1]}, {"id": 3, "type": "KSampler", "pos": [860, 20], "size": [310, 430], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 156}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 188}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 189}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 210}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [128]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "KSampler"}, "widgets_values": [0, "randomize", 4, 1, "euler", "simple", 1]}, {"id": 224, "type": "FluxKontextImageScale", "pos": [10, 1090], "size": [194.9458984375, 26], "flags": {}, "order": 17, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 399}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [396, 397]}], "properties": {"cnr_id": "comfy-core", "ver": "0.13.0", "Node name for S&R": "FluxKontextImageScale"}, "widgets_values": []}, {"id": 8, "type": "VAEDecode", "pos": [900, 880], "size": [250, 46], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 128}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 76}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [110, 400]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "VAEDecode"}, "widgets_values": []}, {"id": 124, "type": "MaskPreview", "pos": [440, 1100], "size": [320, 340], "flags": {}, "order": 16, "mode": 4, "inputs": [{"localized_name": "mask", "name": "mask", "type": "MASK", "link": 215}], "outputs": [], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "MaskPreview"}, "widgets_values": []}], "groups": [{"id": 1, "title": "Step 1 - Upload models", "bounding": [-100, -140, 400, 610], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 4, "title": "Step 3 - Prompt", "bounding": [320, 40, 490, 430], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 5, "title": "4 steps lightning LoRA", "bounding": [320, -140, 490, 160], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 14, "title": "Inpainting", "bounding": [-110, -180, 1340, 1650], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 75, "origin_id": 38, "origin_slot": 0, "target_id": 7, "target_slot": 0, "type": "CLIP"}, {"id": 149, "origin_id": 80, "origin_slot": 0, "target_id": 66, "target_slot": 0, "type": "MODEL"}, {"id": 190, "origin_id": 6, "origin_slot": 0, "target_id": 108, "target_slot": 0, "type": "CONDITIONING"}, {"id": 191, "origin_id": 7, "origin_slot": 0, "target_id": 108, "target_slot": 1, "type": "CONDITIONING"}, {"id": 192, "origin_id": 84, "origin_slot": 0, "target_id": 108, "target_slot": 2, "type": "CONTROL_NET"}, {"id": 193, "origin_id": 39, "origin_slot": 0, "target_id": 108, "target_slot": 3, "type": "VAE"}, {"id": 220, "origin_id": 121, "origin_slot": 0, "target_id": 108, "target_slot": 5, "type": "MASK"}, {"id": 144, "origin_id": 39, "origin_slot": 0, "target_id": 76, "target_slot": 1, "type": "VAE"}, {"id": 208, "origin_id": 76, "origin_slot": 0, "target_id": 122, "target_slot": 0, "type": "LATENT"}, {"id": 219, "origin_id": 121, "origin_slot": 0, "target_id": 122, "target_slot": 1, "type": "MASK"}, {"id": 215, "origin_id": 121, "origin_slot": 0, "target_id": 124, "target_slot": 0, "type": "MASK"}, {"id": 128, "origin_id": 3, "origin_slot": 0, "target_id": 8, "target_slot": 0, "type": "LATENT"}, {"id": 76, "origin_id": 39, "origin_slot": 0, "target_id": 8, "target_slot": 1, "type": "VAE"}, {"id": 74, "origin_id": 38, "origin_slot": 0, "target_id": 6, "target_slot": 0, "type": "CLIP"}, {"id": 145, "origin_id": 37, "origin_slot": 0, "target_id": 80, "target_slot": 0, "type": "MODEL"}, {"id": 156, "origin_id": 66, "origin_slot": 0, "target_id": 3, "target_slot": 0, "type": "MODEL"}, {"id": 188, "origin_id": 108, "origin_slot": 0, "target_id": 3, "target_slot": 1, "type": "CONDITIONING"}, {"id": 189, "origin_id": 108, "origin_slot": 1, "target_id": 3, "target_slot": 2, "type": "CONDITIONING"}, {"id": 210, "origin_id": 122, "origin_slot": 0, "target_id": 3, "target_slot": 3, "type": "LATENT"}, {"id": 205, "origin_id": -10, "origin_slot": 1, "target_id": 121, "target_slot": 0, "type": "MASK"}, {"id": 110, "origin_id": 8, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 394, "origin_id": -10, "origin_slot": 2, "target_id": 6, "target_slot": 1, "type": "STRING"}, {"id": 396, "origin_id": 224, "origin_slot": 0, "target_id": 76, "target_slot": 0, "type": "IMAGE"}, {"id": 397, "origin_id": 224, "origin_slot": 0, "target_id": 108, "target_slot": 4, "type": "IMAGE"}, {"id": 399, "origin_id": -10, "origin_slot": 0, "target_id": 224, "target_slot": 0, "type": "IMAGE"}, {"id": 400, "origin_id": 8, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 401, "origin_id": -10, "origin_slot": 3, "target_id": 38, "target_slot": 0, "type": "COMBO"}, {"id": 402, "origin_id": -10, "origin_slot": 4, "target_id": 39, "target_slot": 0, "type": "COMBO"}, {"id": 403, "origin_id": -10, "origin_slot": 5, "target_id": 84, "target_slot": 0, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image generation and editing/Inpaint image"}, {"id": "56a1f603-fbd2-40ed-94ef-c9ecbd96aca8", "version": 1, "state": {"lastGroupId": 14, "lastNodeId": 256, "lastLinkId": 403, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Grow and Blur Mask", "inputNode": {"id": -10, "bounding": [290, 3536, 120, 100]}, "outputNode": {"id": -20, "bounding": [1130, 3536, 120, 60]}, "inputs": [{"id": "3ac60d5e-8f9d-4663-9b24-b3a15a3e9e20", "name": "mask", "type": "MASK", "linkIds": [279], "localized_name": "mask", "pos": [390, 3556]}, {"id": "d1ab0cf5-7062-41ac-9f4b-8c660fc4a714", "name": "expand", "type": "INT", "linkIds": [379], "pos": [390, 3576]}, {"id": "1a787af5-da9f-44c5-9f5a-3f71609ca0ef", "name": "blur_radius", "type": "INT", "linkIds": [380], "pos": [390, 3596]}], "outputs": [{"id": "1f97f683-13d3-4871-876d-678fca850d89", "name": "MASK", "type": "MASK", "linkIds": [378], "localized_name": "MASK", "pos": [1150, 3556]}], "widgets": [], "nodes": [{"id": 253, "type": "ImageToMask", "pos": [800, 3630], "size": [270, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 377}, {"localized_name": "channel", "name": "channel", "type": "COMBO", "widget": {"name": "channel"}, "link": null}], "outputs": [{"localized_name": "MASK", "name": "MASK", "type": "MASK", "links": [378]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "ImageToMask"}, "widgets_values": ["red"]}, {"id": 251, "type": "MaskToImage", "pos": [780, 3470], "size": [260, 70], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "mask", "name": "mask", "type": "MASK", "link": 372}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [373]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "MaskToImage"}, "widgets_values": []}, {"id": 199, "type": "GrowMask", "pos": [470, 3460], "size": [270, 82], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "mask", "name": "mask", "type": "MASK", "link": 279}, {"localized_name": "expand", "name": "expand", "type": "INT", "widget": {"name": "expand"}, "link": 379}, {"localized_name": "tapered_corners", "name": "tapered_corners", "type": "BOOLEAN", "widget": {"name": "tapered_corners"}, "link": null}], "outputs": [{"localized_name": "MASK", "name": "MASK", "type": "MASK", "links": [372]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "GrowMask"}, "widgets_values": [0, true]}, {"id": 252, "type": "ImageBlur", "pos": [480, 3620], "size": [270, 82], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 373}, {"localized_name": "blur_radius", "name": "blur_radius", "type": "INT", "widget": {"name": "blur_radius"}, "link": 380}, {"localized_name": "sigma", "name": "sigma", "type": "FLOAT", "widget": {"name": "sigma"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [377]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "ImageBlur"}, "widgets_values": [1, 1]}], "groups": [], "links": [{"id": 373, "origin_id": 251, "origin_slot": 0, "target_id": 252, "target_slot": 0, "type": "IMAGE"}, {"id": 377, "origin_id": 252, "origin_slot": 0, "target_id": 253, "target_slot": 0, "type": "IMAGE"}, {"id": 372, "origin_id": 199, "origin_slot": 0, "target_id": 251, "target_slot": 0, "type": "MASK"}, {"id": 279, "origin_id": -10, "origin_slot": 0, "target_id": 199, "target_slot": 0, "type": "MASK"}, {"id": 378, "origin_id": 253, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "MASK"}, {"id": 379, "origin_id": -10, "origin_slot": 1, "target_id": 199, "target_slot": 1, "type": "INT"}, {"id": 380, "origin_id": -10, "origin_slot": 2, "target_id": 252, "target_slot": 1, "type": "INT"}], "extra": {"workflowRendererVersion": "LG"}}]}, "config": {}, "extra": {"ds": {"scale": 1.088930769230769, "offset": [-1576.5829757292656, 657.608356702113]}, "workflowRendererVersion": "LG"}, "version": 0.4} diff --git a/blueprints/Image Levels.json b/blueprints/Image Levels.json new file mode 100644 index 000000000..f028662bd --- /dev/null +++ b/blueprints/Image Levels.json @@ -0,0 +1 @@ +{"revision": 0, "last_node_id": 139, "last_link_id": 0, "nodes": [{"id": 139, "type": "75bf8a72-aad8-4f3e-83ee-380e70248240", "pos": [620, 900], "size": [240, 178], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "IMAGE", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}], "properties": {"proxyWidgets": [["5", "choice"], ["3", "value"], ["6", "value"], ["7", "value"], ["8", "value"], ["9", "value"]]}, "widgets_values": [], "title": "Image Levels"}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "75bf8a72-aad8-4f3e-83ee-380e70248240", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 144, "lastLinkId": 118, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Image Levels", "inputNode": {"id": -10, "bounding": [3840, -3430, 120, 60]}, "outputNode": {"id": -20, "bounding": [4950, -3430, 120, 60]}, "inputs": [{"id": "b53e5012-fa47-400f-a324-28c74854ccae", "name": "images.image0", "type": "IMAGE", "linkIds": [1], "localized_name": "images.image0", "label": "image", "pos": [3940, -3410]}], "outputs": [{"id": "de7f2ffa-155f-41fd-b054-aa4d91ef49ca", "name": "IMAGE0", "type": "IMAGE", "linkIds": [8], "localized_name": "IMAGE0", "label": "IMAGE", "pos": [4970, -3410]}], "widgets": [], "nodes": [{"id": 5, "type": "CustomCombo", "pos": [4020, -3350], "size": [270, 198], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "channel", "localized_name": "choice", "name": "choice", "type": "COMBO", "widget": {"name": "choice"}, "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": null}, {"localized_name": "INDEX", "name": "INDEX", "type": "INT", "links": [3]}], "title": "Channel", "properties": {"Node name for S&R": "CustomCombo"}, "widgets_values": ["RGB", 0, "RGB", "R", "G", "B", ""]}, {"id": 8, "type": "PrimitiveFloat", "pos": [4020, -3550], "size": [270, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "output_black", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [6]}], "title": "Output Black", "properties": {"Node name for S&R": "PrimitiveFloat", "max": 255, "min": 0, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [0, 0, 0]}, {"offset": 1, "color": [255, 255, 255]}]}, "widgets_values": [0]}, {"id": 3, "type": "PrimitiveFloat", "pos": [4020, -3850], "size": [270, 58], "flags": {}, "order": 2, "mode": 0, "inputs": [{"label": "input_black", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [2]}], "title": "Input Black", "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 255, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [0, 0, 0]}, {"offset": 1, "color": [255, 255, 255]}]}, "widgets_values": [0]}, {"id": 6, "type": "PrimitiveFloat", "pos": [4020, -3750], "size": [270, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"label": "input_white", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [4]}], "title": "Input White", "properties": {"Node name for S&R": "PrimitiveFloat", "max": 255, "min": 0, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [0, 0, 0]}, {"offset": 1, "color": [255, 255, 255]}]}, "widgets_values": [255]}, {"id": 7, "type": "PrimitiveFloat", "pos": [4020, -3650], "size": [270, 58], "flags": {}, "order": 4, "mode": 0, "inputs": [{"label": "gamma", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [5]}], "title": "Gamma", "properties": {"Node name for S&R": "PrimitiveFloat", "max": 10, "min": 0, "step": 0.01, "precision": 2, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [0, 0, 0]}, {"offset": 0.5, "color": [128, 128, 128]}, {"offset": 1, "color": [255, 255, 255]}]}, "widgets_values": [1]}, {"id": 9, "type": "PrimitiveFloat", "pos": [4020, -3450], "size": [270, 58], "flags": {}, "order": 5, "mode": 0, "inputs": [{"label": "output_white", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [7]}], "title": "Output White", "properties": {"Node name for S&R": "PrimitiveFloat", "max": 255, "min": 0, "step": 1, "display": "gradientslider", "gradient_stops": [{"offset": 0, "color": [0, 0, 0]}, {"offset": 1, "color": [255, 255, 255]}]}, "widgets_values": [255]}, {"id": 1, "type": "GLSLShader", "pos": [4310, -3850], "size": [580, 272], "flags": {}, "order": 6, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 1}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}, {"label": "u_float0", "localized_name": "floats.u_float0", "name": "floats.u_float0", "shape": 7, "type": "FLOAT", "link": 2}, {"label": "u_float1", "localized_name": "floats.u_float1", "name": "floats.u_float1", "shape": 7, "type": "FLOAT", "link": 4}, {"label": "u_float2", "localized_name": "floats.u_float2", "name": "floats.u_float2", "shape": 7, "type": "FLOAT", "link": 5}, {"label": "u_float3", "localized_name": "floats.u_float3", "name": "floats.u_float3", "shape": 7, "type": "FLOAT", "link": 6}, {"label": "u_float4", "localized_name": "floats.u_float4", "name": "floats.u_float4", "shape": 7, "type": "FLOAT", "link": 7}, {"label": "u_int0", "localized_name": "ints.u_int0", "name": "ints.u_int0", "shape": 7, "type": "INT", "link": 3}, {"label": "u_int1", "localized_name": "ints.u_int1", "name": "ints.u_int1", "shape": 7, "type": "INT", "link": null}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [8]}, {"localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": null}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\n// Levels Adjustment\n// u_int0: channel (0=RGB, 1=R, 2=G, 3=B) default: 0\n// u_float0: input black (0-255) default: 0\n// u_float1: input white (0-255) default: 255\n// u_float2: gamma (0.01-9.99) default: 1.0\n// u_float3: output black (0-255) default: 0\n// u_float4: output white (0-255) default: 255\n\nuniform sampler2D u_image0;\nuniform int u_int0;\nuniform float u_float0;\nuniform float u_float1;\nuniform float u_float2;\nuniform float u_float3;\nuniform float u_float4;\n\nin vec2 v_texCoord;\nout vec4 fragColor;\n\nvec3 applyLevels(vec3 color, float inBlack, float inWhite, float gamma, float outBlack, float outWhite) {\n float inRange = max(inWhite - inBlack, 0.0001);\n vec3 result = clamp((color - inBlack) / inRange, 0.0, 1.0);\n result = pow(result, vec3(1.0 / gamma));\n result = mix(vec3(outBlack), vec3(outWhite), result);\n return result;\n}\n\nfloat applySingleChannel(float value, float inBlack, float inWhite, float gamma, float outBlack, float outWhite) {\n float inRange = max(inWhite - inBlack, 0.0001);\n float result = clamp((value - inBlack) / inRange, 0.0, 1.0);\n result = pow(result, 1.0 / gamma);\n result = mix(outBlack, outWhite, result);\n return result;\n}\n\nvoid main() {\n vec4 texColor = texture(u_image0, v_texCoord);\n vec3 color = texColor.rgb;\n \n float inBlack = u_float0 / 255.0;\n float inWhite = u_float1 / 255.0;\n float gamma = u_float2;\n float outBlack = u_float3 / 255.0;\n float outWhite = u_float4 / 255.0;\n \n vec3 result;\n \n if (u_int0 == 0) {\n result = applyLevels(color, inBlack, inWhite, gamma, outBlack, outWhite);\n }\n else if (u_int0 == 1) {\n result = color;\n result.r = applySingleChannel(color.r, inBlack, inWhite, gamma, outBlack, outWhite);\n }\n else if (u_int0 == 2) {\n result = color;\n result.g = applySingleChannel(color.g, inBlack, inWhite, gamma, outBlack, outWhite);\n }\n else if (u_int0 == 3) {\n result = color;\n result.b = applySingleChannel(color.b, inBlack, inWhite, gamma, outBlack, outWhite);\n }\n else {\n result = color;\n }\n \n fragColor = vec4(result, texColor.a);\n}", "from_input"]}], "groups": [], "links": [{"id": 2, "origin_id": 3, "origin_slot": 0, "target_id": 1, "target_slot": 2, "type": "FLOAT"}, {"id": 4, "origin_id": 6, "origin_slot": 0, "target_id": 1, "target_slot": 3, "type": "FLOAT"}, {"id": 5, "origin_id": 7, "origin_slot": 0, "target_id": 1, "target_slot": 4, "type": "FLOAT"}, {"id": 6, "origin_id": 8, "origin_slot": 0, "target_id": 1, "target_slot": 5, "type": "FLOAT"}, {"id": 7, "origin_id": 9, "origin_slot": 0, "target_id": 1, "target_slot": 6, "type": "FLOAT"}, {"id": 3, "origin_id": 5, "origin_slot": 1, "target_id": 1, "target_slot": 7, "type": "INT"}, {"id": 1, "origin_id": -10, "origin_slot": 0, "target_id": 1, "target_slot": 0, "type": "IMAGE"}, {"id": 8, "origin_id": 1, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Color adjust"}]}, "extra": {}} diff --git a/blueprints/Image Outpainting (Qwen-Image).json b/blueprints/Image Outpainting (Qwen-Image).json new file mode 100644 index 000000000..f36e0bd77 --- /dev/null +++ b/blueprints/Image Outpainting (Qwen-Image).json @@ -0,0 +1 @@ +{"id": "8f79c27f-bec4-412e-9b82-7c5b3b778ecf", "revision": 0, "last_node_id": 255, "last_link_id": 401, "nodes": [{"id": 224, "type": "fbf07656-8ff8-4299-a3fc-7378e0f4a004", "pos": [3200, 740], "size": [400, 460], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": null}, {"name": "left", "type": "INT", "widget": {"name": "left"}, "link": null}, {"name": "top", "type": "INT", "widget": {"name": "top"}, "link": null}, {"name": "right", "type": "INT", "widget": {"name": "right"}, "link": null}, {"name": "bottom", "type": "INT", "widget": {"name": "bottom"}, "link": null}, {"name": "feathering", "type": "INT", "widget": {"name": "feathering"}, "link": null}, {"name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": null}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}, {"name": "control_net_name", "type": "COMBO", "widget": {"name": "control_net_name"}, "link": null}, {"name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": []}], "properties": {"proxyWidgets": [["182", "text"], ["-1", "left"], ["-1", "top"], ["-1", "right"], ["-1", "bottom"], ["-1", "feathering"], ["190", "seed"], ["190", "control_after_generate"], ["-1", "unet_name"], ["-1", "clip_name"], ["-1", "vae_name"], ["-1", "control_net_name"], ["-1", "lora_name"]], "cnr_id": "comfy-core", "ver": "0.13.0"}, "widgets_values": [null, 0, 0, 0, 0, 0, null, null, "qwen_image_fp8_e4m3fn.safetensors", "qwen_2.5_vl_7b_fp8_scaled.safetensors", "qwen_image_vae.safetensors", "Qwen-Image-InstantX-ControlNet-Inpainting.safetensors", "Qwen-Image-Lightning-4steps-V1.0.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "fbf07656-8ff8-4299-a3fc-7378e0f4a004", "version": 1, "state": {"lastGroupId": 14, "lastNodeId": 255, "lastLinkId": 401, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Image Outpainting (Qwen-Image)", "inputNode": {"id": -10, "bounding": [1940, 610, 140.587890625, 260]}, "outputNode": {"id": -20, "bounding": [4240, 765, 120, 60]}, "inputs": [{"id": "466b9998-797f-4c6f-92e9-39120712c1a9", "name": "image", "type": "IMAGE", "linkIds": [351], "localized_name": "image", "pos": [2060.587890625, 630]}, {"id": "c5befee8-d6c4-493e-8ae1-e09d46268d10", "name": "left", "type": "INT", "linkIds": [392], "pos": [2060.587890625, 650]}, {"id": "c0b028a1-fcc0-4a54-9bdf-fa9e76992c40", "name": "top", "type": "INT", "linkIds": [393], "pos": [2060.587890625, 670]}, {"id": "22e43278-694c-410f-9043-f88b8dfdca28", "name": "right", "type": "INT", "linkIds": [394], "pos": [2060.587890625, 690]}, {"id": "f19fec20-a43d-4562-a0f8-bd6955091c1b", "name": "bottom", "type": "INT", "linkIds": [395], "pos": [2060.587890625, 710]}, {"id": "ba832b36-2199-4e1e-a28d-5f2e8acc99a3", "name": "feathering", "type": "INT", "linkIds": [396], "pos": [2060.587890625, 730]}, {"id": "437d6324-2d3c-4c50-ac21-1ea9aab57f4e", "name": "unet_name", "type": "COMBO", "linkIds": [397], "pos": [2060.587890625, 750]}, {"id": "4d58dde7-4402-45d5-ade9-9c41e99e0757", "name": "clip_name", "type": "COMBO", "linkIds": [398], "pos": [2060.587890625, 770]}, {"id": "a7558cc4-d4c4-4b4a-b2a3-0d7229a8ff65", "name": "vae_name", "type": "COMBO", "linkIds": [399], "pos": [2060.587890625, 790]}, {"id": "7d8ffb86-2ff3-49fc-8e96-94d3e530f154", "name": "control_net_name", "type": "COMBO", "linkIds": [400], "pos": [2060.587890625, 810]}, {"id": "a81e0fa5-5984-47ae-bb4f-108a2b92d373", "name": "lora_name", "type": "COMBO", "linkIds": [401], "pos": [2060.587890625, 830]}], "outputs": [{"id": "506ced76-78be-4eb2-ae70-eaa708a4cb98", "name": "IMAGE", "type": "IMAGE", "linkIds": [314], "localized_name": "IMAGE", "pos": [4260, 785]}], "widgets": [], "nodes": [{"id": 174, "type": "CLIPLoader", "pos": [2430, 60], "size": [380, 106], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "clip_name", "name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": 398}, {"localized_name": "type", "name": "type", "type": "COMBO", "widget": {"name": "type"}, "link": null}, {"localized_name": "device", "name": "device", "shape": 7, "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "slot_index": 0, "links": [296, 305]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "CLIPLoader", "models": [{"name": "qwen_2.5_vl_7b_fp8_scaled.safetensors", "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/text_encoders/qwen_2.5_vl_7b_fp8_scaled.safetensors", "directory": "text_encoders"}]}, "widgets_values": ["qwen_2.5_vl_7b_fp8_scaled.safetensors", "qwen_image", "default"]}, {"id": 175, "type": "UNETLoader", "pos": [2430, -70], "size": [380, 82], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 397}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [306]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "UNETLoader", "models": [{"name": "qwen_image_fp8_e4m3fn.safetensors", "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/diffusion_models/qwen_image_fp8_e4m3fn.safetensors", "directory": "diffusion_models"}]}, "widgets_values": ["qwen_image_fp8_e4m3fn.safetensors", "default"]}, {"id": 177, "type": "ControlNetLoader", "pos": [2430, 330], "size": [380, 58], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "control_net_name", "name": "control_net_name", "type": "COMBO", "widget": {"name": "control_net_name"}, "link": 400}], "outputs": [{"localized_name": "CONTROL_NET", "name": "CONTROL_NET", "type": "CONTROL_NET", "links": [301]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "ControlNetLoader", "models": [{"name": "Qwen-Image-InstantX-ControlNet-Inpainting.safetensors", "url": "https://huggingface.co/Comfy-Org/Qwen-Image-InstantX-ControlNets/resolve/main/split_files/controlnet/Qwen-Image-InstantX-ControlNet-Inpainting.safetensors", "directory": "controlnet"}]}, "widgets_values": ["Qwen-Image-InstantX-ControlNet-Inpainting.safetensors"]}, {"id": 180, "type": "ModelSamplingAuraFlow", "pos": [3400, -110], "size": [310, 58], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 298}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [308]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "ModelSamplingAuraFlow"}, "widgets_values": [3.1000000000000005]}, {"id": 185, "type": "LoraLoaderModelOnly", "pos": [2870, -80], "size": [430, 82], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 306}, {"localized_name": "lora_name", "name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": 401}, {"localized_name": "strength_model", "name": "strength_model", "type": "FLOAT", "widget": {"name": "strength_model"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [298]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "LoraLoaderModelOnly", "models": [{"name": "Qwen-Image-Lightning-4steps-V1.0.safetensors", "url": "https://huggingface.co/lightx2v/Qwen-Image-Lightning/resolve/main/Qwen-Image-Lightning-4steps-V1.0.safetensors", "directory": "loras"}]}, "widgets_values": ["Qwen-Image-Lightning-4steps-V1.0.safetensors", 1]}, {"id": 190, "type": "KSampler", "pos": [3400, 10], "size": [310, 474], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 308}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 386}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 387}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 358}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [312]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "KSampler"}, "widgets_values": [375729975350303, "randomize", 4, 1, "euler", "simple", 1]}, {"id": 220, "type": "f93c215e-c393-460e-9534-ed2c3d8a652e", "pos": [2480, 1450], "size": [330, 100], "flags": {}, "order": 17, "mode": 0, "inputs": [{"localized_name": "mask", "name": "mask", "type": "MASK", "link": 377}, {"name": "expand", "type": "INT", "widget": {"name": "expand"}, "link": null}, {"name": "blur_radius", "type": "INT", "widget": {"name": "blur_radius"}, "link": null}], "outputs": [{"localized_name": "MASK", "name": "MASK", "type": "MASK", "links": [374, 375, 376]}], "properties": {"proxyWidgets": [["-1", "expand"], ["-1", "blur_radius"]], "cnr_id": "comfy-core", "ver": "0.3.59"}, "widgets_values": [20, 31]}, {"id": 195, "type": "VAEEncode", "pos": [2950, 820], "size": [140, 46], "flags": {"collapsed": false}, "order": 11, "mode": 0, "inputs": [{"localized_name": "pixels", "name": "pixels", "type": "IMAGE", "link": 371}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 317}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [358]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "VAEEncode"}, "widgets_values": []}, {"id": 181, "type": "ControlNetInpaintingAliMamaApply", "pos": [2940, 560], "size": [317.0093688964844, 206], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 299}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 300}, {"localized_name": "control_net", "name": "control_net", "type": "CONTROL_NET", "link": 301}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 384}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 385}, {"localized_name": "mask", "name": "mask", "type": "MASK", "link": 375}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": null}, {"localized_name": "start_percent", "name": "start_percent", "type": "FLOAT", "widget": {"name": "start_percent"}, "link": null}, {"localized_name": "end_percent", "name": "end_percent", "type": "FLOAT", "widget": {"name": "end_percent"}, "link": null}], "outputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "links": [386]}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "links": [387]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "ControlNetInpaintingAliMamaApply"}, "widgets_values": [1, 0, 1]}, {"id": 178, "type": "VAELoader", "pos": [2430, 220], "size": [380, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 399}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "slot_index": 0, "links": [313, 317, 384]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "VAELoader", "models": [{"name": "qwen_image_vae.safetensors", "url": "https://huggingface.co/Comfy-Org/Qwen-Image_ComfyUI/resolve/main/split_files/vae/qwen_image_vae.safetensors", "directory": "vae"}]}, "widgets_values": ["qwen_image_vae.safetensors"]}, {"id": 182, "type": "CLIPTextEncode", "pos": [2850, 100], "size": [460, 164.31304931640625], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 305}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "slot_index": 0, "links": [299]}], "title": "CLIP Text Encode (Positive Prompt)", "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "CLIPTextEncode"}, "widgets_values": [""], "color": "#232", "bgcolor": "#353"}, {"id": 176, "type": "CLIPTextEncode", "pos": [2850, 310], "size": [460, 140], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 296}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "slot_index": 0, "links": [300]}], "title": "CLIP Text Encode (Negative Prompt)", "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "CLIPTextEncode"}, "widgets_values": [""], "color": "#223", "bgcolor": "#335"}, {"id": 191, "type": "VAEDecode", "pos": [3440, 580], "size": [250, 46], "flags": {}, "order": 10, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 312}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 313}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [314, 323]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "VAEDecode"}, "widgets_values": []}, {"id": 219, "type": "2a4b2cc0-db37-4302-a067-da392f38f06b", "pos": [2480, 1260], "size": [280, 80], "flags": {}, "order": 16, "mode": 0, "inputs": [{"localized_name": "mask", "name": "mask", "type": "MASK", "link": 365}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 366}, {"name": "value", "type": "INT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "MASK", "name": "MASK", "type": "MASK", "links": [377]}, {"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [369, 370, 371, 385]}], "properties": {"proxyWidgets": [["-1", "value"]], "cnr_id": "comfy-core", "ver": "0.3.65"}, "widgets_values": [1536]}, {"id": 207, "type": "MaskPreview", "pos": [3430, 1270], "size": [340, 430], "flags": {}, "order": 15, "mode": 4, "inputs": [{"localized_name": "mask", "name": "mask", "type": "MASK", "link": 376}], "outputs": [], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "MaskPreview"}, "widgets_values": []}, {"id": 203, "type": "PreviewImage", "pos": [2990, 1270], "size": [310, 430], "flags": {}, "order": 14, "mode": 4, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 370}], "outputs": [], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "PreviewImage"}, "widgets_values": []}, {"id": 200, "type": "ImageCompositeMasked", "pos": [3850, 1280], "size": [250, 150], "flags": {}, "order": 12, "mode": 4, "inputs": [{"localized_name": "destination", "name": "destination", "type": "IMAGE", "link": 369}, {"localized_name": "source", "name": "source", "type": "IMAGE", "link": 323}, {"localized_name": "mask", "name": "mask", "shape": 7, "type": "MASK", "link": 374}, {"localized_name": "x", "name": "x", "type": "INT", "widget": {"name": "x"}, "link": null}, {"localized_name": "y", "name": "y", "type": "INT", "widget": {"name": "y"}, "link": null}, {"localized_name": "resize_source", "name": "resize_source", "type": "BOOLEAN", "widget": {"name": "resize_source"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": []}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "ImageCompositeMasked"}, "widgets_values": [0, 0, false]}, {"id": 202, "type": "ImagePadForOutpaint", "pos": [2490, 1030], "size": [270, 174], "flags": {}, "order": 13, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 351}, {"localized_name": "left", "name": "left", "type": "INT", "widget": {"name": "left"}, "link": 392}, {"localized_name": "top", "name": "top", "type": "INT", "widget": {"name": "top"}, "link": 393}, {"localized_name": "right", "name": "right", "type": "INT", "widget": {"name": "right"}, "link": 394}, {"localized_name": "bottom", "name": "bottom", "type": "INT", "widget": {"name": "bottom"}, "link": 395}, {"localized_name": "feathering", "name": "feathering", "type": "INT", "widget": {"name": "feathering"}, "link": 396}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [366]}, {"localized_name": "MASK", "name": "MASK", "type": "MASK", "links": [365]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "ImagePadForOutpaint"}, "widgets_values": [0, 0, 0, 0, 0]}], "groups": [{"id": 12, "title": "For outpainting Ctrl-B to enable", "bounding": [2410, -190, 1770, 1970], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 7, "title": "Step 1 - Upload models", "bounding": [2420, -150, 400, 610], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 9, "title": "Step 3 - Prompt", "bounding": [2840, 30, 490, 430], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 10, "title": "4 steps lightning LoRA", "bounding": [2840, -150, 490, 160], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 11, "title": "Ctrl-B to enable it", "bounding": [2420, 940, 430, 460], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 298, "origin_id": 185, "origin_slot": 0, "target_id": 180, "target_slot": 0, "type": "MODEL"}, {"id": 306, "origin_id": 175, "origin_slot": 0, "target_id": 185, "target_slot": 0, "type": "MODEL"}, {"id": 308, "origin_id": 180, "origin_slot": 0, "target_id": 190, "target_slot": 0, "type": "MODEL"}, {"id": 386, "origin_id": 181, "origin_slot": 0, "target_id": 190, "target_slot": 1, "type": "CONDITIONING"}, {"id": 387, "origin_id": 181, "origin_slot": 1, "target_id": 190, "target_slot": 2, "type": "CONDITIONING"}, {"id": 358, "origin_id": 195, "origin_slot": 0, "target_id": 190, "target_slot": 3, "type": "LATENT"}, {"id": 377, "origin_id": 219, "origin_slot": 0, "target_id": 220, "target_slot": 0, "type": "MASK"}, {"id": 371, "origin_id": 219, "origin_slot": 1, "target_id": 195, "target_slot": 0, "type": "IMAGE"}, {"id": 317, "origin_id": 178, "origin_slot": 0, "target_id": 195, "target_slot": 1, "type": "VAE"}, {"id": 299, "origin_id": 182, "origin_slot": 0, "target_id": 181, "target_slot": 0, "type": "CONDITIONING"}, {"id": 300, "origin_id": 176, "origin_slot": 0, "target_id": 181, "target_slot": 1, "type": "CONDITIONING"}, {"id": 301, "origin_id": 177, "origin_slot": 0, "target_id": 181, "target_slot": 2, "type": "CONTROL_NET"}, {"id": 384, "origin_id": 178, "origin_slot": 0, "target_id": 181, "target_slot": 3, "type": "VAE"}, {"id": 385, "origin_id": 219, "origin_slot": 1, "target_id": 181, "target_slot": 4, "type": "IMAGE"}, {"id": 375, "origin_id": 220, "origin_slot": 0, "target_id": 181, "target_slot": 5, "type": "MASK"}, {"id": 305, "origin_id": 174, "origin_slot": 0, "target_id": 182, "target_slot": 0, "type": "CLIP"}, {"id": 296, "origin_id": 174, "origin_slot": 0, "target_id": 176, "target_slot": 0, "type": "CLIP"}, {"id": 312, "origin_id": 190, "origin_slot": 0, "target_id": 191, "target_slot": 0, "type": "LATENT"}, {"id": 313, "origin_id": 178, "origin_slot": 0, "target_id": 191, "target_slot": 1, "type": "VAE"}, {"id": 365, "origin_id": 202, "origin_slot": 1, "target_id": 219, "target_slot": 0, "type": "MASK"}, {"id": 366, "origin_id": 202, "origin_slot": 0, "target_id": 219, "target_slot": 1, "type": "IMAGE"}, {"id": 376, "origin_id": 220, "origin_slot": 0, "target_id": 207, "target_slot": 0, "type": "MASK"}, {"id": 370, "origin_id": 219, "origin_slot": 1, "target_id": 203, "target_slot": 0, "type": "IMAGE"}, {"id": 369, "origin_id": 219, "origin_slot": 1, "target_id": 200, "target_slot": 0, "type": "IMAGE"}, {"id": 323, "origin_id": 191, "origin_slot": 0, "target_id": 200, "target_slot": 1, "type": "IMAGE"}, {"id": 374, "origin_id": 220, "origin_slot": 0, "target_id": 200, "target_slot": 2, "type": "MASK"}, {"id": 351, "origin_id": -10, "origin_slot": 0, "target_id": 202, "target_slot": 0, "type": "IMAGE"}, {"id": 314, "origin_id": 191, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 392, "origin_id": -10, "origin_slot": 1, "target_id": 202, "target_slot": 1, "type": "INT"}, {"id": 393, "origin_id": -10, "origin_slot": 2, "target_id": 202, "target_slot": 2, "type": "INT"}, {"id": 394, "origin_id": -10, "origin_slot": 3, "target_id": 202, "target_slot": 3, "type": "INT"}, {"id": 395, "origin_id": -10, "origin_slot": 4, "target_id": 202, "target_slot": 4, "type": "INT"}, {"id": 396, "origin_id": -10, "origin_slot": 5, "target_id": 202, "target_slot": 5, "type": "INT"}, {"id": 397, "origin_id": -10, "origin_slot": 6, "target_id": 175, "target_slot": 0, "type": "COMBO"}, {"id": 398, "origin_id": -10, "origin_slot": 7, "target_id": 174, "target_slot": 0, "type": "COMBO"}, {"id": 399, "origin_id": -10, "origin_slot": 8, "target_id": 178, "target_slot": 0, "type": "COMBO"}, {"id": 400, "origin_id": -10, "origin_slot": 9, "target_id": 177, "target_slot": 0, "type": "COMBO"}, {"id": 401, "origin_id": -10, "origin_slot": 10, "target_id": 185, "target_slot": 1, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image generation and editing/Outpaint image"}, {"id": "f93c215e-c393-460e-9534-ed2c3d8a652e", "version": 1, "state": {"lastGroupId": 14, "lastNodeId": 255, "lastLinkId": 401, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Grow and Blur Mask", "inputNode": {"id": -10, "bounding": [290, 3536, 120, 100]}, "outputNode": {"id": -20, "bounding": [1130, 3536, 120, 60]}, "inputs": [{"id": "3ac60d5e-8f9d-4663-9b24-b3a15a3e9e20", "name": "mask", "type": "MASK", "linkIds": [279], "localized_name": "mask", "pos": [390, 3556]}, {"id": "d1ab0cf5-7062-41ac-9f4b-8c660fc4a714", "name": "expand", "type": "INT", "linkIds": [379], "pos": [390, 3576]}, {"id": "1a787af5-da9f-44c5-9f5a-3f71609ca0ef", "name": "blur_radius", "type": "INT", "linkIds": [380], "pos": [390, 3596]}], "outputs": [{"id": "1f97f683-13d3-4871-876d-678fca850d89", "name": "MASK", "type": "MASK", "linkIds": [378], "localized_name": "MASK", "pos": [1150, 3556]}], "widgets": [], "nodes": [{"id": 253, "type": "ImageToMask", "pos": [800, 3630], "size": [270, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 377}, {"localized_name": "channel", "name": "channel", "type": "COMBO", "widget": {"name": "channel"}, "link": null}], "outputs": [{"localized_name": "MASK", "name": "MASK", "type": "MASK", "links": [378]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "ImageToMask"}, "widgets_values": ["red"]}, {"id": 251, "type": "MaskToImage", "pos": [780, 3470], "size": [260, 70], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "mask", "name": "mask", "type": "MASK", "link": 372}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [373]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "MaskToImage"}, "widgets_values": []}, {"id": 199, "type": "GrowMask", "pos": [470, 3460], "size": [270, 82], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "mask", "name": "mask", "type": "MASK", "link": 279}, {"localized_name": "expand", "name": "expand", "type": "INT", "widget": {"name": "expand"}, "link": 379}, {"localized_name": "tapered_corners", "name": "tapered_corners", "type": "BOOLEAN", "widget": {"name": "tapered_corners"}, "link": null}], "outputs": [{"localized_name": "MASK", "name": "MASK", "type": "MASK", "links": [372]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "GrowMask"}, "widgets_values": [20, true]}, {"id": 252, "type": "ImageBlur", "pos": [480, 3620], "size": [270, 82], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 373}, {"localized_name": "blur_radius", "name": "blur_radius", "type": "INT", "widget": {"name": "blur_radius"}, "link": 380}, {"localized_name": "sigma", "name": "sigma", "type": "FLOAT", "widget": {"name": "sigma"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [377]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "ImageBlur"}, "widgets_values": [31, 1]}], "groups": [], "links": [{"id": 373, "origin_id": 251, "origin_slot": 0, "target_id": 252, "target_slot": 0, "type": "IMAGE"}, {"id": 377, "origin_id": 252, "origin_slot": 0, "target_id": 253, "target_slot": 0, "type": "IMAGE"}, {"id": 372, "origin_id": 199, "origin_slot": 0, "target_id": 251, "target_slot": 0, "type": "MASK"}, {"id": 279, "origin_id": -10, "origin_slot": 0, "target_id": 199, "target_slot": 0, "type": "MASK"}, {"id": 378, "origin_id": 253, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "MASK"}, {"id": 379, "origin_id": -10, "origin_slot": 1, "target_id": 199, "target_slot": 1, "type": "INT"}, {"id": 380, "origin_id": -10, "origin_slot": 2, "target_id": 252, "target_slot": 1, "type": "INT"}], "extra": {"workflowRendererVersion": "LG"}}, {"id": "2a4b2cc0-db37-4302-a067-da392f38f06b", "version": 1, "state": {"lastGroupId": 14, "lastNodeId": 255, "lastLinkId": 401, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Scale image and mask", "inputNode": {"id": -10, "bounding": [2110, 1406, 120, 100]}, "outputNode": {"id": -20, "bounding": [3320, 1406, 120, 80]}, "inputs": [{"id": "53ec80db-b075-446c-a79b-891d82ae3cf1", "name": "mask", "type": "MASK", "linkIds": [360], "localized_name": "mask", "pos": [2210, 1426]}, {"id": "37820e3d-f495-4b41-b0c6-58765a0c1766", "name": "image", "type": "IMAGE", "linkIds": [350], "localized_name": "image", "pos": [2210, 1446]}, {"id": "d388f5f1-7a36-4563-b104-9f7ec77f636d", "name": "value", "type": "INT", "linkIds": [365], "pos": [2210, 1466]}], "outputs": [{"id": "7ef75a31-2e69-4dce-8e13-76cd17b4c272", "name": "MASK", "type": "MASK", "linkIds": [364], "localized_name": "MASK", "pos": [3340, 1426]}, {"id": "36058145-b72c-4bd4-bb63-e2e22456d003", "name": "IMAGE", "type": "IMAGE", "linkIds": [352, 353, 354], "localized_name": "IMAGE", "pos": [3340, 1446]}], "widgets": [], "nodes": [{"id": 218, "type": "ImageToMask", "pos": [2990, 1540], "size": [270, 58], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 363}, {"localized_name": "channel", "name": "channel", "type": "COMBO", "widget": {"name": "channel"}, "link": null}], "outputs": [{"localized_name": "MASK", "name": "MASK", "type": "MASK", "links": [364]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.65", "Node name for S&R": "ImageToMask"}, "widgets_values": ["red"]}, {"id": 216, "type": "ImageScaleToMaxDimension", "pos": [2610, 1570], "size": [281.2027282714844, 82], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 361}, {"localized_name": "upscale_method", "name": "upscale_method", "type": "COMBO", "widget": {"name": "upscale_method"}, "link": null}, {"localized_name": "largest_size", "name": "largest_size", "type": "INT", "widget": {"name": "largest_size"}, "link": 362}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [363]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "ImageScaleToMaxDimension"}, "widgets_values": ["area", 1536]}, {"id": 217, "type": "MaskToImage", "pos": [2700, 1420], "size": [193.2779296875, 26], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "mask", "name": "mask", "type": "MASK", "link": 360}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [361]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.65", "Node name for S&R": "MaskToImage"}, "widgets_values": []}, {"id": 194, "type": "ImageScaleToMaxDimension", "pos": [2590, 1280], "size": [281.2027282714844, 82], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 350}, {"localized_name": "upscale_method", "name": "upscale_method", "type": "COMBO", "widget": {"name": "upscale_method"}, "link": null}, {"localized_name": "largest_size", "name": "largest_size", "type": "INT", "widget": {"name": "largest_size"}, "link": 359}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [352, 353, 354]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "ImageScaleToMaxDimension"}, "widgets_values": ["area", 1536]}, {"id": 215, "type": "PrimitiveInt", "pos": [2260, 1560], "size": [270, 82], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "value", "name": "value", "type": "INT", "widget": {"name": "value"}, "link": 365}], "outputs": [{"localized_name": "INT", "name": "INT", "type": "INT", "links": [359, 362]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.65", "Node name for S&R": "PrimitiveInt"}, "widgets_values": [1536, "fixed"]}], "groups": [], "links": [{"id": 363, "origin_id": 216, "origin_slot": 0, "target_id": 218, "target_slot": 0, "type": "IMAGE"}, {"id": 361, "origin_id": 217, "origin_slot": 0, "target_id": 216, "target_slot": 0, "type": "IMAGE"}, {"id": 362, "origin_id": 215, "origin_slot": 0, "target_id": 216, "target_slot": 2, "type": "INT"}, {"id": 359, "origin_id": 215, "origin_slot": 0, "target_id": 194, "target_slot": 2, "type": "INT"}, {"id": 360, "origin_id": -10, "origin_slot": 0, "target_id": 217, "target_slot": 0, "type": "MASK"}, {"id": 350, "origin_id": -10, "origin_slot": 1, "target_id": 194, "target_slot": 0, "type": "IMAGE"}, {"id": 364, "origin_id": 218, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "MASK"}, {"id": 352, "origin_id": 194, "origin_slot": 0, "target_id": -20, "target_slot": 1, "type": "IMAGE"}, {"id": 353, "origin_id": 194, "origin_slot": 0, "target_id": -20, "target_slot": 1, "type": "IMAGE"}, {"id": 354, "origin_id": 194, "origin_slot": 0, "target_id": -20, "target_slot": 1, "type": "IMAGE"}, {"id": 365, "origin_id": -10, "origin_slot": 2, "target_id": 215, "target_slot": 0, "type": "INT"}], "extra": {"workflowRendererVersion": "LG"}}]}, "config": {}, "extra": {"workflowRendererVersion": "LG", "ds": {"scale": 1.170393777345649, "offset": [-2589.3260157061272, -547.3616692627206]}}, "version": 0.4} diff --git a/blueprints/Image Upscale(Z-image-Turbo).json b/blueprints/Image Upscale(Z-image-Turbo).json new file mode 100644 index 000000000..a67d6a2d8 --- /dev/null +++ b/blueprints/Image Upscale(Z-image-Turbo).json @@ -0,0 +1 @@ +{"id": "bf8108f3-d857-46c9-aef5-0e8ad2a64bf5", "revision": 0, "last_node_id": 95, "last_link_id": 115, "nodes": [{"id": 87, "type": "dd15cfd3-cd53-428c-b3e2-33ed4ff8fa78", "pos": [960.6668984200231, 332.66676187423354], "size": [400, 469.9869791666667], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": null}, {"name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": null}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}, {"label": "upscale_model", "name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": null}, {"name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"label": "IMAGE", "localized_name": "IMAGE_1", "name": "IMAGE_1", "type": "IMAGE", "links": []}], "properties": {"proxyWidgets": [["67", "text"], ["69", "seed"], ["69", "control_after_generate"], ["-1", "denoise"], ["-1", "unet_name"], ["-1", "clip_name"], ["-1", "vae_name"], ["-1", "model_name"]], "cnr_id": "comfy-core", "ver": "0.14.1"}, "widgets_values": [null, null, null, 0.33, "z_image_turbo_bf16.safetensors", "qwen_3_4b.safetensors", "ae.safetensors", "RealESRGAN_x4plus.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "dd15cfd3-cd53-428c-b3e2-33ed4ff8fa78", "version": 1, "state": {"lastGroupId": 5, "lastNodeId": 95, "lastLinkId": 115, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Image Upscale(Z-image-Turbo)", "inputNode": {"id": -10, "bounding": [-150, 390, 125.224609375, 160]}, "outputNode": {"id": -20, "bounding": [2070, 490, 120, 60]}, "inputs": [{"id": "e9a14390-4f93-4065-8b02-323f999527c0", "name": "image", "type": "IMAGE", "linkIds": [86], "localized_name": "image", "pos": [-44.775390625, 410]}, {"id": "c5655e11-9531-4949-996c-958b5fe92085", "name": "unet_name", "type": "COMBO", "linkIds": [109], "pos": [-44.775390625, 430]}, {"id": "82576043-dd69-4604-b572-09fabb6e602d", "name": "clip_name", "type": "COMBO", "linkIds": [110], "pos": [-44.775390625, 450]}, {"id": "59e20fb5-cd61-4d4b-a1fd-15a90c7ba6c2", "name": "vae_name", "type": "COMBO", "linkIds": [111], "pos": [-44.775390625, 470]}, {"id": "adc35153-dc52-4bac-be7e-9da19471f441", "name": "model_name", "type": "COMBO", "linkIds": [112], "label": "upscale_model", "pos": [-44.775390625, 490]}, {"id": "c1b2f097-616e-4420-93c8-04eb79f4ba1e", "name": "denoise", "type": "FLOAT", "linkIds": [115], "pos": [-44.775390625, 510]}], "outputs": [{"id": "f138a0aa-489a-42e1-92f7-e3747688c94d", "name": "IMAGE_1", "type": "IMAGE", "linkIds": [97, 103], "localized_name": "IMAGE_1", "label": "IMAGE", "pos": [2090, 510]}], "widgets": [], "nodes": [{"id": 71, "type": "CLIPTextEncode", "pos": [648.333324162179, 398.3333435177784], "size": [491.6666666666667, 150], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 82}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [83]}], "title": "CLIP Text Encode (Negative Prompt)", "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [""], "color": "#323", "bgcolor": "#535"}, {"id": 79, "type": "ImageUpscaleWithModel", "pos": [623.3333541162552, 714.9999406294688], "size": [233.5689453125, 60], "flags": {}, "order": 10, "mode": 0, "inputs": [{"localized_name": "upscale_model", "name": "upscale_model", "type": "UPSCALE_MODEL", "link": 87}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 88}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [92]}], "properties": {"cnr_id": "comfy-core", "ver": "0.13.0", "Node name for S&R": "ImageUpscaleWithModel"}, "widgets_values": []}, {"id": 80, "type": "VAEEncode", "pos": [1173.3330331592938, 631.6665944654844], "size": [187.5, 60], "flags": {}, "order": 11, "mode": 0, "inputs": [{"localized_name": "pixels", "name": "pixels", "type": "IMAGE", "link": 93}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 90}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [91]}], "properties": {"cnr_id": "comfy-core", "ver": "0.13.0", "Node name for S&R": "VAEEncode"}, "widgets_values": []}, {"id": 81, "type": "ImageScaleBy", "pos": [865.0000410901742, 714.9999828835583], "size": [225, 95.546875], "flags": {}, "order": 12, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 92}, {"localized_name": "upscale_method", "name": "upscale_method", "type": "COMBO", "widget": {"name": "upscale_method"}, "link": null}, {"localized_name": "scale_by", "name": "scale_by", "type": "FLOAT", "widget": {"name": "scale_by"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [93]}], "properties": {"cnr_id": "comfy-core", "ver": "0.13.0", "Node name for S&R": "ImageScaleBy"}, "widgets_values": ["lanczos", 0.5]}, {"id": 66, "type": "UNETLoader", "pos": [280, -20], "size": [323.984375, 118.64583333333334], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 109}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [104]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "UNETLoader", "models": [{"name": "z_image_turbo_bf16.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/diffusion_models/z_image_turbo_bf16.safetensors", "directory": "diffusion_models"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["z_image_turbo_bf16.safetensors", "default"]}, {"id": 62, "type": "CLIPLoader", "pos": [280, 140], "size": [323.984375, 150.65104166666669], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "clip_name", "name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": 110}, {"localized_name": "type", "name": "type", "type": "COMBO", "widget": {"name": "type"}, "link": null}, {"localized_name": "device", "name": "device", "shape": 7, "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "links": [78, 82]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "CLIPLoader", "models": [{"name": "qwen_3_4b.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/text_encoders/qwen_3_4b.safetensors", "directory": "text_encoders"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["qwen_3_4b.safetensors", "lumina2", "default"]}, {"id": 67, "type": "CLIPTextEncode", "pos": [650.621298596813, -33.81729273975067], "size": [491.9791666666667, 377.98177083333337], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 78}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [75]}], "title": "CLIP Text Encode (Positive Prompt)", "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["masterpiece, 8k"], "color": "#232", "bgcolor": "#353"}, {"id": 63, "type": "VAELoader", "pos": [280, 330], "size": [323.984375, 83.99739583333334], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 111}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "links": [73, 90]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "VAELoader", "models": [{"name": "ae.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/vae/ae.safetensors", "directory": "vae"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ae.safetensors"]}, {"id": 76, "type": "UpscaleModelLoader", "pos": [264.07395879037364, 704.8118881098496], "size": [323.984375, 83.99739583333334], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "model_name", "name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": 112}], "outputs": [{"localized_name": "UPSCALE_MODEL", "name": "UPSCALE_MODEL", "type": "UPSCALE_MODEL", "links": [87]}], "properties": {"cnr_id": "comfy-core", "ver": "0.13.0", "Node name for S&R": "UpscaleModelLoader", "models": [{"name": "RealESRGAN_x4plus.safetensors", "url": "https://huggingface.co/Comfy-Org/Real-ESRGAN_repackaged/resolve/main/RealESRGAN_x4plus.safetensors", "directory": "upscale_models"}]}, "widgets_values": ["RealESRGAN_x4plus.safetensors"]}, {"id": 70, "type": "ModelSamplingAuraFlow", "pos": [1200, -50], "size": [371.9791666666667, 80.1171875], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 104}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [74]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "ModelSamplingAuraFlow", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [3]}, {"id": 65, "type": "VAEDecode", "pos": [1610, -50], "size": [251.97916666666669, 72.13541666666667], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 72}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 73}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [97, 103]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "VAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 78, "type": "ImageScaleToTotalPixels", "pos": [260, 850], "size": [325, 122.21354166666667], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 86}, {"localized_name": "upscale_method", "name": "upscale_method", "type": "COMBO", "widget": {"name": "upscale_method"}, "link": null}, {"localized_name": "megapixels", "name": "megapixels", "type": "FLOAT", "widget": {"name": "megapixels"}, "link": null}, {"localized_name": "resolution_steps", "name": "resolution_steps", "type": "INT", "widget": {"name": "resolution_steps"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [88]}], "properties": {"cnr_id": "comfy-core", "ver": "0.13.0", "Node name for S&R": "ImageScaleToTotalPixels"}, "widgets_values": ["lanczos", 1, 1]}, {"id": 69, "type": "KSampler", "pos": [1200, 80], "size": [366.6666666666667, 474], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 74}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 75}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 83}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 91}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": 115}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [72]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "KSampler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1098688918602660, "randomize", 5, 1, "dpmpp_2m_sde", "beta", 0.33]}], "groups": [{"id": 3, "title": "Prompt", "bounding": [640, -90, 508.64583333333337, 662.0666813520016], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 5, "title": "Models", "bounding": [260, -90, 344.6965254233087, 516.414685926878], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 104, "origin_id": 66, "origin_slot": 0, "target_id": 70, "target_slot": 0, "type": "MODEL"}, {"id": 82, "origin_id": 62, "origin_slot": 0, "target_id": 71, "target_slot": 0, "type": "CLIP"}, {"id": 87, "origin_id": 76, "origin_slot": 0, "target_id": 79, "target_slot": 0, "type": "UPSCALE_MODEL"}, {"id": 88, "origin_id": 78, "origin_slot": 0, "target_id": 79, "target_slot": 1, "type": "IMAGE"}, {"id": 93, "origin_id": 81, "origin_slot": 0, "target_id": 80, "target_slot": 0, "type": "IMAGE"}, {"id": 90, "origin_id": 63, "origin_slot": 0, "target_id": 80, "target_slot": 1, "type": "VAE"}, {"id": 92, "origin_id": 79, "origin_slot": 0, "target_id": 81, "target_slot": 0, "type": "IMAGE"}, {"id": 74, "origin_id": 70, "origin_slot": 0, "target_id": 69, "target_slot": 0, "type": "MODEL"}, {"id": 75, "origin_id": 67, "origin_slot": 0, "target_id": 69, "target_slot": 1, "type": "CONDITIONING"}, {"id": 83, "origin_id": 71, "origin_slot": 0, "target_id": 69, "target_slot": 2, "type": "CONDITIONING"}, {"id": 91, "origin_id": 80, "origin_slot": 0, "target_id": 69, "target_slot": 3, "type": "LATENT"}, {"id": 72, "origin_id": 69, "origin_slot": 0, "target_id": 65, "target_slot": 0, "type": "LATENT"}, {"id": 73, "origin_id": 63, "origin_slot": 0, "target_id": 65, "target_slot": 1, "type": "VAE"}, {"id": 78, "origin_id": 62, "origin_slot": 0, "target_id": 67, "target_slot": 0, "type": "CLIP"}, {"id": 86, "origin_id": -10, "origin_slot": 0, "target_id": 78, "target_slot": 0, "type": "IMAGE"}, {"id": 97, "origin_id": 65, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 103, "origin_id": 65, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 109, "origin_id": -10, "origin_slot": 1, "target_id": 66, "target_slot": 0, "type": "COMBO"}, {"id": 110, "origin_id": -10, "origin_slot": 2, "target_id": 62, "target_slot": 0, "type": "COMBO"}, {"id": 111, "origin_id": -10, "origin_slot": 3, "target_id": 63, "target_slot": 0, "type": "COMBO"}, {"id": 112, "origin_id": -10, "origin_slot": 4, "target_id": 76, "target_slot": 0, "type": "COMBO"}, {"id": 115, "origin_id": -10, "origin_slot": 5, "target_id": 69, "target_slot": 9, "type": "FLOAT"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image generation and editing/Enhance"}]}, "config": {}, "extra": {"workflowRendererVersion": "LG"}, "version": 0.4} diff --git a/blueprints/Image to Depth Map (Lotus).json b/blueprints/Image to Depth Map (Lotus).json new file mode 100644 index 000000000..5b3f7a1d6 --- /dev/null +++ b/blueprints/Image to Depth Map (Lotus).json @@ -0,0 +1 @@ +{"id": "6af0a6c1-0161-4528-8685-65776e838d44", "revision": 0, "last_node_id": 75, "last_link_id": 245, "nodes": [{"id": 75, "type": "488652fd-6edf-4d06-8f9f-4d84d3a34eaf", "pos": [600, 830], "size": [400, 110], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "pixels", "name": "pixels", "type": "IMAGE", "link": null}, {"label": "depth_intensity", "name": "sigma", "type": "FLOAT", "widget": {"name": "sigma"}, "link": null}, {"name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": []}], "properties": {"proxyWidgets": [["-1", "sigma"], ["-1", "unet_name"], ["-1", "vae_name"]], "cnr_id": "comfy-core", "ver": "0.14.1"}, "widgets_values": [999.0000000000002, "lotus-depth-d-v1-1.safetensors", "vae-ft-mse-840000-ema-pruned.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "488652fd-6edf-4d06-8f9f-4d84d3a34eaf", "version": 1, "state": {"lastGroupId": 1, "lastNodeId": 75, "lastLinkId": 245, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Image to Depth Map (Lotus)", "inputNode": {"id": -10, "bounding": [-60, -172.61268043518066, 126.625, 120]}, "outputNode": {"id": -20, "bounding": [1650, -172.61268043518066, 120, 60]}, "inputs": [{"id": "3bdd30c3-4ec9-485a-814b-e7d39fb6b5cc", "name": "pixels", "type": "IMAGE", "linkIds": [37], "localized_name": "pixels", "pos": [46.625, -152.61268043518066]}, {"id": "f9a1017c-f4b9-43b4-94c2-41c088b3a492", "name": "sigma", "type": "FLOAT", "linkIds": [243], "label": "depth_intensity", "pos": [46.625, -132.61268043518066]}, {"id": "cb96b9fe-93e7-41cf-b27f-6d6dc3a1890b", "name": "unet_name", "type": "COMBO", "linkIds": [244], "pos": [46.625, -112.61268043518066]}, {"id": "42c8efad-1661-49c7-89b5-2b735b72424d", "name": "vae_name", "type": "COMBO", "linkIds": [245], "pos": [46.625, -92.61268043518066]}], "outputs": [{"id": "2ec278bd-0b66-4b30-9c5b-994d5f638214", "name": "IMAGE", "type": "IMAGE", "linkIds": [242], "localized_name": "IMAGE", "pos": [1670, -152.61268043518066]}], "widgets": [], "nodes": [{"id": 10, "type": "UNETLoader", "pos": [108.05555555555557, -253.05555555555557], "size": [254.93706597222226, 82], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 244}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [31, 241]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "UNETLoader", "models": [{"name": "lotus-depth-d-v1-1.safetensors", "url": "https://huggingface.co/Comfy-Org/lotus/resolve/main/lotus-depth-d-v1-1.safetensors", "directory": "diffusion_models"}], "widget_ue_connectable": {}}, "widgets_values": ["lotus-depth-d-v1-1.safetensors", "default"]}, {"id": 18, "type": "DisableNoise", "pos": [607.0641494069639, -268.33337840371513], "size": [175, 33.333333333333336], "flags": {}, "order": 0, "mode": 0, "inputs": [], "outputs": [{"localized_name": "NOISE", "name": "NOISE", "type": "NOISE", "slot_index": 0, "links": [237]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "DisableNoise", "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 23, "type": "VAEEncode", "pos": [620, 160], "size": [175, 50], "flags": {}, "order": 10, "mode": 0, "inputs": [{"localized_name": "pixels", "name": "pixels", "type": "IMAGE", "link": 37}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 38}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [201]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "VAEEncode", "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 21, "type": "KSamplerSelect", "pos": [610, -60], "size": [210, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}], "outputs": [{"localized_name": "SAMPLER", "name": "SAMPLER", "type": "SAMPLER", "slot_index": 0, "links": [33]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "KSamplerSelect", "widget_ue_connectable": {}}, "widgets_values": ["euler"]}, {"id": 19, "type": "BasicGuider", "pos": [610, -170], "size": [175, 50], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 241}, {"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "link": 238}], "outputs": [{"localized_name": "GUIDER", "name": "GUIDER", "type": "GUIDER", "slot_index": 0, "links": [27]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "BasicGuider", "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 16, "type": "SamplerCustomAdvanced", "pos": [890, -130], "size": [295.99609375, 271.65798611111114], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "noise", "name": "noise", "type": "NOISE", "link": 237}, {"localized_name": "guider", "name": "guider", "type": "GUIDER", "link": 27}, {"localized_name": "sampler", "name": "sampler", "type": "SAMPLER", "link": 33}, {"localized_name": "sigmas", "name": "sigmas", "type": "SIGMAS", "link": 194}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 201}], "outputs": [{"localized_name": "output", "name": "output", "type": "LATENT", "slot_index": 0, "links": [232]}, {"localized_name": "denoised_output", "name": "denoised_output", "type": "LATENT", "slot_index": 1, "links": []}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "SamplerCustomAdvanced", "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 28, "type": "SetFirstSigma", "pos": [620, 50], "size": [210, 58], "flags": {}, "order": 11, "mode": 0, "inputs": [{"localized_name": "sigmas", "name": "sigmas", "type": "SIGMAS", "link": 66}, {"localized_name": "sigma", "name": "sigma", "type": "FLOAT", "widget": {"name": "sigma"}, "link": 243}], "outputs": [{"localized_name": "SIGMAS", "name": "SIGMAS", "type": "SIGMAS", "slot_index": 0, "links": [194]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "SetFirstSigma", "widget_ue_connectable": {}}, "widgets_values": [999.0000000000002]}, {"id": 8, "type": "VAEDecode", "pos": [1210, -120], "size": [175, 50], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 232}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 240}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [35]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "VAEDecode", "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 22, "type": "ImageInvert", "pos": [1200, -220], "size": [175, 33.333333333333336], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 35}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [242]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "ImageInvert", "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 14, "type": "VAELoader", "pos": [120, -90], "size": [254.93706597222226, 58], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 245}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "slot_index": 0, "links": [38, 240]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "VAELoader", "models": [{"name": "vae-ft-mse-840000-ema-pruned.safetensors", "url": "https://huggingface.co/stabilityai/sd-vae-ft-mse-original/resolve/main/vae-ft-mse-840000-ema-pruned.safetensors", "directory": "vae"}], "widget_ue_connectable": {}}, "widgets_values": ["vae-ft-mse-840000-ema-pruned.safetensors"]}, {"id": 68, "type": "LotusConditioning", "pos": [400, -150], "size": [175, 33.333333333333336], "flags": {}, "order": 2, "mode": 0, "inputs": [], "outputs": [{"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "slot_index": 0, "links": [238]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "LotusConditioning", "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 20, "type": "BasicScheduler", "pos": [170, 40], "size": [210, 106], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 31}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"localized_name": "SIGMAS", "name": "SIGMAS", "type": "SIGMAS", "slot_index": 0, "links": [66]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "BasicScheduler", "widget_ue_connectable": {}}, "widgets_values": ["normal", 1, 1]}], "groups": [], "links": [{"id": 232, "origin_id": 16, "origin_slot": 0, "target_id": 8, "target_slot": 0, "type": "LATENT"}, {"id": 240, "origin_id": 14, "origin_slot": 0, "target_id": 8, "target_slot": 1, "type": "VAE"}, {"id": 237, "origin_id": 18, "origin_slot": 0, "target_id": 16, "target_slot": 0, "type": "NOISE"}, {"id": 27, "origin_id": 19, "origin_slot": 0, "target_id": 16, "target_slot": 1, "type": "GUIDER"}, {"id": 33, "origin_id": 21, "origin_slot": 0, "target_id": 16, "target_slot": 2, "type": "SAMPLER"}, {"id": 194, "origin_id": 28, "origin_slot": 0, "target_id": 16, "target_slot": 3, "type": "SIGMAS"}, {"id": 201, "origin_id": 23, "origin_slot": 0, "target_id": 16, "target_slot": 4, "type": "LATENT"}, {"id": 241, "origin_id": 10, "origin_slot": 0, "target_id": 19, "target_slot": 0, "type": "MODEL"}, {"id": 238, "origin_id": 68, "origin_slot": 0, "target_id": 19, "target_slot": 1, "type": "CONDITIONING"}, {"id": 31, "origin_id": 10, "origin_slot": 0, "target_id": 20, "target_slot": 0, "type": "MODEL"}, {"id": 35, "origin_id": 8, "origin_slot": 0, "target_id": 22, "target_slot": 0, "type": "IMAGE"}, {"id": 38, "origin_id": 14, "origin_slot": 0, "target_id": 23, "target_slot": 1, "type": "VAE"}, {"id": 66, "origin_id": 20, "origin_slot": 0, "target_id": 28, "target_slot": 0, "type": "SIGMAS"}, {"id": 37, "origin_id": -10, "origin_slot": 0, "target_id": 23, "target_slot": 0, "type": "IMAGE"}, {"id": 242, "origin_id": 22, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 243, "origin_id": -10, "origin_slot": 1, "target_id": 28, "target_slot": 1, "type": "FLOAT"}, {"id": 244, "origin_id": -10, "origin_slot": 2, "target_id": 10, "target_slot": 0, "type": "COMBO"}, {"id": 245, "origin_id": -10, "origin_slot": 3, "target_id": 14, "target_slot": 0, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image generation and editing/Depth to image"}]}, "config": {}, "extra": {"ds": {"scale": 1.3589709866044692, "offset": [-138.53613935617864, -786.0629126022195]}, "workflowRendererVersion": "LG"}, "version": 0.4} diff --git a/blueprints/Image to Layers(Qwen-Image Layered).json b/blueprints/Image to Layers(Qwen-Image Layered).json new file mode 100644 index 000000000..f4c7f0b5f --- /dev/null +++ b/blueprints/Image to Layers(Qwen-Image Layered).json @@ -0,0 +1 @@ +{"id": "1a761372-7c82-4016-b9bf-fa285967e1e9", "revision": 0, "last_node_id": 83, "last_link_id": 0, "nodes": [{"id": 83, "type": "f754a936-daaf-4b6e-9658-41fdc54d301d", "pos": [61.999827823554256, 153.3332507624185], "size": [400, 550], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": null}, {"name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}, {"name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"name": "layers", "type": "INT", "widget": {"name": "layers"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": []}], "properties": {"proxyWidgets": [["-1", "text"], ["-1", "steps"], ["-1", "cfg"], ["-1", "layers"], ["3", "seed"], ["3", "control_after_generate"]], "cnr_id": "comfy-core", "ver": "0.5.1", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["", 20, 2.5, 2]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "f754a936-daaf-4b6e-9658-41fdc54d301d", "version": 1, "state": {"lastGroupId": 3, "lastNodeId": 83, "lastLinkId": 159, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Image to Layers (Qwen-Image-Layered)", "inputNode": {"id": -10, "bounding": [-510, 523, 120, 140]}, "outputNode": {"id": -20, "bounding": [1160, 523, 120, 60]}, "inputs": [{"id": "6c36b5bc-c9a5-4b07-8b52-6fe0df434cce", "name": "image", "type": "IMAGE", "linkIds": [148, 149], "localized_name": "image", "pos": [-410, 543]}, {"id": "8497fe33-124d-4e3e-9ab6-fc4a56a98dde", "name": "text", "type": "STRING", "linkIds": [150], "pos": [-410, 563]}, {"id": "509ab2c1-e6da-47ba-8714-023100ab92bd", "name": "steps", "type": "INT", "linkIds": [153], "pos": [-410, 583]}, {"id": "dd81894e-5def-4c75-9b17-d8f89fe095d6", "name": "cfg", "type": "FLOAT", "linkIds": [154], "pos": [-410, 603]}, {"id": "66da7c8a-3369-4a3f-92f2-3073afc55e7d", "name": "layers", "type": "INT", "linkIds": [159], "pos": [-410, 623]}], "outputs": [{"id": "7df75921-6729-4aad-bfc1-fcc536c2d298", "name": "IMAGE", "type": "IMAGE", "linkIds": [110], "localized_name": "IMAGE", "pos": [1180, 543]}], "widgets": [], "nodes": [{"id": 38, "type": "CLIPLoader", "pos": [-320, 310], "size": [346.7470703125, 106], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "clip_name", "name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": null}, {"localized_name": "type", "name": "type", "type": "COMBO", "widget": {"name": "type"}, "link": null}, {"localized_name": "device", "name": "device", "shape": 7, "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "slot_index": 0, "links": [74, 75]}], "properties": {"Node name for S&R": "CLIPLoader", "cnr_id": "comfy-core", "ver": "0.5.1", "models": [{"name": "qwen_2.5_vl_7b_fp8_scaled.safetensors", "url": "https://huggingface.co/Comfy-Org/HunyuanVideo_1.5_repackaged/resolve/main/split_files/text_encoders/qwen_2.5_vl_7b_fp8_scaled.safetensors", "directory": "text_encoders"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["qwen_2.5_vl_7b_fp8_scaled.safetensors", "qwen_image", "default"]}, {"id": 39, "type": "VAELoader", "pos": [-320, 460], "size": [346.7470703125, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "slot_index": 0, "links": [76, 139]}], "properties": {"Node name for S&R": "VAELoader", "cnr_id": "comfy-core", "ver": "0.5.1", "models": [{"name": "qwen_image_layered_vae.safetensors", "url": "https://huggingface.co/Comfy-Org/Qwen-Image-Layered_ComfyUI/resolve/main/split_files/vae/qwen_image_layered_vae.safetensors", "directory": "vae"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["qwen_image_layered_vae.safetensors"]}, {"id": 7, "type": "CLIPTextEncode", "pos": [70, 420], "size": [425.27801513671875, 180.6060791015625], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 75}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "slot_index": 0, "links": [131]}], "title": "CLIP Text Encode (Negative Prompt)", "properties": {"Node name for S&R": "CLIPTextEncode", "cnr_id": "comfy-core", "ver": "0.5.1", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [""], "color": "#322", "bgcolor": "#533"}, {"id": 70, "type": "ReferenceLatent", "pos": [330, 670], "size": [204.1666717529297, 46], "flags": {"collapsed": true}, "order": 9, "mode": 0, "inputs": [{"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "link": 131}, {"localized_name": "latent", "name": "latent", "shape": 7, "type": "LATENT", "link": 134}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [132]}], "properties": {"Node name for S&R": "ReferenceLatent", "cnr_id": "comfy-core", "ver": "0.5.1", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 69, "type": "ReferenceLatent", "pos": [330, 710], "size": [204.1666717529297, 46], "flags": {"collapsed": true}, "order": 8, "mode": 0, "inputs": [{"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "link": 129}, {"localized_name": "latent", "name": "latent", "shape": 7, "type": "LATENT", "link": 133}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [130]}], "properties": {"Node name for S&R": "ReferenceLatent", "cnr_id": "comfy-core", "ver": "0.5.1", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 66, "type": "ModelSamplingAuraFlow", "pos": [530, 150], "size": [270, 58], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 126}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [125]}], "properties": {"Node name for S&R": "ModelSamplingAuraFlow", "cnr_id": "comfy-core", "ver": "0.5.1", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1]}, {"id": 76, "type": "LatentCutToBatch", "pos": [830, 160], "size": [270, 82], "flags": {}, "order": 11, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 142}, {"localized_name": "dim", "name": "dim", "type": "COMBO", "widget": {"name": "dim"}, "link": null}, {"localized_name": "slice_size", "name": "slice_size", "type": "INT", "widget": {"name": "slice_size"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [143]}], "properties": {"Node name for S&R": "LatentCutToBatch", "cnr_id": "comfy-core", "ver": "0.5.1", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["t", 1]}, {"id": 71, "type": "VAEEncode", "pos": [100, 690], "size": [140, 46], "flags": {"collapsed": false}, "order": 10, "mode": 0, "inputs": [{"localized_name": "pixels", "name": "pixels", "type": "IMAGE", "link": 149}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 139}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [133, 134]}], "properties": {"Node name for S&R": "VAEEncode", "cnr_id": "comfy-core", "ver": "0.5.1", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 8, "type": "VAEDecode", "pos": [850, 310], "size": [210, 46], "flags": {"collapsed": true}, "order": 7, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 143}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 76}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [110]}], "properties": {"Node name for S&R": "VAEDecode", "cnr_id": "comfy-core", "ver": "0.5.1", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 6, "type": "CLIPTextEncode", "pos": [70, 180], "size": [422.84503173828125, 164.31304931640625], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 74}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": 150}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "slot_index": 0, "links": [129]}], "title": "CLIP Text Encode (Positive Prompt)", "properties": {"Node name for S&R": "CLIPTextEncode", "cnr_id": "comfy-core", "ver": "0.5.1", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [""], "color": "#232", "bgcolor": "#353"}, {"id": 3, "type": "KSampler", "pos": [530, 280], "size": [270, 400], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 125}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 130}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 132}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 157}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": 153}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": 154}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [142]}], "properties": {"Node name for S&R": "KSampler", "cnr_id": "comfy-core", "ver": "0.5.1", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, "randomize", 20, 2.5, "euler", "simple", 1]}, {"id": 78, "type": "GetImageSize", "pos": [80, 790], "size": [210, 136], "flags": {}, "order": 12, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 148}], "outputs": [{"localized_name": "width", "name": "width", "type": "INT", "links": [155]}, {"localized_name": "height", "name": "height", "type": "INT", "links": [156]}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "links": null}], "properties": {"Node name for S&R": "GetImageSize", "cnr_id": "comfy-core", "ver": "0.5.1", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 83, "type": "EmptyQwenImageLayeredLatentImage", "pos": [320, 790], "size": [330.9341796875, 130], "flags": {}, "order": 13, "mode": 0, "inputs": [{"localized_name": "width", "name": "width", "type": "INT", "widget": {"name": "width"}, "link": 155}, {"localized_name": "height", "name": "height", "type": "INT", "widget": {"name": "height"}, "link": 156}, {"localized_name": "layers", "name": "layers", "type": "INT", "widget": {"name": "layers"}, "link": 159}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [157]}], "properties": {"Node name for S&R": "EmptyQwenImageLayeredLatentImage", "cnr_id": "comfy-core", "ver": "0.5.1", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [640, 640, 2, 1]}, {"id": 37, "type": "UNETLoader", "pos": [-320, 180], "size": [346.7470703125, 82], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [126]}], "properties": {"Node name for S&R": "UNETLoader", "cnr_id": "comfy-core", "ver": "0.5.1", "models": [{"name": "qwen_image_layered_bf16.safetensors", "url": "https://huggingface.co/Comfy-Org/Qwen-Image-Layered_ComfyUI/resolve/main/split_files/diffusion_models/qwen_image_layered_bf16.safetensors", "directory": "diffusion_models"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["qwen_image_layered_bf16.safetensors", "default"]}], "groups": [{"id": 1, "title": "Prompt(Optional)", "bounding": [60, 110, 450, 510], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 2, "title": "Load Models", "bounding": [-330, 110, 366.7470703125, 421.6], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 75, "origin_id": 38, "origin_slot": 0, "target_id": 7, "target_slot": 0, "type": "CLIP"}, {"id": 131, "origin_id": 7, "origin_slot": 0, "target_id": 70, "target_slot": 0, "type": "CONDITIONING"}, {"id": 134, "origin_id": 71, "origin_slot": 0, "target_id": 70, "target_slot": 1, "type": "LATENT"}, {"id": 129, "origin_id": 6, "origin_slot": 0, "target_id": 69, "target_slot": 0, "type": "CONDITIONING"}, {"id": 133, "origin_id": 71, "origin_slot": 0, "target_id": 69, "target_slot": 1, "type": "LATENT"}, {"id": 126, "origin_id": 37, "origin_slot": 0, "target_id": 66, "target_slot": 0, "type": "MODEL"}, {"id": 125, "origin_id": 66, "origin_slot": 0, "target_id": 3, "target_slot": 0, "type": "MODEL"}, {"id": 130, "origin_id": 69, "origin_slot": 0, "target_id": 3, "target_slot": 1, "type": "CONDITIONING"}, {"id": 132, "origin_id": 70, "origin_slot": 0, "target_id": 3, "target_slot": 2, "type": "CONDITIONING"}, {"id": 142, "origin_id": 3, "origin_slot": 0, "target_id": 76, "target_slot": 0, "type": "LATENT"}, {"id": 74, "origin_id": 38, "origin_slot": 0, "target_id": 6, "target_slot": 0, "type": "CLIP"}, {"id": 139, "origin_id": 39, "origin_slot": 0, "target_id": 71, "target_slot": 1, "type": "VAE"}, {"id": 143, "origin_id": 76, "origin_slot": 0, "target_id": 8, "target_slot": 0, "type": "LATENT"}, {"id": 76, "origin_id": 39, "origin_slot": 0, "target_id": 8, "target_slot": 1, "type": "VAE"}, {"id": 148, "origin_id": -10, "origin_slot": 0, "target_id": 78, "target_slot": 0, "type": "IMAGE"}, {"id": 149, "origin_id": -10, "origin_slot": 0, "target_id": 71, "target_slot": 0, "type": "IMAGE"}, {"id": 110, "origin_id": 8, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 150, "origin_id": -10, "origin_slot": 1, "target_id": 6, "target_slot": 1, "type": "STRING"}, {"id": 153, "origin_id": -10, "origin_slot": 2, "target_id": 3, "target_slot": 5, "type": "INT"}, {"id": 154, "origin_id": -10, "origin_slot": 3, "target_id": 3, "target_slot": 6, "type": "FLOAT"}, {"id": 155, "origin_id": 78, "origin_slot": 0, "target_id": 83, "target_slot": 0, "type": "INT"}, {"id": 156, "origin_id": 78, "origin_slot": 1, "target_id": 83, "target_slot": 1, "type": "INT"}, {"id": 157, "origin_id": 83, "origin_slot": 0, "target_id": 3, "target_slot": 3, "type": "LATENT"}, {"id": 159, "origin_id": -10, "origin_slot": 4, "target_id": 83, "target_slot": 2, "type": "INT"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image generation and editing/Image to layers"}]}, "config": {}, "extra": {"ds": {"scale": 1.14, "offset": [695.5933739308316, 6.855893974423647]}, "workflowRendererVersion": "LG"}, "version": 0.4} diff --git a/blueprints/Image to Model (Hunyuan3d 2.1).json b/blueprints/Image to Model (Hunyuan3d 2.1).json new file mode 100644 index 000000000..04b2d9bc9 --- /dev/null +++ b/blueprints/Image to Model (Hunyuan3d 2.1).json @@ -0,0 +1 @@ +{"id": "8fe311ec-2147-47a8-b618-7bd6fb6d4f9d", "revision": 0, "last_node_id": 23, "last_link_id": 24, "nodes": [{"id": 19, "type": "feb7d184-edf3-4851-9fd6-57a92c00ec42", "pos": [277.7327250391088, 256.4066470374603], "size": [340, 70], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": null}, {"name": "ckpt_name", "type": "COMBO", "widget": {"name": "ckpt_name"}, "link": null}], "outputs": [{"localized_name": "MESH", "name": "MESH", "type": "MESH", "links": []}], "properties": {"proxyWidgets": [["-1", "ckpt_name"]], "cnr_id": "comfy-core", "ver": "0.3.65"}, "widgets_values": ["hunyuan_3d_v2.1.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "feb7d184-edf3-4851-9fd6-57a92c00ec42", "version": 1, "state": {"lastGroupId": 2, "lastNodeId": 23, "lastLinkId": 24, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Image to Model (Hunyuan3d 2.1)", "inputNode": {"id": -10, "bounding": [-138.94803619384766, -392.62060546875, 120, 80]}, "outputNode": {"id": -20, "bounding": [1090, -310, 120, 60]}, "inputs": [{"id": "ab9b5b83-88f9-4698-954d-93f644bd07aa", "name": "image", "type": "IMAGE", "linkIds": [21], "localized_name": "image", "pos": [-38.948036193847656, -372.62060546875]}, {"id": "e15b0ba4-b5fe-41eb-9266-006ce1f1cf79", "name": "ckpt_name", "type": "COMBO", "linkIds": [23], "pos": [-38.948036193847656, -352.62060546875]}], "outputs": [{"id": "c8744662-e812-49b3-8bc8-744d557db6d6", "name": "MESH", "type": "MESH", "linkIds": [11], "localized_name": "MESH", "pos": [1110, -290]}], "widgets": [], "nodes": [{"id": 7, "type": "KSampler", "pos": [760, -510], "size": [270, 262], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 19}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 5}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 6}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 7}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [8]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "KSampler"}, "widgets_values": [894796671366012, "randomize", 30, 5, "euler", "normal", 1]}, {"id": 13, "type": "CLIPVisionEncode", "pos": [450, -410], "size": [270, 80], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "clip_vision", "name": "clip_vision", "type": "CLIP_VISION", "link": 20}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 21}, {"localized_name": "crop", "name": "crop", "type": "COMBO", "widget": {"name": "crop"}, "link": null}], "outputs": [{"localized_name": "CLIP_VISION_OUTPUT", "name": "CLIP_VISION_OUTPUT", "type": "CLIP_VISION_OUTPUT", "links": [22]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "CLIPVisionEncode"}, "widgets_values": ["center"]}, {"id": 6, "type": "Hunyuan3Dv2Conditioning", "pos": [510, -280], "size": [217.82578125, 46], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "clip_vision_output", "name": "clip_vision_output", "type": "CLIP_VISION_OUTPUT", "link": 22}], "outputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "links": [5]}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "links": [6]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "Hunyuan3Dv2Conditioning"}, "widgets_values": []}, {"id": 4, "type": "EmptyLatentHunyuan3Dv2", "pos": [450, -180], "size": [270, 82], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "resolution", "name": "resolution", "type": "INT", "widget": {"name": "resolution"}, "link": null}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [7]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "EmptyLatentHunyuan3Dv2"}, "widgets_values": [4096, 1]}, {"id": 9, "type": "VoxelToMesh", "pos": [760, -40], "size": [270, 82], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "voxel", "name": "voxel", "type": "VOXEL", "link": 10}, {"localized_name": "algorithm", "name": "algorithm", "type": "COMBO", "widget": {"name": "algorithm"}, "link": null}, {"localized_name": "threshold", "name": "threshold", "type": "FLOAT", "widget": {"name": "threshold"}, "link": null}], "outputs": [{"localized_name": "MESH", "name": "MESH", "type": "MESH", "links": [11]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "VoxelToMesh"}, "widgets_values": ["surface net", 0.6]}, {"id": 8, "type": "VAEDecodeHunyuan3D", "pos": [760, -200], "size": [270, 102], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 8}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 18}, {"localized_name": "num_chunks", "name": "num_chunks", "type": "INT", "widget": {"name": "num_chunks"}, "link": null}, {"localized_name": "octree_resolution", "name": "octree_resolution", "type": "INT", "widget": {"name": "octree_resolution"}, "link": null}], "outputs": [{"localized_name": "VOXEL", "name": "VOXEL", "type": "VOXEL", "links": [10]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "VAEDecodeHunyuan3D"}, "widgets_values": [8000, 256]}, {"id": 1, "type": "ImageOnlyCheckpointLoader", "pos": [60, -510], "size": [356.0005859375, 100], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "ckpt_name", "name": "ckpt_name", "type": "COMBO", "widget": {"name": "ckpt_name"}, "link": 23}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [16]}, {"localized_name": "CLIP_VISION", "name": "CLIP_VISION", "type": "CLIP_VISION", "links": [20]}, {"localized_name": "VAE", "name": "VAE", "type": "VAE", "links": [18]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "ImageOnlyCheckpointLoader", "models": [{"name": "hunyuan_3d_v2.1.safetensors", "url": "https://huggingface.co/Comfy-Org/hunyuan3D_2.1_repackaged/resolve/main/hunyuan_3d_v2.1.safetensors", "directory": "checkpoints"}]}, "widgets_values": ["hunyuan_3d_v2.1.safetensors"]}, {"id": 3, "type": "ModelSamplingAuraFlow", "pos": [450, -510], "size": [270, 58], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 16}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [19]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.59", "Node name for S&R": "ModelSamplingAuraFlow"}, "widgets_values": [1]}], "groups": [], "links": [{"id": 16, "origin_id": 1, "origin_slot": 0, "target_id": 3, "target_slot": 0, "type": "MODEL"}, {"id": 19, "origin_id": 3, "origin_slot": 0, "target_id": 7, "target_slot": 0, "type": "MODEL"}, {"id": 5, "origin_id": 6, "origin_slot": 0, "target_id": 7, "target_slot": 1, "type": "CONDITIONING"}, {"id": 6, "origin_id": 6, "origin_slot": 1, "target_id": 7, "target_slot": 2, "type": "CONDITIONING"}, {"id": 7, "origin_id": 4, "origin_slot": 0, "target_id": 7, "target_slot": 3, "type": "LATENT"}, {"id": 8, "origin_id": 7, "origin_slot": 0, "target_id": 8, "target_slot": 0, "type": "LATENT"}, {"id": 18, "origin_id": 1, "origin_slot": 2, "target_id": 8, "target_slot": 1, "type": "VAE"}, {"id": 10, "origin_id": 8, "origin_slot": 0, "target_id": 9, "target_slot": 0, "type": "VOXEL"}, {"id": 20, "origin_id": 1, "origin_slot": 1, "target_id": 13, "target_slot": 0, "type": "CLIP_VISION"}, {"id": 22, "origin_id": 13, "origin_slot": 0, "target_id": 6, "target_slot": 0, "type": "CLIP_VISION_OUTPUT"}, {"id": 21, "origin_id": -10, "origin_slot": 0, "target_id": 13, "target_slot": 1, "type": "IMAGE"}, {"id": 11, "origin_id": 9, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "MESH"}, {"id": 23, "origin_id": -10, "origin_slot": 1, "target_id": 1, "target_slot": 0, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "3D/Image to 3D Model"}]}, "config": {}, "extra": {"ds": {"scale": 0.620921323059155, "offset": [1636.2881100217016, 965.23503257945]}, "workflowRendererVersion": "LG"}, "version": 0.4} diff --git a/blueprints/Image to Video (Wan 2.2).json b/blueprints/Image to Video (Wan 2.2).json new file mode 100644 index 000000000..cd0b44a72 --- /dev/null +++ b/blueprints/Image to Video (Wan 2.2).json @@ -0,0 +1 @@ +{"id": "ec7da562-7e21-4dac-a0d2-f4441e1efd3b", "revision": 0, "last_node_id": 119, "last_link_id": 231, "nodes": [{"id": 116, "type": "296b573f-1e7d-43df-a2df-925fe5e17063", "pos": [1098.3332694531493, -268.3334707134305], "size": [400, 470], "flags": {"collapsed": false}, "order": 0, "mode": 0, "inputs": [{"label": "start image", "localized_name": "start_image", "name": "start_image", "type": "IMAGE", "link": null}, {"label": "prompt", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}, {"name": "width", "type": "INT", "widget": {"name": "width"}, "link": null}, {"name": "height", "type": "INT", "widget": {"name": "height"}, "link": null}, {"name": "length", "type": "INT", "widget": {"name": "length"}, "link": null}, {"label": "low_noise_unet", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"label": "low_noise_lora", "name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": null}, {"label": "high_noise_unet", "name": "unet_name_1", "type": "COMBO", "widget": {"name": "unet_name_1"}, "link": null}, {"label": "high_noise_lora", "name": "lora_name_1", "type": "COMBO", "widget": {"name": "lora_name_1"}, "link": null}, {"name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": null}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}], "outputs": [{"name": "VIDEO", "type": "VIDEO", "links": null}], "properties": {"proxyWidgets": [["-1", "text"], ["-1", "width"], ["-1", "height"], ["-1", "length"], ["86", "noise_seed"], ["86", "control_after_generate"], ["-1", "unet_name"], ["-1", "lora_name"], ["-1", "unet_name_1"], ["-1", "lora_name_1"], ["-1", "clip_name"], ["-1", "vae_name"]], "cnr_id": "comfy-core", "ver": "0.11.0"}, "widgets_values": ["", 640, 640, 81, null, null, "wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors", "wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors", "wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", "wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors", "umt5_xxl_fp8_e4m3fn_scaled.safetensors", "wan_2.1_vae.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "296b573f-1e7d-43df-a2df-925fe5e17063", "version": 1, "state": {"lastGroupId": 16, "lastNodeId": 119, "lastLinkId": 231, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Image to Video (Wan 2.2)", "inputNode": {"id": -10, "bounding": [-250, 570, 131.435546875, 260]}, "outputNode": {"id": -20, "bounding": [1723.4786916118696, 716.3650158766799, 120, 60]}, "inputs": [{"id": "69d8b033-5601-446e-9634-f5cafbd373e2", "name": "start_image", "type": "IMAGE", "linkIds": [186], "localized_name": "start_image", "label": "start image", "shape": 7, "pos": [-138.564453125, 590]}, {"id": "88ae2af6-63c1-41be-90e8-6359f4d5f133", "name": "text", "type": "STRING", "linkIds": [222], "label": "prompt", "pos": [-138.564453125, 610]}, {"id": "fad9d346-653e-4be5-9e52-38cef6fa59f3", "name": "width", "type": "INT", "linkIds": [223], "pos": [-138.564453125, 630]}, {"id": "a4f34897-8063-4613-a2eb-6c2503167eb1", "name": "height", "type": "INT", "linkIds": [224], "pos": [-138.564453125, 650]}, {"id": "dc4d4472-cff7-41e0-9a4a-d118fcd4a21a", "name": "length", "type": "INT", "linkIds": [225], "pos": [-138.564453125, 670]}, {"id": "f7317e79-4a52-460b-9d71-89ec450dc333", "name": "unet_name", "type": "COMBO", "linkIds": [226], "label": "low_noise_unet", "pos": [-138.564453125, 690]}, {"id": "7a470f86-503a-474f-9571-830c8eb99231", "name": "lora_name", "type": "COMBO", "linkIds": [227], "label": "low_noise_lora", "pos": [-138.564453125, 710]}, {"id": "1d88c531-f68e-41b9-95c5-16f944a55b7d", "name": "unet_name_1", "type": "COMBO", "linkIds": [228], "label": "high_noise_unet", "pos": [-138.564453125, 730]}, {"id": "67a79742-33e5-4c38-89d8-ecb021d067c8", "name": "lora_name_1", "type": "COMBO", "linkIds": [229], "label": "high_noise_lora", "pos": [-138.564453125, 750]}, {"id": "9d184b83-37c6-4891-bbdf-ffcdf5ab2016", "name": "clip_name", "type": "COMBO", "linkIds": [230], "pos": [-138.564453125, 770]}, {"id": "24c568ec-aeb2-4c31-9f87-54ee9099d55f", "name": "vae_name", "type": "COMBO", "linkIds": [231], "pos": [-138.564453125, 790]}], "outputs": [{"id": "994c9c48-5f35-48ed-8c9d-0f2b21990cb6", "name": "VIDEO", "type": "VIDEO", "linkIds": [221], "pos": [1743.4786916118696, 736.3650158766799]}], "widgets": [], "nodes": [{"id": 84, "type": "CLIPLoader", "pos": [59.999957705045404, 29.99977085410412], "size": [346.38020833333337, 106], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "clip_name", "name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": 230}, {"localized_name": "type", "name": "type", "type": "COMBO", "widget": {"name": "type"}, "link": null}, {"localized_name": "device", "name": "device", "shape": 7, "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "slot_index": 0, "links": [178, 181]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "CLIPLoader", "models": [{"name": "umt5_xxl_fp8_e4m3fn_scaled.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors", "directory": "text_encoders"}], "ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": ["umt5_xxl_fp8_e4m3fn_scaled.safetensors", "wan", "default"]}, {"id": 90, "type": "VAELoader", "pos": [59.999957705045404, 189.9997708925786], "size": [344.7265625, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 231}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "slot_index": 0, "links": [176, 185]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "VAELoader", "models": [{"name": "wan_2.1_vae.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/vae/wan_2.1_vae.safetensors", "directory": "vae"}], "ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": ["wan_2.1_vae.safetensors"]}, {"id": 95, "type": "UNETLoader", "pos": [49.99996468306838, -230.00013148243067], "size": [346.7447916666667, 82], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 226}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [194]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "UNETLoader", "models": [{"name": "wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors", "directory": "diffusion_models"}], "ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": ["wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors", "default"]}, {"id": 96, "type": "UNETLoader", "pos": [49.99996468306838, -100.00008258817711], "size": [346.7447916666667, 82], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 228}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [196]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "UNETLoader", "models": [{"name": "wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", "directory": "diffusion_models"}], "ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": ["wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors", "default"]}, {"id": 103, "type": "ModelSamplingSD3", "pos": [739.9998741034308, -100.00008258817711], "size": [210, 58], "flags": {"collapsed": false}, "order": 12, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 189}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [192]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "ModelSamplingSD3", "ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": [5.000000000000001]}, {"id": 93, "type": "CLIPTextEncode", "pos": [439.99997175727736, 89.99984067280784], "size": [510, 88], "flags": {}, "order": 16, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 181}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": 222}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "slot_index": 0, "links": [183]}], "title": "CLIP Text Encode (Positive Prompt)", "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "CLIPTextEncode", "ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": [""], "color": "#232", "bgcolor": "#353"}, {"id": 89, "type": "CLIPTextEncode", "pos": [439.99997175727736, 289.99986864261126], "size": [510, 88], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 178}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "slot_index": 0, "links": [184]}], "title": "CLIP Text Encode (Negative Prompt)", "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "CLIPTextEncode", "ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": ["色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走"], "color": "#322", "bgcolor": "#533"}, {"id": 101, "type": "LoraLoaderModelOnly", "pos": [449.99996477925447, -230.00013148243067], "size": [280, 82], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 194}, {"localized_name": "lora_name", "name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": 227}, {"localized_name": "strength_model", "name": "strength_model", "type": "FLOAT", "widget": {"name": "strength_model"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [190]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.49", "Node name for S&R": "LoraLoaderModelOnly", "models": [{"name": "wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors", "directory": "loras"}], "ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": ["wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors", 1.0000000000000002]}, {"id": 102, "type": "LoraLoaderModelOnly", "pos": [449.99996477925447, -100.00008258817711], "size": [280, 82], "flags": {}, "order": 10, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 196}, {"localized_name": "lora_name", "name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": 229}, {"localized_name": "strength_model", "name": "strength_model", "type": "FLOAT", "widget": {"name": "strength_model"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [189]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.49", "Node name for S&R": "LoraLoaderModelOnly", "models": [{"name": "wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors", "directory": "loras"}], "ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": ["wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors", 1.0000000000000002]}, {"id": 104, "type": "ModelSamplingSD3", "pos": [739.9998741034308, -230.00013148243067], "size": [210, 58], "flags": {}, "order": 11, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 190}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [195]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "ModelSamplingSD3", "ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": [5.000000000000001]}, {"id": 98, "type": "WanImageToVideo", "pos": [530.0000206419123, 529.9999245437435], "size": [342.59114583333337, 210], "flags": {}, "order": 17, "mode": 0, "inputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 183}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 184}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 185}, {"localized_name": "clip_vision_output", "name": "clip_vision_output", "shape": 7, "type": "CLIP_VISION_OUTPUT", "link": null}, {"localized_name": "start_image", "name": "start_image", "shape": 7, "type": "IMAGE", "link": 186}, {"localized_name": "width", "name": "width", "type": "INT", "widget": {"name": "width"}, "link": 223}, {"localized_name": "height", "name": "height", "type": "INT", "widget": {"name": "height"}, "link": 224}, {"localized_name": "length", "name": "length", "type": "INT", "widget": {"name": "length"}, "link": 225}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "slot_index": 0, "links": [168, 172]}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "slot_index": 1, "links": [169, 173]}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "slot_index": 2, "links": [174]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "WanImageToVideo", "ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": [640, 640, 81, 1]}, {"id": 86, "type": "KSamplerAdvanced", "pos": [989.9999230265402, -250.00014544809514], "size": [304.73958333333337, 334], "flags": {}, "order": 14, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 195}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 172}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 173}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 174}, {"localized_name": "add_noise", "name": "add_noise", "type": "COMBO", "widget": {"name": "add_noise"}, "link": null}, {"localized_name": "noise_seed", "name": "noise_seed", "type": "INT", "widget": {"name": "noise_seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "start_at_step", "name": "start_at_step", "type": "INT", "widget": {"name": "start_at_step"}, "link": null}, {"localized_name": "end_at_step", "name": "end_at_step", "type": "INT", "widget": {"name": "end_at_step"}, "link": null}, {"localized_name": "return_with_leftover_noise", "name": "return_with_leftover_noise", "type": "COMBO", "widget": {"name": "return_with_leftover_noise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [170]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "KSamplerAdvanced", "ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": ["enable", 0, "randomize", 4, 1, "euler", "simple", 0, 2, "enable"]}, {"id": 85, "type": "KSamplerAdvanced", "pos": [1336.748028098344, -250.00014544809514], "size": [304.73958333333337, 334], "flags": {}, "order": 13, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 192}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 168}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 169}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 170}, {"localized_name": "add_noise", "name": "add_noise", "type": "COMBO", "widget": {"name": "add_noise"}, "link": null}, {"localized_name": "noise_seed", "name": "noise_seed", "type": "INT", "widget": {"name": "noise_seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "start_at_step", "name": "start_at_step", "type": "INT", "widget": {"name": "start_at_step"}, "link": null}, {"localized_name": "end_at_step", "name": "end_at_step", "type": "INT", "widget": {"name": "end_at_step"}, "link": null}, {"localized_name": "return_with_leftover_noise", "name": "return_with_leftover_noise", "type": "COMBO", "widget": {"name": "return_with_leftover_noise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [175]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "KSamplerAdvanced", "ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": ["disable", 0, "fixed", 4, 1, "euler", "simple", 2, 4, "disable"]}, {"id": 67, "type": "Note", "pos": [510.0000345979581, 819.9999455547611], "size": [390, 88], "flags": {}, "order": 4, "mode": 0, "inputs": [], "outputs": [], "title": "Video Size", "properties": {"ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": ["By default, we set the video to a smaller size for users with low VRAM. If you have enough VRAM, you can change the size"], "color": "#222", "bgcolor": "#000"}, {"id": 105, "type": "MarkdownNote", "pos": [-469.9999795985529, 279.9998197772136], "size": [480, 170.65104166666669], "flags": {}, "order": 5, "mode": 0, "inputs": [], "outputs": [], "title": "VRAM Usage", "properties": {"ue_properties": {"version": "7.1", "widget_ue_connectable": {}, "input_ue_unconnectable": {}}}, "widgets_values": ["## GPU:RTX4090D 24GB\n\n| Model | Size |VRAM Usage | 1st Generation | 2nd Generation |\n|---------------------|-------|-----------|---------------|-----------------|\n| fp8_scaled |640*640| 84% | ≈ 536s | ≈ 513s |\n| fp8_scaled + 4steps LoRA | 640*640 | 83% | ≈ 97s | ≈ 71s |"], "color": "#222", "bgcolor": "#000"}, {"id": 66, "type": "MarkdownNote", "pos": [-469.9999795985529, -320.00012452364496], "size": [480, 572.1354166666667], "flags": {}, "order": 6, "mode": 0, "inputs": [], "outputs": [], "title": "Model Links", "properties": {"ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": ["[Tutorial](https://docs.comfy.org/tutorials/video/wan/wan2_2\n)\n\n**Diffusion Model**\n- [wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors)\n- [wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors)\n\n**LoRA**\n- [wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors)\n- [wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_i2v_lightx2v_4steps_lora_v1_high_noise.safetensors)\n\n**VAE**\n- [wan_2.1_vae.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/vae/wan_2.1_vae.safetensors)\n\n**Text Encoder** \n- [umt5_xxl_fp8_e4m3fn_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors)\n\n\nFile save location\n\n```\nComfyUI/\n├───📂 models/\n│ ├───📂 diffusion_models/\n│ │ ├─── wan2.2_i2v_low_noise_14B_fp8_scaled.safetensors\n│ │ └─── wan2.2_i2v_high_noise_14B_fp8_scaled.safetensors\n│ ├───📂 loras/\n│ │ ├─── wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors\n│ │ └─── wan2.2_i2v_lightx2v_4steps_lora_v1_low_noise.safetensors\n│ ├───📂 text_encoders/\n│ │ └─── umt5_xxl_fp8_e4m3fn_scaled.safetensors \n│ └───📂 vae/\n│ └── wan_2.1_vae.safetensors\n```\n"], "color": "#222", "bgcolor": "#000"}, {"id": 115, "type": "Note", "pos": [29.999978639114225, -470.00010361843204], "size": [360, 88], "flags": {}, "order": 7, "mode": 0, "inputs": [], "outputs": [], "title": "About 4 Steps LoRA", "properties": {"ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": ["Using the Wan2.2 Lighting LoRA will result in the loss of video dynamics, but it will reduce the generation time. This template provides two workflows, and you can enable one as needed."], "color": "#222", "bgcolor": "#000"}, {"id": 117, "type": "CreateVideo", "pos": [1030, 650], "size": [270, 78], "flags": {}, "order": 18, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 220}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": null}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "widget": {"name": "fps"}, "link": null}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": [221]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.0", "Node name for S&R": "CreateVideo"}, "widgets_values": [16]}, {"id": 87, "type": "VAEDecode", "pos": [1020, 540], "size": [210, 46], "flags": {}, "order": 15, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 175}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 176}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [220]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "VAEDecode", "ue_properties": {"widget_ue_connectable": {}, "version": "7.1", "input_ue_unconnectable": {}}}, "widgets_values": []}], "groups": [{"id": 15, "title": "fp8_scaled + 4steps LoRA", "bounding": [30, -350, 1630, 1120], "color": "#444", "font_size": 24, "flags": {}}, {"id": 11, "title": "Step1 - Load models", "bounding": [40, -310, 371.0310363769531, 571.3974609375], "color": "#444", "font_size": 24, "flags": {}}, {"id": 13, "title": "Step4 - Prompt", "bounding": [430, 20, 530, 420], "color": "#444", "font_size": 24, "flags": {}}, {"id": 14, "title": "Step3 - Video size & length", "bounding": [430, 460, 530, 290], "color": "#444", "font_size": 24, "flags": {}}, {"id": 16, "title": "Lightx2v 4steps LoRA", "bounding": [430, -310, 530, 310], "color": "#444", "font_size": 24, "flags": {}}], "links": [{"id": 189, "origin_id": 102, "origin_slot": 0, "target_id": 103, "target_slot": 0, "type": "MODEL"}, {"id": 181, "origin_id": 84, "origin_slot": 0, "target_id": 93, "target_slot": 0, "type": "CLIP"}, {"id": 178, "origin_id": 84, "origin_slot": 0, "target_id": 89, "target_slot": 0, "type": "CLIP"}, {"id": 194, "origin_id": 95, "origin_slot": 0, "target_id": 101, "target_slot": 0, "type": "MODEL"}, {"id": 196, "origin_id": 96, "origin_slot": 0, "target_id": 102, "target_slot": 0, "type": "MODEL"}, {"id": 190, "origin_id": 101, "origin_slot": 0, "target_id": 104, "target_slot": 0, "type": "MODEL"}, {"id": 183, "origin_id": 93, "origin_slot": 0, "target_id": 98, "target_slot": 0, "type": "CONDITIONING"}, {"id": 184, "origin_id": 89, "origin_slot": 0, "target_id": 98, "target_slot": 1, "type": "CONDITIONING"}, {"id": 185, "origin_id": 90, "origin_slot": 0, "target_id": 98, "target_slot": 2, "type": "VAE"}, {"id": 175, "origin_id": 85, "origin_slot": 0, "target_id": 87, "target_slot": 0, "type": "LATENT"}, {"id": 176, "origin_id": 90, "origin_slot": 0, "target_id": 87, "target_slot": 1, "type": "VAE"}, {"id": 195, "origin_id": 104, "origin_slot": 0, "target_id": 86, "target_slot": 0, "type": "MODEL"}, {"id": 172, "origin_id": 98, "origin_slot": 0, "target_id": 86, "target_slot": 1, "type": "CONDITIONING"}, {"id": 173, "origin_id": 98, "origin_slot": 1, "target_id": 86, "target_slot": 2, "type": "CONDITIONING"}, {"id": 174, "origin_id": 98, "origin_slot": 2, "target_id": 86, "target_slot": 3, "type": "LATENT"}, {"id": 192, "origin_id": 103, "origin_slot": 0, "target_id": 85, "target_slot": 0, "type": "MODEL"}, {"id": 168, "origin_id": 98, "origin_slot": 0, "target_id": 85, "target_slot": 1, "type": "CONDITIONING"}, {"id": 169, "origin_id": 98, "origin_slot": 1, "target_id": 85, "target_slot": 2, "type": "CONDITIONING"}, {"id": 170, "origin_id": 86, "origin_slot": 0, "target_id": 85, "target_slot": 3, "type": "LATENT"}, {"id": 186, "origin_id": -10, "origin_slot": 0, "target_id": 98, "target_slot": 4, "type": "IMAGE"}, {"id": 220, "origin_id": 87, "origin_slot": 0, "target_id": 117, "target_slot": 0, "type": "IMAGE"}, {"id": 221, "origin_id": 117, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "VIDEO"}, {"id": 222, "origin_id": -10, "origin_slot": 1, "target_id": 93, "target_slot": 1, "type": "STRING"}, {"id": 223, "origin_id": -10, "origin_slot": 2, "target_id": 98, "target_slot": 5, "type": "INT"}, {"id": 224, "origin_id": -10, "origin_slot": 3, "target_id": 98, "target_slot": 6, "type": "INT"}, {"id": 225, "origin_id": -10, "origin_slot": 4, "target_id": 98, "target_slot": 7, "type": "INT"}, {"id": 226, "origin_id": -10, "origin_slot": 5, "target_id": 95, "target_slot": 0, "type": "COMBO"}, {"id": 227, "origin_id": -10, "origin_slot": 6, "target_id": 101, "target_slot": 1, "type": "COMBO"}, {"id": 228, "origin_id": -10, "origin_slot": 7, "target_id": 96, "target_slot": 0, "type": "COMBO"}, {"id": 229, "origin_id": -10, "origin_slot": 8, "target_id": 102, "target_slot": 1, "type": "COMBO"}, {"id": 230, "origin_id": -10, "origin_slot": 9, "target_id": 84, "target_slot": 0, "type": "COMBO"}, {"id": 231, "origin_id": -10, "origin_slot": 10, "target_id": 90, "target_slot": 0, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Video generation and editing/Image to video"}]}, "config": {}, "extra": {"ds": {"scale": 0.7926047855889957, "offset": [-30.12529469925767, 690.3829855122884]}, "frontendVersion": "1.37.11", "workflowRendererVersion": "LG", "VHS_latentpreview": false, "VHS_latentpreviewrate": 0, "VHS_MetadataImage": true, "VHS_KeepIntermediate": true, "ue_links": []}, "version": 0.4} diff --git a/blueprints/Pose to Image (Z-Image-Turbo).json b/blueprints/Pose to Image (Z-Image-Turbo).json new file mode 100644 index 000000000..f4c224249 --- /dev/null +++ b/blueprints/Pose to Image (Z-Image-Turbo).json @@ -0,0 +1 @@ +{"id": "e046dd74-e2a7-4f31-a75b-5e11a8c72d4e", "revision": 0, "last_node_id": 26, "last_link_id": 46, "nodes": [{"id": 13, "type": "d8492a46-9e6c-4917-b5ea-4273aabf5f51", "pos": [400, 3630], "size": [400, 470], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "image", "name": "image", "type": "IMAGE", "link": null}, {"label": "prompt", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}, {"name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": null}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}, {"name": "name", "type": "COMBO", "widget": {"name": "name"}, "link": null}], "outputs": [{"name": "IMAGE", "type": "IMAGE", "links": null}], "properties": {"proxyWidgets": [["-1", "text"], ["19", "seed"], ["19", "control_after_generate"], ["-1", "unet_name"], ["-1", "clip_name"], ["-1", "vae_name"], ["-1", "name"]], "cnr_id": "comfy-core", "ver": "0.11.0"}, "widgets_values": ["", null, null, "z_image_turbo_bf16.safetensors", "qwen_3_4b.safetensors", "ae.safetensors", "Z-Image-Turbo-Fun-Controlnet-Union.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "d8492a46-9e6c-4917-b5ea-4273aabf5f51", "version": 1, "state": {"lastGroupId": 3, "lastNodeId": 26, "lastLinkId": 46, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Pose to Image (Z-Image-Turbo)", "inputNode": {"id": -10, "bounding": [27.60368520069494, 4936.043696127976, 120, 160]}, "outputNode": {"id": -20, "bounding": [1598.6038576146689, 4936.043696127976, 120, 60]}, "inputs": [{"id": "29ca271b-8f63-4e7b-a4b8-c9b4192ada0b", "name": "image", "type": "IMAGE", "linkIds": [41, 42], "label": "image", "pos": [127.60368520069494, 4956.043696127976]}, {"id": "b6549f90-39ee-4b79-9e00-af4d9df969fe", "name": "text", "type": "STRING", "linkIds": [37], "label": "prompt", "pos": [127.60368520069494, 4976.043696127976]}, {"id": "9f23df20-75de-4782-8ff7-225bc7976bbe", "name": "unet_name", "type": "COMBO", "linkIds": [43], "pos": [127.60368520069494, 4996.043696127976]}, {"id": "fc8aa3eb-a537-4976-8b5f-666f0dc5af4b", "name": "clip_name", "type": "COMBO", "linkIds": [44], "pos": [127.60368520069494, 5016.043696127976]}, {"id": "ed2c5269-91ac-4f93-b68d-6b546cef20d8", "name": "vae_name", "type": "COMBO", "linkIds": [45], "pos": [127.60368520069494, 5036.043696127976]}, {"id": "560ba519-ec0c-4ca4-b8f0-f02174012475", "name": "name", "type": "COMBO", "linkIds": [46], "pos": [127.60368520069494, 5056.043696127976]}], "outputs": [{"id": "47f9a22d-6619-4917-9447-a7d5d08dceb5", "name": "IMAGE", "type": "IMAGE", "linkIds": [35], "pos": [1618.6038576146689, 4956.043696127976]}], "widgets": [], "nodes": [{"id": 14, "type": "CLIPLoader", "pos": [340, 4820], "size": [269.9609375, 106], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "clip_name", "name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": 44}, {"localized_name": "type", "name": "type", "type": "COMBO", "widget": {"name": "type"}, "link": null}, {"localized_name": "device", "name": "device", "shape": 7, "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "links": [33]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "CLIPLoader", "models": [{"name": "qwen_3_4b.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/text_encoders/qwen_3_4b.safetensors", "directory": "text_encoders"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["qwen_3_4b.safetensors", "lumina2", "default"]}, {"id": 15, "type": "UNETLoader", "pos": [340, 4670], "size": [269.9609375, 82], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 43}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [28]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "UNETLoader", "models": [{"name": "z_image_turbo_bf16.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/diffusion_models/z_image_turbo_bf16.safetensors", "directory": "diffusion_models"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["z_image_turbo_bf16.safetensors", "default"]}, {"id": 16, "type": "VAELoader", "pos": [340, 5000], "size": [269.9609375, 58], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 45}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "links": [21, 30]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "VAELoader", "models": [{"name": "ae.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/vae/ae.safetensors", "directory": "vae"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ae.safetensors"]}, {"id": 17, "type": "ModelPatchLoader", "pos": [340, 5130], "size": [269.9609375, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "name", "name": "name", "type": "COMBO", "widget": {"name": "name"}, "link": 46}], "outputs": [{"localized_name": "MODEL_PATCH", "name": "MODEL_PATCH", "type": "MODEL_PATCH", "links": [29]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.51", "Node name for S&R": "ModelPatchLoader", "models": [{"name": "Z-Image-Turbo-Fun-Controlnet-Union.safetensors", "url": "https://huggingface.co/alibaba-pai/Z-Image-Turbo-Fun-Controlnet-Union/resolve/main/Z-Image-Turbo-Fun-Controlnet-Union.safetensors", "directory": "model_patches"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["Z-Image-Turbo-Fun-Controlnet-Union.safetensors"]}, {"id": 18, "type": "ModelSamplingAuraFlow", "pos": [1110, 4610], "size": [289.97395833333337, 58], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 22}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [23]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "ModelSamplingAuraFlow", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [3]}, {"id": 19, "type": "KSampler", "pos": [1110, 4720], "size": [300, 309.9609375], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 23}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 24}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 25}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 26}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [20]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "KSampler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, "randomize", 9, 1, "res_multistep", "simple", 1]}, {"id": 20, "type": "ConditioningZeroOut", "pos": [860, 5160], "size": [204.134765625, 26], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "link": 27}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [25]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "ConditioningZeroOut", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 21, "type": "QwenImageDiffsynthControlnet", "pos": [720, 5320], "size": [289.97395833333337, 138], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 28}, {"localized_name": "model_patch", "name": "model_patch", "type": "MODEL_PATCH", "link": 29}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 30}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 42}, {"localized_name": "mask", "name": "mask", "shape": 7, "type": "MASK", "link": null}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [22]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.76", "Node name for S&R": "QwenImageDiffsynthControlnet", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1]}, {"id": 23, "type": "CLIPTextEncode", "pos": [660, 4660], "size": [400, 179.9609375], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 33}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": 37}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [24, 27]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [""], "color": "#232", "bgcolor": "#353"}, {"id": 24, "type": "VAEDecode", "pos": [1450, 4620], "size": [200, 46], "flags": {}, "order": 10, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 20}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 21}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [35]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "VAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 25, "type": "GetImageSize", "pos": [330, 5540], "size": [140, 66], "flags": {"collapsed": false}, "order": 11, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 41}], "outputs": [{"localized_name": "width", "name": "width", "type": "INT", "links": [31]}, {"localized_name": "height", "name": "height", "type": "INT", "links": [32]}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "links": []}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.76", "Node name for S&R": "GetImageSize", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 22, "type": "EmptySD3LatentImage", "pos": [1110, 5540], "size": [259.9609375, 106], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "width", "name": "width", "type": "INT", "widget": {"name": "width"}, "link": 31}, {"localized_name": "height", "name": "height", "type": "INT", "widget": {"name": "height"}, "link": 32}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [26]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "EmptySD3LatentImage", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1024, 1024, 1]}], "groups": [{"id": 1, "title": "Prompt", "bounding": [640, 4590, 440, 630], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 2, "title": "Models", "bounding": [320, 4590, 300, 640], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 3, "title": "Apple ControlNet", "bounding": [640, 5240, 440, 260], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 20, "origin_id": 19, "origin_slot": 0, "target_id": 24, "target_slot": 0, "type": "LATENT"}, {"id": 21, "origin_id": 16, "origin_slot": 0, "target_id": 24, "target_slot": 1, "type": "VAE"}, {"id": 22, "origin_id": 21, "origin_slot": 0, "target_id": 18, "target_slot": 0, "type": "MODEL"}, {"id": 23, "origin_id": 18, "origin_slot": 0, "target_id": 19, "target_slot": 0, "type": "MODEL"}, {"id": 24, "origin_id": 23, "origin_slot": 0, "target_id": 19, "target_slot": 1, "type": "CONDITIONING"}, {"id": 25, "origin_id": 20, "origin_slot": 0, "target_id": 19, "target_slot": 2, "type": "CONDITIONING"}, {"id": 26, "origin_id": 22, "origin_slot": 0, "target_id": 19, "target_slot": 3, "type": "LATENT"}, {"id": 27, "origin_id": 23, "origin_slot": 0, "target_id": 20, "target_slot": 0, "type": "CONDITIONING"}, {"id": 28, "origin_id": 15, "origin_slot": 0, "target_id": 21, "target_slot": 0, "type": "MODEL"}, {"id": 29, "origin_id": 17, "origin_slot": 0, "target_id": 21, "target_slot": 1, "type": "MODEL_PATCH"}, {"id": 30, "origin_id": 16, "origin_slot": 0, "target_id": 21, "target_slot": 2, "type": "VAE"}, {"id": 31, "origin_id": 25, "origin_slot": 0, "target_id": 22, "target_slot": 0, "type": "INT"}, {"id": 32, "origin_id": 25, "origin_slot": 1, "target_id": 22, "target_slot": 1, "type": "INT"}, {"id": 33, "origin_id": 14, "origin_slot": 0, "target_id": 23, "target_slot": 0, "type": "CLIP"}, {"id": 35, "origin_id": 24, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 37, "origin_id": -10, "origin_slot": 1, "target_id": 23, "target_slot": 1, "type": "STRING"}, {"id": 41, "origin_id": -10, "origin_slot": 0, "target_id": 25, "target_slot": 0, "type": "IMAGE"}, {"id": 42, "origin_id": -10, "origin_slot": 0, "target_id": 21, "target_slot": 3, "type": "IMAGE"}, {"id": 43, "origin_id": -10, "origin_slot": 2, "target_id": 15, "target_slot": 0, "type": "COMBO"}, {"id": 44, "origin_id": -10, "origin_slot": 3, "target_id": 14, "target_slot": 0, "type": "COMBO"}, {"id": 45, "origin_id": -10, "origin_slot": 4, "target_id": 16, "target_slot": 0, "type": "COMBO"}, {"id": 46, "origin_id": -10, "origin_slot": 5, "target_id": 17, "target_slot": 0, "type": "COMBO"}], "extra": {"frontendVersion": "1.37.10", "workflowRendererVersion": "LG", "VHS_latentpreview": false, "VHS_latentpreviewrate": 0, "VHS_MetadataImage": true, "VHS_KeepIntermediate": true}, "category": "Image generation and editing/Pose to image"}]}, "config": {}, "extra": {"frontendVersion": "1.37.10", "workflowRendererVersion": "LG", "VHS_latentpreview": false, "VHS_latentpreviewrate": 0, "VHS_MetadataImage": true, "VHS_KeepIntermediate": true, "ds": {"scale": 0.6479518372239997, "offset": [852.9773200429215, -3036.34291480022]}}, "version": 0.4} diff --git a/blueprints/Pose to Video (LTX 2.0).json b/blueprints/Pose to Video (LTX 2.0).json new file mode 100644 index 000000000..78c098798 --- /dev/null +++ b/blueprints/Pose to Video (LTX 2.0).json @@ -0,0 +1 @@ +{"id": "01cd475b-52df-43bf-aafa-484a5976d2d2", "revision": 0, "last_node_id": 160, "last_link_id": 410, "nodes": [{"id": 1, "type": "f0e58a6b-7246-4103-9fec-73b423634b1f", "pos": [210, 3830], "size": [420, 500], "flags": {"collapsed": false}, "order": 0, "mode": 0, "inputs": [{"label": "prompt", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}, {"label": "first_frame_strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": null}, {"label": "disable_first_frame", "name": "bypass", "type": "BOOLEAN", "widget": {"name": "bypass"}, "link": null}, {"label": "first frame", "name": "image", "type": "IMAGE", "link": null}, {"label": "control image", "name": "input", "type": "IMAGE,MASK", "link": null}, {"name": "ckpt_name", "type": "COMBO", "widget": {"name": "ckpt_name"}, "link": null}, {"name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": null}, {"label": "distll_lora", "name": "lora_name_1", "type": "COMBO", "widget": {"name": "lora_name_1"}, "link": null}, {"label": "upscale_model", "name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": null}, {"name": "resize_type.width", "type": "INT", "widget": {"name": "resize_type.width"}, "link": null}, {"name": "resize_type.height", "type": "INT", "widget": {"name": "resize_type.height"}, "link": null}, {"name": "length", "type": "INT", "widget": {"name": "length"}, "link": null}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": []}], "properties": {"proxyWidgets": [["-1", "text"], ["-1", "resize_type.width"], ["-1", "resize_type.height"], ["-1", "length"], ["-1", "strength"], ["-1", "bypass"], ["126", "noise_seed"], ["126", "control_after_generate"], ["-1", "ckpt_name"], ["-1", "lora_name"], ["-1", "model_name"], ["-1", "lora_name_1"]], "cnr_id": "comfy-core", "ver": "0.7.0", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["", 1280, 720, 97, 1, false, null, null, "ltx-2-19b-dev-fp8.safetensors", "ltx-2-19b-ic-lora-pose-control.safetensors", "ltx-2-spatial-upscaler-x2-1.0.safetensors", "ltx-2-19b-distilled-lora-384.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "f0e58a6b-7246-4103-9fec-73b423634b1f", "version": 1, "state": {"lastGroupId": 11, "lastNodeId": 160, "lastLinkId": 410, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Pose to Video (LTX 2.0)", "inputNode": {"id": -10, "bounding": [-2220, 4180, 153.3203125, 280]}, "outputNode": {"id": -20, "bounding": [1750.2777777777776, 4091.1111111111113, 120, 60]}, "inputs": [{"id": "0f1d2f96-933a-4a7b-8f1a-7b49fc4ade09", "name": "text", "type": "STRING", "linkIds": [345], "label": "prompt", "pos": [-2086.6796875, 4200]}, {"id": "59430efe-1090-4e36-8afe-b21ce7f4268b", "name": "strength", "type": "FLOAT", "linkIds": [370, 371], "label": "first_frame_strength", "pos": [-2086.6796875, 4220]}, {"id": "6145a9b9-68ed-4956-89f7-7a5ebdd5c99e", "name": "bypass", "type": "BOOLEAN", "linkIds": [363, 368], "label": "disable_first_frame", "pos": [-2086.6796875, 4240]}, {"id": "f7aa8c12-bdba-4bbd-84cf-b49cfc32a1dd", "name": "image", "type": "IMAGE", "linkIds": [398, 399], "label": "first frame", "pos": [-2086.6796875, 4260]}, {"id": "da40a4c0-cd19-46c6-8eb3-62d0026fbe85", "name": "input", "type": "IMAGE,MASK", "linkIds": [400], "label": "control image", "pos": [-2086.6796875, 4280]}, {"id": "8005344b-99d6-4829-a619-c4e8ef640eb9", "name": "ckpt_name", "type": "COMBO", "linkIds": [401, 402, 403], "pos": [-2086.6796875, 4300]}, {"id": "25e7c4e8-850c-4f37-bc14-e3f4b5f228c0", "name": "lora_name", "type": "COMBO", "linkIds": [404, 405], "pos": [-2086.6796875, 4320]}, {"id": "f16a18dd-947e-400a-8889-02cf998f760a", "name": "lora_name_1", "type": "COMBO", "linkIds": [406], "label": "distll_lora", "pos": [-2086.6796875, 4340]}, {"id": "1abf156c-4c85-4ee5-8671-62df3177d835", "name": "model_name", "type": "COMBO", "linkIds": [407], "label": "upscale_model", "pos": [-2086.6796875, 4360]}, {"id": "203402cf-4253-4daf-bf78-5def9496e0af", "name": "resize_type.width", "type": "INT", "linkIds": [408], "pos": [-2086.6796875, 4380]}, {"id": "e6d8ac4a-34d4-46c6-bcb2-4e66a696438c", "name": "resize_type.height", "type": "INT", "linkIds": [409], "pos": [-2086.6796875, 4400]}, {"id": "6aa6cf2c-bc4f-4f8b-be62-aa15793375dc", "name": "length", "type": "INT", "linkIds": [410], "pos": [-2086.6796875, 4420]}], "outputs": [{"id": "4e837941-de2d-4df8-8f94-686e24036897", "name": "VIDEO", "type": "VIDEO", "linkIds": [304], "localized_name": "VIDEO", "pos": [1770.2777777777776, 4111.111111111111]}], "widgets": [], "nodes": [{"id": 93, "type": "CFGGuider", "pos": [-697.721823660531, 3671.1105325465196], "size": [269.97395833333337, 98], "flags": {}, "order": 16, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 326}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 309}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 311}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}], "outputs": [{"localized_name": "GUIDER", "name": "GUIDER", "type": "GUIDER", "links": [261]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "CFGGuider", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [3]}, {"id": 94, "type": "KSamplerSelect", "pos": [-697.721823660531, 3841.1107362825187], "size": [269.97395833333337, 58], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}], "outputs": [{"localized_name": "SAMPLER", "name": "SAMPLER", "type": "SAMPLER", "links": [262]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "KSamplerSelect", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["euler"]}, {"id": 99, "type": "ManualSigmas", "pos": [410.27824286284044, 3851.110970278795], "size": [269.97395833333337, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "sigmas", "name": "sigmas", "type": "STRING", "widget": {"name": "sigmas"}, "link": null}], "outputs": [{"localized_name": "SIGMAS", "name": "SIGMAS", "type": "SIGMAS", "links": [278]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "ManualSigmas", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["0.909375, 0.725, 0.421875, 0.0"]}, {"id": 100, "type": "LatentUpscaleModelLoader", "pos": [-69.72208571196083, 3701.1104657166875], "size": [389.97395833333337, 58], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "model_name", "name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": 407}], "outputs": [{"localized_name": "LATENT_UPSCALE_MODEL", "name": "LATENT_UPSCALE_MODEL", "type": "LATENT_UPSCALE_MODEL", "links": [288]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LatentUpscaleModelLoader", "models": [{"name": "ltx-2-spatial-upscaler-x2-1.0.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-spatial-upscaler-x2-1.0.safetensors", "directory": "latent_upscale_models"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ltx-2-spatial-upscaler-x2-1.0.safetensors"]}, {"id": 101, "type": "LTXVConcatAVLatent", "pos": [410.27824286284044, 4101.110949206838], "size": [269.97395833333337, 46], "flags": {}, "order": 18, "mode": 0, "inputs": [{"localized_name": "video_latent", "name": "video_latent", "type": "LATENT", "link": 365}, {"localized_name": "audio_latent", "name": "audio_latent", "type": "LATENT", "link": 266}], "outputs": [{"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [279]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "LTXVConcatAVLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 108, "type": "CFGGuider", "pos": [410.27824286284044, 3701.1104657166875], "size": [269.97395833333337, 98], "flags": {}, "order": 22, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 280}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 281}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 282}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}], "outputs": [{"localized_name": "GUIDER", "name": "GUIDER", "type": "GUIDER", "links": [276]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.71", "Node name for S&R": "CFGGuider", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1]}, {"id": 123, "type": "SamplerCustomAdvanced", "pos": [-387.72197839215096, 3521.1103425011374], "size": [213.09895833333334, 106], "flags": {}, "order": 31, "mode": 0, "inputs": [{"localized_name": "noise", "name": "noise", "type": "NOISE", "link": 260}, {"localized_name": "guider", "name": "guider", "type": "GUIDER", "link": 261}, {"localized_name": "sampler", "name": "sampler", "type": "SAMPLER", "link": 262}, {"localized_name": "sigmas", "name": "sigmas", "type": "SIGMAS", "link": 263}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 323}], "outputs": [{"localized_name": "output", "name": "output", "type": "LATENT", "links": [272]}, {"localized_name": "denoised_output", "name": "denoised_output", "type": "LATENT", "links": []}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.60", "Node name for S&R": "SamplerCustomAdvanced", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 114, "type": "LTXVConditioning", "pos": [-1133.7215420073496, 4141.110347554622], "size": [269.97395833333337, 78], "flags": {}, "order": 27, "mode": 0, "inputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 292}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 293}, {"localized_name": "frame_rate", "name": "frame_rate", "type": "FLOAT", "widget": {"name": "frame_rate"}, "link": 355}], "outputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "links": [313]}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "links": [314]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "LTXVConditioning", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [25]}, {"id": 119, "type": "CLIPTextEncode", "pos": [-1163.7218246405453, 3881.1109034489627], "size": [400, 88], "flags": {}, "order": 12, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 294}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [293]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["blurry, low quality, still frame, frames, watermark, overlay, titles, has blurbox, has subtitles"], "color": "#323", "bgcolor": "#535"}, {"id": 116, "type": "LTXVConcatAVLatent", "pos": [-519.7217122979332, 4701.110031965835], "size": [187.5, 46], "flags": {}, "order": 29, "mode": 0, "inputs": [{"localized_name": "video_latent", "name": "video_latent", "type": "LATENT", "link": 324}, {"localized_name": "audio_latent", "name": "audio_latent", "type": "LATENT", "link": 300}], "outputs": [{"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [322, 323]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVConcatAVLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 122, "type": "LTXVSeparateAVLatent", "pos": [-393.72183921949465, 3801.1107787938904], "size": [239.97395833333334, 46], "flags": {}, "order": 30, "mode": 0, "inputs": [{"localized_name": "av_latent", "name": "av_latent", "type": "LATENT", "link": 272}], "outputs": [{"localized_name": "video_latent", "name": "video_latent", "type": "LATENT", "links": [270]}, {"localized_name": "audio_latent", "name": "audio_latent", "type": "LATENT", "links": [266]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "LTXVSeparateAVLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 124, "type": "CLIPTextEncode", "pos": [-1174.7214530029996, 3515.1112854387566], "size": [409.97395833333337, 88], "flags": {}, "order": 32, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 295}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": 345}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [292]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [""], "color": "#232", "bgcolor": "#353"}, {"id": 98, "type": "KSamplerSelect", "pos": [410.27824286284044, 3981.1101681370833], "size": [269.97395833333337, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}], "outputs": [{"localized_name": "SAMPLER", "name": "SAMPLER", "type": "SAMPLER", "links": [277]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "KSamplerSelect", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["gradient_estimation"]}, {"id": 105, "type": "LoraLoaderModelOnly", "pos": [-69.72208571196083, 3571.110499039739], "size": [389.97395833333337, 82], "flags": {}, "order": 15, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 327}, {"localized_name": "lora_name", "name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": 406}, {"localized_name": "strength_model", "name": "strength_model", "type": "FLOAT", "widget": {"name": "strength_model"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [280]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "LoraLoaderModelOnly", "models": [{"name": "ltx-2-19b-distilled-lora-384.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-distilled-lora-384.safetensors", "directory": "loras"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ltx-2-19b-distilled-lora-384.safetensors", 1]}, {"id": 95, "type": "LTXVScheduler", "pos": [-699.7218704597861, 3981.1101681370833], "size": [269.97395833333337, 154], "flags": {}, "order": 17, "mode": 0, "inputs": [{"localized_name": "latent", "name": "latent", "shape": 7, "type": "LATENT", "link": 322}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "max_shift", "name": "max_shift", "type": "FLOAT", "widget": {"name": "max_shift"}, "link": null}, {"localized_name": "base_shift", "name": "base_shift", "type": "FLOAT", "widget": {"name": "base_shift"}, "link": null}, {"localized_name": "stretch", "name": "stretch", "type": "BOOLEAN", "widget": {"name": "stretch"}, "link": null}, {"localized_name": "terminal", "name": "terminal", "type": "FLOAT", "widget": {"name": "terminal"}, "link": null}], "outputs": [{"localized_name": "SIGMAS", "name": "SIGMAS", "type": "SIGMAS", "links": [263]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "LTXVScheduler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [20, 2.05, 0.95, true, 0.1]}, {"id": 126, "type": "RandomNoise", "pos": [-697.721823660531, 3521.1103425011374], "size": [269.97395833333337, 82], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "noise_seed", "name": "noise_seed", "type": "INT", "widget": {"name": "noise_seed"}, "link": null}], "outputs": [{"localized_name": "NOISE", "name": "NOISE", "type": "NOISE", "links": [260]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "RandomNoise", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, "randomize"]}, {"id": 107, "type": "SamplerCustomAdvanced", "pos": [710.2782734905775, 3571.110499039739], "size": [212.36979166666669, 106], "flags": {}, "order": 21, "mode": 0, "inputs": [{"localized_name": "noise", "name": "noise", "type": "NOISE", "link": 347}, {"localized_name": "guider", "name": "guider", "type": "GUIDER", "link": 276}, {"localized_name": "sampler", "name": "sampler", "type": "SAMPLER", "link": 277}, {"localized_name": "sigmas", "name": "sigmas", "type": "SIGMAS", "link": 278}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 279}], "outputs": [{"localized_name": "output", "name": "output", "type": "LATENT", "links": []}, {"localized_name": "denoised_output", "name": "denoised_output", "type": "LATENT", "links": [336]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "SamplerCustomAdvanced", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 143, "type": "RandomNoise", "pos": [410.27824286284044, 3571.110499039739], "size": [269.97395833333337, 82], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "noise_seed", "name": "noise_seed", "type": "INT", "widget": {"name": "noise_seed"}, "link": null}], "outputs": [{"localized_name": "NOISE", "name": "NOISE", "type": "NOISE", "links": [347]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "RandomNoise", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, "fixed"]}, {"id": 139, "type": "LTXVAudioVAEDecode", "pos": [1130.2783163694094, 3841.1107362825187], "size": [239.97395833333334, 46], "flags": {}, "order": 35, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 338}, {"label": "Audio VAE", "localized_name": "audio_vae", "name": "audio_vae", "type": "VAE", "link": 383}], "outputs": [{"localized_name": "Audio", "name": "Audio", "type": "AUDIO", "links": [339]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVAudioVAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 106, "type": "CreateVideo", "pos": [1420.2783925712918, 3761.1104019496292], "size": [269.97395833333337, 78], "flags": {}, "order": 20, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 352}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": 339}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "widget": {"name": "fps"}, "link": 356}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": [304]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "CreateVideo", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [25]}, {"id": 134, "type": "LoraLoaderModelOnly", "pos": [-1649.721454901846, 3761.1104019496292], "size": [419.97395833333337, 82], "flags": {}, "order": 13, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 325}, {"localized_name": "lora_name", "name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": 404}, {"localized_name": "strength_model", "name": "strength_model", "type": "FLOAT", "widget": {"name": "strength_model"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [326, 327]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LoraLoaderModelOnly", "models": [{"name": "ltx-2-19b-ic-lora-pose-control.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2-19b-IC-LoRA-Pose-Control/resolve/main/ltx-2-19b-ic-lora-pose-control.safetensors", "directory": "loras"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ltx-2-19b-ic-lora-pose-control.safetensors", 1], "color": "#322", "bgcolor": "#533"}, {"id": 138, "type": "LTXVSeparateAVLatent", "pos": [730.2784619127078, 3731.1109580277], "size": [193.2916015625, 46], "flags": {}, "order": 34, "mode": 0, "inputs": [{"localized_name": "av_latent", "name": "av_latent", "type": "LATENT", "link": 336}], "outputs": [{"localized_name": "video_latent", "name": "video_latent", "type": "LATENT", "links": [337, 351]}, {"localized_name": "audio_latent", "name": "audio_latent", "type": "LATENT", "links": [338]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "LTXVSeparateAVLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 144, "type": "VAEDecodeTiled", "pos": [1120.2783619435547, 3641.110599376351], "size": [269.97395833333337, 150], "flags": {}, "order": 36, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 351}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 353}, {"localized_name": "tile_size", "name": "tile_size", "type": "INT", "widget": {"name": "tile_size"}, "link": null}, {"localized_name": "overlap", "name": "overlap", "type": "INT", "widget": {"name": "overlap"}, "link": null}, {"localized_name": "temporal_size", "name": "temporal_size", "type": "INT", "widget": {"name": "temporal_size"}, "link": null}, {"localized_name": "temporal_overlap", "name": "temporal_overlap", "type": "INT", "widget": {"name": "temporal_overlap"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [352]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "VAEDecodeTiled", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [512, 64, 4096, 8]}, {"id": 113, "type": "VAEDecode", "pos": [1130.2783163694094, 3531.1113453160738], "size": [239.97395833333334, 46], "flags": {}, "order": 26, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 337}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 291}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": []}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "VAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 145, "type": "PrimitiveInt", "pos": [-1600, 4940], "size": [269.97395833333337, 82], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "value", "name": "value", "type": "INT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "INT", "name": "INT", "type": "INT", "links": [354]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "PrimitiveInt", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [24, "fixed"]}, {"id": 148, "type": "PrimitiveFloat", "pos": [-1600, 5070], "size": [269.97395833333337, 58], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [355, 356]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "PrimitiveFloat", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [24]}, {"id": 118, "type": "Reroute", "pos": [-229.7217758812614, 4211.111007032079], "size": [75, 26], "flags": {}, "order": 14, "mode": 0, "inputs": [{"name": "", "type": "*", "link": 303}], "outputs": [{"name": "", "type": "VAE", "links": [289, 291, 367]}], "properties": {"showOutputText": false, "horizontal": false}}, {"id": 151, "type": "LTXVImgToVideoInplace", "pos": [-19.72161465663438, 4071.1107364662485], "size": [269.97395833333337, 122], "flags": {}, "order": 38, "mode": 0, "inputs": [{"localized_name": "vae", "name": "vae", "type": "VAE", "link": 367}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 398}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "link": 366}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": 371}, {"localized_name": "bypass", "name": "bypass", "type": "BOOLEAN", "widget": {"name": "bypass"}, "link": 368}], "outputs": [{"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [365]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVImgToVideoInplace", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1, false]}, {"id": 104, "type": "LTXVCropGuides", "pos": [-9.721939801202097, 3841.1107362825187], "size": [239.97395833333334, 66], "flags": {}, "order": 19, "mode": 0, "inputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 310}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 312}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "link": 270}], "outputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "links": [281]}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "links": [282]}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "slot_index": 2, "links": [287]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.68", "Node name for S&R": "LTXVCropGuides", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 112, "type": "LTXVLatentUpsampler", "pos": [-9.721939801202097, 3961.111517352274], "size": [259.97395833333337, 66], "flags": {}, "order": 25, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 287}, {"localized_name": "upscale_model", "name": "upscale_model", "type": "LATENT_UPSCALE_MODEL", "link": 288}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 289}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [366]}], "title": "spatial", "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVLatentUpsampler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 97, "type": "LTXAVTextEncoderLoader", "pos": [-1649.721454901846, 4041.1110828665023], "size": [419.97395833333337, 106], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "text_encoder", "name": "text_encoder", "type": "COMBO", "widget": {"name": "text_encoder"}, "link": 405}, {"localized_name": "ckpt_name", "name": "ckpt_name", "type": "COMBO", "widget": {"name": "ckpt_name"}, "link": 403}, {"localized_name": "device", "name": "device", "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "links": [294, 295]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXAVTextEncoderLoader", "models": [{"name": "ltx-2-19b-dev-fp8.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-dev-fp8.safetensors", "directory": "checkpoints"}, {"name": "gemma_3_12B_it_fp4_mixed.safetensors", "url": "https://huggingface.co/Comfy-Org/ltx-2/resolve/main/split_files/text_encoders/gemma_3_12B_it_fp4_mixed.safetensors", "directory": "text_encoders"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ltx-2-19b-ic-lora-pose-control.safetensors", "ltx-2-19b-dev-fp8.safetensors", "default"]}, {"id": 103, "type": "CheckpointLoaderSimple", "pos": [-1649.721454901846, 3591.1104777840524], "size": [419.97395833333337, 98], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "ckpt_name", "name": "ckpt_name", "type": "COMBO", "widget": {"name": "ckpt_name"}, "link": 401}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [325]}, {"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "links": []}, {"localized_name": "VAE", "name": "VAE", "type": "VAE", "links": [303, 328, 353, 359]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.56", "Node name for S&R": "CheckpointLoaderSimple", "models": [{"name": "ltx-2-19b-dev-fp8.safetensors", "url": "https://huggingface.co/Lightricks/LTX-2/resolve/main/ltx-2-19b-dev-fp8.safetensors", "directory": "checkpoints"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ltx-2-19b-dev-fp8.safetensors"]}, {"id": 156, "type": "LTXVAudioVAELoader", "pos": [-1636.9543279290153, 3911.095334870057], "size": [399.0494791666667, 58], "flags": {}, "order": 10, "mode": 0, "inputs": [{"localized_name": "ckpt_name", "name": "ckpt_name", "type": "COMBO", "widget": {"name": "ckpt_name"}, "link": 402}], "outputs": [{"localized_name": "Audio VAE", "name": "Audio VAE", "type": "VAE", "links": [382, 383]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.0", "Node name for S&R": "LTXVAudioVAELoader"}, "widgets_values": ["ltx-2-19b-dev-fp8.safetensors"]}, {"id": 149, "type": "LTXVImgToVideoInplace", "pos": [-1089.7215608128167, 4401.110560478942], "size": [269.97395833333337, 122], "flags": {}, "order": 37, "mode": 0, "inputs": [{"localized_name": "vae", "name": "vae", "type": "VAE", "link": 359}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 399}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "link": 360}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": 370}, {"localized_name": "bypass", "name": "bypass", "type": "BOOLEAN", "widget": {"name": "bypass"}, "link": 363}], "outputs": [{"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [357]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "LTXVImgToVideoInplace", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1, false]}, {"id": 132, "type": "LTXVAddGuide", "pos": [-599.7217670603999, 4421.110609115862], "size": [269.97395833333337, 162], "flags": {}, "order": 33, "mode": 0, "inputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 313}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 314}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 328}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "link": 357}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 395}, {"localized_name": "frame_idx", "name": "frame_idx", "type": "INT", "widget": {"name": "frame_idx"}, "link": null}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": null}], "outputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "links": [309, 310]}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "links": [311, 312]}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [324]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.75", "Node name for S&R": "LTXVAddGuide", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, 1]}, {"id": 154, "type": "MarkdownNote", "pos": [-1630, 5190], "size": [350, 88], "flags": {"collapsed": false}, "order": 11, "mode": 0, "inputs": [], "outputs": [], "title": "Frame Rate Note", "properties": {}, "widgets_values": ["Please make sure the frame rate value is the same in both boxes"], "color": "#432", "bgcolor": "#653"}, {"id": 159, "type": "ResizeImageMaskNode", "pos": [-1610, 4580], "size": [284.375, 154], "flags": {}, "order": 39, "mode": 0, "inputs": [{"localized_name": "input", "name": "input", "type": "IMAGE,MASK", "link": 400}, {"localized_name": "resize_type", "name": "resize_type", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "resize_type"}, "link": null}, {"localized_name": "width", "name": "resize_type.width", "type": "INT", "widget": {"name": "resize_type.width"}, "link": 408}, {"localized_name": "height", "name": "resize_type.height", "type": "INT", "widget": {"name": "resize_type.height"}, "link": 409}, {"localized_name": "crop", "name": "resize_type.crop", "type": "COMBO", "widget": {"name": "resize_type.crop"}, "link": null}, {"localized_name": "scale_method", "name": "scale_method", "type": "COMBO", "widget": {"name": "scale_method"}, "link": null}], "outputs": [{"localized_name": "resized", "name": "resized", "type": "IMAGE,MASK", "links": [391, 392, 395]}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "ResizeImageMaskNode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["scale dimensions", 1280, 720, "center", "lanczos"]}, {"id": 110, "type": "GetImageSize", "pos": [-1600, 4780], "size": [259.97395833333337, 66], "flags": {}, "order": 23, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 391}], "outputs": [{"localized_name": "width", "name": "width", "type": "INT", "links": [296]}, {"localized_name": "height", "name": "height", "type": "INT", "links": [297]}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "links": []}], "properties": {"cnr_id": "comfy-core", "ver": "0.7.0", "Node name for S&R": "GetImageSize", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 115, "type": "EmptyLTXVLatentVideo", "pos": [-1099.721794809093, 4611.11072170357], "size": [269.97395833333337, 130], "flags": {}, "order": 28, "mode": 0, "inputs": [{"localized_name": "width", "name": "width", "type": "INT", "widget": {"name": "width"}, "link": 296}, {"localized_name": "height", "name": "height", "type": "INT", "widget": {"name": "height"}, "link": 297}, {"localized_name": "length", "name": "length", "type": "INT", "widget": {"name": "length"}, "link": 410}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [360]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.60", "Node name for S&R": "EmptyLTXVLatentVideo", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [768, 512, 97, 1]}, {"id": 111, "type": "LTXVEmptyLatentAudio", "pos": [-1099.721794809093, 4811.110229576288], "size": [269.97395833333337, 106], "flags": {}, "order": 24, "mode": 0, "inputs": [{"localized_name": "audio_vae", "name": "audio_vae", "type": "VAE", "link": 382}, {"localized_name": "frames_number", "name": "frames_number", "type": "INT", "widget": {"name": "frames_number"}, "link": null}, {"localized_name": "frame_rate", "name": "frame_rate", "type": "INT", "widget": {"name": "frame_rate"}, "link": 354}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "Latent", "name": "Latent", "type": "LATENT", "links": [300]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.68", "Node name for S&R": "LTXVEmptyLatentAudio", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [97, 25, 1]}], "groups": [{"id": 1, "title": "Model", "bounding": [-1660, 3440, 440, 820], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 2, "title": "Basic Sampling", "bounding": [-700, 3440, 570, 820], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 3, "title": "Prompt", "bounding": [-1180, 3440, 440, 820], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 5, "title": "Latent", "bounding": [-1180, 4290, 1050, 680], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 9, "title": "Upscale Sampling(2x)", "bounding": [-100, 3440, 1090, 820], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 6, "title": "Sampler", "bounding": [350, 3480, 620, 750], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 7, "title": "Model", "bounding": [-90, 3480, 430, 310], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 11, "title": "Frame rate", "bounding": [-1610, 4860, 290, 271.6], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 326, "origin_id": 134, "origin_slot": 0, "target_id": 93, "target_slot": 0, "type": "MODEL"}, {"id": 309, "origin_id": 132, "origin_slot": 0, "target_id": 93, "target_slot": 1, "type": "CONDITIONING"}, {"id": 311, "origin_id": 132, "origin_slot": 1, "target_id": 93, "target_slot": 2, "type": "CONDITIONING"}, {"id": 266, "origin_id": 122, "origin_slot": 1, "target_id": 101, "target_slot": 1, "type": "LATENT"}, {"id": 280, "origin_id": 105, "origin_slot": 0, "target_id": 108, "target_slot": 0, "type": "MODEL"}, {"id": 281, "origin_id": 104, "origin_slot": 0, "target_id": 108, "target_slot": 1, "type": "CONDITIONING"}, {"id": 282, "origin_id": 104, "origin_slot": 1, "target_id": 108, "target_slot": 2, "type": "CONDITIONING"}, {"id": 260, "origin_id": 126, "origin_slot": 0, "target_id": 123, "target_slot": 0, "type": "NOISE"}, {"id": 261, "origin_id": 93, "origin_slot": 0, "target_id": 123, "target_slot": 1, "type": "GUIDER"}, {"id": 262, "origin_id": 94, "origin_slot": 0, "target_id": 123, "target_slot": 2, "type": "SAMPLER"}, {"id": 263, "origin_id": 95, "origin_slot": 0, "target_id": 123, "target_slot": 3, "type": "SIGMAS"}, {"id": 323, "origin_id": 116, "origin_slot": 0, "target_id": 123, "target_slot": 4, "type": "LATENT"}, {"id": 296, "origin_id": 110, "origin_slot": 0, "target_id": 115, "target_slot": 0, "type": "INT"}, {"id": 297, "origin_id": 110, "origin_slot": 1, "target_id": 115, "target_slot": 1, "type": "INT"}, {"id": 325, "origin_id": 103, "origin_slot": 0, "target_id": 134, "target_slot": 0, "type": "MODEL"}, {"id": 292, "origin_id": 124, "origin_slot": 0, "target_id": 114, "target_slot": 0, "type": "CONDITIONING"}, {"id": 293, "origin_id": 119, "origin_slot": 0, "target_id": 114, "target_slot": 1, "type": "CONDITIONING"}, {"id": 294, "origin_id": 97, "origin_slot": 0, "target_id": 119, "target_slot": 0, "type": "CLIP"}, {"id": 324, "origin_id": 132, "origin_slot": 2, "target_id": 116, "target_slot": 0, "type": "LATENT"}, {"id": 300, "origin_id": 111, "origin_slot": 0, "target_id": 116, "target_slot": 1, "type": "LATENT"}, {"id": 313, "origin_id": 114, "origin_slot": 0, "target_id": 132, "target_slot": 0, "type": "CONDITIONING"}, {"id": 314, "origin_id": 114, "origin_slot": 1, "target_id": 132, "target_slot": 1, "type": "CONDITIONING"}, {"id": 328, "origin_id": 103, "origin_slot": 2, "target_id": 132, "target_slot": 2, "type": "VAE"}, {"id": 272, "origin_id": 123, "origin_slot": 0, "target_id": 122, "target_slot": 0, "type": "LATENT"}, {"id": 336, "origin_id": 107, "origin_slot": 1, "target_id": 138, "target_slot": 0, "type": "LATENT"}, {"id": 339, "origin_id": 139, "origin_slot": 0, "target_id": 106, "target_slot": 1, "type": "AUDIO"}, {"id": 295, "origin_id": 97, "origin_slot": 0, "target_id": 124, "target_slot": 0, "type": "CLIP"}, {"id": 303, "origin_id": 103, "origin_slot": 2, "target_id": 118, "target_slot": 0, "type": "VAE"}, {"id": 338, "origin_id": 138, "origin_slot": 1, "target_id": 139, "target_slot": 0, "type": "LATENT"}, {"id": 337, "origin_id": 138, "origin_slot": 0, "target_id": 113, "target_slot": 0, "type": "LATENT"}, {"id": 291, "origin_id": 118, "origin_slot": 0, "target_id": 113, "target_slot": 1, "type": "VAE"}, {"id": 276, "origin_id": 108, "origin_slot": 0, "target_id": 107, "target_slot": 1, "type": "GUIDER"}, {"id": 277, "origin_id": 98, "origin_slot": 0, "target_id": 107, "target_slot": 2, "type": "SAMPLER"}, {"id": 278, "origin_id": 99, "origin_slot": 0, "target_id": 107, "target_slot": 3, "type": "SIGMAS"}, {"id": 279, "origin_id": 101, "origin_slot": 0, "target_id": 107, "target_slot": 4, "type": "LATENT"}, {"id": 327, "origin_id": 134, "origin_slot": 0, "target_id": 105, "target_slot": 0, "type": "MODEL"}, {"id": 310, "origin_id": 132, "origin_slot": 0, "target_id": 104, "target_slot": 0, "type": "CONDITIONING"}, {"id": 312, "origin_id": 132, "origin_slot": 1, "target_id": 104, "target_slot": 1, "type": "CONDITIONING"}, {"id": 270, "origin_id": 122, "origin_slot": 0, "target_id": 104, "target_slot": 2, "type": "LATENT"}, {"id": 287, "origin_id": 104, "origin_slot": 2, "target_id": 112, "target_slot": 0, "type": "LATENT"}, {"id": 288, "origin_id": 100, "origin_slot": 0, "target_id": 112, "target_slot": 1, "type": "LATENT_UPSCALE_MODEL"}, {"id": 289, "origin_id": 118, "origin_slot": 0, "target_id": 112, "target_slot": 2, "type": "VAE"}, {"id": 322, "origin_id": 116, "origin_slot": 0, "target_id": 95, "target_slot": 0, "type": "LATENT"}, {"id": 304, "origin_id": 106, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "VIDEO"}, {"id": 345, "origin_id": -10, "origin_slot": 0, "target_id": 124, "target_slot": 1, "type": "STRING"}, {"id": 347, "origin_id": 143, "origin_slot": 0, "target_id": 107, "target_slot": 0, "type": "NOISE"}, {"id": 351, "origin_id": 138, "origin_slot": 0, "target_id": 144, "target_slot": 0, "type": "LATENT"}, {"id": 352, "origin_id": 144, "origin_slot": 0, "target_id": 106, "target_slot": 0, "type": "IMAGE"}, {"id": 353, "origin_id": 103, "origin_slot": 2, "target_id": 144, "target_slot": 1, "type": "VAE"}, {"id": 354, "origin_id": 145, "origin_slot": 0, "target_id": 111, "target_slot": 2, "type": "INT"}, {"id": 355, "origin_id": 148, "origin_slot": 0, "target_id": 114, "target_slot": 2, "type": "FLOAT"}, {"id": 356, "origin_id": 148, "origin_slot": 0, "target_id": 106, "target_slot": 2, "type": "FLOAT"}, {"id": 357, "origin_id": 149, "origin_slot": 0, "target_id": 132, "target_slot": 3, "type": "LATENT"}, {"id": 359, "origin_id": 103, "origin_slot": 2, "target_id": 149, "target_slot": 0, "type": "VAE"}, {"id": 360, "origin_id": 115, "origin_slot": 0, "target_id": 149, "target_slot": 2, "type": "LATENT"}, {"id": 363, "origin_id": -10, "origin_slot": 2, "target_id": 149, "target_slot": 4, "type": "BOOLEAN"}, {"id": 365, "origin_id": 151, "origin_slot": 0, "target_id": 101, "target_slot": 0, "type": "LATENT"}, {"id": 366, "origin_id": 112, "origin_slot": 0, "target_id": 151, "target_slot": 2, "type": "LATENT"}, {"id": 367, "origin_id": 118, "origin_slot": 0, "target_id": 151, "target_slot": 0, "type": "VAE"}, {"id": 368, "origin_id": -10, "origin_slot": 2, "target_id": 151, "target_slot": 4, "type": "BOOLEAN"}, {"id": 370, "origin_id": -10, "origin_slot": 1, "target_id": 149, "target_slot": 3, "type": "FLOAT"}, {"id": 371, "origin_id": -10, "origin_slot": 1, "target_id": 151, "target_slot": 3, "type": "FLOAT"}, {"id": 382, "origin_id": 156, "origin_slot": 0, "target_id": 111, "target_slot": 0, "type": "VAE"}, {"id": 383, "origin_id": 156, "origin_slot": 0, "target_id": 139, "target_slot": 1, "type": "VAE"}, {"id": 391, "origin_id": 159, "origin_slot": 0, "target_id": 110, "target_slot": 0, "type": "IMAGE"}, {"id": 395, "origin_id": 159, "origin_slot": 0, "target_id": 132, "target_slot": 4, "type": "IMAGE"}, {"id": 398, "origin_id": -10, "origin_slot": 3, "target_id": 151, "target_slot": 1, "type": "IMAGE"}, {"id": 399, "origin_id": -10, "origin_slot": 3, "target_id": 149, "target_slot": 1, "type": "IMAGE"}, {"id": 400, "origin_id": -10, "origin_slot": 4, "target_id": 159, "target_slot": 0, "type": "IMAGE,MASK"}, {"id": 401, "origin_id": -10, "origin_slot": 5, "target_id": 103, "target_slot": 0, "type": "COMBO"}, {"id": 402, "origin_id": -10, "origin_slot": 5, "target_id": 156, "target_slot": 0, "type": "COMBO"}, {"id": 403, "origin_id": -10, "origin_slot": 5, "target_id": 97, "target_slot": 1, "type": "COMBO"}, {"id": 404, "origin_id": -10, "origin_slot": 6, "target_id": 134, "target_slot": 1, "type": "COMBO"}, {"id": 405, "origin_id": -10, "origin_slot": 6, "target_id": 97, "target_slot": 0, "type": "COMBO"}, {"id": 406, "origin_id": -10, "origin_slot": 7, "target_id": 105, "target_slot": 1, "type": "COMBO"}, {"id": 407, "origin_id": -10, "origin_slot": 8, "target_id": 100, "target_slot": 0, "type": "COMBO"}, {"id": 408, "origin_id": -10, "origin_slot": 9, "target_id": 159, "target_slot": 2, "type": "INT"}, {"id": 409, "origin_id": -10, "origin_slot": 10, "target_id": 159, "target_slot": 3, "type": "INT"}, {"id": 410, "origin_id": -10, "origin_slot": 11, "target_id": 115, "target_slot": 2, "type": "INT"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Video generation and editing/Pose to video"}]}, "config": {}, "extra": {"ds": {"scale": 1.3889423076923078, "offset": [217.0560747663551, -3703.3333333333335]}, "frontendVersion": "1.37.10", "workflowRendererVersion": "LG", "VHS_latentpreview": false, "VHS_latentpreviewrate": 0, "VHS_MetadataImage": true, "VHS_KeepIntermediate": true}, "version": 0.4} diff --git a/blueprints/Prompt Enhance.json b/blueprints/Prompt Enhance.json new file mode 100644 index 000000000..2612f66db --- /dev/null +++ b/blueprints/Prompt Enhance.json @@ -0,0 +1 @@ +{"revision": 0, "last_node_id": 15, "last_link_id": 0, "nodes": [{"id": 15, "type": "24d8bbfd-39d4-4774-bff0-3de40cc7a471", "pos": [-1490, 2040], "size": [400, 260], "flags": {}, "order": 0, "mode": 0, "inputs": [{"name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": null}, {"label": "reference images", "name": "images", "type": "IMAGE", "link": null}], "outputs": [{"name": "STRING", "type": "STRING", "links": null}], "title": "Prompt Enhance", "properties": {"proxyWidgets": [["-1", "prompt"]], "cnr_id": "comfy-core", "ver": "0.14.1"}, "widgets_values": [""]}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "24d8bbfd-39d4-4774-bff0-3de40cc7a471", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 15, "lastLinkId": 14, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Prompt Enhance", "inputNode": {"id": -10, "bounding": [-2170, 2110, 138.876953125, 80]}, "outputNode": {"id": -20, "bounding": [-640, 2110, 120, 60]}, "inputs": [{"id": "aeab7216-00e0-4528-a09b-bba50845c5a6", "name": "prompt", "type": "STRING", "linkIds": [11], "pos": [-2051.123046875, 2130]}, {"id": "7b73fd36-aa31-4771-9066-f6c83879994b", "name": "images", "type": "IMAGE", "linkIds": [14], "label": "reference images", "pos": [-2051.123046875, 2150]}], "outputs": [{"id": "c7b0d930-68a1-48d1-b496-0519e5837064", "name": "STRING", "type": "STRING", "linkIds": [13], "pos": [-620, 2130]}], "widgets": [], "nodes": [{"id": 11, "type": "GeminiNode", "pos": [-1560, 1990], "size": [470, 470], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "shape": 7, "type": "IMAGE", "link": 14}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": null}, {"localized_name": "video", "name": "video", "shape": 7, "type": "VIDEO", "link": null}, {"localized_name": "files", "name": "files", "shape": 7, "type": "GEMINI_INPUT_FILES", "link": null}, {"localized_name": "prompt", "name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": 11}, {"localized_name": "model", "name": "model", "type": "COMBO", "widget": {"name": "model"}, "link": null}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "system_prompt", "name": "system_prompt", "shape": 7, "type": "STRING", "widget": {"name": "system_prompt"}, "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": [13]}], "properties": {"cnr_id": "comfy-core", "ver": "0.14.1", "Node name for S&R": "GeminiNode"}, "widgets_values": ["", "gemini-3-pro-preview", 42, "randomize", "You are an expert in prompt writing.\nBased on the input, rewrite the user's input into a detailed prompt.\nincluding camera settings, lighting, composition, and style.\nReturn the prompt only"], "color": "#432", "bgcolor": "#653"}], "groups": [], "links": [{"id": 11, "origin_id": -10, "origin_slot": 0, "target_id": 11, "target_slot": 4, "type": "STRING"}, {"id": 13, "origin_id": 11, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "STRING"}, {"id": 14, "origin_id": -10, "origin_slot": 1, "target_id": 11, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Text generation/Prompt enhance"}]}, "extra": {}} diff --git a/blueprints/Sharpen.json b/blueprints/Sharpen.json new file mode 100644 index 000000000..a4accaf59 --- /dev/null +++ b/blueprints/Sharpen.json @@ -0,0 +1 @@ +{"revision": 0, "last_node_id": 25, "last_link_id": 0, "nodes": [{"id": 25, "type": "621ba4e2-22a8-482d-a369-023753198b7b", "pos": [4610, -790], "size": [230, 58], "flags": {}, "order": 4, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "IMAGE", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}], "title": "Sharpen", "properties": {"proxyWidgets": [["24", "value"]]}, "widgets_values": []}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "621ba4e2-22a8-482d-a369-023753198b7b", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 24, "lastLinkId": 36, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Sharpen", "inputNode": {"id": -10, "bounding": [4090, -825, 120, 60]}, "outputNode": {"id": -20, "bounding": [5150, -825, 120, 60]}, "inputs": [{"id": "37011fb7-14b7-4e0e-b1a0-6a02e8da1fd7", "name": "images.image0", "type": "IMAGE", "linkIds": [34], "localized_name": "images.image0", "label": "image", "pos": [4190, -805]}], "outputs": [{"id": "e9182b3f-635c-4cd4-a152-4b4be17ae4b9", "name": "IMAGE0", "type": "IMAGE", "linkIds": [35], "localized_name": "IMAGE0", "label": "IMAGE", "pos": [5170, -805]}], "widgets": [], "nodes": [{"id": 24, "type": "PrimitiveFloat", "pos": [4280, -1240], "size": [270, 58], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "strength", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [36]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 3, "precision": 2, "step": 0.05}, "widgets_values": [0.5]}, {"id": 23, "type": "GLSLShader", "pos": [4570, -1240], "size": [370, 192], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 34}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}, {"label": "u_float0", "localized_name": "floats.u_float0", "name": "floats.u_float0", "shape": 7, "type": "FLOAT", "link": 36}, {"label": "u_float1", "localized_name": "floats.u_float1", "name": "floats.u_float1", "shape": 7, "type": "FLOAT", "link": null}, {"label": "u_int0", "localized_name": "ints.u_int0", "name": "ints.u_int0", "shape": 7, "type": "INT", "link": null}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [35]}, {"localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": null}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform float u_float0; // strength [0.0 – 2.0] typical: 0.3–1.0\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nvoid main() {\n vec2 texel = 1.0 / u_resolution;\n \n // Sample center and neighbors\n vec4 center = texture(u_image0, v_texCoord);\n vec4 top = texture(u_image0, v_texCoord + vec2( 0.0, -texel.y));\n vec4 bottom = texture(u_image0, v_texCoord + vec2( 0.0, texel.y));\n vec4 left = texture(u_image0, v_texCoord + vec2(-texel.x, 0.0));\n vec4 right = texture(u_image0, v_texCoord + vec2( texel.x, 0.0));\n \n // Edge enhancement (Laplacian)\n vec4 edges = center * 4.0 - top - bottom - left - right;\n \n // Add edges back scaled by strength\n vec4 sharpened = center + edges * u_float0;\n \n fragColor0 = vec4(clamp(sharpened.rgb, 0.0, 1.0), center.a);\n}", "from_input"]}], "groups": [], "links": [{"id": 36, "origin_id": 24, "origin_slot": 0, "target_id": 23, "target_slot": 2, "type": "FLOAT"}, {"id": 34, "origin_id": -10, "origin_slot": 0, "target_id": 23, "target_slot": 0, "type": "IMAGE"}, {"id": 35, "origin_id": 23, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Sharpen"}]}} diff --git a/blueprints/Text to Audio (ACE-Step 1.5).json b/blueprints/Text to Audio (ACE-Step 1.5).json new file mode 100644 index 000000000..51e3bbed3 --- /dev/null +++ b/blueprints/Text to Audio (ACE-Step 1.5).json @@ -0,0 +1 @@ +{"id": "67979fed-a490-450a-83f4-c7c0105d450e", "revision": 0, "last_node_id": 110, "last_link_id": 288, "nodes": [{"id": 21, "type": "510f6b52-34ee-40dd-b532-475497dee41b", "pos": [1810, -560], "size": [390, 460], "flags": {}, "order": 0, "mode": 0, "inputs": [{"name": "tags", "type": "STRING", "widget": {"name": "tags"}, "link": null}, {"name": "lyrics", "type": "STRING", "widget": {"name": "lyrics"}, "link": null}, {"name": "timesignature", "type": "COMBO", "widget": {"name": "timesignature"}, "link": null}, {"name": "language", "type": "COMBO", "widget": {"name": "language"}, "link": null}, {"name": "keyscale", "type": "COMBO", "widget": {"name": "keyscale"}, "link": null}, {"name": "generate_audio_codes", "type": "BOOLEAN", "widget": {"name": "generate_audio_codes"}, "link": null}, {"name": "cfg_scale", "type": "FLOAT", "widget": {"name": "cfg_scale"}, "link": null}, {"label": "duration", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}, {"name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"name": "clip_name1", "type": "COMBO", "widget": {"name": "clip_name1"}, "link": null}, {"name": "clip_name2", "type": "COMBO", "widget": {"name": "clip_name2"}, "link": null}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}], "outputs": [{"localized_name": "AUDIO", "name": "AUDIO", "type": "AUDIO", "links": []}], "properties": {"proxyWidgets": [["-1", "tags"], ["-1", "lyrics"], ["-1", "language"], ["-1", "timesignature"], ["-1", "keyscale"], ["-1", "generate_audio_codes"], ["-1", "cfg_scale"], ["102", "value"], ["102", "control_after_generate"], ["-1", "unet_name"], ["-1", "clip_name1"], ["-1", "clip_name2"], ["-1", "vae_name"]], "cnr_id": "comfy-core", "ver": "0.12.3", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["", "", "en", "4", "E minor", true, 2, null, null, "acestep_v1.5_turbo.safetensors", "qwen_0.6b_ace15.safetensors", "qwen_4b_ace15.safetensors", "ace_1.5_vae.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "510f6b52-34ee-40dd-b532-475497dee41b", "version": 1, "state": {"lastGroupId": 3, "lastNodeId": 110, "lastLinkId": 288, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Text to Audio (ACE-Step 1.5)", "inputNode": {"id": -10, "bounding": [-660, -560, 167.458984375, 280]}, "outputNode": {"id": -20, "bounding": [1504.8375, -410, 120, 60]}, "inputs": [{"id": "ebc79d17-2e65-4e0f-855a-c9f2466a5fbf", "name": "tags", "type": "STRING", "linkIds": [264], "pos": [-512.541015625, -540]}, {"id": "230afdb4-a647-4fb7-a68c-a2204fd5d570", "name": "lyrics", "type": "STRING", "linkIds": [265], "pos": [-512.541015625, -520]}, {"id": "efdcbb48-231c-4757-b343-4458c011a283", "name": "timesignature", "type": "COMBO", "linkIds": [266], "pos": [-512.541015625, -500]}, {"id": "811579c1-2979-4721-a1e1-7d9352616e7b", "name": "language", "type": "COMBO", "linkIds": [267], "pos": [-512.541015625, -480]}, {"id": "76a68b0d-7a5f-43dc-873d-d78adf32895f", "name": "keyscale", "type": "COMBO", "linkIds": [268], "pos": [-512.541015625, -460]}, {"id": "11bb3297-272d-4c56-873a-2c974581e838", "name": "generate_audio_codes", "type": "BOOLEAN", "linkIds": [269], "pos": [-512.541015625, -440]}, {"id": "e5a30400-a8b0-422a-a0f3-21739727ab03", "name": "cfg_scale", "type": "FLOAT", "linkIds": [270], "pos": [-512.541015625, -420]}, {"id": "91a37ca5-e0d1-42c5-8248-419b850661a0", "name": "value", "type": "FLOAT", "linkIds": [284], "label": "duration", "pos": [-512.541015625, -400]}, {"id": "30f69f59-e916-48ab-9a5d-ae445b8d8a63", "name": "unet_name", "type": "COMBO", "linkIds": [285], "pos": [-512.541015625, -380]}, {"id": "1af0e8df-6fa7-4df2-b1b4-9c356a8f30a6", "name": "clip_name1", "type": "COMBO", "linkIds": [286], "pos": [-512.541015625, -360]}, {"id": "c7195505-9e83-4f87-b8d7-7747d808577d", "name": "clip_name2", "type": "COMBO", "linkIds": [287], "pos": [-512.541015625, -340]}, {"id": "ca4bd68f-e7c1-4d87-9914-cfe15c63b96e", "name": "vae_name", "type": "COMBO", "linkIds": [288], "pos": [-512.541015625, -320]}], "outputs": [{"id": "bfd748f6-f9ac-4588-81fa-41bde07a58fa", "name": "AUDIO", "type": "AUDIO", "linkIds": [263], "localized_name": "AUDIO", "pos": [1524.8375, -390]}], "widgets": [], "nodes": [{"id": 105, "type": "DualCLIPLoader", "pos": [-165, -660], "size": [380, 130], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "clip_name1", "name": "clip_name1", "type": "COMBO", "widget": {"name": "clip_name1"}, "link": 286}, {"localized_name": "clip_name2", "name": "clip_name2", "type": "COMBO", "widget": {"name": "clip_name2"}, "link": 287}, {"localized_name": "type", "name": "type", "type": "COMBO", "widget": {"name": "type"}, "link": null}, {"localized_name": "device", "name": "device", "shape": 7, "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "links": [261]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.1", "Node name for S&R": "DualCLIPLoader", "models": [{"name": "qwen_0.6b_ace15.safetensors", "url": "https://huggingface.co/Comfy-Org/ace_step_1.5_ComfyUI_files/resolve/main/split_files/text_encoders/qwen_0.6b_ace15.safetensors", "directory": "text_encoders"}, {"name": "qwen_4b_ace15.safetensors", "url": "https://huggingface.co/Comfy-Org/ace_step_1.5_ComfyUI_files/resolve/main/split_files/text_encoders/qwen_4b_ace15.safetensors", "directory": "text_encoders"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["qwen_0.6b_ace15.safetensors", "qwen_4b_ace15.safetensors", "ace", "default"]}, {"id": 106, "type": "VAELoader", "pos": [-165, -470], "size": [380, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 288}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "links": [262]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.1", "Node name for S&R": "VAELoader", "models": [{"name": "ace_1.5_vae.safetensors", "url": "https://huggingface.co/Comfy-Org/ace_step_1.5_ComfyUI_files/resolve/main/split_files/vae/ace_1.5_vae.safetensors", "directory": "vae"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ace_1.5_vae.safetensors"]}, {"id": 98, "type": "EmptyAceStep1.5LatentAudio", "pos": [-150, 10], "size": [314.90390625, 82], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "seconds", "name": "seconds", "type": "FLOAT", "widget": {"name": "seconds"}, "link": 279}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [249]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.1", "Node name for S&R": "EmptyAceStep1.5LatentAudio", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [120, 1]}, {"id": 47, "type": "ConditioningZeroOut", "pos": [670, 50], "size": [204.75, 26], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "link": 255}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [119]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.1", "Node name for S&R": "ConditioningZeroOut", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 3, "type": "KSampler", "pos": [930, -680], "size": [329.39477481889753, 262], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 175}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 254}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 119}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 249}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": 258}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [256]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.1", "Node name for S&R": "KSampler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, "fixed", 8, 1, "euler", "simple", 1]}, {"id": 78, "type": "ModelSamplingAuraFlow", "pos": [930, -810], "size": [329.39477481889753, 60], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 260}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [175]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.1", "Node name for S&R": "ModelSamplingAuraFlow", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [3]}, {"id": 18, "type": "VAEDecodeAudio", "pos": [1280, -800], "size": [164.8375, 46], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 256}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 262}], "outputs": [{"localized_name": "AUDIO", "name": "AUDIO", "type": "AUDIO", "links": [263]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.1", "Node name for S&R": "VAEDecodeAudio", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 94, "type": "TextEncodeAceStepAudio1.5", "pos": [270, -790], "size": [611.9184354063266, 679.7643386829468], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 261}, {"localized_name": "tags", "name": "tags", "type": "STRING", "widget": {"name": "tags"}, "link": 264}, {"localized_name": "lyrics", "name": "lyrics", "type": "STRING", "widget": {"name": "lyrics"}, "link": 265}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": 257}, {"localized_name": "bpm", "name": "bpm", "type": "INT", "widget": {"name": "bpm"}, "link": null}, {"localized_name": "duration", "name": "duration", "type": "FLOAT", "widget": {"name": "duration"}, "link": 280}, {"localized_name": "timesignature", "name": "timesignature", "type": "COMBO", "widget": {"name": "timesignature"}, "link": 266}, {"localized_name": "language", "name": "language", "type": "COMBO", "widget": {"name": "language"}, "link": 267}, {"localized_name": "keyscale", "name": "keyscale", "type": "COMBO", "widget": {"name": "keyscale"}, "link": 268}, {"localized_name": "generate_audio_codes", "name": "generate_audio_codes", "type": "BOOLEAN", "widget": {"name": "generate_audio_codes"}, "link": 269}, {"localized_name": "cfg_scale", "name": "cfg_scale", "type": "FLOAT", "widget": {"name": "cfg_scale"}, "link": 270}, {"localized_name": "temperature", "name": "temperature", "type": "FLOAT", "widget": {"name": "temperature"}, "link": null}, {"localized_name": "top_p", "name": "top_p", "type": "FLOAT", "widget": {"name": "top_p"}, "link": null}, {"localized_name": "top_k", "name": "top_k", "type": "INT", "widget": {"name": "top_k"}, "link": null}, {"localized_name": "min_p", "name": "min_p", "type": "FLOAT", "widget": {"name": "min_p"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [254, 255]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.1", "Node name for S&R": "TextEncodeAceStepAudio1.5", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["", "", 0, "fixed", 190, 120, "4", "en", "E minor", true, 2, 0.85, 0.9, 0, 0]}, {"id": 104, "type": "UNETLoader", "pos": [-170, -790], "size": [380, 82], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 285}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [260]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.1", "Node name for S&R": "UNETLoader", "models": [{"name": "acestep_v1.5_turbo.safetensors", "url": "https://huggingface.co/Comfy-Org/ace_step_1.5_ComfyUI_files/resolve/main/split_files/diffusion_models/acestep_v1.5_turbo.safetensors", "directory": "diffusion_models"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["acestep_v1.5_turbo.safetensors", "default"]}, {"id": 102, "type": "PrimitiveNode", "pos": [-120, -130], "size": [268.39945903485034, 82], "flags": {}, "order": 3, "mode": 0, "inputs": [], "outputs": [{"name": "INT", "type": "INT", "widget": {"name": "seed"}, "links": [257, 258]}], "title": "seed", "properties": {"Run widget replace on values": false}, "widgets_values": [0, "randomize"]}, {"id": 110, "type": "PrimitiveFloat", "pos": [-120, -280], "size": [270, 58], "flags": {}, "order": 10, "mode": 0, "inputs": [{"localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": 284}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [279, 280]}], "title": "Song Duration", "properties": {"cnr_id": "comfy-core", "ver": "0.12.3", "Node name for S&R": "PrimitiveFloat", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [120]}], "groups": [{"id": 1, "title": "Step 1 - Load Models", "bounding": [-180, -860, 405, 461.6], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 2, "title": "Step 2 - Duration", "bounding": [-180, -370, 400, 170], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 3, "title": "Step3 - Prompt", "bounding": [260, -860, 640, 960], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 255, "origin_id": 94, "origin_slot": 0, "target_id": 47, "target_slot": 0, "type": "CONDITIONING"}, {"id": 175, "origin_id": 78, "origin_slot": 0, "target_id": 3, "target_slot": 0, "type": "MODEL"}, {"id": 254, "origin_id": 94, "origin_slot": 0, "target_id": 3, "target_slot": 1, "type": "CONDITIONING"}, {"id": 119, "origin_id": 47, "origin_slot": 0, "target_id": 3, "target_slot": 2, "type": "CONDITIONING"}, {"id": 249, "origin_id": 98, "origin_slot": 0, "target_id": 3, "target_slot": 3, "type": "LATENT"}, {"id": 258, "origin_id": 102, "origin_slot": 0, "target_id": 3, "target_slot": 4, "type": "INT"}, {"id": 260, "origin_id": 104, "origin_slot": 0, "target_id": 78, "target_slot": 0, "type": "MODEL"}, {"id": 256, "origin_id": 3, "origin_slot": 0, "target_id": 18, "target_slot": 0, "type": "LATENT"}, {"id": 262, "origin_id": 106, "origin_slot": 0, "target_id": 18, "target_slot": 1, "type": "VAE"}, {"id": 261, "origin_id": 105, "origin_slot": 0, "target_id": 94, "target_slot": 0, "type": "CLIP"}, {"id": 257, "origin_id": 102, "origin_slot": 0, "target_id": 94, "target_slot": 3, "type": "INT"}, {"id": 263, "origin_id": 18, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "AUDIO"}, {"id": 264, "origin_id": -10, "origin_slot": 0, "target_id": 94, "target_slot": 1, "type": "STRING"}, {"id": 265, "origin_id": -10, "origin_slot": 1, "target_id": 94, "target_slot": 2, "type": "STRING"}, {"id": 266, "origin_id": -10, "origin_slot": 2, "target_id": 94, "target_slot": 6, "type": "COMBO"}, {"id": 267, "origin_id": -10, "origin_slot": 3, "target_id": 94, "target_slot": 7, "type": "COMBO"}, {"id": 268, "origin_id": -10, "origin_slot": 4, "target_id": 94, "target_slot": 8, "type": "COMBO"}, {"id": 269, "origin_id": -10, "origin_slot": 5, "target_id": 94, "target_slot": 9, "type": "BOOLEAN"}, {"id": 270, "origin_id": -10, "origin_slot": 6, "target_id": 94, "target_slot": 10, "type": "FLOAT"}, {"id": 279, "origin_id": 110, "origin_slot": 0, "target_id": 98, "target_slot": 0, "type": "FLOAT"}, {"id": 280, "origin_id": 110, "origin_slot": 0, "target_id": 94, "target_slot": 5, "type": "FLOAT"}, {"id": 284, "origin_id": -10, "origin_slot": 7, "target_id": 110, "target_slot": 0, "type": "FLOAT"}, {"id": 285, "origin_id": -10, "origin_slot": 8, "target_id": 104, "target_slot": 0, "type": "COMBO"}, {"id": 286, "origin_id": -10, "origin_slot": 9, "target_id": 105, "target_slot": 0, "type": "COMBO"}, {"id": 287, "origin_id": -10, "origin_slot": 10, "target_id": 105, "target_slot": 1, "type": "COMBO"}, {"id": 288, "origin_id": -10, "origin_slot": 11, "target_id": 106, "target_slot": 0, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Audio/Music generation"}]}, "config": {}, "extra": {"workflowRendererVersion": "LG", "ds": {"scale": 0.9575633843910519, "offset": [-950.8014851321678, 872.1540230582457]}}, "version": 0.4} diff --git a/blueprints/Text to Image (Z-Image-Turbo).json b/blueprints/Text to Image (Z-Image-Turbo).json new file mode 100644 index 000000000..ce25ce1df --- /dev/null +++ b/blueprints/Text to Image (Z-Image-Turbo).json @@ -0,0 +1 @@ +{"id": "1c3eaa76-5cfa-4dc7-8571-97a570324e01", "revision": 0, "last_node_id": 34, "last_link_id": 40, "nodes": [{"id": 5, "type": "dfe9eb32-97c0-43a5-90d5-4fd37768d91b", "pos": [-2.5766491043910378e-05, 1229.999928629805], "size": [400, 470], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "prompt", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}, {"name": "width", "type": "INT", "widget": {"name": "width"}, "link": null}, {"name": "height", "type": "INT", "widget": {"name": "height"}, "link": null}, {"name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": null}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": []}], "properties": {"proxyWidgets": [["-1", "text"], ["-1", "width"], ["-1", "height"], ["3", "seed"], ["3", "control_after_generate"], ["-1", "unet_name"], ["-1", "clip_name"], ["-1", "vae_name"]], "cnr_id": "comfy-core", "ver": "0.3.73", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["", 1024, 1024, null, null, "z_image_turbo_bf16.safetensors", "qwen_3_4b.safetensors", "ae.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "dfe9eb32-97c0-43a5-90d5-4fd37768d91b", "version": 1, "state": {"lastGroupId": 4, "lastNodeId": 34, "lastLinkId": 40, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Text to Image (Z-Image-Turbo)", "inputNode": {"id": -10, "bounding": [-80, 425, 120, 160]}, "outputNode": {"id": -20, "bounding": [1490, 415, 120, 60]}, "inputs": [{"id": "fb178669-e742-4a53-8a69-7df59834dfd8", "name": "text", "type": "STRING", "linkIds": [34], "label": "prompt", "pos": [20, 445]}, {"id": "dd780b3c-23e9-46ff-8469-156008f42e5a", "name": "width", "type": "INT", "linkIds": [35], "pos": [20, 465]}, {"id": "7b08d546-6bb0-4ef9-82e9-ffae5e1ee6bc", "name": "height", "type": "INT", "linkIds": [36], "pos": [20, 485]}, {"id": "23087d15-8412-4fbd-b71e-9b6d7ef76de1", "name": "unet_name", "type": "COMBO", "linkIds": [38], "pos": [20, 505]}, {"id": "0677f5c3-2a3f-43d4-98ac-a4c56d5efdc0", "name": "clip_name", "type": "COMBO", "linkIds": [39], "pos": [20, 525]}, {"id": "c85c0445-2641-48b1-bbca-95057edf2fcf", "name": "vae_name", "type": "COMBO", "linkIds": [40], "pos": [20, 545]}], "outputs": [{"id": "1fa72a21-ce00-4952-814e-1f2ffbe87d1d", "name": "IMAGE", "type": "IMAGE", "linkIds": [16], "localized_name": "IMAGE", "pos": [1510, 435]}], "widgets": [], "nodes": [{"id": 30, "type": "CLIPLoader", "pos": [109.99997264844609, 329.99999029608756], "size": [269.9869791666667, 106], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "clip_name", "name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": 39}, {"localized_name": "type", "name": "type", "type": "COMBO", "widget": {"name": "type"}, "link": null}, {"localized_name": "device", "name": "device", "shape": 7, "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "links": [28]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "CLIPLoader", "models": [{"name": "qwen_3_4b.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/text_encoders/qwen_3_4b.safetensors", "directory": "text_encoders"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["qwen_3_4b.safetensors", "lumina2", "default"]}, {"id": 29, "type": "VAELoader", "pos": [109.99997264844609, 479.9999847172637], "size": [269.9869791666667, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 40}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "links": [27]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "VAELoader", "models": [{"name": "ae.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/vae/ae.safetensors", "directory": "vae"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["ae.safetensors"]}, {"id": 33, "type": "ConditioningZeroOut", "pos": [639.9999103333332, 620.0000271257795], "size": [204.134765625, 26], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "conditioning", "name": "conditioning", "type": "CONDITIONING", "link": 32}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [33]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "ConditioningZeroOut", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 8, "type": "VAEDecode", "pos": [1219.9999088104782, 160.00009184959066], "size": [209.98697916666669, 46], "flags": {}, "order": 5, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 14}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 27}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [16]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "VAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": []}, {"id": 28, "type": "UNETLoader", "pos": [109.99997264844609, 200.0000502647102], "size": [269.9869791666667, 82], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 38}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [26]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "UNETLoader", "models": [{"name": "z_image_turbo_bf16.safetensors", "url": "https://huggingface.co/Comfy-Org/z_image_turbo/resolve/main/split_files/diffusion_models/z_image_turbo_bf16.safetensors", "directory": "diffusion_models"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": ["z_image_turbo_bf16.safetensors", "default"]}, {"id": 27, "type": "CLIPTextEncode", "pos": [429.99997828947767, 200.0000502647102], "size": [409.9869791666667, 319.9869791666667], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 28}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": 34}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "links": [30, 32]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.73", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [""]}, {"id": 13, "type": "EmptySD3LatentImage", "pos": [109.99997264844609, 629.9999791384399], "size": [259.9869791666667, 106], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "width", "name": "width", "type": "INT", "widget": {"name": "width"}, "link": 35}, {"localized_name": "height", "name": "height", "type": "INT", "widget": {"name": "height"}, "link": 36}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [17]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "EmptySD3LatentImage", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [1024, 1024, 1]}, {"id": 3, "type": "KSampler", "pos": [879.9999615530063, 269.9999774911694], "size": [314.9869791666667, 262], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 13}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 30}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 33}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 17}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [14]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "KSampler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [0, "randomize", 4, 1, "res_multistep", "simple", 1]}, {"id": 11, "type": "ModelSamplingAuraFlow", "pos": [879.9999615530063, 160.00009184959066], "size": [309.9869791666667, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 26}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [13]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.64", "Node name for S&R": "ModelSamplingAuraFlow", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65}, "widgets_values": [3]}], "groups": [{"id": 2, "title": "Image size", "bounding": [100, 560, 290, 200], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 3, "title": "Prompt", "bounding": [410, 130, 450, 540], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 4, "title": "Models", "bounding": [100, 130, 290, 413.6], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 32, "origin_id": 27, "origin_slot": 0, "target_id": 33, "target_slot": 0, "type": "CONDITIONING"}, {"id": 26, "origin_id": 28, "origin_slot": 0, "target_id": 11, "target_slot": 0, "type": "MODEL"}, {"id": 14, "origin_id": 3, "origin_slot": 0, "target_id": 8, "target_slot": 0, "type": "LATENT"}, {"id": 27, "origin_id": 29, "origin_slot": 0, "target_id": 8, "target_slot": 1, "type": "VAE"}, {"id": 13, "origin_id": 11, "origin_slot": 0, "target_id": 3, "target_slot": 0, "type": "MODEL"}, {"id": 30, "origin_id": 27, "origin_slot": 0, "target_id": 3, "target_slot": 1, "type": "CONDITIONING"}, {"id": 33, "origin_id": 33, "origin_slot": 0, "target_id": 3, "target_slot": 2, "type": "CONDITIONING"}, {"id": 17, "origin_id": 13, "origin_slot": 0, "target_id": 3, "target_slot": 3, "type": "LATENT"}, {"id": 28, "origin_id": 30, "origin_slot": 0, "target_id": 27, "target_slot": 0, "type": "CLIP"}, {"id": 16, "origin_id": 8, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 34, "origin_id": -10, "origin_slot": 0, "target_id": 27, "target_slot": 1, "type": "STRING"}, {"id": 35, "origin_id": -10, "origin_slot": 1, "target_id": 13, "target_slot": 0, "type": "INT"}, {"id": 36, "origin_id": -10, "origin_slot": 2, "target_id": 13, "target_slot": 1, "type": "INT"}, {"id": 38, "origin_id": -10, "origin_slot": 3, "target_id": 28, "target_slot": 0, "type": "COMBO"}, {"id": 39, "origin_id": -10, "origin_slot": 4, "target_id": 30, "target_slot": 0, "type": "COMBO"}, {"id": 40, "origin_id": -10, "origin_slot": 5, "target_id": 29, "target_slot": 0, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image generation and editing/Text to image"}]}, "config": {}, "extra": {"frontendVersion": "1.37.10", "workflowRendererVersion": "LG", "VHS_latentpreview": false, "VHS_latentpreviewrate": 0, "VHS_MetadataImage": true, "VHS_KeepIntermediate": true, "ds": {"scale": 0.8401370345180755, "offset": [940.0587067393087, -830.7121087564725]}}, "version": 0.4} diff --git a/blueprints/Text to Video (Wan 2.2).json b/blueprints/Text to Video (Wan 2.2).json new file mode 100644 index 000000000..9f1b69669 --- /dev/null +++ b/blueprints/Text to Video (Wan 2.2).json @@ -0,0 +1 @@ +{"id": "ec7da562-7e21-4dac-a0d2-f4441e1efd3b", "revision": 0, "last_node_id": 116, "last_link_id": 188, "nodes": [{"id": 114, "type": "59b2f9c7-af11-45c8-a22b-871166f816c0", "pos": [900.0000142553818, 629.999938027585], "size": [400, 394.97395833333337], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "prompt", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}, {"name": "length", "type": "INT", "widget": {"name": "length"}, "link": null}, {"name": "width", "type": "INT", "widget": {"name": "width"}, "link": null}, {"name": "height", "type": "INT", "widget": {"name": "height"}, "link": null}], "outputs": [{"name": "VIDEO", "type": "VIDEO", "links": null}], "properties": {"proxyWidgets": [["-1", "text"], ["-1", "length"], ["-1", "width"], ["-1", "height"], ["81", "noise_seed"], ["81", "control_after_generate"]], "cnr_id": "comfy-core", "ver": "0.11.0"}, "widgets_values": ["", 81, 640, 640]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "59b2f9c7-af11-45c8-a22b-871166f816c0", "version": 1, "state": {"lastGroupId": 15, "lastNodeId": 114, "lastLinkId": 196, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Text to Video (Wan 2.2)", "inputNode": {"id": -10, "bounding": [-99.66668418897854, 621.3333300391974, 120, 120]}, "outputNode": {"id": -20, "bounding": [1661.9927561248032, 500.2133490758798, 120, 60]}, "inputs": [{"id": "3a15ef44-456f-4a3a-ade7-7a0840166830", "name": "text", "type": "STRING", "linkIds": [189], "label": "prompt", "pos": [0.333315811021464, 641.3333300391974]}, {"id": "ec76f1bf-b130-4dc9-a50c-0b10002725d6", "name": "length", "type": "INT", "linkIds": [190], "pos": [0.333315811021464, 661.3333300391974]}, {"id": "1abb6b00-a8b4-4e72-9d87-53f1fc5d281e", "name": "width", "type": "INT", "linkIds": [191], "pos": [0.333315811021464, 681.3333300391974]}, {"id": "0af36ab5-ee95-4ce5-9ad9-26436319a0d2", "name": "height", "type": "INT", "linkIds": [192], "pos": [0.333315811021464, 701.3333300391974]}], "outputs": [{"id": "6bdfda51-5568-48bf-8985-dbad1e11b3d8", "name": "VIDEO", "type": "VIDEO", "linkIds": [196], "pos": [1681.9927561248032, 520.2133490758798]}], "widgets": [], "nodes": [{"id": 71, "type": "CLIPLoader", "pos": [50.33329119280961, 51.33334121884377], "size": [346.38020833333337, 98], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "clip_name", "name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": null}, {"localized_name": "type", "name": "type", "type": "COMBO", "widget": {"name": "type"}, "link": null}, {"localized_name": "device", "name": "device", "shape": 7, "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "slot_index": 0, "links": [141, 160]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "CLIPLoader", "models": [{"name": "umt5_xxl_fp8_e4m3fn_scaled.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors", "directory": "text_encoders"}]}, "widgets_values": ["umt5_xxl_fp8_e4m3fn_scaled.safetensors", "wan", "default"]}, {"id": 73, "type": "VAELoader", "pos": [50.33329119280961, 211.33336855035554], "size": [344.7135416666667, 50], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "slot_index": 0, "links": [158]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "VAELoader", "models": [{"name": "wan_2.1_vae.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/vae/wan_2.1_vae.safetensors", "directory": "vae"}]}, "widgets_values": ["wan_2.1_vae.safetensors"]}, {"id": 76, "type": "UNETLoader", "pos": [50.33329119280961, -78.66666636275716], "size": [346.7447916666667, 74], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [155]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "UNETLoader", "models": [{"name": "wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors", "directory": "diffusion_models"}]}, "widgets_values": ["wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors", "default"]}, {"id": 75, "type": "UNETLoader", "pos": [50.33329119280961, -208.66667394435814], "size": [346.7447916666667, 74], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [153]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "UNETLoader", "models": [{"name": "wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors", "directory": "diffusion_models"}]}, "widgets_values": ["wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors", "default"]}, {"id": 83, "type": "LoraLoaderModelOnly", "pos": [450.3332425195698, -198.66662836038148], "size": [279.9869791666667, 74], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 153}, {"localized_name": "lora_name", "name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": null}, {"localized_name": "strength_model", "name": "strength_model", "type": "FLOAT", "widget": {"name": "strength_model"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [152]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.49", "Node name for S&R": "LoraLoaderModelOnly", "models": [{"name": "wan2.2_t2v_lightx2v_4steps_lora_v1.1_high_noise.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_t2v_lightx2v_4steps_lora_v1.1_high_noise.safetensors", "directory": "loras"}]}, "widgets_values": ["wan2.2_t2v_lightx2v_4steps_lora_v1.1_high_noise.safetensors", 1.0000000000000002]}, {"id": 85, "type": "LoraLoaderModelOnly", "pos": [450.3332425195698, -58.66669219682302], "size": [279.9869791666667, 74], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 155}, {"localized_name": "lora_name", "name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": null}, {"localized_name": "strength_model", "name": "strength_model", "type": "FLOAT", "widget": {"name": "strength_model"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [156]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.49", "Node name for S&R": "LoraLoaderModelOnly", "models": [{"name": "wan2.2_t2v_lightx2v_4steps_lora_v1.1_low_noise.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_t2v_lightx2v_4steps_lora_v1.1_low_noise.safetensors", "directory": "loras"}]}, "widgets_values": ["wan2.2_t2v_lightx2v_4steps_lora_v1.1_low_noise.safetensors", 1.0000000000000002]}, {"id": 86, "type": "ModelSamplingSD3", "pos": [740.3332774326827, -58.66669219682302], "size": [210, 50], "flags": {"collapsed": false}, "order": 9, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 156}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [183]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "ModelSamplingSD3"}, "widgets_values": [5.000000000000001]}, {"id": 82, "type": "ModelSamplingSD3", "pos": [740.3332774326827, -198.66662836038148], "size": [210, 50], "flags": {}, "order": 10, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 152}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [181]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "ModelSamplingSD3"}, "widgets_values": [5.000000000000001]}, {"id": 81, "type": "KSamplerAdvanced", "pos": [990.3333640139272, -248.66668077723608], "size": [300, 440.98958333333337], "flags": {}, "order": 13, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 181}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 149}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 150}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 151}, {"localized_name": "add_noise", "name": "add_noise", "type": "COMBO", "widget": {"name": "add_noise"}, "link": null}, {"localized_name": "noise_seed", "name": "noise_seed", "type": "INT", "widget": {"name": "noise_seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "start_at_step", "name": "start_at_step", "type": "INT", "widget": {"name": "start_at_step"}, "link": null}, {"localized_name": "end_at_step", "name": "end_at_step", "type": "INT", "widget": {"name": "end_at_step"}, "link": null}, {"localized_name": "return_with_leftover_noise", "name": "return_with_leftover_noise", "type": "COMBO", "widget": {"name": "return_with_leftover_noise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [145]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "KSamplerAdvanced"}, "widgets_values": ["enable", 0, "randomize", 4, 1, "euler", "simple", 0, 2, "enable"]}, {"id": 74, "type": "EmptyHunyuanLatentVideo", "pos": [70.33326535874369, 381.33332446382485], "size": [314.9869791666667, 122], "flags": {}, "order": 11, "mode": 0, "inputs": [{"localized_name": "width", "name": "width", "type": "INT", "widget": {"name": "width"}, "link": 191}, {"localized_name": "height", "name": "height", "type": "INT", "widget": {"name": "height"}, "link": 192}, {"localized_name": "length", "name": "length", "type": "INT", "widget": {"name": "length"}, "link": 190}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [151]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "EmptyHunyuanLatentVideo"}, "widgets_values": [640, 640, 81, 1]}, {"id": 78, "type": "KSamplerAdvanced", "pos": [1310.3334186769505, -248.66668077723608], "size": [304.73958333333337, 440.98958333333337], "flags": {}, "order": 12, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 183}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 143}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 144}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 145}, {"localized_name": "add_noise", "name": "add_noise", "type": "COMBO", "widget": {"name": "add_noise"}, "link": null}, {"localized_name": "noise_seed", "name": "noise_seed", "type": "INT", "widget": {"name": "noise_seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "start_at_step", "name": "start_at_step", "type": "INT", "widget": {"name": "start_at_step"}, "link": null}, {"localized_name": "end_at_step", "name": "end_at_step", "type": "INT", "widget": {"name": "end_at_step"}, "link": null}, {"localized_name": "return_with_leftover_noise", "name": "return_with_leftover_noise", "type": "COMBO", "widget": {"name": "return_with_leftover_noise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [157]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "KSamplerAdvanced"}, "widgets_values": ["disable", 0, "fixed", 4, 1, "euler", "simple", 2, 4, "disable"]}, {"id": 114, "type": "CreateVideo", "pos": [1320.333347258908, 441.33336396364655], "size": [269.9869791666667, 70], "flags": {}, "order": 16, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 195}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": null}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "widget": {"name": "fps"}, "link": null}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": [196]}], "properties": {"cnr_id": "comfy-core", "ver": "0.11.0", "Node name for S&R": "CreateVideo"}, "widgets_values": [16]}, {"id": 112, "type": "Note", "pos": [30.33320002485607, -428.6666237736725], "size": [359.9869791666667, 52], "flags": {}, "order": 4, "mode": 0, "inputs": [], "outputs": [], "title": "About 4 Steps LoRA", "properties": {}, "widgets_values": ["Using the Wan2.2 Lighting LoRA will result in the loss of video dynamics, but it will reduce the generation time. This template provides two workflows, and you can enable one as needed."], "color": "#222", "bgcolor": "#000"}, {"id": 62, "type": "MarkdownNote", "pos": [-489.666771800538, -278.666700527147], "size": [479.9869791666667, 542.1354166666667], "flags": {}, "order": 5, "mode": 0, "inputs": [], "outputs": [], "title": "Model Links", "properties": {}, "widgets_values": ["[Tutorial](https://docs.comfy.org/tutorials/video/wan/wan2_2\n) | [教程](https://docs.comfy.org/zh-CN/tutorials/video/wan/wan2_2\n)\n\n**Diffusion Model** \n- [wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors)\n- [wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/diffusion_models/wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors)\n\n**LoRA**\n\n- [wan2.2_t2v_lightx2v_4steps_lora_v1.1_high_noise.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_t2v_lightx2v_4steps_lora_v1.1_high_noise.safetensors)\n- [wan2.2_t2v_lightx2v_4steps_lora_v1.1_low_noise.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/loras/wan2.2_t2v_lightx2v_4steps_lora_v1.1_low_noise.safetensors)\n\n**VAE**\n- [wan_2.1_vae.safetensors](https://huggingface.co/Comfy-Org/Wan_2.2_ComfyUI_Repackaged/resolve/main/split_files/vae/wan_2.1_vae.safetensors)\n\n**Text Encoder** \n- [umt5_xxl_fp8_e4m3fn_scaled.safetensors](https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors)\n\n\nFile save location\n\n```\nComfyUI/\n├───📂 models/\n│ ├───📂 diffusion_models/\n│ │ ├─── wan2.2_t2v_low_noise_14B_fp8_scaled.safetensors\n│ │ └─── wan2.2_t2v_high_noise_14B_fp8_scaled.safetensors\n│ ├───📂 loras/\n│ │ ├───wan2.2_t2v_lightx2v_4steps_lora_v1.1_low_noise.safetensors\n│ │ └───wan2.2_t2v_lightx2v_4steps_lora_v1.1_high_noise.safetensors\n│ ├───📂 text_encoders/\n│ │ └─── umt5_xxl_fp8_e4m3fn_scaled.safetensors \n│ └───📂 vae/\n│ └── wan_2.1_vae.safetensors\n```\n"], "color": "#222", "bgcolor": "#000"}, {"id": 87, "type": "VAEDecode", "pos": [1020.3331497597994, 471.3333837135574], "size": [210, 46], "flags": {"collapsed": false}, "order": 14, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 157}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 158}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [195]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "VAEDecode"}, "widgets_values": []}, {"id": 72, "type": "CLIPTextEncode", "pos": [440.3333139376125, 331.3333305479798], "size": [500, 170], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 141}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "slot_index": 0, "links": [144, 150]}], "title": "CLIP Text Encode (Negative Prompt)", "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "CLIPTextEncode"}, "widgets_values": ["色调艳丽,过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走,裸露,NSFW"], "color": "#322", "bgcolor": "#533"}, {"id": 89, "type": "CLIPTextEncode", "pos": [440.3333139376125, 131.33323788258042], "size": [510, 170], "flags": {}, "order": 15, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 160}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": 189}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "slot_index": 0, "links": [143, 149]}], "title": "CLIP Text Encode (Positive Prompt)", "properties": {"cnr_id": "comfy-core", "ver": "0.3.45", "Node name for S&R": "CLIPTextEncode"}, "widgets_values": [""], "color": "#232", "bgcolor": "#353"}], "groups": [{"id": 13, "title": "Wan2.2 T2V fp8_scaled + 4 steps LoRA", "bounding": [31.999982477688036, -317.00000329413615, 1610, 880], "color": "#444", "font_size": 24, "flags": {}}, {"id": 6, "title": "Step3 Prompt", "bounding": [431.99998247768815, 57.99999670586385, 530, 460], "color": "#444", "font_size": 24, "flags": {}}, {"id": 7, "title": "Lightx2v 4steps LoRA", "bounding": [431.99998247768815, -275.33333662746946, 530, 320], "color": "#444", "font_size": 24, "flags": {}}, {"id": 11, "title": "Step 1 - Load models", "bounding": [40.33331581102152, -275.33333662746946, 366.7470703125, 563.5814208984375], "color": "#444", "font_size": 24, "flags": {}}, {"id": 12, "title": "Step 2 - Video size", "bounding": [40.33331581102152, 299.6666633725306, 370, 230], "color": "#444", "font_size": 24, "flags": {}}], "links": [{"id": 153, "origin_id": 75, "origin_slot": 0, "target_id": 83, "target_slot": 0, "type": "MODEL"}, {"id": 155, "origin_id": 76, "origin_slot": 0, "target_id": 85, "target_slot": 0, "type": "MODEL"}, {"id": 156, "origin_id": 85, "origin_slot": 0, "target_id": 86, "target_slot": 0, "type": "MODEL"}, {"id": 152, "origin_id": 83, "origin_slot": 0, "target_id": 82, "target_slot": 0, "type": "MODEL"}, {"id": 160, "origin_id": 71, "origin_slot": 0, "target_id": 89, "target_slot": 0, "type": "CLIP"}, {"id": 181, "origin_id": 82, "origin_slot": 0, "target_id": 81, "target_slot": 0, "type": "MODEL"}, {"id": 149, "origin_id": 89, "origin_slot": 0, "target_id": 81, "target_slot": 1, "type": "CONDITIONING"}, {"id": 150, "origin_id": 72, "origin_slot": 0, "target_id": 81, "target_slot": 2, "type": "CONDITIONING"}, {"id": 151, "origin_id": 74, "origin_slot": 0, "target_id": 81, "target_slot": 3, "type": "LATENT"}, {"id": 157, "origin_id": 78, "origin_slot": 0, "target_id": 87, "target_slot": 0, "type": "LATENT"}, {"id": 158, "origin_id": 73, "origin_slot": 0, "target_id": 87, "target_slot": 1, "type": "VAE"}, {"id": 141, "origin_id": 71, "origin_slot": 0, "target_id": 72, "target_slot": 0, "type": "CLIP"}, {"id": 183, "origin_id": 86, "origin_slot": 0, "target_id": 78, "target_slot": 0, "type": "MODEL"}, {"id": 143, "origin_id": 89, "origin_slot": 0, "target_id": 78, "target_slot": 1, "type": "CONDITIONING"}, {"id": 144, "origin_id": 72, "origin_slot": 0, "target_id": 78, "target_slot": 2, "type": "CONDITIONING"}, {"id": 145, "origin_id": 81, "origin_slot": 0, "target_id": 78, "target_slot": 3, "type": "LATENT"}, {"id": 189, "origin_id": -10, "origin_slot": 0, "target_id": 89, "target_slot": 1, "type": "STRING"}, {"id": 190, "origin_id": -10, "origin_slot": 1, "target_id": 74, "target_slot": 2, "type": "INT"}, {"id": 191, "origin_id": -10, "origin_slot": 2, "target_id": 74, "target_slot": 0, "type": "INT"}, {"id": 192, "origin_id": -10, "origin_slot": 3, "target_id": 74, "target_slot": 1, "type": "INT"}, {"id": 195, "origin_id": 87, "origin_slot": 0, "target_id": 114, "target_slot": 0, "type": "IMAGE"}, {"id": 196, "origin_id": 114, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "VIDEO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Video generation and editing/Text to video"}]}, "config": {}, "extra": {"frontendVersion": "1.37.10", "workflowRendererVersion": "LG", "VHS_latentpreview": false, "VHS_latentpreviewrate": 0, "VHS_MetadataImage": true, "VHS_KeepIntermediate": true}, "version": 0.4} diff --git a/blueprints/Unsharp Mask.json b/blueprints/Unsharp Mask.json new file mode 100644 index 000000000..9363037ef --- /dev/null +++ b/blueprints/Unsharp Mask.json @@ -0,0 +1 @@ +{"revision": 0, "last_node_id": 30, "last_link_id": 0, "nodes": [{"id": 30, "type": "d99ba3f5-8a56-4365-8e45-3f3ea7c572a1", "pos": [4420, -370], "size": [210, 106], "flags": {}, "order": 4, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}], "title": "Unsharp Mask", "properties": {"proxyWidgets": [["27", "value"], ["28", "value"], ["29", "value"]]}, "widgets_values": []}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "d99ba3f5-8a56-4365-8e45-3f3ea7c572a1", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 29, "lastLinkId": 43, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Unsharp Mask", "inputNode": {"id": -10, "bounding": [3920, -405, 120, 60]}, "outputNode": {"id": -20, "bounding": [4930, -405, 120, 60]}, "inputs": [{"id": "75354555-d2f3-46b9-a3dd-b076dcfca561", "name": "images.image0", "type": "IMAGE", "linkIds": [39], "localized_name": "images.image0", "label": "image0", "pos": [4020, -385]}], "outputs": [{"id": "04368b94-2a96-46ff-8c07-d0ce3235b40d", "name": "IMAGE0", "type": "IMAGE", "linkIds": [40], "localized_name": "IMAGE0", "pos": [4950, -385]}], "widgets": [], "nodes": [{"id": 27, "type": "PrimitiveFloat", "pos": [4100, -540], "size": [270, 58], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "amount", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [41]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 3, "precision": 2, "step": 0.05}, "widgets_values": [1]}, {"id": 28, "type": "PrimitiveFloat", "pos": [4100, -430], "size": [270, 58], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "radius", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [42]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 10, "precision": 1, "step": 0.5}, "widgets_values": [3]}, {"id": 29, "type": "PrimitiveFloat", "pos": [4100, -320], "size": [270, 58], "flags": {}, "order": 2, "mode": 0, "inputs": [{"label": "threshold", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [43]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 1, "precision": 2, "step": 0.05}, "widgets_values": [0]}, {"id": 26, "type": "GLSLShader", "pos": [4470, -580], "size": [400, 232], "flags": {}, "order": 3, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 39}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}, {"label": "u_float0", "localized_name": "floats.u_float0", "name": "floats.u_float0", "shape": 7, "type": "FLOAT", "link": 41}, {"label": "u_float1", "localized_name": "floats.u_float1", "name": "floats.u_float1", "shape": 7, "type": "FLOAT", "link": 42}, {"label": "u_float2", "localized_name": "floats.u_float2", "name": "floats.u_float2", "shape": 7, "type": "FLOAT", "link": 43}, {"label": "u_float3", "localized_name": "floats.u_float3", "name": "floats.u_float3", "shape": 7, "type": "FLOAT", "link": null}, {"label": "u_int0", "localized_name": "ints.u_int0", "name": "ints.u_int0", "shape": 7, "type": "INT", "link": null}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [40]}, {"localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": null}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform float u_float0; // amount [0.0 - 3.0] typical: 0.5-1.5\nuniform float u_float1; // radius [0.5 - 10.0] blur radius in pixels\nuniform float u_float2; // threshold [0.0 - 0.1] min difference to sharpen\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nfloat gaussian(float x, float sigma) {\n return exp(-(x * x) / (2.0 * sigma * sigma));\n}\n\nfloat getLuminance(vec3 color) {\n return dot(color, vec3(0.2126, 0.7152, 0.0722));\n}\n\nvoid main() {\n vec2 texel = 1.0 / u_resolution;\n float radius = max(u_float1, 0.5);\n float amount = u_float0;\n float threshold = u_float2;\n\n vec4 original = texture(u_image0, v_texCoord);\n\n // Gaussian blur for the \"unsharp\" mask\n int samples = int(ceil(radius));\n float sigma = radius / 2.0;\n\n vec4 blurred = vec4(0.0);\n float totalWeight = 0.0;\n\n for (int x = -samples; x <= samples; x++) {\n for (int y = -samples; y <= samples; y++) {\n vec2 offset = vec2(float(x), float(y)) * texel;\n vec4 sample_color = texture(u_image0, v_texCoord + offset);\n\n float dist = length(vec2(float(x), float(y)));\n float weight = gaussian(dist, sigma);\n blurred += sample_color * weight;\n totalWeight += weight;\n }\n }\n blurred /= totalWeight;\n\n // Unsharp mask = original - blurred\n vec3 mask = original.rgb - blurred.rgb;\n\n // Luminance-based threshold with smooth falloff\n float lumaDelta = abs(getLuminance(original.rgb) - getLuminance(blurred.rgb));\n float thresholdScale = smoothstep(0.0, threshold, lumaDelta);\n mask *= thresholdScale;\n\n // Sharpen: original + mask * amount\n vec3 sharpened = original.rgb + mask * amount;\n\n fragColor0 = vec4(clamp(sharpened, 0.0, 1.0), original.a);\n}\n", "from_input"]}], "groups": [], "links": [{"id": 41, "origin_id": 27, "origin_slot": 0, "target_id": 26, "target_slot": 2, "type": "FLOAT"}, {"id": 42, "origin_id": 28, "origin_slot": 0, "target_id": 26, "target_slot": 3, "type": "FLOAT"}, {"id": 43, "origin_id": 29, "origin_slot": 0, "target_id": 26, "target_slot": 4, "type": "FLOAT"}, {"id": 39, "origin_id": -10, "origin_slot": 0, "target_id": 26, "target_slot": 0, "type": "IMAGE"}, {"id": 40, "origin_id": 26, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Sharpen"}]}} diff --git a/blueprints/Video Captioning (Gemini).json b/blueprints/Video Captioning (Gemini).json new file mode 100644 index 000000000..1d72718a1 --- /dev/null +++ b/blueprints/Video Captioning (Gemini).json @@ -0,0 +1 @@ +{"revision": 0, "last_node_id": 233, "last_link_id": 0, "nodes": [{"id": 233, "type": "dcf32045-0ee4-4efc-9aca-9f26f3a157be", "pos": [0, 1140], "size": [400, 260], "flags": {}, "order": 7, "mode": 0, "inputs": [{"name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": null}, {"name": "model", "type": "COMBO", "widget": {"name": "model"}, "link": null}, {"name": "video", "type": "VIDEO", "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": []}], "title": "Video Captioning(Gemini)", "properties": {"proxyWidgets": [["-1", "prompt"], ["-1", "model"], ["1", "seed"]], "cnr_id": "comfy-core", "ver": "0.13.0"}, "widgets_values": ["Describe this video", "gemini-2.5-pro"]}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "dcf32045-0ee4-4efc-9aca-9f26f3a157be", "version": 1, "state": {"lastGroupId": 1, "lastNodeId": 16, "lastLinkId": 17, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Video Captioning(Gemini)", "inputNode": {"id": -10, "bounding": [-6870, 2530, 120, 100]}, "outputNode": {"id": -20, "bounding": [-6240, 2530, 120, 60]}, "inputs": [{"id": "d8cbd7eb-636a-4d7b-8ff6-b22f1755e26c", "name": "prompt", "type": "STRING", "linkIds": [15], "pos": [-6770, 2550]}, {"id": "b034e26a-d114-4604-aec2-32783e86aa6b", "name": "model", "type": "COMBO", "linkIds": [16], "pos": [-6770, 2570]}, {"id": "f7363f60-a106-4e06-90af-df5f53355b98", "name": "video", "type": "VIDEO", "linkIds": [17], "pos": [-6770, 2590]}], "outputs": [{"id": "e12c6e80-5210-4328-a581-bc8924c53070", "name": "STRING", "type": "STRING", "linkIds": [6], "localized_name": "STRING", "pos": [-6220, 2550]}], "widgets": [], "nodes": [{"id": 1, "type": "GeminiNode", "pos": [-6690, 2360], "size": [390, 430], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "shape": 7, "type": "IMAGE", "link": null}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": null}, {"localized_name": "video", "name": "video", "shape": 7, "type": "VIDEO", "link": 17}, {"localized_name": "files", "name": "files", "shape": 7, "type": "GEMINI_INPUT_FILES", "link": null}, {"localized_name": "prompt", "name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": 15}, {"localized_name": "model", "name": "model", "type": "COMBO", "widget": {"name": "model"}, "link": 16}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "system_prompt", "name": "system_prompt", "shape": 7, "type": "STRING", "widget": {"name": "system_prompt"}, "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": [6]}], "properties": {"cnr_id": "comfy-core", "ver": "0.5.1", "Node name for S&R": "GeminiNode"}, "widgets_values": ["Describe this video", "gemini-2.5-pro", 511865409297955, "randomize", "- Role: AI Video Analysis and Description Specialist\n- Background: The user requires a prompt that enables AI to analyze videos (including frame sequences, dynamic movements, audio-visual elements) and generate detailed, structured descriptions. These descriptions must be directly usable as video generation prompts to create similar videos, serving core tasks such as video content creation, creative inspiration extraction, and artistic style exploration.\n- Profile: As an AI Video Analysis and Description Specialist, you possess expertise in computer vision, video temporal sequence processing, motion analysis, and multi-modal natural language generation. You excel at interpreting dynamic visual data (frame-by-frame features + continuous motion) and translating it into precise descriptive text that fully guides the creation of new videos with matching style, rhythm, and content.\n- Skills: Proficiency in video frame feature extraction, motion trajectory recognition, temporal rhythm analysis, scene/shot segmentation, color grading detection, camera movement identification (pan/tilt/zoom/dolly), audio-visual element correlation analysis, and descriptive language generation that captures both static visual features and dynamic temporal characteristics. Mastery of artistic elements in video: composition (per frame + dynamic framing), color palette (consistent + transitional), texture (surface details + motion blur), pacing (frame rate, shot duration), and sound style (background music, ambient sound cues).\n- Goals: To analyze the provided video comprehensively, generate a detailed, structured description that captures all key video elements (static visual features + dynamic motion/temporal characteristics + audio-visual style), and ensure this description can directly serve as a high-quality prompt for creating similar videos.\n- Constraints: \n 1. The description must be clear, structured, and specific enough to guide end-to-end video creation (including frame rate, shot duration, camera movement, motion speed, color transitions).\n 2. Avoid ambiguity; focus on the most salient static (per-frame) and dynamic (temporal) features of the video.\n 3. Prioritize video-specific elements: motion trajectory, shot types (close-up/wide shot/etc.), camera movement, frame rate, scene transitions, rhythm/pacing, and temporal color changes.\n 4. The output must only contain the video generation prompt (no extra explanations).\n- OutputFormat: A detailed, hierarchical text description of the video, structured as follows:\n 1. Core Content & Narrative: Brief overview of the video's subject and temporal progression\n 2. Visual Style (Static): Per-frame key elements (objects, colors, composition, lighting, texture)\n 3. Dynamic Elements (Temporal): Motion details (speed, trajectory, direction), camera movement (type, speed, direction), shot duration/frame rate, scene transitions\n 4. Audio-Visual Style: Color grading (consistent/transitional), rhythm/pacing, and implied audio style (if discernible)\n- Workflow:\n 1. Analyze the video to segment shots/scenes, identify frame-by-frame static visual elements (objects, colors, composition) and cross-frame dynamic elements (motion, camera movement, temporal changes).\n 2. Extract video-specific technical features: frame rate, shot duration, scene transition types, motion speed/rhythm.\n 3. Generate a structured, detailed description that captures the essence of the video (static + dynamic + temporal characteristics), ensuring specificity and actionability for video generation.\n 4. Refine the description for clarity, conciseness, and alignment with video generation prompt norms (e.g., including frame rate, camera movement terms, motion speed descriptors)."], "color": "#432", "bgcolor": "#653"}], "groups": [], "links": [{"id": 6, "origin_id": 1, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "*"}, {"id": 15, "origin_id": -10, "origin_slot": 0, "target_id": 1, "target_slot": 4, "type": "STRING"}, {"id": 16, "origin_id": -10, "origin_slot": 1, "target_id": 1, "target_slot": 5, "type": "COMBO"}, {"id": 17, "origin_id": -10, "origin_slot": 2, "target_id": 1, "target_slot": 2, "type": "VIDEO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Text generation/Video Captioning"}]}} diff --git a/blueprints/Video Inpaint(Wan2.1 VACE).json b/blueprints/Video Inpaint(Wan2.1 VACE).json new file mode 100644 index 000000000..a7c6db003 --- /dev/null +++ b/blueprints/Video Inpaint(Wan2.1 VACE).json @@ -0,0 +1 @@ +{"id": "2f429c60-2e03-4117-908b-31e1fab04bba", "revision": 0, "last_node_id": 229, "last_link_id": 366, "nodes": [{"id": 229, "type": "53a657f3-c9eb-40f2-9ebd-1ed77d25ed67", "pos": [-230, 160], "size": [400, 480], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "video mask", "localized_name": "mask", "name": "mask", "type": "MASK", "link": null}, {"localized_name": "video", "name": "video", "type": "VIDEO", "link": null}, {"name": "width", "type": "INT", "widget": {"name": "width"}, "link": null}, {"name": "height", "type": "INT", "widget": {"name": "height"}, "link": null}, {"label": "reference image", "name": "reference_image_1", "type": "IMAGE", "link": null}, {"name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": null}, {"name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": null}, {"name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": null}, {"name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": null}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": []}], "properties": {"proxyWidgets": [["6", "text"], ["-1", "width"], ["-1", "height"], ["3", "seed"], ["3", "control_after_generate"], ["-1", "unet_name"], ["-1", "lora_name"], ["-1", "clip_name"], ["-1", "vae_name"]], "cnr_id": "comfy-core", "ver": "0.13.0"}, "widgets_values": [null, 720, 720, null, null, "wan2.1_vace_14B_fp16.safetensors", "Wan21_CausVid_14B_T2V_lora_rank32.safetensors", "umt5_xxl_fp8_e4m3fn_scaled.safetensors", "wan_2.1_vae.safetensors"]}], "links": [], "groups": [], "definitions": {"subgraphs": [{"id": "53a657f3-c9eb-40f2-9ebd-1ed77d25ed67", "version": 1, "state": {"lastGroupId": 25, "lastNodeId": 229, "lastLinkId": 366, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "local-Video Inpaint(Wan2.1 VACE)", "inputNode": {"id": -10, "bounding": [-970, 800, 132.54296875, 220]}, "outputNode": {"id": -20, "bounding": [1480, 535, 120, 60]}, "inputs": [{"id": "9fdda38d-6aa7-48ad-b425-f493d8aa585c", "name": "mask", "type": "MASK", "linkIds": [351, 335, 345], "localized_name": "mask", "label": "video mask", "pos": [-857.45703125, 820]}, {"id": "8b1788cc-46d2-4f40-8b33-70fd56b4cb24", "name": "video", "type": "VIDEO", "linkIds": [336], "localized_name": "video", "pos": [-857.45703125, 840]}, {"id": "09393f21-257e-4476-bb02-54899a8252b8", "name": "width", "type": "INT", "linkIds": [355], "pos": [-857.45703125, 860]}, {"id": "07a030f7-7eac-4b3f-b8f3-f00ee87b191d", "name": "height", "type": "INT", "linkIds": [356], "pos": [-857.45703125, 880]}, {"id": "255908d3-6cc9-48fc-b76b-ab9fb72695bc", "name": "reference_image_1", "type": "IMAGE", "linkIds": [361], "label": "reference image", "pos": [-857.45703125, 900]}, {"id": "18a5d241-523c-433d-ae05-25b6e69d1e29", "name": "unet_name", "type": "COMBO", "linkIds": [363], "pos": [-857.45703125, 920]}, {"id": "d7576e1b-da5f-402f-81b2-d37f838b1f8f", "name": "lora_name", "type": "COMBO", "linkIds": [364], "pos": [-857.45703125, 940]}, {"id": "41676a3e-c710-4723-821e-f651ad3784b1", "name": "clip_name", "type": "COMBO", "linkIds": [365], "pos": [-857.45703125, 960]}, {"id": "41fc878c-9aa6-4c12-bef3-ceda6b094b7c", "name": "vae_name", "type": "COMBO", "linkIds": [366], "pos": [-857.45703125, 980]}], "outputs": [{"id": "d4861f39-1011-49dc-80fd-ee318b614a8d", "name": "VIDEO", "type": "VIDEO", "linkIds": [129], "localized_name": "VIDEO", "pos": [1500, 555]}], "widgets": [], "nodes": [{"id": 58, "type": "TrimVideoLatent", "pos": [760, 390], "size": [315, 60], "flags": {"collapsed": false}, "order": 13, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 116}, {"localized_name": "trim_amount", "name": "trim_amount", "type": "INT", "widget": {"name": "trim_amount"}, "link": 115}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "links": [117]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "TrimVideoLatent", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {"trim_amount": true}}, "widgets_values": [0]}, {"id": 8, "type": "VAEDecode", "pos": [770, 500], "size": [315, 46], "flags": {"collapsed": false}, "order": 11, "mode": 0, "inputs": [{"localized_name": "samples", "name": "samples", "type": "LATENT", "link": 117}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 76}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "slot_index": 0, "links": [139]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "VAEDecode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 48, "type": "ModelSamplingSD3", "pos": [400, 50], "size": [315, 58], "flags": {}, "order": 9, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 279}, {"localized_name": "shift", "name": "shift", "type": "FLOAT", "widget": {"name": "shift"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [280]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "ModelSamplingSD3", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": [5]}, {"id": 219, "type": "InvertMask", "pos": [400, 990], "size": [140, 26], "flags": {}, "order": 24, "mode": 0, "inputs": [{"localized_name": "mask", "name": "mask", "type": "MASK", "link": 351}], "outputs": [{"localized_name": "MASK", "name": "MASK", "type": "MASK", "links": [352]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.40", "Node name for S&R": "InvertMask"}, "widgets_values": []}, {"id": 216, "type": "MaskToImage", "pos": [560, 990], "size": [193.2779296875, 26], "flags": {}, "order": 23, "mode": 0, "inputs": [{"localized_name": "mask", "name": "mask", "type": "MASK", "link": 352}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [334]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.40", "Node name for S&R": "MaskToImage"}, "widgets_values": []}, {"id": 213, "type": "RebatchImages", "pos": [410, 690], "size": [230, 60], "flags": {}, "order": 21, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 360}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": 340}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "shape": 6, "type": "IMAGE", "links": [333]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.40", "Node name for S&R": "RebatchImages"}, "widgets_values": [1]}, {"id": 68, "type": "CreateVideo", "pos": [1150, 50], "size": [270, 78], "flags": {"collapsed": false}, "order": 14, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 139}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": 362}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "widget": {"name": "fps"}, "link": 353}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": [129]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "CreateVideo", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": [16]}, {"id": 208, "type": "ImageCompositeMasked", "pos": [410, 790], "size": [230, 146], "flags": {}, "order": 18, "mode": 0, "inputs": [{"localized_name": "destination", "name": "destination", "type": "IMAGE", "link": 333}, {"localized_name": "source", "name": "source", "type": "IMAGE", "link": 334}, {"localized_name": "mask", "name": "mask", "shape": 7, "type": "MASK", "link": 335}, {"localized_name": "x", "name": "x", "type": "INT", "widget": {"name": "x"}, "link": null}, {"localized_name": "y", "name": "y", "type": "INT", "widget": {"name": "y"}, "link": null}, {"localized_name": "resize_source", "name": "resize_source", "type": "BOOLEAN", "widget": {"name": "resize_source"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [341, 344]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.40", "Node name for S&R": "ImageCompositeMasked"}, "widgets_values": [0, 0, true]}, {"id": 214, "type": "PreviewImage", "pos": [760, 690], "size": [300, 300], "flags": {}, "order": 22, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 341}], "outputs": [], "properties": {"cnr_id": "comfy-core", "ver": "0.3.40", "Node name for S&R": "PreviewImage"}, "widgets_values": []}, {"id": 111, "type": "MaskToImage", "pos": [20, 1270], "size": [240, 26], "flags": {}, "order": 15, "mode": 0, "inputs": [{"localized_name": "mask", "name": "mask", "type": "MASK", "link": 345}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [201]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "MaskToImage", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": []}, {"id": 129, "type": "RepeatImageBatch", "pos": [20, 1160], "size": [240, 60], "flags": {}, "order": 16, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 201}, {"localized_name": "amount", "name": "amount", "type": "INT", "widget": {"name": "amount"}, "link": 346}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [202]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "RepeatImageBatch", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {"amount": true}}, "widgets_values": [17]}, {"id": 130, "type": "ImageToMask", "pos": [20, 1050], "size": [240, 60], "flags": {}, "order": 17, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 202}, {"localized_name": "channel", "name": "channel", "type": "COMBO", "widget": {"name": "channel"}, "link": null}], "outputs": [{"localized_name": "MASK", "name": "MASK", "type": "MASK", "links": [349]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "ImageToMask", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": ["red"]}, {"id": 3, "type": "KSampler", "pos": [770, 50], "size": [315, 262], "flags": {}, "order": 10, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 280}, {"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 98}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 99}, {"localized_name": "latent_image", "name": "latent_image", "type": "LATENT", "link": 160}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "steps", "name": "steps", "type": "INT", "widget": {"name": "steps"}, "link": null}, {"localized_name": "cfg", "name": "cfg", "type": "FLOAT", "widget": {"name": "cfg"}, "link": null}, {"localized_name": "sampler_name", "name": "sampler_name", "type": "COMBO", "widget": {"name": "sampler_name"}, "link": null}, {"localized_name": "scheduler", "name": "scheduler", "type": "COMBO", "widget": {"name": "scheduler"}, "link": null}, {"localized_name": "denoise", "name": "denoise", "type": "FLOAT", "widget": {"name": "denoise"}, "link": null}], "outputs": [{"localized_name": "LATENT", "name": "LATENT", "type": "LATENT", "slot_index": 0, "links": [116]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "KSampler", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": [584027519362099, "randomize", 4, 1, "uni_pc", "simple", 1]}, {"id": 224, "type": "MarkdownNote", "pos": [420, -160], "size": [310, 110], "flags": {}, "order": 0, "mode": 0, "inputs": [], "outputs": [], "title": "About Video Size", "properties": {}, "widgets_values": ["| Model | 480P | 720P |\n| ------------------------------------------------------------ | ---- | ---- |\n| [VACE-1.3B](https://huggingface.co/Wan-AI/Wan2.1-VACE-1.3B) | ✅ | ❌ |\n| [VACE-14B](https://huggingface.co/Wan-AI/Wan2.1-VACE-14B) | ✅ | ✅ |"], "color": "#432", "bgcolor": "#000"}, {"id": 223, "type": "MarkdownNote", "pos": [770, -210], "size": [303.90106201171875, 158.5415802001953], "flags": {}, "order": 1, "mode": 0, "inputs": [], "outputs": [], "title": "KSampler Setting", "properties": {}, "widgets_values": ["## Default\n\n- steps:20\n- cfg:6.0\n\n## For CausVid LoRA\n\n- steps: 2-4\n- cfg: 1.0\n\n"], "color": "#432", "bgcolor": "#000"}, {"id": 6, "type": "CLIPTextEncode", "pos": [-80, 60], "size": [420, 280], "flags": {}, "order": 7, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 74}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "slot_index": 0, "links": [96]}], "title": "CLIP Text Encode (Positive Prompt)", "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": [""], "color": "#232", "bgcolor": "#353"}, {"id": 140, "type": "UNETLoader", "pos": [-505.8336486816406, 88.22794342041016], "size": [360, 82], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "unet_name", "name": "unet_name", "type": "COMBO", "widget": {"name": "unet_name"}, "link": 363}, {"localized_name": "weight_dtype", "name": "weight_dtype", "type": "COMBO", "widget": {"name": "weight_dtype"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "slot_index": 0, "links": [248]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "UNETLoader", "models": [{"name": "wan2.1_vace_14B_fp16.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/diffusion_models/wan2.1_vace_14B_fp16.safetensors", "directory": "diffusion_models"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": ["wan2.1_vace_14B_fp16.safetensors", "fp8_e4m3fn_fast"]}, {"id": 154, "type": "LoraLoaderModelOnly", "pos": [-505.8336486816406, 228.2279510498047], "size": [360, 85.11004638671875], "flags": {}, "order": 6, "mode": 0, "inputs": [{"localized_name": "model", "name": "model", "type": "MODEL", "link": 248}, {"localized_name": "lora_name", "name": "lora_name", "type": "COMBO", "widget": {"name": "lora_name"}, "link": 364}, {"localized_name": "strength_model", "name": "strength_model", "type": "FLOAT", "widget": {"name": "strength_model"}, "link": null}], "outputs": [{"localized_name": "MODEL", "name": "MODEL", "type": "MODEL", "links": [279]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "LoraLoaderModelOnly", "models": [{"name": "Wan21_CausVid_14B_T2V_lora_rank32.safetensors", "url": "https://huggingface.co/Kijai/WanVideo_comfy/resolve/main/Wan21_CausVid_14B_T2V_lora_rank32.safetensors", "directory": "loras"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": ["Wan21_CausVid_14B_T2V_lora_rank32.safetensors", 0.30000000000000004]}, {"id": 38, "type": "CLIPLoader", "pos": [-499.14141845703125, 368.0911865234375], "size": [360, 106], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "clip_name", "name": "clip_name", "type": "COMBO", "widget": {"name": "clip_name"}, "link": 365}, {"localized_name": "type", "name": "type", "type": "COMBO", "widget": {"name": "type"}, "link": null}, {"localized_name": "device", "name": "device", "shape": 7, "type": "COMBO", "widget": {"name": "device"}, "link": null}], "outputs": [{"localized_name": "CLIP", "name": "CLIP", "type": "CLIP", "slot_index": 0, "links": [74, 75]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "CLIPLoader", "models": [{"name": "umt5_xxl_fp8_e4m3fn_scaled.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/text_encoders/umt5_xxl_fp8_e4m3fn_scaled.safetensors?download=true", "directory": "text_encoders"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": ["umt5_xxl_fp8_e4m3fn_scaled.safetensors", "wan", "default"]}, {"id": 39, "type": "VAELoader", "pos": [-498.5298156738281, 517.2576293945312], "size": [360, 60], "flags": {}, "order": 4, "mode": 0, "inputs": [{"localized_name": "vae_name", "name": "vae_name", "type": "COMBO", "widget": {"name": "vae_name"}, "link": 366}], "outputs": [{"localized_name": "VAE", "name": "VAE", "type": "VAE", "slot_index": 0, "links": [76, 101]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "VAELoader", "models": [{"name": "wan_2.1_vae.safetensors", "url": "https://huggingface.co/Comfy-Org/Wan_2.1_ComfyUI_repackaged/resolve/main/split_files/vae/wan_2.1_vae.safetensors", "directory": "vae"}], "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": ["wan_2.1_vae.safetensors"]}, {"id": 221, "type": "MarkdownNote", "pos": [380, 1090], "size": [480, 170], "flags": {}, "order": 5, "mode": 0, "inputs": [], "outputs": [], "title": "[EN] About video mask", "properties": {"widget_ue_connectable": {}}, "widgets_values": ["Currently, it's difficult to perfectly draw dynamic masks for different frames using only core nodes. However, to avoid requiring users to install additional custom nodes, our templates only use core nodes. You can refer to this implementation idea to achieve video inpainting.\n\nYou can use KJNode’s Points Editor and Sam2Segmentation to create some dynamic mask functions.\n\nCustom node links:\n- [ComfyUI-KJNodes](https://github.com/kijai/ComfyUI-KJNodes)\n- [ComfyUI-segment-anything-2](https://github.com/kijai/ComfyUI-segment-anything-2)"], "color": "#432", "bgcolor": "#000"}, {"id": 7, "type": "CLIPTextEncode", "pos": [-80, 390], "size": [425.27801513671875, 180.6060791015625], "flags": {}, "order": 8, "mode": 0, "inputs": [{"localized_name": "clip", "name": "clip", "type": "CLIP", "link": 75}, {"localized_name": "text", "name": "text", "type": "STRING", "widget": {"name": "text"}, "link": null}], "outputs": [{"localized_name": "CONDITIONING", "name": "CONDITIONING", "type": "CONDITIONING", "slot_index": 0, "links": [97]}], "title": "CLIP Text Encode (Negative Prompt)", "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "CLIPTextEncode", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {}}, "widgets_values": ["过曝,静态,细节模糊不清,字幕,风格,作品,画作,画面,静止,整体发灰,最差质量,低质量,JPEG压缩残留,丑陋的,残缺的,多余的手指,画得不好的手部,画得不好的脸部,畸形的,毁容的,形态畸形的肢体,手指融合,静止不动的画面,杂乱的背景,三条腿,背景人很多,倒着走,过曝,"], "color": "#223", "bgcolor": "#335"}, {"id": 229, "type": "ImageFromBatch", "pos": [-510, 800], "size": [270, 82], "flags": {}, "order": 25, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 358}, {"localized_name": "batch_index", "name": "batch_index", "type": "INT", "widget": {"name": "batch_index"}, "link": null}, {"localized_name": "length", "name": "length", "type": "INT", "widget": {"name": "length"}, "link": null}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [359, 360]}], "properties": {"cnr_id": "comfy-core", "ver": "0.13.0", "Node name for S&R": "ImageFromBatch"}, "widgets_values": [0, 81]}, {"id": 49, "type": "WanVaceToVideo", "pos": [400, 200], "size": [315, 254], "flags": {}, "order": 12, "mode": 0, "inputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "link": 96}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "link": 97}, {"localized_name": "vae", "name": "vae", "type": "VAE", "link": 101}, {"localized_name": "control_video", "name": "control_video", "shape": 7, "type": "IMAGE", "link": 344}, {"localized_name": "control_masks", "name": "control_masks", "shape": 7, "type": "MASK", "link": 349}, {"localized_name": "reference_image", "name": "reference_image", "shape": 7, "type": "IMAGE", "link": 361}, {"localized_name": "width", "name": "width", "type": "INT", "widget": {"name": "width"}, "link": 355}, {"localized_name": "height", "name": "height", "type": "INT", "widget": {"name": "height"}, "link": 356}, {"localized_name": "length", "name": "length", "type": "INT", "widget": {"name": "length"}, "link": null}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "widget": {"name": "batch_size"}, "link": null}, {"localized_name": "strength", "name": "strength", "type": "FLOAT", "widget": {"name": "strength"}, "link": null}], "outputs": [{"localized_name": "positive", "name": "positive", "type": "CONDITIONING", "links": [98]}, {"localized_name": "negative", "name": "negative", "type": "CONDITIONING", "links": [99]}, {"localized_name": "latent", "name": "latent", "type": "LATENT", "links": [160]}, {"localized_name": "trim_latent", "name": "trim_latent", "type": "INT", "links": [115]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.34", "Node name for S&R": "WanVaceToVideo", "enableTabs": false, "tabWidth": 65, "tabXOffset": 10, "hasSecondTab": false, "secondTabText": "Send Back", "secondTabOffset": 80, "secondTabWidth": 65, "widget_ue_connectable": {"width": true, "height": true, "length": true}}, "widgets_values": [720, 720, 81, 1, 1]}, {"id": 211, "type": "GetImageSize", "pos": [70, 800], "size": [190, 66], "flags": {"collapsed": false}, "order": 20, "mode": 0, "inputs": [{"localized_name": "image", "name": "image", "type": "IMAGE", "link": 359}], "outputs": [{"localized_name": "width", "name": "width", "type": "INT", "links": null}, {"localized_name": "height", "name": "height", "type": "INT", "links": null}, {"localized_name": "batch_size", "name": "batch_size", "type": "INT", "links": [340, 346]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.40", "Node name for S&R": "GetImageSize"}, "widgets_values": []}, {"id": 210, "type": "GetVideoComponents", "pos": [-510, 690], "size": [193.530859375, 66], "flags": {}, "order": 19, "mode": 0, "inputs": [{"localized_name": "video", "name": "video", "type": "VIDEO", "link": 336}], "outputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "links": [358]}, {"localized_name": "audio", "name": "audio", "type": "AUDIO", "links": [362]}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "links": [353]}], "properties": {"cnr_id": "comfy-core", "ver": "0.3.40", "Node name for S&R": "GetVideoComponents"}, "widgets_values": []}], "groups": [{"id": 1, "title": "Step1 - Load models here", "bounding": [-540, -30, 430, 620], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 2, "title": "Prompt", "bounding": [-90, -30, 450, 620], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 3, "title": "Sampling & Decoding", "bounding": [380, -30, 720, 620], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 10, "title": "Repeat Mask Batch", "bounding": [-90, 910, 450, 460], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 21, "title": "Get video info", "bounding": [-540, 610, 900, 290], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 22, "title": "Composite video & masks", "bounding": [380, 610, 720, 420], "color": "#3f789e", "font_size": 24, "flags": {}}, {"id": 23, "title": "Step4 - Set video size & length", "bounding": [390, 130, 360, 340], "color": "#A88", "font_size": 24, "flags": {}}, {"id": 25, "title": "14B", "bounding": [-520, 10, 380, 308.7100524902344], "color": "#3f789e", "font_size": 24, "flags": {}}], "links": [{"id": 116, "origin_id": 3, "origin_slot": 0, "target_id": 58, "target_slot": 0, "type": "LATENT"}, {"id": 115, "origin_id": 49, "origin_slot": 3, "target_id": 58, "target_slot": 1, "type": "INT"}, {"id": 117, "origin_id": 58, "origin_slot": 0, "target_id": 8, "target_slot": 0, "type": "LATENT"}, {"id": 76, "origin_id": 39, "origin_slot": 0, "target_id": 8, "target_slot": 1, "type": "VAE"}, {"id": 279, "origin_id": 154, "origin_slot": 0, "target_id": 48, "target_slot": 0, "type": "MODEL"}, {"id": 352, "origin_id": 219, "origin_slot": 0, "target_id": 216, "target_slot": 0, "type": "MASK"}, {"id": 340, "origin_id": 211, "origin_slot": 2, "target_id": 213, "target_slot": 1, "type": "INT"}, {"id": 96, "origin_id": 6, "origin_slot": 0, "target_id": 49, "target_slot": 0, "type": "CONDITIONING"}, {"id": 97, "origin_id": 7, "origin_slot": 0, "target_id": 49, "target_slot": 1, "type": "CONDITIONING"}, {"id": 101, "origin_id": 39, "origin_slot": 0, "target_id": 49, "target_slot": 2, "type": "VAE"}, {"id": 344, "origin_id": 208, "origin_slot": 0, "target_id": 49, "target_slot": 3, "type": "IMAGE"}, {"id": 349, "origin_id": 130, "origin_slot": 0, "target_id": 49, "target_slot": 4, "type": "MASK"}, {"id": 139, "origin_id": 8, "origin_slot": 0, "target_id": 68, "target_slot": 0, "type": "IMAGE"}, {"id": 353, "origin_id": 210, "origin_slot": 2, "target_id": 68, "target_slot": 2, "type": "FLOAT"}, {"id": 333, "origin_id": 213, "origin_slot": 0, "target_id": 208, "target_slot": 0, "type": "IMAGE"}, {"id": 334, "origin_id": 216, "origin_slot": 0, "target_id": 208, "target_slot": 1, "type": "IMAGE"}, {"id": 341, "origin_id": 208, "origin_slot": 0, "target_id": 214, "target_slot": 0, "type": "IMAGE"}, {"id": 201, "origin_id": 111, "origin_slot": 0, "target_id": 129, "target_slot": 0, "type": "IMAGE"}, {"id": 346, "origin_id": 211, "origin_slot": 2, "target_id": 129, "target_slot": 1, "type": "INT"}, {"id": 202, "origin_id": 129, "origin_slot": 0, "target_id": 130, "target_slot": 0, "type": "IMAGE"}, {"id": 280, "origin_id": 48, "origin_slot": 0, "target_id": 3, "target_slot": 0, "type": "MODEL"}, {"id": 98, "origin_id": 49, "origin_slot": 0, "target_id": 3, "target_slot": 1, "type": "CONDITIONING"}, {"id": 99, "origin_id": 49, "origin_slot": 1, "target_id": 3, "target_slot": 2, "type": "CONDITIONING"}, {"id": 160, "origin_id": 49, "origin_slot": 2, "target_id": 3, "target_slot": 3, "type": "LATENT"}, {"id": 74, "origin_id": 38, "origin_slot": 0, "target_id": 6, "target_slot": 0, "type": "CLIP"}, {"id": 248, "origin_id": 140, "origin_slot": 0, "target_id": 154, "target_slot": 0, "type": "MODEL"}, {"id": 75, "origin_id": 38, "origin_slot": 0, "target_id": 7, "target_slot": 0, "type": "CLIP"}, {"id": 351, "origin_id": -10, "origin_slot": 0, "target_id": 219, "target_slot": 0, "type": "MASK"}, {"id": 335, "origin_id": -10, "origin_slot": 0, "target_id": 208, "target_slot": 2, "type": "MASK"}, {"id": 345, "origin_id": -10, "origin_slot": 0, "target_id": 111, "target_slot": 0, "type": "MASK"}, {"id": 336, "origin_id": -10, "origin_slot": 1, "target_id": 210, "target_slot": 0, "type": "VIDEO"}, {"id": 129, "origin_id": 68, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "VIDEO"}, {"id": 355, "origin_id": -10, "origin_slot": 2, "target_id": 49, "target_slot": 6, "type": "INT"}, {"id": 356, "origin_id": -10, "origin_slot": 3, "target_id": 49, "target_slot": 7, "type": "INT"}, {"id": 358, "origin_id": 210, "origin_slot": 0, "target_id": 229, "target_slot": 0, "type": "IMAGE"}, {"id": 359, "origin_id": 229, "origin_slot": 0, "target_id": 211, "target_slot": 0, "type": "IMAGE"}, {"id": 360, "origin_id": 229, "origin_slot": 0, "target_id": 213, "target_slot": 0, "type": "IMAGE"}, {"id": 361, "origin_id": -10, "origin_slot": 4, "target_id": 49, "target_slot": 5, "type": "IMAGE"}, {"id": 362, "origin_id": 210, "origin_slot": 1, "target_id": 68, "target_slot": 1, "type": "AUDIO"}, {"id": 363, "origin_id": -10, "origin_slot": 5, "target_id": 140, "target_slot": 0, "type": "COMBO"}, {"id": 364, "origin_id": -10, "origin_slot": 6, "target_id": 154, "target_slot": 1, "type": "COMBO"}, {"id": 365, "origin_id": -10, "origin_slot": 7, "target_id": 38, "target_slot": 0, "type": "COMBO"}, {"id": 366, "origin_id": -10, "origin_slot": 8, "target_id": 39, "target_slot": 0, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Video generation and editing/Inpaint video"}]}, "config": {}, "extra": {"workflowRendererVersion": "LG", "ds": {"scale": 0.8183828377358485, "offset": [1215.8643989712405, 178.87024992690183]}}, "version": 0.4} diff --git a/blueprints/Video Stitch.json b/blueprints/Video Stitch.json new file mode 100644 index 000000000..11bcf6b7d --- /dev/null +++ b/blueprints/Video Stitch.json @@ -0,0 +1 @@ +{"revision": 0, "last_node_id": 84, "last_link_id": 0, "nodes": [{"id": 84, "type": "8e8aa94a-647e-436d-8440-8ee4691864de", "pos": [-6100, 2620], "size": [290, 160], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "Before Video", "localized_name": "video", "name": "video", "type": "VIDEO", "link": null}, {"label": "After Video", "localized_name": "video_1", "name": "video_1", "type": "VIDEO", "link": null}, {"name": "direction", "type": "COMBO", "widget": {"name": "direction"}, "link": null}, {"name": "match_image_size", "type": "BOOLEAN", "widget": {"name": "match_image_size"}, "link": null}, {"name": "spacing_width", "type": "INT", "widget": {"name": "spacing_width"}, "link": null}, {"name": "spacing_color", "type": "COMBO", "widget": {"name": "spacing_color"}, "link": null}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": []}], "properties": {"proxyWidgets": [["-1", "direction"], ["-1", "match_image_size"], ["-1", "spacing_width"], ["-1", "spacing_color"]], "cnr_id": "comfy-core", "ver": "0.13.0"}, "widgets_values": ["right", true, 0, "white"], "title": "Video Stitch"}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "8e8aa94a-647e-436d-8440-8ee4691864de", "version": 1, "state": {"lastGroupId": 1, "lastNodeId": 84, "lastLinkId": 262, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Video Stitch", "inputNode": {"id": -10, "bounding": [-6580, 2649, 143.55859375, 160]}, "outputNode": {"id": -20, "bounding": [-5720, 2659, 120, 60]}, "inputs": [{"id": "85555afe-c7a1-4f6e-b073-7c37f7bace7f", "name": "video", "type": "VIDEO", "linkIds": [253], "localized_name": "video", "label": "Before Video", "pos": [-6456.44140625, 2669]}, {"id": "022773ee-6b4f-4e3d-bead-68b3e75e2d20", "name": "video_1", "type": "VIDEO", "linkIds": [254], "localized_name": "video_1", "label": "After Video", "pos": [-6456.44140625, 2689]}, {"id": "7bcd7cbc-e918-472a-a0cf-2e0900545372", "name": "direction", "type": "COMBO", "linkIds": [259], "pos": [-6456.44140625, 2709]}, {"id": "9a00389d-c1c8-40d5-87fe-f41019b61fbc", "name": "match_image_size", "type": "BOOLEAN", "linkIds": [260], "pos": [-6456.44140625, 2729]}, {"id": "b95e0440-3ea8-4ae0-887e-12e75701042a", "name": "spacing_width", "type": "INT", "linkIds": [261], "pos": [-6456.44140625, 2749]}, {"id": "83ab9382-0a70-4169-b26a-66ab026b43c4", "name": "spacing_color", "type": "COMBO", "linkIds": [262], "pos": [-6456.44140625, 2769]}], "outputs": [{"id": "09707f43-7552-4a6e-bd23-d962d31801c2", "name": "VIDEO", "type": "VIDEO", "linkIds": [255], "localized_name": "VIDEO", "pos": [-5700, 2679]}], "widgets": [], "nodes": [{"id": 78, "type": "GetVideoComponents", "pos": [-6390, 2560], "size": [193.530859375, 66], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "video", "name": "video", "type": "VIDEO", "link": 254}], "outputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "links": [249]}, {"localized_name": "audio", "name": "audio", "type": "AUDIO", "links": null}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "links": null}], "properties": {"cnr_id": "comfy-core", "ver": "0.13.0", "Node name for S&R": "GetVideoComponents"}}, {"id": 77, "type": "GetVideoComponents", "pos": [-6390, 2420], "size": [193.530859375, 66], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "video", "name": "video", "type": "VIDEO", "link": 253}], "outputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "links": [248]}, {"localized_name": "audio", "name": "audio", "type": "AUDIO", "links": [251]}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "links": [252]}], "properties": {"cnr_id": "comfy-core", "ver": "0.13.0", "Node name for S&R": "GetVideoComponents"}}, {"id": 79, "type": "ImageStitch", "pos": [-6390, 2700], "size": [270, 150], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "image1", "name": "image1", "type": "IMAGE", "link": 248}, {"localized_name": "image2", "name": "image2", "shape": 7, "type": "IMAGE", "link": 249}, {"localized_name": "direction", "name": "direction", "type": "COMBO", "widget": {"name": "direction"}, "link": 259}, {"localized_name": "match_image_size", "name": "match_image_size", "type": "BOOLEAN", "widget": {"name": "match_image_size"}, "link": 260}, {"localized_name": "spacing_width", "name": "spacing_width", "type": "INT", "widget": {"name": "spacing_width"}, "link": 261}, {"localized_name": "spacing_color", "name": "spacing_color", "type": "COMBO", "widget": {"name": "spacing_color"}, "link": 262}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [250]}], "properties": {"cnr_id": "comfy-core", "ver": "0.13.0", "Node name for S&R": "ImageStitch"}, "widgets_values": ["right", true, 0, "white"]}, {"id": 80, "type": "CreateVideo", "pos": [-6040, 2610], "size": [270, 78], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 250}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": 251}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "widget": {"name": "fps"}, "link": 252}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": [255]}], "properties": {"cnr_id": "comfy-core", "ver": "0.13.0", "Node name for S&R": "CreateVideo"}, "widgets_values": [30]}], "groups": [], "links": [{"id": 248, "origin_id": 77, "origin_slot": 0, "target_id": 79, "target_slot": 0, "type": "IMAGE"}, {"id": 249, "origin_id": 78, "origin_slot": 0, "target_id": 79, "target_slot": 1, "type": "IMAGE"}, {"id": 250, "origin_id": 79, "origin_slot": 0, "target_id": 80, "target_slot": 0, "type": "IMAGE"}, {"id": 251, "origin_id": 77, "origin_slot": 1, "target_id": 80, "target_slot": 1, "type": "AUDIO"}, {"id": 252, "origin_id": 77, "origin_slot": 2, "target_id": 80, "target_slot": 2, "type": "FLOAT"}, {"id": 253, "origin_id": -10, "origin_slot": 0, "target_id": 77, "target_slot": 0, "type": "VIDEO"}, {"id": 254, "origin_id": -10, "origin_slot": 1, "target_id": 78, "target_slot": 0, "type": "VIDEO"}, {"id": 255, "origin_id": 80, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "VIDEO"}, {"id": 259, "origin_id": -10, "origin_slot": 2, "target_id": 79, "target_slot": 2, "type": "COMBO"}, {"id": 260, "origin_id": -10, "origin_slot": 3, "target_id": 79, "target_slot": 3, "type": "BOOLEAN"}, {"id": 261, "origin_id": -10, "origin_slot": 4, "target_id": 79, "target_slot": 4, "type": "INT"}, {"id": 262, "origin_id": -10, "origin_slot": 5, "target_id": 79, "target_slot": 5, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Video Tools/Stitch videos"}]}} diff --git a/blueprints/Video Upscale(GAN x4).json b/blueprints/Video Upscale(GAN x4).json new file mode 100644 index 000000000..e80b2e229 --- /dev/null +++ b/blueprints/Video Upscale(GAN x4).json @@ -0,0 +1 @@ +{"revision": 0, "last_node_id": 13, "last_link_id": 0, "nodes": [{"id": 13, "type": "cf95b747-3e17-46cb-8097-cac60ff9b2e1", "pos": [1120, 330], "size": [240, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "video", "name": "video", "type": "VIDEO", "link": null}, {"name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": null}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": []}], "title": "Video Upscale(GAN x4)", "properties": {"proxyWidgets": [["-1", "model_name"]], "cnr_id": "comfy-core", "ver": "0.14.1"}, "widgets_values": ["RealESRGAN_x4plus.safetensors"]}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "cf95b747-3e17-46cb-8097-cac60ff9b2e1", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 13, "lastLinkId": 19, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Video Upscale(GAN x4)", "inputNode": {"id": -10, "bounding": [550, 460, 120, 80]}, "outputNode": {"id": -20, "bounding": [1490, 460, 120, 60]}, "inputs": [{"id": "666d633e-93e7-42dc-8d11-2b7b99b0f2a6", "name": "video", "type": "VIDEO", "linkIds": [10], "localized_name": "video", "pos": [650, 480]}, {"id": "2e23a087-caa8-4d65-99e6-662761aa905a", "name": "model_name", "type": "COMBO", "linkIds": [19], "pos": [650, 500]}], "outputs": [{"id": "0c1768ea-3ec2-412f-9af6-8e0fa36dae70", "name": "VIDEO", "type": "VIDEO", "linkIds": [15], "localized_name": "VIDEO", "pos": [1510, 480]}], "widgets": [], "nodes": [{"id": 2, "type": "ImageUpscaleWithModel", "pos": [1110, 450], "size": [320, 46], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "upscale_model", "name": "upscale_model", "type": "UPSCALE_MODEL", "link": 1}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 14}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [13]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "ImageUpscaleWithModel"}}, {"id": 11, "type": "CreateVideo", "pos": [1110, 550], "size": [320, 78], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 13}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": 16}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "widget": {"name": "fps"}, "link": 12}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": [15]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "CreateVideo"}, "widgets_values": [30]}, {"id": 10, "type": "GetVideoComponents", "pos": [1110, 330], "size": [320, 70], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "video", "name": "video", "type": "VIDEO", "link": 10}], "outputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "links": [14]}, {"localized_name": "audio", "name": "audio", "type": "AUDIO", "links": [16]}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "links": [12]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "GetVideoComponents"}}, {"id": 1, "type": "UpscaleModelLoader", "pos": [750, 450], "size": [280, 60], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "model_name", "name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": 19}], "outputs": [{"localized_name": "UPSCALE_MODEL", "name": "UPSCALE_MODEL", "type": "UPSCALE_MODEL", "links": [1]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "UpscaleModelLoader", "models": [{"name": "RealESRGAN_x4plus.safetensors", "url": "https://huggingface.co/Comfy-Org/Real-ESRGAN_repackaged/resolve/main/RealESRGAN_x4plus.safetensors", "directory": "upscale_models"}]}, "widgets_values": ["RealESRGAN_x4plus.safetensors"]}], "groups": [], "links": [{"id": 1, "origin_id": 1, "origin_slot": 0, "target_id": 2, "target_slot": 0, "type": "UPSCALE_MODEL"}, {"id": 14, "origin_id": 10, "origin_slot": 0, "target_id": 2, "target_slot": 1, "type": "IMAGE"}, {"id": 13, "origin_id": 2, "origin_slot": 0, "target_id": 11, "target_slot": 0, "type": "IMAGE"}, {"id": 16, "origin_id": 10, "origin_slot": 1, "target_id": 11, "target_slot": 1, "type": "AUDIO"}, {"id": 12, "origin_id": 10, "origin_slot": 2, "target_id": 11, "target_slot": 2, "type": "FLOAT"}, {"id": 10, "origin_id": -10, "origin_slot": 0, "target_id": 10, "target_slot": 0, "type": "VIDEO"}, {"id": 15, "origin_id": 11, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "VIDEO"}, {"id": 19, "origin_id": -10, "origin_slot": 1, "target_id": 1, "target_slot": 0, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Video generation and editing/Enhance video"}]}, "extra": {}} diff --git a/comfy/checkpoint_pickle.py b/comfy/checkpoint_pickle.py deleted file mode 100644 index 206551d3c..000000000 --- a/comfy/checkpoint_pickle.py +++ /dev/null @@ -1,13 +0,0 @@ -import pickle - -load = pickle.load - -class Empty: - pass - -class Unpickler(pickle.Unpickler): - def find_class(self, module, name): - #TODO: safe unpickle - if module.startswith("pytorch_lightning"): - return Empty - return super().find_class(module, name) diff --git a/comfy/comfy_types/node_typing.py b/comfy/comfy_types/node_typing.py index 0194b7d70..92b1acbd5 100644 --- a/comfy/comfy_types/node_typing.py +++ b/comfy/comfy_types/node_typing.py @@ -176,6 +176,8 @@ class InputTypeOptions(TypedDict): """COMBO type only. Specifies the configuration for a multi-select widget. Available after ComfyUI frontend v1.13.4 https://github.com/Comfy-Org/ComfyUI_frontend/pull/2987""" + gradient_stops: NotRequired[list[list[float]]] + """Gradient color stops for gradientslider display mode. Each stop is [offset, r, g, b] (``FLOAT``).""" class HiddenInputTypeDict(TypedDict): diff --git a/comfy/conds.py b/comfy/conds.py index 5af3e93ea..55d8cdd78 100644 --- a/comfy/conds.py +++ b/comfy/conds.py @@ -4,6 +4,25 @@ import comfy.utils import logging +def is_equal(x, y): + if torch.is_tensor(x) and torch.is_tensor(y): + return torch.equal(x, y) + elif isinstance(x, dict) and isinstance(y, dict): + if x.keys() != y.keys(): + return False + return all(is_equal(x[k], y[k]) for k in x) + elif isinstance(x, (list, tuple)) and isinstance(y, (list, tuple)): + if type(x) is not type(y) or len(x) != len(y): + return False + return all(is_equal(a, b) for a, b in zip(x, y)) + else: + try: + return x == y + except Exception: + logging.warning("comparison issue with COND") + return False + + class CONDRegular: def __init__(self, cond): self.cond = cond @@ -84,7 +103,7 @@ class CONDConstant(CONDRegular): return self._copy_with(self.cond) def can_concat(self, other): - if self.cond != other.cond: + if not is_equal(self.cond, other.cond): return False return True diff --git a/comfy/controlnet.py b/comfy/controlnet.py index 9e1e704e0..ba670b16d 100644 --- a/comfy/controlnet.py +++ b/comfy/controlnet.py @@ -297,6 +297,30 @@ class ControlNet(ControlBase): self.model_sampling_current = None super().cleanup() + +class QwenFunControlNet(ControlNet): + def get_control(self, x_noisy, t, cond, batched_number, transformer_options): + # Fun checkpoints are more sensitive to high strengths in the generic + # ControlNet merge path. Use a soft response curve so strength=1.0 stays + # unchanged while >1 grows more gently. + original_strength = self.strength + self.strength = math.sqrt(max(self.strength, 0.0)) + try: + return super().get_control(x_noisy, t, cond, batched_number, transformer_options) + finally: + self.strength = original_strength + + def pre_run(self, model, percent_to_timestep_function): + super().pre_run(model, percent_to_timestep_function) + self.set_extra_arg("base_model", model.diffusion_model) + + def copy(self): + c = QwenFunControlNet(None, global_average_pooling=self.global_average_pooling, load_device=self.load_device, manual_cast_dtype=self.manual_cast_dtype) + c.control_model = self.control_model + c.control_model_wrapped = self.control_model_wrapped + self.copy_to(c) + return c + class ControlLoraOps: class Linear(torch.nn.Module, comfy.ops.CastWeightBiasOp): def __init__(self, in_features: int, out_features: int, bias: bool = True, @@ -560,6 +584,7 @@ def load_controlnet_hunyuandit(controlnet_data, model_options={}): def load_controlnet_flux_xlabs_mistoline(sd, mistoline=False, model_options={}): model_config, operations, load_device, unet_dtype, manual_cast_dtype, offload_device = controlnet_config(sd, model_options=model_options) control_model = comfy.ldm.flux.controlnet.ControlNetFlux(mistoline=mistoline, operations=operations, device=offload_device, dtype=unet_dtype, **model_config.unet_config) + sd = model_config.process_unet_state_dict(sd) control_model = controlnet_load_state_dict(control_model, sd) extra_conds = ['y', 'guidance'] control = ControlNet(control_model, load_device=load_device, manual_cast_dtype=manual_cast_dtype, extra_conds=extra_conds) @@ -605,6 +630,53 @@ def load_controlnet_qwen_instantx(sd, model_options={}): control = ControlNet(control_model, compression_ratio=1, latent_format=latent_format, concat_mask=concat_mask, load_device=load_device, manual_cast_dtype=manual_cast_dtype, extra_conds=extra_conds) return control + +def load_controlnet_qwen_fun(sd, model_options={}): + load_device = comfy.model_management.get_torch_device() + weight_dtype = comfy.utils.weight_dtype(sd) + unet_dtype = model_options.get("dtype", weight_dtype) + manual_cast_dtype = comfy.model_management.unet_manual_cast(unet_dtype, load_device) + + operations = model_options.get("custom_operations", None) + if operations is None: + operations = comfy.ops.pick_operations(unet_dtype, manual_cast_dtype, disable_fast_fp8=True) + + in_features = sd["control_img_in.weight"].shape[1] + inner_dim = sd["control_img_in.weight"].shape[0] + + block_weight = sd["control_blocks.0.attn.to_q.weight"] + attention_head_dim = sd["control_blocks.0.attn.norm_q.weight"].shape[0] + num_attention_heads = max(1, block_weight.shape[0] // max(1, attention_head_dim)) + + model = comfy.ldm.qwen_image.controlnet.QwenImageFunControlNetModel( + control_in_features=in_features, + inner_dim=inner_dim, + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + num_control_blocks=5, + main_model_double=60, + injection_layers=(0, 12, 24, 36, 48), + operations=operations, + device=comfy.model_management.unet_offload_device(), + dtype=unet_dtype, + ) + model = controlnet_load_state_dict(model, sd) + + latent_format = comfy.latent_formats.Wan21() + control = QwenFunControlNet( + model, + compression_ratio=1, + latent_format=latent_format, + # Fun checkpoints already expect their own 33-channel context handling. + # Enabling generic concat_mask injects an extra mask channel at apply-time + # and breaks the intended fallback packing path. + concat_mask=False, + load_device=load_device, + manual_cast_dtype=manual_cast_dtype, + extra_conds=[], + ) + return control + def convert_mistoline(sd): return comfy.utils.state_dict_prefix_replace(sd, {"single_controlnet_blocks.": "controlnet_single_blocks."}) @@ -682,6 +754,8 @@ def load_controlnet_state_dict(state_dict, model=None, model_options={}): return load_controlnet_qwen_instantx(controlnet_data, model_options=model_options) elif "controlnet_x_embedder.weight" in controlnet_data: return load_controlnet_flux_instantx(controlnet_data, model_options=model_options) + elif "control_blocks.0.after_proj.weight" in controlnet_data and "control_img_in.weight" in controlnet_data: + return load_controlnet_qwen_fun(controlnet_data, model_options=model_options) elif "controlnet_blocks.0.linear.weight" in controlnet_data: #mistoline flux return load_controlnet_flux_xlabs_mistoline(convert_mistoline(controlnet_data), mistoline=True, model_options=model_options) diff --git a/comfy/k_diffusion/sampling.py b/comfy/k_diffusion/sampling.py index c0c51d51a..6978eb717 100644 --- a/comfy/k_diffusion/sampling.py +++ b/comfy/k_diffusion/sampling.py @@ -1,12 +1,11 @@ import math -import time from functools import partial from scipy import integrate import torch from torch import nn import torchsde -from tqdm.auto import trange as trange_, tqdm +from tqdm.auto import tqdm from . import utils from . import deis @@ -15,34 +14,7 @@ import comfy.model_patcher import comfy.model_sampling import comfy.memory_management - - -def trange(*args, **kwargs): - if comfy.memory_management.aimdo_allocator is None: - return trange_(*args, **kwargs) - - pbar = trange_(*args, **kwargs, smoothing=1.0) - pbar._i = 0 - pbar.set_postfix_str(" Model Initializing ... ") - - _update = pbar.update - - def warmup_update(n=1): - pbar._i += 1 - if pbar._i == 1: - pbar.i1_time = time.time() - pbar.set_postfix_str(" Model Initialization complete! ") - elif pbar._i == 2: - #bring forward the effective start time based the the diff between first and second iteration - #to attempt to remove load overhead from the final step rate estimate. - pbar.start_t = pbar.i1_time - (time.time() - pbar.i1_time) - pbar.set_postfix_str("") - - _update(n) - - pbar.update = warmup_update - return pbar - +from comfy.utils import model_trange as trange def append_zero(x): return torch.cat([x, x.new_zeros([1])]) diff --git a/comfy/latent_formats.py b/comfy/latent_formats.py index 4b3a3798c..f59999af6 100644 --- a/comfy/latent_formats.py +++ b/comfy/latent_formats.py @@ -755,6 +755,10 @@ class ACEAudio(LatentFormat): latent_channels = 8 latent_dimensions = 2 +class ACEAudio15(LatentFormat): + latent_channels = 64 + latent_dimensions = 1 + class ChromaRadiance(LatentFormat): latent_channels = 3 spacial_downscale_ratio = 1 diff --git a/comfy/ldm/ace/ace_step15.py b/comfy/ldm/ace/ace_step15.py new file mode 100644 index 000000000..1d7dc59a8 --- /dev/null +++ b/comfy/ldm/ace/ace_step15.py @@ -0,0 +1,1155 @@ +import math +import torch +import torch.nn as nn +import torch.nn.functional as F +import itertools +from comfy.ldm.modules.attention import optimized_attention +import comfy.model_management +from comfy.ldm.flux.layers import timestep_embedding + +def get_silence_latent(length, device): + head = torch.tensor([[[ 0.5707, 0.0982, 0.6909, -0.5658, 0.6266, 0.6996, -0.1365, -0.1291, + -0.0776, -0.1171, -0.2743, -0.8422, -0.1168, 1.5539, -4.6936, 0.7436, + -1.1846, -0.2637, 0.6933, -6.7266, 0.0966, -0.1187, -0.3501, -1.1736, + 0.0587, -2.0517, -1.3651, 0.7508, -0.2490, -1.3548, -0.1290, -0.7261, + 1.1132, -0.3249, 0.2337, 0.3004, 0.6605, -0.0298, -0.1989, -0.4041, + 0.2843, -1.0963, -0.5519, 0.2639, -1.0436, -0.1183, 0.0640, 0.4460, + -1.1001, -0.6172, -1.3241, 1.1379, 0.5623, -0.1507, -0.1963, -0.4742, + -2.4697, 0.5302, 0.5381, 0.4636, -0.1782, -0.0687, 1.0333, 0.4202], + [ 0.3040, -0.1367, 0.6200, 0.0665, -0.0642, 0.4655, -0.1187, -0.0440, + 0.2941, -0.2753, 0.0173, -0.2421, -0.0147, 1.5603, -2.7025, 0.7907, + -0.9736, -0.0682, 0.1294, -5.0707, -0.2167, 0.3302, -0.1513, -0.8100, + -0.3894, -0.2884, -0.3149, 0.8660, -0.3817, -1.7061, 0.5824, -0.4840, + 0.6938, 0.1859, 0.1753, 0.3081, 0.0195, 0.1403, -0.0754, -0.2091, + 0.1251, -0.1578, -0.4968, -0.1052, -0.4554, -0.0320, 0.1284, 0.4974, + -1.1889, -0.0344, -0.8313, 0.2953, 0.5445, -0.6249, -0.1595, -0.0682, + -3.1412, 0.0484, 0.4153, 0.8260, -0.1526, -0.0625, 0.5366, 0.8473], + [ 5.3524e-02, -1.7534e-01, 5.4443e-01, -4.3501e-01, -2.1317e-03, + 3.7200e-01, -4.0143e-03, -1.5516e-01, -1.2968e-01, -1.5375e-01, + -7.7107e-02, -2.0593e-01, -3.2780e-01, 1.5142e+00, -2.6101e+00, + 5.8698e-01, -1.2716e+00, -2.4773e-01, -2.7933e-02, -5.0799e+00, + 1.1601e-01, 4.0987e-01, -2.2030e-02, -6.6495e-01, -2.0995e-01, + -6.3474e-01, -1.5893e-01, 8.2745e-01, -2.2992e-01, -1.6816e+00, + 5.4440e-01, -4.9579e-01, 5.5128e-01, 3.0477e-01, 8.3052e-02, + -6.1782e-02, 5.9036e-03, 2.9553e-01, -8.0645e-02, -1.0060e-01, + 1.9144e-01, -3.8124e-01, -7.2949e-01, 2.4520e-02, -5.0814e-01, + 2.3977e-01, 9.2943e-02, 3.9256e-01, -1.1993e+00, -3.2752e-01, + -7.2707e-01, 2.9476e-01, 4.3542e-01, -8.8597e-01, -4.1686e-01, + -8.5390e-02, -2.9018e+00, 6.4988e-02, 5.3945e-01, 9.1988e-01, + 5.8762e-02, -7.0098e-02, 6.4772e-01, 8.9118e-01], + [-3.2225e-02, -1.3195e-01, 5.6411e-01, -5.4766e-01, -5.2170e-03, + 3.1425e-01, -5.4367e-02, -1.9419e-01, -1.3059e-01, -1.3660e-01, + -9.0984e-02, -1.9540e-01, -2.5590e-01, 1.5440e+00, -2.6349e+00, + 6.8273e-01, -1.2532e+00, -1.9810e-01, -2.2793e-02, -5.0506e+00, + 1.8818e-01, 5.0109e-01, 7.3546e-03, -6.8771e-01, -3.0676e-01, + -7.3257e-01, -1.6687e-01, 9.2232e-01, -1.8987e-01, -1.7267e+00, + 5.3355e-01, -5.3179e-01, 4.4953e-01, 2.8820e-01, 1.3012e-01, + -2.0943e-01, -1.1348e-01, 3.3929e-01, -1.5069e-01, -1.2919e-01, + 1.8929e-01, -3.6166e-01, -8.0756e-01, 6.6387e-02, -5.8867e-01, + 1.6978e-01, 1.0134e-01, 3.3877e-01, -1.2133e+00, -3.2492e-01, + -8.1237e-01, 3.8101e-01, 4.3765e-01, -8.0596e-01, -4.4531e-01, + -4.7513e-02, -2.9266e+00, 1.1741e-03, 4.5123e-01, 9.3075e-01, + 5.3688e-02, -1.9621e-01, 6.4530e-01, 9.3870e-01]]], device=device).movedim(-1, 1) + + silence_latent = torch.tensor([[[-1.3672e-01, -1.5820e-01, 5.8594e-01, -5.7422e-01, 3.0273e-02, + 2.7930e-01, -2.5940e-03, -2.0703e-01, -1.6113e-01, -1.4746e-01, + -2.7710e-02, -1.8066e-01, -2.9688e-01, 1.6016e+00, -2.6719e+00, + 7.7734e-01, -1.3516e+00, -1.9434e-01, -7.1289e-02, -5.0938e+00, + 2.4316e-01, 4.7266e-01, 4.6387e-02, -6.6406e-01, -2.1973e-01, + -6.7578e-01, -1.5723e-01, 9.5312e-01, -2.0020e-01, -1.7109e+00, + 5.8984e-01, -5.7422e-01, 5.1562e-01, 2.8320e-01, 1.4551e-01, + -1.8750e-01, -5.9814e-02, 3.6719e-01, -1.0059e-01, -1.5723e-01, + 2.0605e-01, -4.3359e-01, -8.2812e-01, 4.5654e-02, -6.6016e-01, + 1.4844e-01, 9.4727e-02, 3.8477e-01, -1.2578e+00, -3.3203e-01, + -8.5547e-01, 4.3359e-01, 4.2383e-01, -8.9453e-01, -5.0391e-01, + -5.6152e-02, -2.9219e+00, -2.4658e-02, 5.0391e-01, 9.8438e-01, + 7.2754e-02, -2.1582e-01, 6.3672e-01, 1.0000e+00]]], device=device).movedim(-1, 1).repeat(1, 1, length) + silence_latent[:, :, :head.shape[-1]] = head + return silence_latent + + +def get_layer_class(operations, layer_name): + if operations is not None and hasattr(operations, layer_name): + return getattr(operations, layer_name) + return getattr(nn, layer_name) + +class RotaryEmbedding(nn.Module): + def __init__(self, dim, max_position_embeddings=32768, base=1000000.0, dtype=None, device=None, operations=None): + super().__init__() + self.dim = dim + self.base = base + self.max_position_embeddings = max_position_embeddings + + inv_freq = 1.0 / (self.base ** (torch.arange(0, self.dim, 2, dtype=torch.float32, device=device) / self.dim)) + self.register_buffer("inv_freq", inv_freq, persistent=False) + self._set_cos_sin_cache(max_position_embeddings, device=device, dtype=torch.get_default_dtype() if dtype is None else dtype) + + def _set_cos_sin_cache(self, seq_len, device, dtype): + self.max_seq_len_cached = seq_len + t = torch.arange(self.max_seq_len_cached, device=device, dtype=torch.float32) + freqs = torch.outer(t, self.inv_freq) + emb = torch.cat((freqs, freqs), dim=-1) + self.register_buffer("cos_cached", emb.cos().to(dtype), persistent=False) + self.register_buffer("sin_cached", emb.sin().to(dtype), persistent=False) + + def forward(self, x, seq_len=None): + if seq_len > self.max_seq_len_cached: + self._set_cos_sin_cache(seq_len, x.device, x.dtype) + return ( + self.cos_cached[:seq_len].to(dtype=x.dtype, device=x.device), + self.sin_cached[:seq_len].to(dtype=x.dtype, device=x.device), + ) + +def rotate_half(x): + x1 = x[..., : x.shape[-1] // 2] + x2 = x[..., x.shape[-1] // 2 :] + return torch.cat((-x2, x1), dim=-1) + +def apply_rotary_pos_emb(q, k, cos, sin): + cos = cos.unsqueeze(0).unsqueeze(0) + sin = sin.unsqueeze(0).unsqueeze(0) + q_embed = (q * cos) + (rotate_half(q) * sin) + k_embed = (k * cos) + (rotate_half(k) * sin) + return q_embed, k_embed + +class MLP(nn.Module): + def __init__(self, hidden_size, intermediate_size, dtype=None, device=None, operations=None): + super().__init__() + Linear = get_layer_class(operations, "Linear") + self.gate_proj = Linear(hidden_size, intermediate_size, bias=False, dtype=dtype, device=device) + self.up_proj = Linear(hidden_size, intermediate_size, bias=False, dtype=dtype, device=device) + self.down_proj = Linear(intermediate_size, hidden_size, bias=False, dtype=dtype, device=device) + self.act_fn = nn.SiLU() + + def forward(self, x): + return self.down_proj(self.act_fn(self.gate_proj(x)) * self.up_proj(x)) + +class TimestepEmbedding(nn.Module): + def __init__(self, in_channels: int, time_embed_dim: int, scale: float = 1000, dtype=None, device=None, operations=None): + super().__init__() + Linear = get_layer_class(operations, "Linear") + self.linear_1 = Linear(in_channels, time_embed_dim, bias=True, dtype=dtype, device=device) + self.act1 = nn.SiLU() + self.linear_2 = Linear(time_embed_dim, time_embed_dim, bias=True, dtype=dtype, device=device) + self.in_channels = in_channels + self.act2 = nn.SiLU() + self.time_proj = Linear(time_embed_dim, time_embed_dim * 6, dtype=dtype, device=device) + self.scale = scale + + def forward(self, t, dtype=None): + t_freq = timestep_embedding(t, self.in_channels, time_factor=self.scale) + temb = self.linear_1(t_freq.to(dtype=dtype)) + temb = self.act1(temb) + temb = self.linear_2(temb) + timestep_proj = self.time_proj(self.act2(temb)).view(-1, 6, temb.shape[-1]) + return temb, timestep_proj + +class AceStepAttention(nn.Module): + def __init__( + self, + hidden_size, + num_heads, + num_kv_heads, + head_dim, + rms_norm_eps=1e-6, + is_cross_attention=False, + sliding_window=None, + dtype=None, + device=None, + operations=None + ): + super().__init__() + self.hidden_size = hidden_size + self.num_heads = num_heads + self.num_kv_heads = num_kv_heads + self.head_dim = head_dim + self.is_cross_attention = is_cross_attention + self.sliding_window = sliding_window + + Linear = get_layer_class(operations, "Linear") + + self.q_proj = Linear(hidden_size, num_heads * head_dim, bias=False, dtype=dtype, device=device) + self.k_proj = Linear(hidden_size, num_kv_heads * head_dim, bias=False, dtype=dtype, device=device) + self.v_proj = Linear(hidden_size, num_kv_heads * head_dim, bias=False, dtype=dtype, device=device) + self.o_proj = Linear(num_heads * head_dim, hidden_size, bias=False, dtype=dtype, device=device) + + self.q_norm = operations.RMSNorm(head_dim, eps=rms_norm_eps, dtype=dtype, device=device) + self.k_norm = operations.RMSNorm(head_dim, eps=rms_norm_eps, dtype=dtype, device=device) + + def forward( + self, + hidden_states, + encoder_hidden_states=None, + attention_mask=None, + position_embeddings=None, + ): + bsz, q_len, _ = hidden_states.size() + + query_states = self.q_proj(hidden_states) + query_states = query_states.view(bsz, q_len, self.num_heads, self.head_dim) + query_states = self.q_norm(query_states) + query_states = query_states.transpose(1, 2) + + if self.is_cross_attention and encoder_hidden_states is not None: + bsz_enc, kv_len, _ = encoder_hidden_states.size() + key_states = self.k_proj(encoder_hidden_states) + value_states = self.v_proj(encoder_hidden_states) + + key_states = key_states.view(bsz_enc, kv_len, self.num_kv_heads, self.head_dim) + key_states = self.k_norm(key_states) + value_states = value_states.view(bsz_enc, kv_len, self.num_kv_heads, self.head_dim) + + key_states = key_states.transpose(1, 2) + value_states = value_states.transpose(1, 2) + else: + kv_len = q_len + key_states = self.k_proj(hidden_states) + value_states = self.v_proj(hidden_states) + + key_states = key_states.view(bsz, q_len, self.num_kv_heads, self.head_dim) + key_states = self.k_norm(key_states) + value_states = value_states.view(bsz, q_len, self.num_kv_heads, self.head_dim) + + key_states = key_states.transpose(1, 2) + value_states = value_states.transpose(1, 2) + + if position_embeddings is not None: + cos, sin = position_embeddings + query_states, key_states = apply_rotary_pos_emb(query_states, key_states, cos, sin) + + n_rep = self.num_heads // self.num_kv_heads + if n_rep > 1: + key_states = key_states.repeat_interleave(n_rep, dim=1) + value_states = value_states.repeat_interleave(n_rep, dim=1) + + attn_bias = None + if self.sliding_window is not None and not self.is_cross_attention: + indices = torch.arange(q_len, device=query_states.device) + diff = indices.unsqueeze(1) - indices.unsqueeze(0) + in_window = torch.abs(diff) <= self.sliding_window + + window_bias = torch.zeros((q_len, kv_len), device=query_states.device, dtype=query_states.dtype) + min_value = torch.finfo(query_states.dtype).min + window_bias.masked_fill_(~in_window, min_value) + + window_bias = window_bias.unsqueeze(0).unsqueeze(0) + + if attn_bias is not None: + if attn_bias.dtype == torch.bool: + base_bias = torch.zeros_like(window_bias) + base_bias.masked_fill_(~attn_bias, min_value) + attn_bias = base_bias + window_bias + else: + attn_bias = attn_bias + window_bias + else: + attn_bias = window_bias + + attn_output = optimized_attention(query_states, key_states, value_states, self.num_heads, attn_bias, skip_reshape=True, low_precision_attention=False) + attn_output = self.o_proj(attn_output) + + return attn_output + +class AceStepDiTLayer(nn.Module): + def __init__( + self, + hidden_size, + num_heads, + num_kv_heads, + head_dim, + intermediate_size, + rms_norm_eps=1e-6, + layer_type="full_attention", + sliding_window=128, + dtype=None, + device=None, + operations=None + ): + super().__init__() + + self_attn_window = sliding_window if layer_type == "sliding_attention" else None + + self.self_attn_norm = operations.RMSNorm(hidden_size, eps=rms_norm_eps, dtype=dtype, device=device) + self.self_attn = AceStepAttention( + hidden_size, num_heads, num_kv_heads, head_dim, rms_norm_eps, + is_cross_attention=False, sliding_window=self_attn_window, + dtype=dtype, device=device, operations=operations + ) + + self.cross_attn_norm = operations.RMSNorm(hidden_size, eps=rms_norm_eps, dtype=dtype, device=device) + self.cross_attn = AceStepAttention( + hidden_size, num_heads, num_kv_heads, head_dim, rms_norm_eps, + is_cross_attention=True, dtype=dtype, device=device, operations=operations + ) + + self.mlp_norm = operations.RMSNorm(hidden_size, eps=rms_norm_eps, dtype=dtype, device=device) + self.mlp = MLP(hidden_size, intermediate_size, dtype=dtype, device=device, operations=operations) + + self.scale_shift_table = nn.Parameter(torch.empty(1, 6, hidden_size, dtype=dtype, device=device)) + + def forward( + self, + hidden_states, + temb, + encoder_hidden_states, + position_embeddings, + attention_mask=None, + encoder_attention_mask=None + ): + modulation = comfy.model_management.cast_to(self.scale_shift_table, dtype=temb.dtype, device=temb.device) + temb + shift_msa, scale_msa, gate_msa, c_shift_msa, c_scale_msa, c_gate_msa = modulation.chunk(6, dim=1) + + norm_hidden = self.self_attn_norm(hidden_states) + norm_hidden = norm_hidden * (1 + scale_msa) + shift_msa + + attn_out = self.self_attn( + norm_hidden, + position_embeddings=position_embeddings, + attention_mask=attention_mask + ) + hidden_states = hidden_states + attn_out * gate_msa + + norm_hidden = self.cross_attn_norm(hidden_states) + attn_out = self.cross_attn( + norm_hidden, + encoder_hidden_states=encoder_hidden_states, + attention_mask=encoder_attention_mask + ) + hidden_states = hidden_states + attn_out + + norm_hidden = self.mlp_norm(hidden_states) + norm_hidden = norm_hidden * (1 + c_scale_msa) + c_shift_msa + + mlp_out = self.mlp(norm_hidden) + hidden_states = hidden_states + mlp_out * c_gate_msa + + return hidden_states + +class AceStepEncoderLayer(nn.Module): + def __init__( + self, + hidden_size, + num_heads, + num_kv_heads, + head_dim, + intermediate_size, + rms_norm_eps=1e-6, + dtype=None, + device=None, + operations=None + ): + super().__init__() + self.self_attn = AceStepAttention( + hidden_size, num_heads, num_kv_heads, head_dim, rms_norm_eps, + is_cross_attention=False, dtype=dtype, device=device, operations=operations + ) + self.input_layernorm = operations.RMSNorm(hidden_size, eps=rms_norm_eps, dtype=dtype, device=device) + self.post_attention_layernorm = operations.RMSNorm(hidden_size, eps=rms_norm_eps, dtype=dtype, device=device) + self.mlp = MLP(hidden_size, intermediate_size, dtype=dtype, device=device, operations=operations) + + def forward(self, hidden_states, position_embeddings, attention_mask=None): + residual = hidden_states + hidden_states = self.input_layernorm(hidden_states) + hidden_states = self.self_attn( + hidden_states=hidden_states, + position_embeddings=position_embeddings, + attention_mask=attention_mask + ) + hidden_states = residual + hidden_states + + residual = hidden_states + hidden_states = self.post_attention_layernorm(hidden_states) + hidden_states = self.mlp(hidden_states) + hidden_states = residual + hidden_states + return hidden_states + +class AceStepLyricEncoder(nn.Module): + def __init__( + self, + text_hidden_dim, + hidden_size, + num_layers, + num_heads, + num_kv_heads, + head_dim, + intermediate_size, + rms_norm_eps=1e-6, + dtype=None, + device=None, + operations=None + ): + super().__init__() + Linear = get_layer_class(operations, "Linear") + self.embed_tokens = Linear(text_hidden_dim, hidden_size, dtype=dtype, device=device) + self.norm = operations.RMSNorm(hidden_size, eps=rms_norm_eps, dtype=dtype, device=device) + + self.rotary_emb = RotaryEmbedding( + head_dim, + base=1000000.0, + dtype=dtype, + device=device, + operations=operations + ) + + self.layers = nn.ModuleList([ + AceStepEncoderLayer( + hidden_size, num_heads, num_kv_heads, head_dim, intermediate_size, rms_norm_eps, + dtype=dtype, device=device, operations=operations + ) + for _ in range(num_layers) + ]) + + def forward(self, inputs_embeds, attention_mask=None): + hidden_states = self.embed_tokens(inputs_embeds) + seq_len = hidden_states.shape[1] + cos, sin = self.rotary_emb(hidden_states, seq_len=seq_len) + position_embeddings = (cos, sin) + + for layer in self.layers: + hidden_states = layer( + hidden_states, + position_embeddings=position_embeddings, + attention_mask=attention_mask + ) + + hidden_states = self.norm(hidden_states) + return hidden_states + +class AceStepTimbreEncoder(nn.Module): + def __init__( + self, + timbre_hidden_dim, + hidden_size, + num_layers, + num_heads, + num_kv_heads, + head_dim, + intermediate_size, + rms_norm_eps=1e-6, + dtype=None, + device=None, + operations=None + ): + super().__init__() + Linear = get_layer_class(operations, "Linear") + self.embed_tokens = Linear(timbre_hidden_dim, hidden_size, dtype=dtype, device=device) + self.norm = operations.RMSNorm(hidden_size, eps=rms_norm_eps, dtype=dtype, device=device) + + self.rotary_emb = RotaryEmbedding( + head_dim, + base=1000000.0, + dtype=dtype, + device=device, + operations=operations + ) + + self.layers = nn.ModuleList([ + AceStepEncoderLayer( + hidden_size, num_heads, num_kv_heads, head_dim, intermediate_size, rms_norm_eps, + dtype=dtype, device=device, operations=operations + ) + for _ in range(num_layers) + ]) + self.special_token = nn.Parameter(torch.empty(1, 1, hidden_size, device=device, dtype=dtype)) + + def unpack_timbre_embeddings(self, timbre_embs_packed, refer_audio_order_mask): + N, d = timbre_embs_packed.shape + device = timbre_embs_packed.device + B = N + counts = torch.bincount(refer_audio_order_mask, minlength=B) + max_count = counts.max().item() + + sorted_indices = torch.argsort( + refer_audio_order_mask * N + torch.arange(N, device=device), + stable=True + ) + sorted_batch_ids = refer_audio_order_mask[sorted_indices] + + positions = torch.arange(N, device=device) + batch_starts = torch.cat([torch.tensor([0], device=device), torch.cumsum(counts, dim=0)[:-1]]) + positions_in_sorted = positions - batch_starts[sorted_batch_ids] + + inverse_indices = torch.empty_like(sorted_indices) + inverse_indices[sorted_indices] = torch.arange(N, device=device) + positions_in_batch = positions_in_sorted[inverse_indices] + + indices_2d = refer_audio_order_mask * max_count + positions_in_batch + one_hot = F.one_hot(indices_2d, num_classes=B * max_count).to(timbre_embs_packed.dtype) + + timbre_embs_flat = one_hot.t() @ timbre_embs_packed + timbre_embs_unpack = timbre_embs_flat.view(B, max_count, d) + + mask_flat = (one_hot.sum(dim=0) > 0).long() + new_mask = mask_flat.view(B, max_count) + return timbre_embs_unpack, new_mask + + def forward(self, refer_audio_acoustic_hidden_states_packed, refer_audio_order_mask, attention_mask=None): + hidden_states = self.embed_tokens(refer_audio_acoustic_hidden_states_packed) + if hidden_states.dim() == 2: + hidden_states = hidden_states.unsqueeze(0) + + seq_len = hidden_states.shape[1] + cos, sin = self.rotary_emb(hidden_states, seq_len=seq_len) + + for layer in self.layers: + hidden_states = layer( + hidden_states, + position_embeddings=(cos, sin), + attention_mask=attention_mask + ) + hidden_states = self.norm(hidden_states) + + flat_states = hidden_states[:, 0, :] + unpacked_embs, unpacked_mask = self.unpack_timbre_embeddings(flat_states, refer_audio_order_mask) + return unpacked_embs, unpacked_mask + + +def pack_sequences(hidden1, hidden2, mask1, mask2): + hidden_cat = torch.cat([hidden1, hidden2], dim=1) + B, L, D = hidden_cat.shape + + if mask1 is not None and mask2 is not None: + mask_cat = torch.cat([mask1, mask2], dim=1) + sort_idx = mask_cat.argsort(dim=1, descending=True, stable=True) + gather_idx = sort_idx.unsqueeze(-1).expand(B, L, D) + hidden_sorted = torch.gather(hidden_cat, 1, gather_idx) + lengths = mask_cat.sum(dim=1) + new_mask = (torch.arange(L, device=hidden_cat.device).unsqueeze(0) < lengths.unsqueeze(1)) + else: + new_mask = None + hidden_sorted = hidden_cat + + return hidden_sorted, new_mask + +class AceStepConditionEncoder(nn.Module): + def __init__( + self, + text_hidden_dim, + timbre_hidden_dim, + hidden_size, + num_lyric_layers, + num_timbre_layers, + num_heads, + num_kv_heads, + head_dim, + intermediate_size, + rms_norm_eps=1e-6, + dtype=None, + device=None, + operations=None + ): + super().__init__() + Linear = get_layer_class(operations, "Linear") + self.text_projector = Linear(text_hidden_dim, hidden_size, bias=False, dtype=dtype, device=device) + + self.lyric_encoder = AceStepLyricEncoder( + text_hidden_dim=text_hidden_dim, + hidden_size=hidden_size, + num_layers=num_lyric_layers, + num_heads=num_heads, + num_kv_heads=num_kv_heads, + head_dim=head_dim, + intermediate_size=intermediate_size, + rms_norm_eps=rms_norm_eps, + dtype=dtype, + device=device, + operations=operations + ) + + self.timbre_encoder = AceStepTimbreEncoder( + timbre_hidden_dim=timbre_hidden_dim, + hidden_size=hidden_size, + num_layers=num_timbre_layers, + num_heads=num_heads, + num_kv_heads=num_kv_heads, + head_dim=head_dim, + intermediate_size=intermediate_size, + rms_norm_eps=rms_norm_eps, + dtype=dtype, + device=device, + operations=operations + ) + + def forward( + self, + text_hidden_states=None, + text_attention_mask=None, + lyric_hidden_states=None, + lyric_attention_mask=None, + refer_audio_acoustic_hidden_states_packed=None, + refer_audio_order_mask=None + ): + text_emb = self.text_projector(text_hidden_states) + + lyric_emb = self.lyric_encoder( + inputs_embeds=lyric_hidden_states, + attention_mask=lyric_attention_mask + ) + + timbre_emb, timbre_mask = self.timbre_encoder( + refer_audio_acoustic_hidden_states_packed, + refer_audio_order_mask + ) + + merged_emb, merged_mask = pack_sequences(lyric_emb, timbre_emb, lyric_attention_mask, timbre_mask) + final_emb, final_mask = pack_sequences(merged_emb, text_emb, merged_mask, text_attention_mask) + + return final_emb, final_mask + +# -------------------------------------------------------------------------------- +# Main Diffusion Model (DiT) +# -------------------------------------------------------------------------------- + +class AceStepDiTModel(nn.Module): + def __init__( + self, + in_channels, + hidden_size, + num_layers, + num_heads, + num_kv_heads, + head_dim, + intermediate_size, + patch_size, + audio_acoustic_hidden_dim, + layer_types=None, + sliding_window=128, + rms_norm_eps=1e-6, + dtype=None, + device=None, + operations=None + ): + super().__init__() + self.patch_size = patch_size + self.rotary_emb = RotaryEmbedding( + head_dim, + base=1000000.0, + dtype=dtype, + device=device, + operations=operations + ) + + Conv1d = get_layer_class(operations, "Conv1d") + ConvTranspose1d = get_layer_class(operations, "ConvTranspose1d") + Linear = get_layer_class(operations, "Linear") + + self.proj_in = nn.Sequential( + nn.Identity(), + Conv1d( + in_channels, hidden_size, kernel_size=patch_size, stride=patch_size, + dtype=dtype, device=device)) + + self.time_embed = TimestepEmbedding(256, hidden_size, dtype=dtype, device=device, operations=operations) + self.time_embed_r = TimestepEmbedding(256, hidden_size, dtype=dtype, device=device, operations=operations) + self.condition_embedder = Linear(hidden_size, hidden_size, dtype=dtype, device=device) + + if layer_types is None: + layer_types = ["full_attention"] * num_layers + + if len(layer_types) < num_layers: + layer_types = list(itertools.islice(itertools.cycle(layer_types), num_layers)) + + self.layers = nn.ModuleList([ + AceStepDiTLayer( + hidden_size, num_heads, num_kv_heads, head_dim, intermediate_size, rms_norm_eps, + layer_type=layer_types[i], + sliding_window=sliding_window, + dtype=dtype, device=device, operations=operations + ) for i in range(num_layers) + ]) + + self.norm_out = operations.RMSNorm(hidden_size, eps=rms_norm_eps, dtype=dtype, device=device) + self.proj_out = nn.Sequential( + nn.Identity(), + ConvTranspose1d(hidden_size, audio_acoustic_hidden_dim, kernel_size=patch_size, stride=patch_size, dtype=dtype, device=device) + ) + + self.scale_shift_table = nn.Parameter(torch.empty(1, 2, hidden_size, dtype=dtype, device=device)) + + def forward( + self, + hidden_states, + timestep, + timestep_r, + attention_mask, + encoder_hidden_states, + encoder_attention_mask, + context_latents + ): + temb_t, proj_t = self.time_embed(timestep, dtype=hidden_states.dtype) + temb_r, proj_r = self.time_embed_r(timestep - timestep_r, dtype=hidden_states.dtype) + temb = temb_t + temb_r + timestep_proj = proj_t + proj_r + + x = torch.cat([context_latents, hidden_states], dim=-1) + original_seq_len = x.shape[1] + + pad_length = 0 + if x.shape[1] % self.patch_size != 0: + pad_length = self.patch_size - (x.shape[1] % self.patch_size) + x = F.pad(x, (0, 0, 0, pad_length), mode='constant', value=0) + + x = x.transpose(1, 2) + x = self.proj_in(x) + x = x.transpose(1, 2) + + encoder_hidden_states = self.condition_embedder(encoder_hidden_states) + + seq_len = x.shape[1] + cos, sin = self.rotary_emb(x, seq_len=seq_len) + + for layer in self.layers: + x = layer( + hidden_states=x, + temb=timestep_proj, + encoder_hidden_states=encoder_hidden_states, + position_embeddings=(cos, sin), + attention_mask=None, + encoder_attention_mask=None + ) + + shift, scale = (comfy.model_management.cast_to(self.scale_shift_table, dtype=temb.dtype, device=temb.device) + temb.unsqueeze(1)).chunk(2, dim=1) + x = self.norm_out(x) * (1 + scale) + shift + + x = x.transpose(1, 2) + x = self.proj_out(x) + x = x.transpose(1, 2) + + x = x[:, :original_seq_len, :] + return x + + +class AttentionPooler(nn.Module): + def __init__(self, hidden_size, num_layers, head_dim, rms_norm_eps, dtype=None, device=None, operations=None): + super().__init__() + Linear = get_layer_class(operations, "Linear") + self.embed_tokens = Linear(hidden_size, hidden_size, dtype=dtype, device=device) + self.norm = operations.RMSNorm(hidden_size, eps=rms_norm_eps, dtype=dtype, device=device) + self.rotary_emb = RotaryEmbedding(head_dim, dtype=dtype, device=device, operations=operations) + self.special_token = nn.Parameter(torch.empty(1, 1, hidden_size, dtype=dtype, device=device)) + + self.layers = nn.ModuleList([ + AceStepEncoderLayer( + hidden_size, 16, 8, head_dim, hidden_size * 3, rms_norm_eps, + dtype=dtype, device=device, operations=operations + ) + for _ in range(num_layers) + ]) + + def forward(self, x): + B, T, P, D = x.shape + x = self.embed_tokens(x) + special = comfy.model_management.cast_to(self.special_token, device=x.device, dtype=x.dtype).expand(B, T, 1, -1) + x = torch.cat([special, x], dim=2) + x = x.view(B * T, P + 1, D) + + cos, sin = self.rotary_emb(x, seq_len=P + 1) + for layer in self.layers: + x = layer(x, (cos, sin)) + + x = self.norm(x) + return x[:, 0, :].view(B, T, D) + + +class FSQ(nn.Module): + def __init__( + self, + levels, + dim=None, + device=None, + dtype=None, + operations=None + ): + super().__init__() + + _levels = torch.tensor(levels, dtype=torch.int32, device=device) + self.register_buffer('_levels', _levels, persistent=False) + + _basis = torch.cumprod(torch.tensor([1] + levels[:-1], dtype=torch.int32, device=device), dim=0) + self.register_buffer('_basis', _basis, persistent=False) + + self.codebook_dim = len(levels) + self.dim = dim if dim is not None else self.codebook_dim + + requires_projection = self.dim != self.codebook_dim + if requires_projection: + self.project_in = operations.Linear(self.dim, self.codebook_dim, device=device, dtype=dtype) + self.project_out = operations.Linear(self.codebook_dim, self.dim, device=device, dtype=dtype) + else: + self.project_in = nn.Identity() + self.project_out = nn.Identity() + + self.codebook_size = self._levels.prod().item() + + indices = torch.arange(self.codebook_size, device=device) + implicit_codebook = self._indices_to_codes(indices) + + if dtype is not None: + implicit_codebook = implicit_codebook.to(dtype) + + self.register_buffer('implicit_codebook', implicit_codebook, persistent=False) + + def bound(self, z): + levels_minus_1 = (comfy.model_management.cast_to(self._levels, device=z.device, dtype=z.dtype) - 1) + scale = 2. / levels_minus_1 + bracket = (levels_minus_1 * (torch.tanh(z) + 1) / 2.) + 0.5 + + zhat = bracket.floor() + bracket_ste = bracket + (zhat - bracket).detach() + + return scale * bracket_ste - 1. + + def _indices_to_codes(self, indices): + indices = indices.unsqueeze(-1) + codes_non_centered = (indices // self._basis) % self._levels + return codes_non_centered.float() * (2. / (self._levels.float() - 1)) - 1. + + def codes_to_indices(self, zhat): + zhat_normalized = (zhat + 1.) / (2. / (comfy.model_management.cast_to(self._levels, device=zhat.device, dtype=zhat.dtype) - 1)) + return (zhat_normalized * comfy.model_management.cast_to(self._basis, device=zhat.device, dtype=zhat.dtype)).sum(dim=-1).round().to(torch.int32) + + def forward(self, z): + orig_dtype = z.dtype + z = self.project_in(z) + + codes = self.bound(z) + indices = self.codes_to_indices(codes) + + out = self.project_out(codes) + return out.to(orig_dtype), indices + + +class ResidualFSQ(nn.Module): + def __init__( + self, + levels, + num_quantizers, + dim=None, + bound_hard_clamp=True, + device=None, + dtype=None, + operations=None, + **kwargs + ): + super().__init__() + + codebook_dim = len(levels) + dim = dim if dim is not None else codebook_dim + + requires_projection = codebook_dim != dim + if requires_projection: + self.project_in = operations.Linear(dim, codebook_dim, device=device, dtype=dtype) + self.project_out = operations.Linear(codebook_dim, dim, device=device, dtype=dtype) + else: + self.project_in = nn.Identity() + self.project_out = nn.Identity() + + self.layers = nn.ModuleList() + levels_tensor = torch.tensor(levels, device=device) + scales = [] + + for ind in range(num_quantizers): + scale_val = levels_tensor.float() ** -ind + scales.append(scale_val) + + self.layers.append(FSQ( + levels=levels, + dim=codebook_dim, + device=device, + dtype=dtype, + operations=operations + )) + + scales_tensor = torch.stack(scales) + if dtype is not None: + scales_tensor = scales_tensor.to(dtype) + self.register_buffer('scales', scales_tensor, persistent=False) + + if bound_hard_clamp: + val = 1 + (1 / (levels_tensor.float() - 1)) + if dtype is not None: + val = val.to(dtype) + self.register_buffer('soft_clamp_input_value', val, persistent=False) + + def get_output_from_indices(self, indices, dtype=torch.float32): + if indices.dim() == 2: + indices = indices.unsqueeze(-1) + + all_codes = [] + for i, layer in enumerate(self.layers): + idx = indices[..., i].long() + codes = F.embedding(idx, comfy.model_management.cast_to(layer.implicit_codebook, device=idx.device, dtype=dtype)) + all_codes.append(codes * comfy.model_management.cast_to(self.scales[i], device=idx.device, dtype=dtype)) + + codes_summed = torch.stack(all_codes).sum(dim=0) + return self.project_out(codes_summed) + + def forward(self, x): + x = self.project_in(x) + + if hasattr(self, 'soft_clamp_input_value'): + sc_val = comfy.model_management.cast_to(self.soft_clamp_input_value, device=x.device, dtype=x.dtype) + x = (x / sc_val).tanh() * sc_val + + quantized_out = torch.tensor(0., device=x.device, dtype=x.dtype) + residual = x + all_indices = [] + + for layer, scale in zip(self.layers, self.scales): + scale = comfy.model_management.cast_to(scale, device=x.device, dtype=x.dtype) + + quantized, indices = layer(residual / scale) + quantized = quantized * scale + + residual = residual - quantized.detach() + quantized_out = quantized_out + quantized + all_indices.append(indices) + + quantized_out = self.project_out(quantized_out) + all_indices = torch.stack(all_indices, dim=-1) + + return quantized_out, all_indices + + +class AceStepAudioTokenizer(nn.Module): + def __init__( + self, + audio_acoustic_hidden_dim, + hidden_size, + pool_window_size, + fsq_dim, + fsq_levels, + fsq_input_num_quantizers, + num_layers, + head_dim, + rms_norm_eps, + dtype=None, + device=None, + operations=None + ): + super().__init__() + Linear = get_layer_class(operations, "Linear") + self.audio_acoustic_proj = Linear(audio_acoustic_hidden_dim, hidden_size, dtype=dtype, device=device) + self.attention_pooler = AttentionPooler( + hidden_size, num_layers, head_dim, rms_norm_eps, dtype=dtype, device=device, operations=operations + ) + self.pool_window_size = pool_window_size + self.fsq_dim = fsq_dim + self.quantizer = ResidualFSQ( + dim=fsq_dim, + levels=fsq_levels, + num_quantizers=fsq_input_num_quantizers, + bound_hard_clamp=True, + dtype=dtype, device=device, operations=operations + ) + + def forward(self, hidden_states): + hidden_states = self.audio_acoustic_proj(hidden_states) + hidden_states = self.attention_pooler(hidden_states) + quantized, indices = self.quantizer(hidden_states) + return quantized, indices + + def tokenize(self, x): + B, T, D = x.shape + P = self.pool_window_size + + if T % P != 0: + pad = P - (T % P) + x = F.pad(x, (0, 0, 0, pad)) + T = x.shape[1] + + T_patch = T // P + x = x.view(B, T_patch, P, D) + + quantized, indices = self.forward(x) + return quantized, indices + + +class AudioTokenDetokenizer(nn.Module): + def __init__( + self, + hidden_size, + pool_window_size, + audio_acoustic_hidden_dim, + num_layers, + head_dim, + dtype=None, + device=None, + operations=None + ): + super().__init__() + Linear = get_layer_class(operations, "Linear") + self.pool_window_size = pool_window_size + self.embed_tokens = Linear(hidden_size, hidden_size, dtype=dtype, device=device) + self.special_tokens = nn.Parameter(torch.empty(1, pool_window_size, hidden_size, dtype=dtype, device=device)) + self.rotary_emb = RotaryEmbedding(head_dim, dtype=dtype, device=device, operations=operations) + self.layers = nn.ModuleList([ + AceStepEncoderLayer( + hidden_size, 16, 8, head_dim, hidden_size * 3, 1e-6, + dtype=dtype, device=device, operations=operations + ) + for _ in range(num_layers) + ]) + self.norm = operations.RMSNorm(hidden_size, dtype=dtype, device=device) + self.proj_out = Linear(hidden_size, audio_acoustic_hidden_dim, dtype=dtype, device=device) + + def forward(self, x): + B, T, D = x.shape + x = self.embed_tokens(x) + x = x.unsqueeze(2).repeat(1, 1, self.pool_window_size, 1) + x = x + comfy.model_management.cast_to(self.special_tokens.expand(B, T, -1, -1), device=x.device, dtype=x.dtype) + x = x.view(B * T, self.pool_window_size, D) + + cos, sin = self.rotary_emb(x, seq_len=self.pool_window_size) + for layer in self.layers: + x = layer(x, (cos, sin)) + + x = self.norm(x) + x = self.proj_out(x) + return x.view(B, T * self.pool_window_size, -1) + + +class AceStepConditionGenerationModel(nn.Module): + def __init__( + self, + in_channels=192, + hidden_size=2048, + text_hidden_dim=1024, + timbre_hidden_dim=64, + audio_acoustic_hidden_dim=64, + num_dit_layers=24, + num_lyric_layers=8, + num_timbre_layers=4, + num_tokenizer_layers=2, + num_heads=16, + num_kv_heads=8, + head_dim=128, + intermediate_size=6144, + patch_size=2, + pool_window_size=5, + rms_norm_eps=1e-06, + timestep_mu=-0.4, + timestep_sigma=1.0, + data_proportion=0.5, + sliding_window=128, + layer_types=None, + fsq_dim=2048, + fsq_levels=[8, 8, 8, 5, 5, 5], + fsq_input_num_quantizers=1, + audio_model=None, + dtype=None, + device=None, + operations=None + ): + super().__init__() + self.dtype = dtype + self.timestep_mu = timestep_mu + self.timestep_sigma = timestep_sigma + self.data_proportion = data_proportion + self.pool_window_size = pool_window_size + + if layer_types is None: + layer_types = [] + for i in range(num_dit_layers): + layer_types.append("sliding_attention" if i % 2 == 0 else "full_attention") + + self.decoder = AceStepDiTModel( + in_channels, hidden_size, num_dit_layers, num_heads, num_kv_heads, head_dim, + intermediate_size, patch_size, audio_acoustic_hidden_dim, + layer_types=layer_types, sliding_window=sliding_window, rms_norm_eps=rms_norm_eps, + dtype=dtype, device=device, operations=operations + ) + self.encoder = AceStepConditionEncoder( + text_hidden_dim, timbre_hidden_dim, hidden_size, num_lyric_layers, num_timbre_layers, + num_heads, num_kv_heads, head_dim, intermediate_size, rms_norm_eps, + dtype=dtype, device=device, operations=operations + ) + self.tokenizer = AceStepAudioTokenizer( + audio_acoustic_hidden_dim, hidden_size, pool_window_size, fsq_dim=fsq_dim, fsq_levels=fsq_levels, fsq_input_num_quantizers=fsq_input_num_quantizers, num_layers=num_tokenizer_layers, head_dim=head_dim, rms_norm_eps=rms_norm_eps, + dtype=dtype, device=device, operations=operations + ) + self.detokenizer = AudioTokenDetokenizer( + hidden_size, pool_window_size, audio_acoustic_hidden_dim, num_layers=2, head_dim=head_dim, + dtype=dtype, device=device, operations=operations + ) + self.null_condition_emb = nn.Parameter(torch.empty(1, 1, hidden_size, dtype=dtype, device=device)) + + def prepare_condition( + self, + text_hidden_states, text_attention_mask, + lyric_hidden_states, lyric_attention_mask, + refer_audio_acoustic_hidden_states_packed, refer_audio_order_mask, + src_latents, chunk_masks, is_covers, + precomputed_lm_hints_25Hz=None, + audio_codes=None + ): + encoder_hidden, encoder_mask = self.encoder( + text_hidden_states, text_attention_mask, + lyric_hidden_states, lyric_attention_mask, + refer_audio_acoustic_hidden_states_packed, refer_audio_order_mask + ) + + if precomputed_lm_hints_25Hz is not None: + lm_hints = precomputed_lm_hints_25Hz + else: + if audio_codes is not None: + if audio_codes.shape[1] * 5 < src_latents.shape[1]: + audio_codes = torch.nn.functional.pad(audio_codes, (0, math.ceil(src_latents.shape[1] / 5) - audio_codes.shape[1]), "constant", 35847) + lm_hints_5Hz = self.tokenizer.quantizer.get_output_from_indices(audio_codes, dtype=text_hidden_states.dtype) + else: + lm_hints_5Hz, indices = self.tokenizer.tokenize(refer_audio_acoustic_hidden_states_packed) + + lm_hints = self.detokenizer(lm_hints_5Hz) + + lm_hints = lm_hints[:, :src_latents.shape[1], :] + if is_covers is None or is_covers is True: + src_latents = lm_hints + elif is_covers is False: + src_latents = refer_audio_acoustic_hidden_states_packed + + context_latents = torch.cat([src_latents, chunk_masks.to(src_latents.dtype)], dim=-1) + + return encoder_hidden, encoder_mask, context_latents + + def forward(self, x, timestep, context, lyric_embed=None, refer_audio=None, audio_codes=None, is_covers=None, replace_with_null_embeds=False, **kwargs): + text_attention_mask = None + lyric_attention_mask = None + refer_audio_order_mask = None + attention_mask = None + chunk_masks = None + src_latents = None + precomputed_lm_hints_25Hz = None + lyric_hidden_states = lyric_embed + text_hidden_states = context + refer_audio_acoustic_hidden_states_packed = refer_audio.movedim(-1, -2) + + x = x.movedim(-1, -2) + + if refer_audio_order_mask is None: + refer_audio_order_mask = torch.zeros((x.shape[0],), device=x.device, dtype=torch.long) + + if src_latents is None: + src_latents = x + + if chunk_masks is None: + chunk_masks = torch.ones_like(x) + + enc_hidden, enc_mask, context_latents = self.prepare_condition( + text_hidden_states, text_attention_mask, + lyric_hidden_states, lyric_attention_mask, + refer_audio_acoustic_hidden_states_packed, refer_audio_order_mask, + src_latents, chunk_masks, is_covers, precomputed_lm_hints_25Hz=precomputed_lm_hints_25Hz, audio_codes=audio_codes + ) + + if replace_with_null_embeds: + enc_hidden[:] = self.null_condition_emb.to(enc_hidden) + + out = self.decoder(hidden_states=x, + timestep=timestep, + timestep_r=timestep, + attention_mask=attention_mask, + encoder_hidden_states=enc_hidden, + encoder_attention_mask=enc_mask, + context_latents=context_latents + ) + + return out.movedim(-1, -2) diff --git a/comfy/ldm/anima/model.py b/comfy/ldm/anima/model.py index 2e6ed58fa..6fcf8df90 100644 --- a/comfy/ldm/anima/model.py +++ b/comfy/ldm/anima/model.py @@ -179,8 +179,8 @@ class LLMAdapter(nn.Module): if source_attention_mask.ndim == 2: source_attention_mask = source_attention_mask.unsqueeze(1).unsqueeze(1) - x = self.in_proj(self.embed(target_input_ids)) context = source_hidden_states + x = self.in_proj(self.embed(target_input_ids, out_dtype=context.dtype)) position_ids = torch.arange(x.shape[1], device=x.device).unsqueeze(0) position_ids_context = torch.arange(context.shape[1], device=x.device).unsqueeze(0) position_embeddings = self.rotary_emb(x, position_ids) @@ -195,8 +195,20 @@ class Anima(MiniTrainDIT): super().__init__(*args, **kwargs) self.llm_adapter = LLMAdapter(device=kwargs.get("device"), dtype=kwargs.get("dtype"), operations=kwargs.get("operations")) - def preprocess_text_embeds(self, text_embeds, text_ids): + def preprocess_text_embeds(self, text_embeds, text_ids, t5xxl_weights=None): if text_ids is not None: - return self.llm_adapter(text_embeds, text_ids) + out = self.llm_adapter(text_embeds, text_ids) + if t5xxl_weights is not None: + out = out * t5xxl_weights + + if out.shape[1] < 512: + out = torch.nn.functional.pad(out, (0, 0, 0, 512 - out.shape[1])) + return out else: return text_embeds + + def forward(self, x, timesteps, context, **kwargs): + t5xxl_ids = kwargs.pop("t5xxl_ids", None) + if t5xxl_ids is not None: + context = self.preprocess_text_embeds(context, t5xxl_ids, t5xxl_weights=kwargs.pop("t5xxl_weights", None)) + return super().forward(x, timesteps, context, **kwargs) diff --git a/comfy/ldm/chroma/layers.py b/comfy/ldm/chroma/layers.py index 2d5684348..df348a8ed 100644 --- a/comfy/ldm/chroma/layers.py +++ b/comfy/ldm/chroma/layers.py @@ -3,7 +3,6 @@ from torch import Tensor, nn from comfy.ldm.flux.layers import ( MLPEmbedder, - RMSNorm, ModulationOut, ) @@ -29,7 +28,7 @@ class Approximator(nn.Module): super().__init__() self.in_proj = operations.Linear(in_dim, hidden_dim, bias=True, dtype=dtype, device=device) self.layers = nn.ModuleList([MLPEmbedder(hidden_dim, hidden_dim, dtype=dtype, device=device, operations=operations) for x in range( n_layers)]) - self.norms = nn.ModuleList([RMSNorm(hidden_dim, dtype=dtype, device=device, operations=operations) for x in range( n_layers)]) + self.norms = nn.ModuleList([operations.RMSNorm(hidden_dim, dtype=dtype, device=device) for x in range( n_layers)]) self.out_proj = operations.Linear(hidden_dim, out_dim, dtype=dtype, device=device) @property diff --git a/comfy/ldm/chroma/model.py b/comfy/ldm/chroma/model.py index 2e8ef0687..9fd865f20 100644 --- a/comfy/ldm/chroma/model.py +++ b/comfy/ldm/chroma/model.py @@ -152,6 +152,7 @@ class Chroma(nn.Module): transformer_options={}, attn_mask: Tensor = None, ) -> Tensor: + transformer_options = transformer_options.copy() patches_replace = transformer_options.get("patches_replace", {}) # running on sequences img @@ -228,6 +229,7 @@ class Chroma(nn.Module): transformer_options["total_blocks"] = len(self.single_blocks) transformer_options["block_type"] = "single" + transformer_options["img_slice"] = [txt.shape[1], img.shape[1]] for i, block in enumerate(self.single_blocks): transformer_options["block_index"] = i if i not in self.skip_dit: diff --git a/comfy/ldm/chroma_radiance/layers.py b/comfy/ldm/chroma_radiance/layers.py index 3c7bc9b6b..08d31e0ba 100644 --- a/comfy/ldm/chroma_radiance/layers.py +++ b/comfy/ldm/chroma_radiance/layers.py @@ -4,8 +4,6 @@ from functools import lru_cache import torch from torch import nn -from comfy.ldm.flux.layers import RMSNorm - class NerfEmbedder(nn.Module): """ @@ -145,7 +143,7 @@ class NerfGLUBlock(nn.Module): # We now need to generate parameters for 3 matrices. total_params = 3 * hidden_size_x**2 * mlp_ratio self.param_generator = operations.Linear(hidden_size_s, total_params, dtype=dtype, device=device) - self.norm = RMSNorm(hidden_size_x, dtype=dtype, device=device, operations=operations) + self.norm = operations.RMSNorm(hidden_size_x, dtype=dtype, device=device) self.mlp_ratio = mlp_ratio @@ -178,7 +176,7 @@ class NerfGLUBlock(nn.Module): class NerfFinalLayer(nn.Module): def __init__(self, hidden_size, out_channels, dtype=None, device=None, operations=None): super().__init__() - self.norm = RMSNorm(hidden_size, dtype=dtype, device=device, operations=operations) + self.norm = operations.RMSNorm(hidden_size, dtype=dtype, device=device) self.linear = operations.Linear(hidden_size, out_channels, dtype=dtype, device=device) def forward(self, x: torch.Tensor) -> torch.Tensor: @@ -190,7 +188,7 @@ class NerfFinalLayer(nn.Module): class NerfFinalLayerConv(nn.Module): def __init__(self, hidden_size: int, out_channels: int, dtype=None, device=None, operations=None): super().__init__() - self.norm = RMSNorm(hidden_size, dtype=dtype, device=device, operations=operations) + self.norm = operations.RMSNorm(hidden_size, dtype=dtype, device=device) self.conv = operations.Conv2d( in_channels=hidden_size, out_channels=out_channels, diff --git a/comfy/ldm/cosmos/predict2.py b/comfy/ldm/cosmos/predict2.py index c270e6333..2268bff38 100644 --- a/comfy/ldm/cosmos/predict2.py +++ b/comfy/ldm/cosmos/predict2.py @@ -335,7 +335,7 @@ class FinalLayer(nn.Module): device=None, dtype=None, operations=None ): super().__init__() - self.layer_norm = nn.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6) + self.layer_norm = operations.LayerNorm(hidden_size, elementwise_affine=False, eps=1e-6) self.linear = operations.Linear( hidden_size, spatial_patch_size * spatial_patch_size * temporal_patch_size * out_channels, bias=False, device=device, dtype=dtype ) @@ -463,6 +463,8 @@ class Block(nn.Module): extra_per_block_pos_emb: Optional[torch.Tensor] = None, transformer_options: Optional[dict] = {}, ) -> torch.Tensor: + residual_dtype = x_B_T_H_W_D.dtype + compute_dtype = emb_B_T_D.dtype if extra_per_block_pos_emb is not None: x_B_T_H_W_D = x_B_T_H_W_D + extra_per_block_pos_emb @@ -512,7 +514,7 @@ class Block(nn.Module): result_B_T_H_W_D = rearrange( self.self_attn( # normalized_x_B_T_HW_D, - rearrange(normalized_x_B_T_H_W_D, "b t h w d -> b (t h w) d"), + rearrange(normalized_x_B_T_H_W_D.to(compute_dtype), "b t h w d -> b (t h w) d"), None, rope_emb=rope_emb_L_1_1_D, transformer_options=transformer_options, @@ -522,7 +524,7 @@ class Block(nn.Module): h=H, w=W, ) - x_B_T_H_W_D = x_B_T_H_W_D + gate_self_attn_B_T_1_1_D * result_B_T_H_W_D + x_B_T_H_W_D = x_B_T_H_W_D + gate_self_attn_B_T_1_1_D.to(residual_dtype) * result_B_T_H_W_D.to(residual_dtype) def _x_fn( _x_B_T_H_W_D: torch.Tensor, @@ -536,7 +538,7 @@ class Block(nn.Module): ) _result_B_T_H_W_D = rearrange( self.cross_attn( - rearrange(_normalized_x_B_T_H_W_D, "b t h w d -> b (t h w) d"), + rearrange(_normalized_x_B_T_H_W_D.to(compute_dtype), "b t h w d -> b (t h w) d"), crossattn_emb, rope_emb=rope_emb_L_1_1_D, transformer_options=transformer_options, @@ -555,7 +557,7 @@ class Block(nn.Module): shift_cross_attn_B_T_1_1_D, transformer_options=transformer_options, ) - x_B_T_H_W_D = result_B_T_H_W_D * gate_cross_attn_B_T_1_1_D + x_B_T_H_W_D + x_B_T_H_W_D = result_B_T_H_W_D.to(residual_dtype) * gate_cross_attn_B_T_1_1_D.to(residual_dtype) + x_B_T_H_W_D normalized_x_B_T_H_W_D = _fn( x_B_T_H_W_D, @@ -563,8 +565,8 @@ class Block(nn.Module): scale_mlp_B_T_1_1_D, shift_mlp_B_T_1_1_D, ) - result_B_T_H_W_D = self.mlp(normalized_x_B_T_H_W_D) - x_B_T_H_W_D = x_B_T_H_W_D + gate_mlp_B_T_1_1_D * result_B_T_H_W_D + result_B_T_H_W_D = self.mlp(normalized_x_B_T_H_W_D.to(compute_dtype)) + x_B_T_H_W_D = x_B_T_H_W_D + gate_mlp_B_T_1_1_D.to(residual_dtype) * result_B_T_H_W_D.to(residual_dtype) return x_B_T_H_W_D @@ -876,6 +878,14 @@ class MiniTrainDIT(nn.Module): "extra_per_block_pos_emb": extra_pos_emb_B_T_H_W_D_or_T_H_W_B_D, "transformer_options": kwargs.get("transformer_options", {}), } + + # The residual stream for this model has large values. To make fp16 compute_dtype work, we keep the residual stream + # in fp32, but run attention and MLP modules in fp16. + # An alternate method that clamps fp16 values "works" in the sense that it makes coherent images, but there is noticeable + # quality degradation and visual artifacts. + if x_B_T_H_W_D.dtype == torch.float16: + x_B_T_H_W_D = x_B_T_H_W_D.float() + for block in self.blocks: x_B_T_H_W_D = block( x_B_T_H_W_D, @@ -884,6 +894,6 @@ class MiniTrainDIT(nn.Module): **block_kwargs, ) - x_B_T_H_W_O = self.final_layer(x_B_T_H_W_D, t_embedding_B_T_D, adaln_lora_B_T_3D=adaln_lora_B_T_3D) + x_B_T_H_W_O = self.final_layer(x_B_T_H_W_D.to(crossattn_emb.dtype), t_embedding_B_T_D, adaln_lora_B_T_3D=adaln_lora_B_T_3D) x_B_C_Tt_Hp_Wp = self.unpatchify(x_B_T_H_W_O)[:, :, :orig_shape[-3], :orig_shape[-2], :orig_shape[-1]] return x_B_C_Tt_Hp_Wp diff --git a/comfy/ldm/flux/layers.py b/comfy/ldm/flux/layers.py index 60f2bdae2..8b3f500d7 100644 --- a/comfy/ldm/flux/layers.py +++ b/comfy/ldm/flux/layers.py @@ -5,9 +5,9 @@ import torch from torch import Tensor, nn from .math import attention, rope -import comfy.ops -import comfy.ldm.common_dit +# Fix import for some custom nodes, TODO: delete eventually. +RMSNorm = None class EmbedND(nn.Module): def __init__(self, dim: int, theta: int, axes_dim: list): @@ -87,20 +87,12 @@ def build_mlp(hidden_size, mlp_hidden_dim, mlp_silu_act=False, yak_mlp=False, dt operations.Linear(mlp_hidden_dim, hidden_size, bias=True, dtype=dtype, device=device), ) -class RMSNorm(torch.nn.Module): - def __init__(self, dim: int, dtype=None, device=None, operations=None): - super().__init__() - self.scale = nn.Parameter(torch.empty((dim), dtype=dtype, device=device)) - - def forward(self, x: Tensor): - return comfy.ldm.common_dit.rms_norm(x, self.scale, 1e-6) - class QKNorm(torch.nn.Module): def __init__(self, dim: int, dtype=None, device=None, operations=None): super().__init__() - self.query_norm = RMSNorm(dim, dtype=dtype, device=device, operations=operations) - self.key_norm = RMSNorm(dim, dtype=dtype, device=device, operations=operations) + self.query_norm = operations.RMSNorm(dim, dtype=dtype, device=device) + self.key_norm = operations.RMSNorm(dim, dtype=dtype, device=device) def forward(self, q: Tensor, k: Tensor, v: Tensor) -> tuple: q = self.query_norm(q) @@ -169,7 +161,7 @@ class SiLUActivation(nn.Module): class DoubleStreamBlock(nn.Module): - def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float, qkv_bias: bool = False, flipped_img_txt=False, modulation=True, mlp_silu_act=False, proj_bias=True, yak_mlp=False, dtype=None, device=None, operations=None): + def __init__(self, hidden_size: int, num_heads: int, mlp_ratio: float, qkv_bias: bool = False, modulation=True, mlp_silu_act=False, proj_bias=True, yak_mlp=False, dtype=None, device=None, operations=None): super().__init__() mlp_hidden_dim = int(hidden_size * mlp_ratio) @@ -197,8 +189,6 @@ class DoubleStreamBlock(nn.Module): self.txt_mlp = build_mlp(hidden_size, mlp_hidden_dim, mlp_silu_act=mlp_silu_act, yak_mlp=yak_mlp, dtype=dtype, device=device, operations=operations) - self.flipped_img_txt = flipped_img_txt - def forward(self, img: Tensor, txt: Tensor, vec: Tensor, pe: Tensor, attn_mask=None, modulation_dims_img=None, modulation_dims_txt=None, transformer_options={}): if self.modulation: img_mod1, img_mod2 = self.img_mod(vec) @@ -206,6 +196,9 @@ class DoubleStreamBlock(nn.Module): else: (img_mod1, img_mod2), (txt_mod1, txt_mod2) = vec + transformer_patches = transformer_options.get("patches", {}) + extra_options = transformer_options.copy() + # prepare image for attention img_modulated = self.img_norm1(img) img_modulated = apply_mod(img_modulated, (1 + img_mod1.scale), img_mod1.shift, modulation_dims_img) @@ -224,32 +217,23 @@ class DoubleStreamBlock(nn.Module): del txt_qkv txt_q, txt_k = self.txt_attn.norm(txt_q, txt_k, txt_v) - if self.flipped_img_txt: - q = torch.cat((img_q, txt_q), dim=2) - del img_q, txt_q - k = torch.cat((img_k, txt_k), dim=2) - del img_k, txt_k - v = torch.cat((img_v, txt_v), dim=2) - del img_v, txt_v - # run actual attention - attn = attention(q, k, v, - pe=pe, mask=attn_mask, transformer_options=transformer_options) - del q, k, v + q = torch.cat((txt_q, img_q), dim=2) + del txt_q, img_q + k = torch.cat((txt_k, img_k), dim=2) + del txt_k, img_k + v = torch.cat((txt_v, img_v), dim=2) + del txt_v, img_v + # run actual attention + attn = attention(q, k, v, pe=pe, mask=attn_mask, transformer_options=transformer_options) + del q, k, v - img_attn, txt_attn = attn[:, : img.shape[1]], attn[:, img.shape[1]:] - else: - q = torch.cat((txt_q, img_q), dim=2) - del txt_q, img_q - k = torch.cat((txt_k, img_k), dim=2) - del txt_k, img_k - v = torch.cat((txt_v, img_v), dim=2) - del txt_v, img_v - # run actual attention - attn = attention(q, k, v, - pe=pe, mask=attn_mask, transformer_options=transformer_options) - del q, k, v + if "attn1_output_patch" in transformer_patches: + extra_options["img_slice"] = [txt.shape[1], attn.shape[1]] + patch = transformer_patches["attn1_output_patch"] + for p in patch: + attn = p(attn, extra_options) - txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1]:] + txt_attn, img_attn = attn[:, : txt.shape[1]], attn[:, txt.shape[1]:] # calculate the img bloks img += apply_mod(self.img_attn.proj(img_attn), img_mod1.gate, None, modulation_dims_img) @@ -328,6 +312,9 @@ class SingleStreamBlock(nn.Module): else: mod = vec + transformer_patches = transformer_options.get("patches", {}) + extra_options = transformer_options.copy() + qkv, mlp = torch.split(self.linear1(apply_mod(self.pre_norm(x), (1 + mod.scale), mod.shift, modulation_dims)), [3 * self.hidden_size, self.mlp_hidden_dim_first], dim=-1) q, k, v = qkv.view(qkv.shape[0], qkv.shape[1], 3, self.num_heads, -1).permute(2, 0, 3, 1, 4) @@ -337,6 +324,12 @@ class SingleStreamBlock(nn.Module): # compute attention attn = attention(q, k, v, pe=pe, mask=attn_mask, transformer_options=transformer_options) del q, k, v + + if "attn1_output_patch" in transformer_patches: + patch = transformer_patches["attn1_output_patch"] + for p in patch: + attn = p(attn, extra_options) + # compute activation in mlp stream, cat again and run second linear layer if self.yak_mlp: mlp = self.mlp_act(mlp[..., self.mlp_hidden_dim_first // 2:]) * mlp[..., :self.mlp_hidden_dim_first // 2] diff --git a/comfy/ldm/flux/math.py b/comfy/ldm/flux/math.py index f9597de5b..5e764bb46 100644 --- a/comfy/ldm/flux/math.py +++ b/comfy/ldm/flux/math.py @@ -29,19 +29,34 @@ def rope(pos: Tensor, dim: int, theta: int) -> Tensor: return out.to(dtype=torch.float32, device=pos.device) +def _apply_rope1(x: Tensor, freqs_cis: Tensor): + x_ = x.to(dtype=freqs_cis.dtype).reshape(*x.shape[:-1], -1, 1, 2) + + x_out = freqs_cis[..., 0] * x_[..., 0] + x_out.addcmul_(freqs_cis[..., 1], x_[..., 1]) + + return x_out.reshape(*x.shape).type_as(x) + + +def _apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor): + return apply_rope1(xq, freqs_cis), apply_rope1(xk, freqs_cis) + + try: import comfy.quant_ops - apply_rope = comfy.quant_ops.ck.apply_rope - apply_rope1 = comfy.quant_ops.ck.apply_rope1 + q_apply_rope = comfy.quant_ops.ck.apply_rope + q_apply_rope1 = comfy.quant_ops.ck.apply_rope1 + def apply_rope(xq, xk, freqs_cis): + if comfy.model_management.in_training: + return _apply_rope(xq, xk, freqs_cis) + else: + return apply_rope1(xq, freqs_cis), apply_rope1(xk, freqs_cis) + def apply_rope1(x, freqs_cis): + if comfy.model_management.in_training: + return _apply_rope1(x, freqs_cis) + else: + return q_apply_rope1(x, freqs_cis) except: logging.warning("No comfy kitchen, using old apply_rope functions.") - def apply_rope1(x: Tensor, freqs_cis: Tensor): - x_ = x.to(dtype=freqs_cis.dtype).reshape(*x.shape[:-1], -1, 1, 2) - - x_out = freqs_cis[..., 0] * x_[..., 0] - x_out.addcmul_(freqs_cis[..., 1], x_[..., 1]) - - return x_out.reshape(*x.shape).type_as(x) - - def apply_rope(xq: Tensor, xk: Tensor, freqs_cis: Tensor): - return apply_rope1(xq, freqs_cis), apply_rope1(xk, freqs_cis) + apply_rope = _apply_rope + apply_rope1 = _apply_rope1 diff --git a/comfy/ldm/flux/model.py b/comfy/ldm/flux/model.py index f40c2a7a9..ef4dcf7c5 100644 --- a/comfy/ldm/flux/model.py +++ b/comfy/ldm/flux/model.py @@ -16,7 +16,6 @@ from .layers import ( SingleStreamBlock, timestep_embedding, Modulation, - RMSNorm ) @dataclass @@ -81,7 +80,7 @@ class Flux(nn.Module): self.txt_in = operations.Linear(params.context_in_dim, self.hidden_size, bias=params.ops_bias, dtype=dtype, device=device) if params.txt_norm: - self.txt_norm = RMSNorm(params.context_in_dim, dtype=dtype, device=device, operations=operations) + self.txt_norm = operations.RMSNorm(params.context_in_dim, dtype=dtype, device=device) else: self.txt_norm = None @@ -143,6 +142,7 @@ class Flux(nn.Module): attn_mask: Tensor = None, ) -> Tensor: + transformer_options = transformer_options.copy() patches = transformer_options.get("patches", {}) patches_replace = transformer_options.get("patches_replace", {}) if img.ndim != 3 or txt.ndim != 3: @@ -232,6 +232,7 @@ class Flux(nn.Module): transformer_options["total_blocks"] = len(self.single_blocks) transformer_options["block_type"] = "single" + transformer_options["img_slice"] = [txt.shape[1], img.shape[1]] for i, block in enumerate(self.single_blocks): transformer_options["block_index"] = i if ("single_block", i) in blocks_replace: diff --git a/comfy/ldm/hunyuan_video/model.py b/comfy/ldm/hunyuan_video/model.py index 55ab550f8..b94cdfa87 100644 --- a/comfy/ldm/hunyuan_video/model.py +++ b/comfy/ldm/hunyuan_video/model.py @@ -241,7 +241,6 @@ class HunyuanVideo(nn.Module): self.num_heads, mlp_ratio=params.mlp_ratio, qkv_bias=params.qkv_bias, - flipped_img_txt=True, dtype=dtype, device=device, operations=operations ) for _ in range(params.depth) @@ -305,6 +304,7 @@ class HunyuanVideo(nn.Module): control=None, transformer_options={}, ) -> Tensor: + transformer_options = transformer_options.copy() patches_replace = transformer_options.get("patches_replace", {}) initial_shape = list(img.shape) @@ -378,14 +378,14 @@ class HunyuanVideo(nn.Module): extra_txt_ids = torch.zeros((txt_ids.shape[0], txt_vision_states.shape[1], txt_ids.shape[-1]), device=txt_ids.device, dtype=txt_ids.dtype) txt_ids = torch.cat((txt_ids, extra_txt_ids), dim=1) - ids = torch.cat((img_ids, txt_ids), dim=1) + ids = torch.cat((txt_ids, img_ids), dim=1) pe = self.pe_embedder(ids) img_len = img.shape[1] if txt_mask is not None: attn_mask_len = img_len + txt.shape[1] attn_mask = torch.zeros((1, 1, attn_mask_len), dtype=img.dtype, device=img.device) - attn_mask[:, 0, img_len:] = txt_mask + attn_mask[:, 0, :txt.shape[1]] = txt_mask else: attn_mask = None @@ -413,10 +413,11 @@ class HunyuanVideo(nn.Module): if add is not None: img += add - img = torch.cat((img, txt), 1) + img = torch.cat((txt, img), 1) transformer_options["total_blocks"] = len(self.single_blocks) transformer_options["block_type"] = "single" + transformer_options["img_slice"] = [txt.shape[1], img.shape[1]] for i, block in enumerate(self.single_blocks): transformer_options["block_index"] = i if ("single_block", i) in blocks_replace: @@ -435,9 +436,9 @@ class HunyuanVideo(nn.Module): if i < len(control_o): add = control_o[i] if add is not None: - img[:, : img_len] += add + img[:, txt.shape[1]: img_len + txt.shape[1]] += add - img = img[:, : img_len] + img = img[:, txt.shape[1]: img_len + txt.shape[1]] if ref_latent is not None: img = img[:, ref_latent.shape[1]:] diff --git a/comfy/ldm/lightricks/av_model.py b/comfy/ldm/lightricks/av_model.py index 2c6954ecd..553fd5b38 100644 --- a/comfy/ldm/lightricks/av_model.py +++ b/comfy/ldm/lightricks/av_model.py @@ -9,6 +9,7 @@ from comfy.ldm.lightricks.model import ( LTXVModel, ) from comfy.ldm.lightricks.symmetric_patchifier import AudioPatchifier +from comfy.ldm.lightricks.embeddings_connector import Embeddings1DConnector import comfy.ldm.common_dit class CompressedTimestep: @@ -217,7 +218,7 @@ class BasicAVTransformerBlock(nn.Module): def forward( self, x: Tuple[torch.Tensor, torch.Tensor], v_context=None, a_context=None, attention_mask=None, v_timestep=None, a_timestep=None, v_pe=None, a_pe=None, v_cross_pe=None, a_cross_pe=None, v_cross_scale_shift_timestep=None, a_cross_scale_shift_timestep=None, - v_cross_gate_timestep=None, a_cross_gate_timestep=None, transformer_options=None, + v_cross_gate_timestep=None, a_cross_gate_timestep=None, transformer_options=None, self_attention_mask=None, ) -> Tuple[torch.Tensor, torch.Tensor]: run_vx = transformer_options.get("run_vx", True) run_ax = transformer_options.get("run_ax", True) @@ -233,7 +234,7 @@ class BasicAVTransformerBlock(nn.Module): vshift_msa, vscale_msa = (self.get_ada_values(self.scale_shift_table, vx.shape[0], v_timestep, slice(0, 2))) norm_vx = comfy.ldm.common_dit.rms_norm(vx) * (1 + vscale_msa) + vshift_msa del vshift_msa, vscale_msa - attn1_out = self.attn1(norm_vx, pe=v_pe, transformer_options=transformer_options) + attn1_out = self.attn1(norm_vx, pe=v_pe, mask=self_attention_mask, transformer_options=transformer_options) del norm_vx # video cross-attention vgate_msa = self.get_ada_values(self.scale_shift_table, vx.shape[0], v_timestep, slice(2, 3))[0] @@ -450,6 +451,29 @@ class LTXAVModel(LTXVModel): operations=self.operations, ) + self.audio_embeddings_connector = Embeddings1DConnector( + split_rope=True, + double_precision_rope=True, + dtype=dtype, + device=device, + operations=self.operations, + ) + + self.video_embeddings_connector = Embeddings1DConnector( + split_rope=True, + double_precision_rope=True, + dtype=dtype, + device=device, + operations=self.operations, + ) + + def preprocess_text_embeds(self, context): + if context.shape[-1] == self.caption_channels * 2: + return context + out_vid = self.video_embeddings_connector(context)[0] + out_audio = self.audio_embeddings_connector(context)[0] + return torch.concat((out_vid, out_audio), dim=-1) + def _init_transformer_blocks(self, device, dtype, **kwargs): """Initialize transformer blocks for LTXAV.""" self.transformer_blocks = nn.ModuleList( @@ -702,7 +726,7 @@ class LTXAVModel(LTXVModel): return [(v_pe, av_cross_video_freq_cis), (a_pe, av_cross_audio_freq_cis)] def _process_transformer_blocks( - self, x, context, attention_mask, timestep, pe, transformer_options={}, **kwargs + self, x, context, attention_mask, timestep, pe, transformer_options={}, self_attention_mask=None, **kwargs ): vx = x[0] ax = x[1] @@ -746,6 +770,7 @@ class LTXAVModel(LTXVModel): v_cross_gate_timestep=args["v_cross_gate_timestep"], a_cross_gate_timestep=args["a_cross_gate_timestep"], transformer_options=args["transformer_options"], + self_attention_mask=args.get("self_attention_mask"), ) return out @@ -766,6 +791,7 @@ class LTXAVModel(LTXVModel): "v_cross_gate_timestep": av_ca_a2v_gate_noise_timestep, "a_cross_gate_timestep": av_ca_v2a_gate_noise_timestep, "transformer_options": transformer_options, + "self_attention_mask": self_attention_mask, }, {"original_block": block_wrap}, ) @@ -787,6 +813,7 @@ class LTXAVModel(LTXVModel): v_cross_gate_timestep=av_ca_a2v_gate_noise_timestep, a_cross_gate_timestep=av_ca_v2a_gate_noise_timestep, transformer_options=transformer_options, + self_attention_mask=self_attention_mask, ) return [vx, ax] diff --git a/comfy/ldm/lightricks/embeddings_connector.py b/comfy/ldm/lightricks/embeddings_connector.py index 06f5ada89..33adb9671 100644 --- a/comfy/ldm/lightricks/embeddings_connector.py +++ b/comfy/ldm/lightricks/embeddings_connector.py @@ -157,11 +157,9 @@ class Embeddings1DConnector(nn.Module): self.num_learnable_registers = num_learnable_registers if self.num_learnable_registers: self.learnable_registers = nn.Parameter( - torch.rand( + torch.empty( self.num_learnable_registers, inner_dim, dtype=dtype, device=device ) - * 2.0 - - 1.0 ) def get_fractional_positions(self, indices_grid): @@ -234,7 +232,7 @@ class Embeddings1DConnector(nn.Module): return indices - def precompute_freqs_cis(self, indices_grid, spacing="exp"): + def precompute_freqs_cis(self, indices_grid, spacing="exp", out_dtype=None): dim = self.inner_dim n_elem = 2 # 2 because of cos and sin freqs = self.precompute_freqs(indices_grid, spacing) @@ -247,7 +245,7 @@ class Embeddings1DConnector(nn.Module): ) else: cos_freq, sin_freq = interleaved_freqs_cis(freqs, dim % n_elem) - return cos_freq.to(self.dtype), sin_freq.to(self.dtype), self.split_rope + return cos_freq.to(dtype=out_dtype), sin_freq.to(dtype=out_dtype), self.split_rope def forward( self, @@ -288,7 +286,7 @@ class Embeddings1DConnector(nn.Module): hidden_states.shape[1], dtype=torch.float32, device=hidden_states.device ) indices_grid = indices_grid[None, None, :] - freqs_cis = self.precompute_freqs_cis(indices_grid) + freqs_cis = self.precompute_freqs_cis(indices_grid, out_dtype=hidden_states.dtype) # 2. Blocks for block_idx, block in enumerate(self.transformer_1d_blocks): diff --git a/comfy/ldm/lightricks/model.py b/comfy/ldm/lightricks/model.py index d61e19d6e..60d760d29 100644 --- a/comfy/ldm/lightricks/model.py +++ b/comfy/ldm/lightricks/model.py @@ -1,6 +1,7 @@ from abc import ABC, abstractmethod from enum import Enum import functools +import logging import math from typing import Dict, Optional, Tuple @@ -14,6 +15,8 @@ import comfy.ldm.common_dit from .symmetric_patchifier import SymmetricPatchifier, latent_to_pixel_coords +logger = logging.getLogger(__name__) + def _log_base(x, base): return np.log(x) / np.log(base) @@ -415,12 +418,12 @@ class BasicTransformerBlock(nn.Module): self.scale_shift_table = nn.Parameter(torch.empty(6, dim, device=device, dtype=dtype)) - def forward(self, x, context=None, attention_mask=None, timestep=None, pe=None, transformer_options={}): + def forward(self, x, context=None, attention_mask=None, timestep=None, pe=None, transformer_options={}, self_attention_mask=None): shift_msa, scale_msa, gate_msa, shift_mlp, scale_mlp, gate_mlp = (self.scale_shift_table[None, None].to(device=x.device, dtype=x.dtype) + timestep.reshape(x.shape[0], timestep.shape[1], self.scale_shift_table.shape[0], -1)).unbind(dim=2) attn1_input = comfy.ldm.common_dit.rms_norm(x) attn1_input = torch.addcmul(attn1_input, attn1_input, scale_msa).add_(shift_msa) - attn1_input = self.attn1(attn1_input, pe=pe, transformer_options=transformer_options) + attn1_input = self.attn1(attn1_input, pe=pe, mask=self_attention_mask, transformer_options=transformer_options) x.addcmul_(attn1_input, gate_msa) del attn1_input @@ -638,8 +641,16 @@ class LTXBaseModel(torch.nn.Module, ABC): """Process input data. Must be implemented by subclasses.""" pass + def _build_guide_self_attention_mask(self, x, transformer_options, merged_args): + """Build self-attention mask for per-guide attention attenuation. + + Base implementation returns None (no attenuation). Subclasses that + support guide-based attention control should override this. + """ + return None + @abstractmethod - def _process_transformer_blocks(self, x, context, attention_mask, timestep, pe, **kwargs): + def _process_transformer_blocks(self, x, context, attention_mask, timestep, pe, self_attention_mask=None, **kwargs): """Process transformer blocks. Must be implemented by subclasses.""" pass @@ -788,9 +799,17 @@ class LTXBaseModel(torch.nn.Module, ABC): attention_mask = self._prepare_attention_mask(attention_mask, input_dtype) pe = self._prepare_positional_embeddings(pixel_coords, frame_rate, input_dtype) + # Build self-attention mask for per-guide attenuation + self_attention_mask = self._build_guide_self_attention_mask( + x, transformer_options, merged_args + ) + # Process transformer blocks x = self._process_transformer_blocks( - x, context, attention_mask, timestep, pe, transformer_options=transformer_options, **merged_args + x, context, attention_mask, timestep, pe, + transformer_options=transformer_options, + self_attention_mask=self_attention_mask, + **merged_args, ) # Process output @@ -890,13 +909,243 @@ class LTXVModel(LTXBaseModel): pixel_coords = pixel_coords[:, :, grid_mask, ...] kf_grid_mask = grid_mask[-keyframe_idxs.shape[2]:] + + # Compute per-guide surviving token counts from guide_attention_entries. + # Each entry tracks one guide reference; they are appended in order and + # their pre_filter_counts partition the kf_grid_mask. + guide_entries = kwargs.get("guide_attention_entries", None) + if guide_entries: + total_pfc = sum(e["pre_filter_count"] for e in guide_entries) + if total_pfc != len(kf_grid_mask): + raise ValueError( + f"guide pre_filter_counts ({total_pfc}) != " + f"keyframe grid mask length ({len(kf_grid_mask)})" + ) + resolved_entries = [] + offset = 0 + for entry in guide_entries: + pfc = entry["pre_filter_count"] + entry_mask = kf_grid_mask[offset:offset + pfc] + surviving = int(entry_mask.sum().item()) + resolved_entries.append({ + **entry, + "surviving_count": surviving, + }) + offset += pfc + additional_args["resolved_guide_entries"] = resolved_entries + keyframe_idxs = keyframe_idxs[..., kf_grid_mask, :] pixel_coords[:, :, -keyframe_idxs.shape[2]:, :] = keyframe_idxs + # Total surviving guide tokens (all guides) + additional_args["num_guide_tokens"] = keyframe_idxs.shape[2] + x = self.patchify_proj(x) return x, pixel_coords, additional_args - def _process_transformer_blocks(self, x, context, attention_mask, timestep, pe, transformer_options={}, **kwargs): + def _build_guide_self_attention_mask(self, x, transformer_options, merged_args): + """Build self-attention mask for per-guide attention attenuation. + + Reads resolved_guide_entries from merged_args (computed in _process_input) + to build a log-space additive bias mask that attenuates noisy ↔ guide + attention for each guide reference independently. + + Returns None if no attenuation is needed (all strengths == 1.0 and no + spatial masks, or no guide tokens). + """ + if isinstance(x, list): + # AV model: x = [vx, ax]; use vx for token count and device + total_tokens = x[0].shape[1] + device = x[0].device + dtype = x[0].dtype + else: + total_tokens = x.shape[1] + device = x.device + dtype = x.dtype + + num_guide_tokens = merged_args.get("num_guide_tokens", 0) + if num_guide_tokens == 0: + return None + + resolved_entries = merged_args.get("resolved_guide_entries", None) + if not resolved_entries: + return None + + # Check if any attenuation is actually needed + needs_attenuation = any( + e["strength"] < 1.0 or e.get("pixel_mask") is not None + for e in resolved_entries + ) + if not needs_attenuation: + return None + + # Build per-guide-token weights for all tracked guide tokens. + # Guides are appended in order at the end of the sequence. + guide_start = total_tokens - num_guide_tokens + all_weights = [] + total_tracked = 0 + + for entry in resolved_entries: + surviving = entry["surviving_count"] + if surviving == 0: + continue + + strength = entry["strength"] + pixel_mask = entry.get("pixel_mask") + latent_shape = entry.get("latent_shape") + + if pixel_mask is not None and latent_shape is not None: + f_lat, h_lat, w_lat = latent_shape + per_token = self._downsample_mask_to_latent( + pixel_mask.to(device=device, dtype=dtype), + f_lat, h_lat, w_lat, + ) + # per_token shape: (B, f_lat*h_lat*w_lat). + # Collapse batch dim — the mask is assumed identical across the + # batch; validate and take the first element to get (1, tokens). + if per_token.shape[0] > 1: + ref = per_token[0] + for bi in range(1, per_token.shape[0]): + if not torch.equal(ref, per_token[bi]): + logger.warning( + "pixel_mask differs across batch elements; " + "using first element only." + ) + break + per_token = per_token[:1] + # `surviving` is the post-grid_mask token count. + # Clamp to surviving to handle any mismatch safely. + n_weights = min(per_token.shape[1], surviving) + weights = per_token[:, :n_weights] * strength # (1, n_weights) + else: + weights = torch.full( + (1, surviving), strength, device=device, dtype=dtype + ) + + all_weights.append(weights) + total_tracked += weights.shape[1] + + if not all_weights: + return None + + # Concatenate per-token weights for all tracked guides + tracked_weights = torch.cat(all_weights, dim=1) # (1, total_tracked) + + # Check if any weight is actually < 1.0 (otherwise no attenuation needed) + if (tracked_weights >= 1.0).all(): + return None + + # Build the mask: guide tokens are at the end of the sequence. + # Tracked guides come first (in order), untracked follow. + return self._build_self_attention_mask( + total_tokens, num_guide_tokens, total_tracked, + tracked_weights, guide_start, device, dtype, + ) + + @staticmethod + def _downsample_mask_to_latent(mask, f_lat, h_lat, w_lat): + """Downsample a pixel-space mask to per-token latent weights. + + Args: + mask: (B, 1, F_pix, H_pix, W_pix) pixel-space mask with values in [0, 1]. + f_lat: Number of latent frames (pre-dilation original count). + h_lat: Latent height (pre-dilation original height). + w_lat: Latent width (pre-dilation original width). + + Returns: + (B, F_lat * H_lat * W_lat) flattened per-token weights. + """ + b = mask.shape[0] + f_pix = mask.shape[2] + + # Spatial downsampling: area interpolation per frame + spatial_down = torch.nn.functional.interpolate( + rearrange(mask, "b 1 f h w -> (b f) 1 h w"), + size=(h_lat, w_lat), + mode="area", + ) + spatial_down = rearrange(spatial_down, "(b f) 1 h w -> b 1 f h w", b=b) + + # Temporal downsampling: first pixel frame maps to first latent frame, + # remaining pixel frames are averaged in groups for causal temporal structure. + first_frame = spatial_down[:, :, :1, :, :] + if f_pix > 1 and f_lat > 1: + remaining_pix = f_pix - 1 + remaining_lat = f_lat - 1 + t = remaining_pix // remaining_lat + if t < 1: + # Fewer pixel frames than latent frames — upsample by repeating + # the available pixel frames via nearest interpolation. + rest_flat = rearrange( + spatial_down[:, :, 1:, :, :], + "b 1 f h w -> (b h w) 1 f", + ) + rest_up = torch.nn.functional.interpolate( + rest_flat, size=remaining_lat, mode="nearest", + ) + rest = rearrange( + rest_up, "(b h w) 1 f -> b 1 f h w", + b=b, h=h_lat, w=w_lat, + ) + else: + # Trim trailing pixel frames that don't fill a complete group + usable = remaining_lat * t + rest = rearrange( + spatial_down[:, :, 1:1 + usable, :, :], + "b 1 (f t) h w -> b 1 f t h w", + t=t, + ) + rest = rest.mean(dim=3) + latent_mask = torch.cat([first_frame, rest], dim=2) + elif f_lat > 1: + # Single pixel frame but multiple latent frames — repeat the + # single frame across all latent frames. + latent_mask = first_frame.expand(-1, -1, f_lat, -1, -1) + else: + latent_mask = first_frame + + return rearrange(latent_mask, "b 1 f h w -> b (f h w)") + + @staticmethod + def _build_self_attention_mask(total_tokens, num_guide_tokens, tracked_count, + tracked_weights, guide_start, device, dtype): + """Build a log-space additive self-attention bias mask. + + Attenuates attention between noisy tokens and tracked guide tokens. + Untracked guide tokens (at the end of the guide portion) keep full attention. + + Args: + total_tokens: Total sequence length. + num_guide_tokens: Total guide tokens (all guides) at end of sequence. + tracked_count: Number of tracked guide tokens (first in the guide portion). + tracked_weights: (1, tracked_count) tensor, values in [0, 1]. + guide_start: Index where guide tokens begin in the sequence. + device: Target device. + dtype: Target dtype. + + Returns: + (1, 1, total_tokens, total_tokens) additive bias mask. + 0.0 = full attention, negative = attenuated, finfo.min = effectively fully masked. + """ + finfo = torch.finfo(dtype) + mask = torch.zeros((1, 1, total_tokens, total_tokens), device=device, dtype=dtype) + tracked_end = guide_start + tracked_count + + # Convert weights to log-space bias + w = tracked_weights.to(device=device, dtype=dtype) # (1, tracked_count) + log_w = torch.full_like(w, finfo.min) + positive_mask = w > 0 + if positive_mask.any(): + log_w[positive_mask] = torch.log(w[positive_mask].clamp(min=finfo.tiny)) + + # noisy → tracked guides: each noisy row gets the same per-guide weight + mask[:, :, :guide_start, guide_start:tracked_end] = log_w.view(1, 1, 1, -1) + # tracked guides → noisy: each guide row broadcasts its weight across noisy cols + mask[:, :, guide_start:tracked_end, :guide_start] = log_w.view(1, 1, -1, 1) + + return mask + + def _process_transformer_blocks(self, x, context, attention_mask, timestep, pe, transformer_options={}, self_attention_mask=None, **kwargs): """Process transformer blocks for LTXV.""" patches_replace = transformer_options.get("patches_replace", {}) blocks_replace = patches_replace.get("dit", {}) @@ -906,10 +1155,10 @@ class LTXVModel(LTXBaseModel): def block_wrap(args): out = {} - out["img"] = block(args["img"], context=args["txt"], attention_mask=args["attention_mask"], timestep=args["vec"], pe=args["pe"], transformer_options=args["transformer_options"]) + out["img"] = block(args["img"], context=args["txt"], attention_mask=args["attention_mask"], timestep=args["vec"], pe=args["pe"], transformer_options=args["transformer_options"], self_attention_mask=args.get("self_attention_mask")) return out - out = blocks_replace[("double_block", i)]({"img": x, "txt": context, "attention_mask": attention_mask, "vec": timestep, "pe": pe, "transformer_options": transformer_options}, {"original_block": block_wrap}) + out = blocks_replace[("double_block", i)]({"img": x, "txt": context, "attention_mask": attention_mask, "vec": timestep, "pe": pe, "transformer_options": transformer_options, "self_attention_mask": self_attention_mask}, {"original_block": block_wrap}) x = out["img"] else: x = block( @@ -919,6 +1168,7 @@ class LTXVModel(LTXBaseModel): timestep=timestep, pe=pe, transformer_options=transformer_options, + self_attention_mask=self_attention_mask, ) return x diff --git a/comfy/ldm/modules/attention.py b/comfy/ldm/modules/attention.py index ccf690945..10d051325 100644 --- a/comfy/ldm/modules/attention.py +++ b/comfy/ldm/modules/attention.py @@ -524,6 +524,9 @@ def attention_pytorch(q, k, v, heads, mask=None, attn_precision=None, skip_resha @wrap_attn def attention_sage(q, k, v, heads, mask=None, attn_precision=None, skip_reshape=False, skip_output_reshape=False, **kwargs): + if kwargs.get("low_precision_attention", True) is False: + return attention_pytorch(q, k, v, heads, mask=mask, skip_reshape=skip_reshape, skip_output_reshape=skip_output_reshape, **kwargs) + exception_fallback = False if skip_reshape: b, _, _, dim_head = q.shape diff --git a/comfy/ldm/modules/diffusionmodules/model.py b/comfy/ldm/modules/diffusionmodules/model.py index 5a22ef030..805592aa5 100644 --- a/comfy/ldm/modules/diffusionmodules/model.py +++ b/comfy/ldm/modules/diffusionmodules/model.py @@ -102,19 +102,7 @@ class VideoConv3d(nn.Module): return self.conv(x) def interpolate_up(x, scale_factor): - try: - return torch.nn.functional.interpolate(x, scale_factor=scale_factor, mode="nearest") - except: #operation not implemented for bf16 - orig_shape = list(x.shape) - out_shape = orig_shape[:2] - for i in range(len(orig_shape) - 2): - out_shape.append(round(orig_shape[i + 2] * scale_factor[i])) - out = torch.empty(out_shape, dtype=x.dtype, layout=x.layout, device=x.device) - split = 8 - l = out.shape[1] // split - for i in range(0, out.shape[1], l): - out[:,i:i+l] = torch.nn.functional.interpolate(x[:,i:i+l].to(torch.float32), scale_factor=scale_factor, mode="nearest").to(x.dtype) - return out + return torch.nn.functional.interpolate(x, scale_factor=scale_factor, mode="nearest") class Upsample(nn.Module): def __init__(self, in_channels, with_conv, conv_op=ops.Conv2d, scale_factor=2.0): diff --git a/comfy/ldm/modules/diffusionmodules/openaimodel.py b/comfy/ldm/modules/diffusionmodules/openaimodel.py index 4c8d53cac..295310df6 100644 --- a/comfy/ldm/modules/diffusionmodules/openaimodel.py +++ b/comfy/ldm/modules/diffusionmodules/openaimodel.py @@ -18,6 +18,8 @@ import comfy.patcher_extension import comfy.ops ops = comfy.ops.disable_weight_init +from ..sdpose import HeatmapHead + class TimestepBlock(nn.Module): """ Any module where forward() takes timestep embeddings as a second argument. @@ -441,6 +443,7 @@ class UNetModel(nn.Module): disable_temporal_crossattention=False, max_ddpm_temb_period=10000, attn_precision=None, + heatmap_head=False, device=None, operations=ops, ): @@ -827,6 +830,9 @@ class UNetModel(nn.Module): #nn.LogSoftmax(dim=1) # change to cross_entropy and produce non-normalized logits ) + if heatmap_head: + self.heatmap_head = HeatmapHead(device=device, dtype=self.dtype, operations=operations) + def forward(self, x, timesteps=None, context=None, y=None, control=None, transformer_options={}, **kwargs): return comfy.patcher_extension.WrapperExecutor.new_class_executor( self._forward, diff --git a/comfy/ldm/modules/sdpose.py b/comfy/ldm/modules/sdpose.py new file mode 100644 index 000000000..d67b60b76 --- /dev/null +++ b/comfy/ldm/modules/sdpose.py @@ -0,0 +1,130 @@ +import torch +import numpy as np +from scipy.ndimage import gaussian_filter + +class HeatmapHead(torch.nn.Module): + def __init__( + self, + in_channels=640, + out_channels=133, + input_size=(768, 1024), + heatmap_scale=4, + deconv_out_channels=(640,), + deconv_kernel_sizes=(4,), + conv_out_channels=(640,), + conv_kernel_sizes=(1,), + final_layer_kernel_size=1, + device=None, dtype=None, operations=None + ): + super().__init__() + + self.heatmap_size = (input_size[0] // heatmap_scale, input_size[1] // heatmap_scale) + self.scale_factor = ((np.array(input_size) - 1) / (np.array(self.heatmap_size) - 1)).astype(np.float32) + + # Deconv layers + if deconv_out_channels: + deconv_layers = [] + for out_ch, kernel_size in zip(deconv_out_channels, deconv_kernel_sizes): + if kernel_size == 4: + padding, output_padding = 1, 0 + elif kernel_size == 3: + padding, output_padding = 1, 1 + elif kernel_size == 2: + padding, output_padding = 0, 0 + else: + raise ValueError(f'Unsupported kernel size {kernel_size}') + + deconv_layers.extend([ + operations.ConvTranspose2d(in_channels, out_ch, kernel_size, + stride=2, padding=padding, output_padding=output_padding, bias=False, device=device, dtype=dtype), + torch.nn.InstanceNorm2d(out_ch, device=device, dtype=dtype), + torch.nn.SiLU(inplace=True) + ]) + in_channels = out_ch + self.deconv_layers = torch.nn.Sequential(*deconv_layers) + else: + self.deconv_layers = torch.nn.Identity() + + # Conv layers + if conv_out_channels: + conv_layers = [] + for out_ch, kernel_size in zip(conv_out_channels, conv_kernel_sizes): + padding = (kernel_size - 1) // 2 + conv_layers.extend([ + operations.Conv2d(in_channels, out_ch, kernel_size, + stride=1, padding=padding, device=device, dtype=dtype), + torch.nn.InstanceNorm2d(out_ch, device=device, dtype=dtype), + torch.nn.SiLU(inplace=True) + ]) + in_channels = out_ch + self.conv_layers = torch.nn.Sequential(*conv_layers) + else: + self.conv_layers = torch.nn.Identity() + + self.final_layer = operations.Conv2d(in_channels, out_channels, kernel_size=final_layer_kernel_size, padding=final_layer_kernel_size // 2, device=device, dtype=dtype) + + def forward(self, x): # Decode heatmaps to keypoints + heatmaps = self.final_layer(self.conv_layers(self.deconv_layers(x))) + heatmaps_np = heatmaps.float().cpu().numpy() # (B, K, H, W) + B, K, H, W = heatmaps_np.shape + + batch_keypoints = [] + batch_scores = [] + + for b in range(B): + hm = heatmaps_np[b].copy() # (K, H, W) + + # --- vectorised argmax --- + flat = hm.reshape(K, -1) + idx = np.argmax(flat, axis=1) + scores = flat[np.arange(K), idx].copy() + y_locs, x_locs = np.unravel_index(idx, (H, W)) + keypoints = np.stack([x_locs, y_locs], axis=-1).astype(np.float32) # (K, 2) in heatmap space + invalid = scores <= 0. + keypoints[invalid] = -1 + + # --- DARK sub-pixel refinement (UDP) --- + # 1. Gaussian blur with max-preserving normalisation + border = 5 # (kernel-1)//2 for kernel=11 + for k in range(K): + origin_max = np.max(hm[k]) + dr = np.zeros((H + 2 * border, W + 2 * border), dtype=np.float32) + dr[border:-border, border:-border] = hm[k].copy() + dr = gaussian_filter(dr, sigma=2.0) + hm[k] = dr[border:-border, border:-border].copy() + cur_max = np.max(hm[k]) + if cur_max > 0: + hm[k] *= origin_max / cur_max + # 2. Log-space for Taylor expansion + np.clip(hm, 1e-3, 50., hm) + np.log(hm, hm) + # 3. Hessian-based Newton step + hm_pad = np.pad(hm, ((0, 0), (1, 1), (1, 1)), mode='edge').flatten() + index = keypoints[:, 0] + 1 + (keypoints[:, 1] + 1) * (W + 2) + index += (W + 2) * (H + 2) * np.arange(0, K) + index = index.astype(int).reshape(-1, 1) + i_ = hm_pad[index] + ix1 = hm_pad[index + 1] + iy1 = hm_pad[index + W + 2] + ix1y1 = hm_pad[index + W + 3] + ix1_y1_ = hm_pad[index - W - 3] + ix1_ = hm_pad[index - 1] + iy1_ = hm_pad[index - 2 - W] + dx = 0.5 * (ix1 - ix1_) + dy = 0.5 * (iy1 - iy1_) + derivative = np.concatenate([dx, dy], axis=1).reshape(K, 2, 1) + dxx = ix1 - 2 * i_ + ix1_ + dyy = iy1 - 2 * i_ + iy1_ + dxy = 0.5 * (ix1y1 - ix1 - iy1 + i_ + i_ - ix1_ - iy1_ + ix1_y1_) + hessian = np.concatenate([dxx, dxy, dxy, dyy], axis=1).reshape(K, 2, 2) + hessian = np.linalg.inv(hessian + np.finfo(np.float32).eps * np.eye(2)) + keypoints -= np.einsum('imn,ink->imk', hessian, derivative).squeeze(axis=-1) + + # --- restore to input image space --- + keypoints = keypoints * self.scale_factor + keypoints[invalid] = -1 + + batch_keypoints.append(keypoints) + batch_scores.append(scores) + + return batch_keypoints, batch_scores diff --git a/comfy/ldm/qwen_image/controlnet.py b/comfy/ldm/qwen_image/controlnet.py index a6d408104..c0aae9240 100644 --- a/comfy/ldm/qwen_image/controlnet.py +++ b/comfy/ldm/qwen_image/controlnet.py @@ -2,6 +2,196 @@ import torch import math from .model import QwenImageTransformer2DModel +from .model import QwenImageTransformerBlock + + +class QwenImageFunControlBlock(QwenImageTransformerBlock): + def __init__(self, dim, num_attention_heads, attention_head_dim, has_before_proj=False, dtype=None, device=None, operations=None): + super().__init__( + dim=dim, + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + dtype=dtype, + device=device, + operations=operations, + ) + self.has_before_proj = has_before_proj + if has_before_proj: + self.before_proj = operations.Linear(dim, dim, device=device, dtype=dtype) + self.after_proj = operations.Linear(dim, dim, device=device, dtype=dtype) + + +class QwenImageFunControlNetModel(torch.nn.Module): + def __init__( + self, + control_in_features=132, + inner_dim=3072, + num_attention_heads=24, + attention_head_dim=128, + num_control_blocks=5, + main_model_double=60, + injection_layers=(0, 12, 24, 36, 48), + dtype=None, + device=None, + operations=None, + ): + super().__init__() + self.dtype = dtype + self.main_model_double = main_model_double + self.injection_layers = tuple(injection_layers) + # Keep base hint scaling at 1.0 so user-facing strength behaves similarly + # to the reference Gen2/VideoX implementation around strength=1. + self.hint_scale = 1.0 + self.control_img_in = operations.Linear(control_in_features, inner_dim, device=device, dtype=dtype) + + self.control_blocks = torch.nn.ModuleList([]) + for i in range(num_control_blocks): + self.control_blocks.append( + QwenImageFunControlBlock( + dim=inner_dim, + num_attention_heads=num_attention_heads, + attention_head_dim=attention_head_dim, + has_before_proj=(i == 0), + dtype=dtype, + device=device, + operations=operations, + ) + ) + + def _process_hint_tokens(self, hint): + if hint is None: + return None + if hint.ndim == 4: + hint = hint.unsqueeze(2) + + # Fun checkpoints are trained with 33 latent channels before 2x2 packing: + # [control_latent(16), mask(1), inpaint_latent(16)] -> 132 features. + # Default behavior (no inpaint input in stock Apply ControlNet) should use + # zeros for mask/inpaint branches, matching VideoX fallback semantics. + expected_c = self.control_img_in.weight.shape[1] // 4 + if hint.shape[1] == 16 and expected_c == 33: + zeros_mask = torch.zeros_like(hint[:, :1]) + zeros_inpaint = torch.zeros_like(hint) + hint = torch.cat([hint, zeros_mask, zeros_inpaint], dim=1) + + bs, c, t, h, w = hint.shape + hidden_states = torch.nn.functional.pad(hint, (0, w % 2, 0, h % 2)) + orig_shape = hidden_states.shape + hidden_states = hidden_states.view( + orig_shape[0], + orig_shape[1], + orig_shape[-3], + orig_shape[-2] // 2, + 2, + orig_shape[-1] // 2, + 2, + ) + hidden_states = hidden_states.permute(0, 2, 3, 5, 1, 4, 6) + hidden_states = hidden_states.reshape( + bs, + t * ((h + 1) // 2) * ((w + 1) // 2), + c * 4, + ) + + expected_in = self.control_img_in.weight.shape[1] + cur_in = hidden_states.shape[-1] + if cur_in < expected_in: + pad = torch.zeros( + (hidden_states.shape[0], hidden_states.shape[1], expected_in - cur_in), + device=hidden_states.device, + dtype=hidden_states.dtype, + ) + hidden_states = torch.cat([hidden_states, pad], dim=-1) + elif cur_in > expected_in: + hidden_states = hidden_states[:, :, :expected_in] + + return hidden_states + + def forward( + self, + x, + timesteps, + context, + attention_mask=None, + guidance: torch.Tensor = None, + hint=None, + transformer_options={}, + base_model=None, + **kwargs, + ): + if base_model is None: + raise RuntimeError("Qwen Fun ControlNet requires a QwenImage base model at runtime.") + + encoder_hidden_states_mask = attention_mask + # Keep attention mask disabled inside Fun control blocks to mirror + # VideoX behavior (they rely on seq lengths for RoPE, not masked attention). + encoder_hidden_states_mask = None + + hidden_states, img_ids, _ = base_model.process_img(x) + hint_tokens = self._process_hint_tokens(hint) + if hint_tokens is None: + raise RuntimeError("Qwen Fun ControlNet requires a control hint image.") + + if hint_tokens.shape[1] != hidden_states.shape[1]: + max_tokens = min(hint_tokens.shape[1], hidden_states.shape[1]) + hint_tokens = hint_tokens[:, :max_tokens] + hidden_states = hidden_states[:, :max_tokens] + img_ids = img_ids[:, :max_tokens] + + txt_start = round( + max( + ((x.shape[-1] + (base_model.patch_size // 2)) // base_model.patch_size) // 2, + ((x.shape[-2] + (base_model.patch_size // 2)) // base_model.patch_size) // 2, + ) + ) + txt_ids = torch.arange(txt_start, txt_start + context.shape[1], device=x.device).reshape(1, -1, 1).repeat(x.shape[0], 1, 3) + ids = torch.cat((txt_ids, img_ids), dim=1) + image_rotary_emb = base_model.pe_embedder(ids).to(x.dtype).contiguous() + + hidden_states = base_model.img_in(hidden_states) + encoder_hidden_states = base_model.txt_norm(context) + encoder_hidden_states = base_model.txt_in(encoder_hidden_states) + + if guidance is not None: + guidance = guidance * 1000 + + temb = ( + base_model.time_text_embed(timesteps, hidden_states) + if guidance is None + else base_model.time_text_embed(timesteps, guidance, hidden_states) + ) + + c = self.control_img_in(hint_tokens) + + for i, block in enumerate(self.control_blocks): + if i == 0: + c_in = block.before_proj(c) + hidden_states + all_c = [] + else: + all_c = list(torch.unbind(c, dim=0)) + c_in = all_c.pop(-1) + + encoder_hidden_states, c_out = block( + hidden_states=c_in, + encoder_hidden_states=encoder_hidden_states, + encoder_hidden_states_mask=encoder_hidden_states_mask, + temb=temb, + image_rotary_emb=image_rotary_emb, + transformer_options=transformer_options, + ) + + c_skip = block.after_proj(c_out) * self.hint_scale + all_c += [c_skip, c_out] + c = torch.stack(all_c, dim=0) + + hints = torch.unbind(c, dim=0)[:-1] + + controlnet_block_samples = [None] * self.main_model_double + for local_idx, base_idx in enumerate(self.injection_layers): + if local_idx < len(hints) and base_idx < len(controlnet_block_samples): + controlnet_block_samples[base_idx] = hints[local_idx] + + return {"input": controlnet_block_samples} class QwenImageControlNetModel(QwenImageTransformer2DModel): diff --git a/comfy/ldm/wan/vae.py b/comfy/ldm/wan/vae.py index fd125ceed..7903c7690 100644 --- a/comfy/ldm/wan/vae.py +++ b/comfy/ldm/wan/vae.py @@ -459,6 +459,7 @@ class WanVAE(nn.Module): attn_scales=[], temperal_downsample=[True, True, False], image_channels=3, + conv_out_channels=3, dropout=0.0): super().__init__() self.dim = dim @@ -474,7 +475,7 @@ class WanVAE(nn.Module): attn_scales, self.temperal_downsample, dropout) self.conv1 = CausalConv3d(z_dim * 2, z_dim * 2, 1) self.conv2 = CausalConv3d(z_dim, z_dim, 1) - self.decoder = Decoder3d(dim, z_dim, image_channels, dim_mult, num_res_blocks, + self.decoder = Decoder3d(dim, z_dim, conv_out_channels, dim_mult, num_res_blocks, attn_scales, self.temperal_upsample, dropout) def encode(self, x): diff --git a/comfy/lora.py b/comfy/lora.py index 7b31d055c..f36ddb046 100644 --- a/comfy/lora.py +++ b/comfy/lora.py @@ -332,6 +332,13 @@ def model_lora_keys_unet(model, key_map={}): key_map["{}".format(key_lora)] = k key_map["transformer.{}".format(key_lora)] = k + if isinstance(model, comfy.model_base.ACEStep15): + for k in sdk: + if k.startswith("diffusion_model.decoder.") and k.endswith(".weight"): + key_lora = k[len("diffusion_model.decoder."):-len(".weight")] + key_map["base_model.model.{}".format(key_lora)] = k # Official base model loras + key_map["lycoris_{}".format(key_lora.replace(".", "_"))] = k # LyCORIS/LoKR format + return key_map @@ -368,6 +375,31 @@ def pad_tensor_to_shape(tensor: torch.Tensor, new_shape: list[int]) -> torch.Ten return padded_tensor +def calculate_shape(patches, weight, key, original_weights=None): + current_shape = weight.shape + + for p in patches: + v = p[1] + offset = p[3] + + # Offsets restore the old shape; lists force a diff without metadata + if offset is not None or isinstance(v, list): + continue + + if isinstance(v, weight_adapter.WeightAdapterBase): + adapter_shape = v.calculate_shape(key) + if adapter_shape is not None: + current_shape = adapter_shape + continue + + # Standard diff logic with padding + if len(v) == 2: + patch_type, patch_data = v[0], v[1] + if patch_type == "diff" and len(patch_data) > 1 and patch_data[1]['pad_weight']: + current_shape = patch_data[0].shape + + return current_shape + def calculate_weight(patches, weight, key, intermediate_dtype=torch.float32, original_weights=None): for p in patches: strength = p[0] diff --git a/comfy/lora_convert.py b/comfy/lora_convert.py index 9d8d21efe..749e81df3 100644 --- a/comfy/lora_convert.py +++ b/comfy/lora_convert.py @@ -5,7 +5,7 @@ import comfy.utils def convert_lora_bfl_control(sd): #BFL loras for Flux sd_out = {} for k in sd: - k_to = "diffusion_model.{}".format(k.replace(".lora_B.bias", ".diff_b").replace("_norm.scale", "_norm.scale.set_weight")) + k_to = "diffusion_model.{}".format(k.replace(".lora_B.bias", ".diff_b").replace("_norm.scale", "_norm.set_weight")) sd_out[k_to] = sd[k] sd_out["diffusion_model.img_in.reshape_weight"] = torch.tensor([sd["img_in.lora_B.weight"].shape[0], sd["img_in.lora_A.weight"].shape[1]]) diff --git a/comfy/memory_management.py b/comfy/memory_management.py index 858bd4cc7..0b7da2852 100644 --- a/comfy/memory_management.py +++ b/comfy/memory_management.py @@ -78,4 +78,4 @@ def interpret_gathered_like(tensors, gathered): return dest_views -aimdo_allocator = None +aimdo_enabled = False diff --git a/comfy/model_base.py b/comfy/model_base.py index 85acdb66a..8f852e3c6 100644 --- a/comfy/model_base.py +++ b/comfy/model_base.py @@ -50,6 +50,7 @@ import comfy.ldm.omnigen.omnigen2 import comfy.ldm.qwen_image.model import comfy.ldm.kandinsky5.model import comfy.ldm.anima.model +import comfy.ldm.ace.ace_step15 import comfy.model_management import comfy.patcher_extension @@ -75,6 +76,7 @@ class ModelType(Enum): FLUX = 8 IMG_TO_IMG = 9 FLOW_COSMOS = 10 + IMG_TO_IMG_FLOW = 11 def model_sampling(model_config, model_type): @@ -107,6 +109,8 @@ def model_sampling(model_config, model_type): elif model_type == ModelType.FLOW_COSMOS: c = comfy.model_sampling.COSMOS_RFLOW s = comfy.model_sampling.ModelSamplingCosmosRFlow + elif model_type == ModelType.IMG_TO_IMG_FLOW: + c = comfy.model_sampling.IMG_TO_IMG_FLOW class ModelSampling(s, c): pass @@ -146,11 +150,11 @@ class BaseModel(torch.nn.Module): self.diffusion_model.to(memory_format=torch.channels_last) logging.debug("using channels last mode for diffusion model") logging.info("model weight dtype {}, manual cast: {}".format(self.get_dtype(), self.manual_cast_dtype)) + comfy.model_management.archive_model_dtypes(self.diffusion_model) + self.model_type = model_type self.model_sampling = model_sampling(model_config, model_type) - comfy.model_management.archive_model_dtypes(self.diffusion_model) - self.adm_channels = unet_config.get("adm_in_channels", None) if self.adm_channels is None: self.adm_channels = 0 @@ -177,10 +181,7 @@ class BaseModel(torch.nn.Module): xc = torch.cat([xc] + [comfy.model_management.cast_to_device(c_concat, xc.device, xc.dtype)], dim=1) context = c_crossattn - dtype = self.get_dtype() - - if self.manual_cast_dtype is not None: - dtype = self.manual_cast_dtype + dtype = self.get_dtype_inference() xc = xc.to(dtype) device = xc.device @@ -217,6 +218,13 @@ class BaseModel(torch.nn.Module): def get_dtype(self): return self.diffusion_model.dtype + def get_dtype_inference(self): + dtype = self.get_dtype() + + if self.manual_cast_dtype is not None: + dtype = self.manual_cast_dtype + return dtype + def encode_adm(self, **kwargs): return None @@ -371,9 +379,7 @@ class BaseModel(torch.nn.Module): input_shapes += shape if comfy.model_management.xformers_enabled() or comfy.model_management.pytorch_attention_flash_attention(): - dtype = self.get_dtype() - if self.manual_cast_dtype is not None: - dtype = self.manual_cast_dtype + dtype = self.get_dtype_inference() #TODO: this needs to be tweaked area = sum(map(lambda input_shape: input_shape[0] * math.prod(input_shape[2:]), input_shapes)) return (area * comfy.model_management.dtype_size(dtype) * 0.01 * self.memory_usage_factor) * (1024 * 1024) @@ -968,6 +974,10 @@ class LTXV(BaseModel): if keyframe_idxs is not None: out['keyframe_idxs'] = comfy.conds.CONDRegular(keyframe_idxs) + guide_attention_entries = kwargs.get("guide_attention_entries", None) + if guide_attention_entries is not None: + out['guide_attention_entries'] = comfy.conds.CONDConstant(guide_attention_entries) + return out def process_timestep(self, timestep, x, denoise_mask=None, **kwargs): @@ -985,10 +995,14 @@ class LTXAV(BaseModel): def extra_conds(self, **kwargs): out = super().extra_conds(**kwargs) attention_mask = kwargs.get("attention_mask", None) + device = kwargs["device"] + if attention_mask is not None: out['attention_mask'] = comfy.conds.CONDRegular(attention_mask) cross_attn = kwargs.get("cross_attn", None) if cross_attn is not None: + if hasattr(self.diffusion_model, "preprocess_text_embeds"): + cross_attn = self.diffusion_model.preprocess_text_embeds(cross_attn.to(device=device, dtype=self.get_dtype_inference())) out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn) out['frame_rate'] = comfy.conds.CONDConstant(kwargs.get("frame_rate", 25)) @@ -1016,6 +1030,10 @@ class LTXAV(BaseModel): if latent_shapes is not None: out['latent_shapes'] = comfy.conds.CONDConstant(latent_shapes) + guide_attention_entries = kwargs.get("guide_attention_entries", None) + if guide_attention_entries is not None: + out['guide_attention_entries'] = comfy.conds.CONDConstant(guide_attention_entries) + return out def process_timestep(self, timestep, x, denoise_mask=None, audio_denoise_mask=None, **kwargs): @@ -1159,12 +1177,16 @@ class Anima(BaseModel): device = kwargs["device"] if cross_attn is not None: if t5xxl_ids is not None: - cross_attn = self.diffusion_model.preprocess_text_embeds(cross_attn.to(device=device, dtype=self.get_dtype()), t5xxl_ids.unsqueeze(0).to(device=device)) if t5xxl_weights is not None: - cross_attn *= t5xxl_weights.unsqueeze(0).unsqueeze(-1).to(cross_attn) + t5xxl_weights = t5xxl_weights.unsqueeze(0).unsqueeze(-1).to(cross_attn) + t5xxl_ids = t5xxl_ids.unsqueeze(0) + + if torch.is_inference_mode_enabled(): # if not we are training + cross_attn = self.diffusion_model.preprocess_text_embeds(cross_attn.to(device=device, dtype=self.get_dtype_inference()), t5xxl_ids.to(device=device), t5xxl_weights=t5xxl_weights.to(device=device, dtype=self.get_dtype_inference())) + else: + out['t5xxl_ids'] = comfy.conds.CONDRegular(t5xxl_ids) + out['t5xxl_weights'] = comfy.conds.CONDRegular(t5xxl_weights) - if cross_attn.shape[1] < 512: - cross_attn = torch.nn.functional.pad(cross_attn, (0, 0, 0, 512 - cross_attn.shape[1])) out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn) return out @@ -1455,6 +1477,12 @@ class WAN22(WAN21): def scale_latent_inpaint(self, sigma, noise, latent_image, **kwargs): return latent_image +class WAN21_FlowRVS(WAN21): + def __init__(self, model_config, model_type=ModelType.IMG_TO_IMG_FLOW, image_to_video=False, device=None): + model_config.unet_config["model_type"] = "t2v" + super(WAN21, self).__init__(model_config, model_type, device=device, unet_model=comfy.ldm.wan.model.WanModel) + self.image_to_video = image_to_video + class Hunyuan3Dv2(BaseModel): def __init__(self, model_config, model_type=ModelType.FLOW, device=None): super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.hunyuan3d.model.Hunyuan3Dv2) @@ -1540,6 +1568,49 @@ class ACEStep(BaseModel): out['lyrics_strength'] = comfy.conds.CONDConstant(kwargs.get("lyrics_strength", 1.0)) return out +class ACEStep15(BaseModel): + def __init__(self, model_config, model_type=ModelType.FLOW, device=None): + super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.ace.ace_step15.AceStepConditionGenerationModel) + + def extra_conds(self, **kwargs): + out = super().extra_conds(**kwargs) + device = kwargs["device"] + noise = kwargs["noise"] + + cross_attn = kwargs.get("cross_attn", None) + if cross_attn is not None: + if torch.count_nonzero(cross_attn) == 0: + out['replace_with_null_embeds'] = comfy.conds.CONDConstant(True) + out['c_crossattn'] = comfy.conds.CONDRegular(cross_attn) + + conditioning_lyrics = kwargs.get("conditioning_lyrics", None) + if cross_attn is not None: + out['lyric_embed'] = comfy.conds.CONDRegular(conditioning_lyrics) + + refer_audio = kwargs.get("reference_audio_timbre_latents", None) + if refer_audio is None or len(refer_audio) == 0: + refer_audio = comfy.ldm.ace.ace_step15.get_silence_latent(noise.shape[2], device) + pass_audio_codes = True + else: + refer_audio = refer_audio[-1][:, :, :noise.shape[2]] + out['is_covers'] = comfy.conds.CONDConstant(True) + pass_audio_codes = False + + if pass_audio_codes: + audio_codes = kwargs.get("audio_codes", None) + if audio_codes is not None: + out['audio_codes'] = comfy.conds.CONDRegular(torch.tensor(audio_codes, device=device)) + refer_audio = refer_audio[:, :, :750] + else: + out['is_covers'] = comfy.conds.CONDConstant(False) + + if refer_audio.shape[2] < noise.shape[2]: + pad = comfy.ldm.ace.ace_step15.get_silence_latent(noise.shape[2], device) + refer_audio = torch.cat([refer_audio.to(pad), pad[:, :, refer_audio.shape[2]:]], dim=2) + + out['refer_audio'] = comfy.conds.CONDRegular(refer_audio) + return out + class Omnigen2(BaseModel): def __init__(self, model_config, model_type=ModelType.FLOW, device=None): super().__init__(model_config, model_type, device=device, unet_model=comfy.ldm.omnigen.omnigen2.OmniGen2Transformer2DModel) diff --git a/comfy/model_detection.py b/comfy/model_detection.py index 8cea16e50..b4b51b200 100644 --- a/comfy/model_detection.py +++ b/comfy/model_detection.py @@ -19,6 +19,12 @@ def count_blocks(state_dict_keys, prefix_string): count += 1 return count +def any_suffix_in(keys, prefix, main, suffix_list=[]): + for x in suffix_list: + if "{}{}{}".format(prefix, main, x) in keys: + return True + return False + def calculate_transformer_depth(prefix, state_dict_keys, state_dict): context_dim = None use_linear_in_transformer = False @@ -186,7 +192,7 @@ def detect_unet_config(state_dict, key_prefix, metadata=None): dit_config["meanflow_sum"] = False return dit_config - if '{}double_blocks.0.img_attn.norm.key_norm.scale'.format(key_prefix) in state_dict_keys and ('{}img_in.weight'.format(key_prefix) in state_dict_keys or f"{key_prefix}distilled_guidance_layer.norms.0.scale" in state_dict_keys): #Flux, Chroma or Chroma Radiance (has no img_in.weight) + if any_suffix_in(state_dict_keys, key_prefix, 'double_blocks.0.img_attn.norm.key_norm.', ["weight", "scale"]) and ('{}img_in.weight'.format(key_prefix) in state_dict_keys or any_suffix_in(state_dict_keys, key_prefix, 'distilled_guidance_layer.norms.0.', ["weight", "scale"])): #Flux, Chroma or Chroma Radiance (has no img_in.weight) dit_config = {} if '{}double_stream_modulation_img.lin.weight'.format(key_prefix) in state_dict_keys: dit_config["image_model"] = "flux2" @@ -241,7 +247,8 @@ def detect_unet_config(state_dict, key_prefix, metadata=None): dit_config["depth"] = count_blocks(state_dict_keys, '{}double_blocks.'.format(key_prefix) + '{}.') dit_config["depth_single_blocks"] = count_blocks(state_dict_keys, '{}single_blocks.'.format(key_prefix) + '{}.') - if '{}distilled_guidance_layer.0.norms.0.scale'.format(key_prefix) in state_dict_keys or '{}distilled_guidance_layer.norms.0.scale'.format(key_prefix) in state_dict_keys: #Chroma + + if any_suffix_in(state_dict_keys, key_prefix, 'distilled_guidance_layer.0.norms.0.', ["weight", "scale"]) or any_suffix_in(state_dict_keys, key_prefix, 'distilled_guidance_layer.norms.0.', ["weight", "scale"]): #Chroma dit_config["image_model"] = "chroma" dit_config["in_channels"] = 64 dit_config["out_channels"] = 64 @@ -249,7 +256,8 @@ def detect_unet_config(state_dict, key_prefix, metadata=None): dit_config["out_dim"] = 3072 dit_config["hidden_dim"] = 5120 dit_config["n_layers"] = 5 - if f"{key_prefix}nerf_blocks.0.norm.scale" in state_dict_keys: #Chroma Radiance + + if any_suffix_in(state_dict_keys, key_prefix, 'nerf_blocks.0.norm.', ["weight", "scale"]): #Chroma Radiance dit_config["image_model"] = "chroma_radiance" dit_config["in_channels"] = 3 dit_config["out_channels"] = 3 @@ -259,7 +267,7 @@ def detect_unet_config(state_dict, key_prefix, metadata=None): dit_config["nerf_depth"] = 4 dit_config["nerf_max_freqs"] = 8 dit_config["nerf_tile_size"] = 512 - dit_config["nerf_final_head_type"] = "conv" if f"{key_prefix}nerf_final_layer_conv.norm.scale" in state_dict_keys else "linear" + dit_config["nerf_final_head_type"] = "conv" if any_suffix_in(state_dict_keys, key_prefix, 'nerf_final_layer_conv.norm.', ["weight", "scale"]) else "linear" dit_config["nerf_embedder_dtype"] = torch.float32 if "{}__x0__".format(key_prefix) in state_dict_keys: # x0 pred dit_config["use_x0"] = True @@ -268,7 +276,7 @@ def detect_unet_config(state_dict, key_prefix, metadata=None): else: dit_config["guidance_embed"] = "{}guidance_in.in_layer.weight".format(key_prefix) in state_dict_keys dit_config["yak_mlp"] = '{}double_blocks.0.img_mlp.gate_proj.weight'.format(key_prefix) in state_dict_keys - dit_config["txt_norm"] = "{}txt_norm.scale".format(key_prefix) in state_dict_keys + dit_config["txt_norm"] = any_suffix_in(state_dict_keys, key_prefix, 'txt_norm.', ["weight", "scale"]) if dit_config["yak_mlp"] and dit_config["txt_norm"]: # Ovis model dit_config["txt_ids_dims"] = [1, 2] @@ -501,6 +509,9 @@ def detect_unet_config(state_dict, key_prefix, metadata=None): if ref_conv_weight is not None: dit_config["in_dim_ref_conv"] = ref_conv_weight.shape[1] + if metadata is not None and "config" in metadata: + dit_config.update(json.loads(metadata["config"]).get("transformer", {})) + return dit_config if '{}latent_in.weight'.format(key_prefix) in state_dict_keys: # Hunyuan 3D @@ -655,6 +666,11 @@ def detect_unet_config(state_dict, key_prefix, metadata=None): dit_config["num_visual_blocks"] = count_blocks(state_dict_keys, '{}visual_transformer_blocks.'.format(key_prefix) + '{}.') return dit_config + if '{}encoder.lyric_encoder.layers.0.input_layernorm.weight'.format(key_prefix) in state_dict_keys: + dit_config = {} + dit_config["audio_model"] = "ace1.5" + return dit_config + if '{}input_blocks.0.0.weight'.format(key_prefix) not in state_dict_keys: return None @@ -779,6 +795,10 @@ def detect_unet_config(state_dict, key_prefix, metadata=None): unet_config["use_temporal_resblock"] = False unet_config["use_temporal_attention"] = False + heatmap_key = '{}heatmap_head.conv_layers.0.weight'.format(key_prefix) + if heatmap_key in state_dict_keys: + unet_config["heatmap_head"] = True + return unet_config def model_config_from_unet_config(unet_config, state_dict=None): @@ -999,7 +1019,7 @@ def unet_config_from_diffusers_unet(state_dict, dtype=None): LotusD = {'use_checkpoint': False, 'image_size': 32, 'out_channels': 4, 'use_spatial_transformer': True, 'legacy': False, 'adm_in_channels': 4, 'dtype': dtype, 'in_channels': 4, 'model_channels': 320, 'num_res_blocks': [2, 2, 2, 2], 'transformer_depth': [1, 1, 1, 1, 1, 1, 0, 0], - 'channel_mult': [1, 2, 4, 4], 'transformer_depth_middle': 1, 'use_linear_in_transformer': True, 'context_dim': 1024, 'num_heads': 8, + 'channel_mult': [1, 2, 4, 4], 'transformer_depth_middle': 1, 'use_linear_in_transformer': True, 'context_dim': 1024, 'num_head_channels': 64, 'transformer_depth_output': [1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0], 'use_temporal_attention': False, 'use_temporal_resblock': False} diff --git a/comfy/model_management.py b/comfy/model_management.py index 758e718e8..f73613f17 100644 --- a/comfy/model_management.py +++ b/comfy/model_management.py @@ -20,6 +20,7 @@ import psutil import logging from enum import Enum from comfy.cli_args import args, PerformanceFeature +import threading import torch import sys import platform @@ -54,6 +55,11 @@ cpu_state = CPUState.GPU total_vram = 0 + +# Training Related State +in_training = False + + def get_supported_float8_types(): float8_types = [] try: @@ -344,7 +350,7 @@ AMD_ENABLE_MIOPEN_ENV = 'COMFYUI_ENABLE_MIOPEN' try: if is_amd(): - arch = torch.cuda.get_device_properties(get_torch_device()).gcnArchName + arch = torch.cuda.get_device_properties(get_torch_device()).gcnArchName.split(':')[0] if not (any((a in arch) for a in AMD_RDNA2_AND_OLDER_ARCH)): if os.getenv(AMD_ENABLE_MIOPEN_ENV) != '1': torch.backends.cudnn.enabled = False # Seems to improve things a lot on AMD @@ -372,7 +378,7 @@ try: if args.use_split_cross_attention == False and args.use_quad_cross_attention == False: if aotriton_supported(arch): # AMD efficient attention implementation depends on aotriton. if torch_version_numeric >= (2, 7): # works on 2.6 but doesn't actually seem to improve much - if any((a in arch) for a in ["gfx90a", "gfx942", "gfx1100", "gfx1101", "gfx1151"]): # TODO: more arches, TODO: gfx950 + if any((a in arch) for a in ["gfx90a", "gfx942", "gfx950", "gfx1100", "gfx1101", "gfx1151"]): # TODO: more arches, TODO: gfx950 ENABLE_PYTORCH_ATTENTION = True if rocm_version >= (7, 0): if any((a in arch) for a in ["gfx1200", "gfx1201"]): @@ -830,7 +836,7 @@ def unet_inital_load_device(parameters, dtype): mem_dev = get_free_memory(torch_dev) mem_cpu = get_free_memory(cpu_dev) - if mem_dev > mem_cpu and model_size < mem_dev and comfy.memory_management.aimdo_allocator is None: + if mem_dev > mem_cpu and model_size < mem_dev and comfy.memory_management.aimdo_enabled: return torch_dev else: return cpu_dev @@ -1112,11 +1118,10 @@ def get_cast_buffer(offload_stream, device, size, ref): return None if cast_buffer is not None and cast_buffer.numel() > 50 * (1024 ** 2): #I want my wrongly sized 50MB+ of VRAM back from the caching allocator right now - torch.cuda.synchronize() + synchronize() del STREAM_CAST_BUFFERS[offload_stream] del cast_buffer - #FIXME: This doesn't work in Aimdo because mempool cant clear cache - torch.cuda.empty_cache() + soft_empty_cache() with wf_context: cast_buffer = torch.empty((size), dtype=torch.int8, device=device) STREAM_CAST_BUFFERS[offload_stream] = cast_buffer @@ -1132,9 +1137,7 @@ def reset_cast_buffers(): for offload_stream in STREAM_CAST_BUFFERS: offload_stream.synchronize() STREAM_CAST_BUFFERS.clear() - if comfy.memory_management.aimdo_allocator is None: - #Pytorch 2.7 and earlier crashes if you try and empty_cache when mempools exist - torch.cuda.empty_cache() + soft_empty_cache() def get_offload_stream(device): stream_counter = stream_counters.get(device, 0) @@ -1202,27 +1205,35 @@ def cast_to(weight, dtype=None, device=None, non_blocking=False, copy=False, str assert r is None assert stream is None - r = torch.empty_like(weight, dtype=weight._model_dtype, device=device) + cast_geometry = comfy.memory_management.tensors_to_geometries([ weight ]) + + if dtype is None: + dtype = weight._model_dtype signature = comfy_aimdo.model_vbar.vbar_fault(weight._v) if signature is not None: - raw_tensor = comfy_aimdo.torch.aimdo_to_tensor(weight._v, device) - v_tensor = comfy.memory_management.interpret_gathered_like([r], raw_tensor)[0] + if comfy_aimdo.model_vbar.vbar_signature_compare(signature, weight._v_signature): + v_tensor = weight._v_tensor + else: + raw_tensor = comfy_aimdo.torch.aimdo_to_tensor(weight._v, device) + v_tensor = comfy.memory_management.interpret_gathered_like(cast_geometry, raw_tensor)[0] + weight._v_tensor = v_tensor + weight._v_signature = signature + #Send it over + v_tensor.copy_(weight, non_blocking=non_blocking) + return v_tensor.to(dtype=dtype) - if comfy_aimdo.model_vbar.vbar_signature_compare(signature, weight._v_signature): - #always take a deep copy even if _v is good, as we have no reasonable point to unpin - #a non comfy weight - r.copy_(v_tensor) - comfy_aimdo.model_vbar.vbar_unpin(weight._v) - return r + r = torch.empty_like(weight, dtype=dtype, device=device) + if weight.dtype != r.dtype and weight.dtype != weight._model_dtype: + #Offloaded casting could skip this, however it would make the quantizations + #inconsistent between loaded and offloaded weights. So force the double casting + #that would happen in regular flow to make offload deterministic. + cast_buffer = torch.empty_like(weight, dtype=weight._model_dtype, device=device) + cast_buffer.copy_(weight, non_blocking=non_blocking) + weight = cast_buffer r.copy_(weight, non_blocking=non_blocking) - if signature is not None: - weight._v_signature = signature - v_tensor.copy_(r) - comfy_aimdo.model_vbar.vbar_unpin(weight._v) - return r if device is None or weight.device == device: @@ -1275,7 +1286,7 @@ def discard_cuda_async_error(): a = torch.tensor([1], dtype=torch.uint8, device=get_torch_device()) b = torch.tensor([1], dtype=torch.uint8, device=get_torch_device()) _ = a + b - torch.cuda.synchronize() + synchronize() except torch.AcceleratorError: #Dump it! We already know about it from the synchronous return pass @@ -1679,6 +1690,12 @@ def lora_compute_dtype(device): LORA_COMPUTE_DTYPES[device] = dtype return dtype +def synchronize(): + if is_intel_xpu(): + torch.xpu.synchronize() + elif torch.cuda.is_available(): + torch.cuda.synchronize() + def soft_empty_cache(force=False): global cpu_state if cpu_state == CPUState.MPS: @@ -1690,11 +1707,9 @@ def soft_empty_cache(force=False): elif is_mlu(): torch.mlu.empty_cache() elif torch.cuda.is_available(): - if comfy.memory_management.aimdo_allocator is None: - #Pytorch 2.7 and earlier crashes if you try and empty_cache when mempools exist - torch.cuda.synchronize() - torch.cuda.empty_cache() - torch.cuda.ipc_collect() + torch.cuda.synchronize() + torch.cuda.empty_cache() + torch.cuda.ipc_collect() def unload_all_models(): free_memory(1e30, get_torch_device()) @@ -1704,9 +1719,6 @@ def debug_memory_summary(): return torch.cuda.memory.memory_summary() return "" -#TODO: might be cleaner to put this somewhere else -import threading - class InterruptProcessingException(Exception): pass diff --git a/comfy/model_patcher.py b/comfy/model_patcher.py index 57b53d8c5..1c9ba8096 100644 --- a/comfy/model_patcher.py +++ b/comfy/model_patcher.py @@ -19,7 +19,6 @@ from __future__ import annotations import collections -import copy import inspect import logging import math @@ -111,6 +110,10 @@ def move_weight_functions(m, device): memory += f.move_to(device=device) return memory +def string_to_seed(data): + logging.warning("WARNING: string_to_seed has moved from comfy.model_patcher to comfy.utils") + return comfy.utils.string_to_seed(data) + class LowVramPatch: def __init__(self, key, patches, convert_func=None, set_func=None): self.key = key @@ -157,6 +160,11 @@ def get_key_weight(model, key): return weight, set_func, convert_func +def key_param_name_to_key(key, param): + if len(key) == 0: + return param + return "{}.{}".format(key, param) + class AutoPatcherEjector: def __init__(self, model: 'ModelPatcher', skip_and_inject_on_exit_only=False): self.model = model @@ -263,6 +271,7 @@ class ModelPatcher: self.is_clip = False self.hook_mode = comfy.hooks.EnumHookMode.MaxSpeed + self.cached_patcher_init: tuple[Callable, tuple] | None = None if not hasattr(self.model, 'model_loaded_weight_memory'): self.model.model_loaded_weight_memory = 0 @@ -299,8 +308,15 @@ class ModelPatcher: def get_free_memory(self, device): return comfy.model_management.get_free_memory(device) - def clone(self): - n = self.__class__(self.model, self.load_device, self.offload_device, self.model_size(), weight_inplace_update=self.weight_inplace_update) + def clone(self, disable_dynamic=False): + class_ = self.__class__ + model = self.model + if self.is_dynamic() and disable_dynamic: + class_ = ModelPatcher + temp_model_patcher = self.cached_patcher_init[0](*self.cached_patcher_init[1], disable_dynamic=True) + model = temp_model_patcher.model + + n = class_(model, self.load_device, self.offload_device, self.model_size(), weight_inplace_update=self.weight_inplace_update) n.patches = {} for k in self.patches: n.patches[k] = self.patches[k][:] @@ -308,7 +324,7 @@ class ModelPatcher: n.object_patches = self.object_patches.copy() n.weight_wrapper_patches = self.weight_wrapper_patches.copy() - n.model_options = copy.deepcopy(self.model_options) + n.model_options = comfy.utils.deepcopy_list_dict(self.model_options) n.backup = self.backup n.object_patches_backup = self.object_patches_backup n.parent = self @@ -354,6 +370,8 @@ class ModelPatcher: n.is_clip = self.is_clip n.hook_mode = self.hook_mode + n.cached_patcher_init = self.cached_patcher_init + for callback in self.get_all_callbacks(CallbacksMP.ON_CLONE): callback(self, n) return n @@ -398,13 +416,16 @@ class ModelPatcher: def memory_required(self, input_shape): return self.model.memory_required(input_shape=input_shape) + def disable_model_cfg1_optimization(self): + self.model_options["disable_cfg1_optimization"] = True + def set_model_sampler_cfg_function(self, sampler_cfg_function, disable_cfg1_optimization=False): if len(inspect.signature(sampler_cfg_function).parameters) == 3: self.model_options["sampler_cfg_function"] = lambda args: sampler_cfg_function(args["cond"], args["uncond"], args["cond_scale"]) #Old way else: self.model_options["sampler_cfg_function"] = sampler_cfg_function if disable_cfg1_optimization: - self.model_options["disable_cfg1_optimization"] = True + self.disable_model_cfg1_optimization() def set_model_sampler_post_cfg_function(self, post_cfg_function, disable_cfg1_optimization=False): self.model_options = set_model_options_post_cfg_function(self.model_options, post_cfg_function, disable_cfg1_optimization) @@ -671,18 +692,19 @@ class ModelPatcher: for key in list(self.pinned): self.unpin_weight(key) - def _load_list(self, prio_comfy_cast_weights=False): + def _load_list(self, prio_comfy_cast_weights=False, default_device=None): loading = [] for n, m in self.model.named_modules(): - params = [] - skip = False - for name, param in m.named_parameters(recurse=False): - params.append(name) + default = False + params = { name: param for name, param in m.named_parameters(recurse=False) } for name, param in m.named_parameters(recurse=True): if name not in params: - skip = True # skip random weights in non leaf modules + default = True # default random weights in non leaf modules break - if not skip and (hasattr(m, "comfy_cast_weights") or len(params) > 0): + if default and default_device is not None: + for param in params.values(): + param.data = param.data.to(device=default_device) + if not default and (hasattr(m, "comfy_cast_weights") or len(params) > 0): module_mem = comfy.model_management.module_size(m) module_offload_mem = module_mem if hasattr(m, "comfy_cast_weights"): @@ -791,7 +813,7 @@ class ModelPatcher: continue for param in params: - key = "{}.{}".format(n, param) + key = key_param_name_to_key(n, param) self.unpin_weight(key) self.patch_weight_to_device(key, device_to=device_to) if comfy.model_management.is_device_cuda(device_to): @@ -807,7 +829,7 @@ class ModelPatcher: n = x[1] params = x[3] for param in params: - self.pin_weight_to_device("{}.{}".format(n, param)) + self.pin_weight_to_device(key_param_name_to_key(n, param)) usable_stat = "{:.2f} MB usable,".format(lowvram_model_memory / (1024 * 1024)) if lowvram_model_memory < 1e32 else "" if lowvram_counter > 0: @@ -913,7 +935,7 @@ class ModelPatcher: if hasattr(m, "comfy_patched_weights") and m.comfy_patched_weights == True: move_weight = True for param in params: - key = "{}.{}".format(n, param) + key = key_param_name_to_key(n, param) bk = self.backup.get(key, None) if bk is not None: if not lowvram_possible: @@ -964,7 +986,7 @@ class ModelPatcher: logging.debug("freed {}".format(n)) for param in params: - self.pin_weight_to_device("{}.{}".format(n, param)) + self.pin_weight_to_device(key_param_name_to_key(n, param)) self.model.model_lowvram = True @@ -1391,7 +1413,7 @@ class ModelPatcher: continue key = "diffusion_model." + k unet_state_dict[k] = LazyCastingParam(self, key, comfy.utils.get_attr(self.model, key)) - return self.model.state_dict_for_saving(unet_state_dict) + return self.model.state_dict_for_saving(unet_state_dict, clip_state_dict=clip_state_dict, vae_state_dict=vae_state_dict, clip_vision_state_dict=clip_vision_state_dict) def __del__(self): self.unpin_all_weights() @@ -1483,9 +1505,11 @@ class ModelPatcherDynamic(ModelPatcher): if vbar is not None: vbar.prioritize() - #We have way more tools for acceleration on comfy weight offloading, so always + #We force reserve VRAM for the non comfy-weight so we dont have to deal + #with pin and unpin syncrhonization which can be expensive for small weights + #with a high layer rate (e.g. autoregressive LLMs). #prioritize the non-comfy weights (note the order reverse). - loading = self._load_list(prio_comfy_cast_weights=True) + loading = self._load_list(prio_comfy_cast_weights=True, default_device=device_to) loading.sort(reverse=True) for x in loading: @@ -1497,14 +1521,16 @@ class ModelPatcherDynamic(ModelPatcher): def setup_param(self, m, n, param_key): nonlocal num_patches - key = "{}.{}".format(n, param_key) + key = key_param_name_to_key(n, param_key) weight_function = [] weight, _, _ = get_key_weight(self.model, key) if weight is None: - return 0 + return (False, 0) if key in self.patches: + if comfy.lora.calculate_shape(self.patches[key], weight, key) != weight.shape: + return (True, 0) setattr(m, param_key + "_lowvram_function", LowVramPatch(key, self.patches)) num_patches += 1 else: @@ -1515,10 +1541,16 @@ class ModelPatcherDynamic(ModelPatcher): setattr(m, param_key + "_function", weight_function) geometry = weight if not isinstance(weight, QuantizedTensor): - model_dtype = getattr(m, param_key + "_comfy_model_dtype", weight.dtype) + model_dtype = getattr(m, param_key + "_comfy_model_dtype", None) or weight.dtype weight._model_dtype = model_dtype geometry = comfy.memory_management.TensorGeometry(shape=weight.shape, dtype=model_dtype) - return comfy.memory_management.vram_aligned_size(geometry) + return (False, comfy.memory_management.vram_aligned_size(geometry)) + + def force_load_param(self, param_key, device_to): + key = key_param_name_to_key(n, param_key) + if key in self.backup: + comfy.utils.set_attr_param(self.model, key, self.backup[key].weight) + self.patch_weight_to_device(key, device_to=device_to) if hasattr(m, "comfy_cast_weights"): m.comfy_cast_weights = True @@ -1526,28 +1558,37 @@ class ModelPatcherDynamic(ModelPatcher): m.seed_key = n set_dirty(m, dirty) - v_weight_size = 0 - v_weight_size += setup_param(self, m, n, "weight") - v_weight_size += setup_param(self, m, n, "bias") + force_load, v_weight_size = setup_param(self, m, n, "weight") + force_load_bias, v_weight_bias = setup_param(self, m, n, "bias") + force_load = force_load or force_load_bias + v_weight_size += v_weight_bias - if vbar is not None and not hasattr(m, "_v"): - m._v = vbar.alloc(v_weight_size) - allocated_size += v_weight_size + if force_load: + logging.info(f"Module {n} has resizing Lora - force loading") + force_load_param(self, "weight", device_to) + force_load_param(self, "bias", device_to) + else: + if vbar is not None and not hasattr(m, "_v"): + m._v = vbar.alloc(v_weight_size) + allocated_size += v_weight_size else: for param in params: - key = "{}.{}".format(n, param) + key = key_param_name_to_key(n, param) weight, _, _ = get_key_weight(self.model, key) weight.seed_key = key set_dirty(weight, dirty) geometry = weight - model_dtype = getattr(m, param + "_comfy_model_dtype", weight.dtype) + model_dtype = getattr(m, param + "_comfy_model_dtype", None) or weight.dtype geometry = comfy.memory_management.TensorGeometry(shape=weight.shape, dtype=model_dtype) weight_size = geometry.numel() * geometry.element_size() if vbar is not None and not hasattr(weight, "_v"): weight._v = vbar.alloc(weight_size) weight._model_dtype = model_dtype allocated_size += weight_size + vbar.set_watermark_limit(allocated_size) + + move_weight_functions(m, device_to) logging.info(f"Model {self.model.__class__.__name__} prepared for dynamic VRAM loading. {allocated_size // (1024 ** 2)}MB Staged. {num_patches} patches attached.") @@ -1568,7 +1609,7 @@ class ModelPatcherDynamic(ModelPatcher): return 0 if vbar is None else vbar.free_memory(memory_to_free) def partially_unload_ram(self, ram_to_unload): - loading = self._load_list(prio_comfy_cast_weights=True) + loading = self._load_list(prio_comfy_cast_weights=True, default_device=self.offload_device) for x in loading: _, _, _, _, m, _ = x ram_to_unload -= comfy.pinned_memory.unpin_memory(m) @@ -1588,7 +1629,14 @@ class ModelPatcherDynamic(ModelPatcher): if unpatch_weights: self.partially_unload_ram(1e32) - self.partially_unload(None) + self.partially_unload(None, 1e32) + for m in self.model.modules(): + move_weight_functions(m, device_to) + + keys = list(self.backup.keys()) + for k in keys: + bk = self.backup[k] + comfy.utils.set_attr_param(self.model, k, bk.weight) def partially_load(self, device_to, extra_memory=0, force_patch_weights=False): assert not force_patch_weights #See above diff --git a/comfy/model_sampling.py b/comfy/model_sampling.py index 2a00ed819..13860e6a2 100644 --- a/comfy/model_sampling.py +++ b/comfy/model_sampling.py @@ -83,6 +83,16 @@ class IMG_TO_IMG(X0): def calculate_input(self, sigma, noise): return noise +class IMG_TO_IMG_FLOW(CONST): + def calculate_denoised(self, sigma, model_output, model_input): + return model_output + + def noise_scaling(self, sigma, noise, latent_image, max_denoise=False): + return latent_image + + def inverse_noise_scaling(self, sigma, latent): + return 1.0 - latent + class COSMOS_RFLOW: def calculate_input(self, sigma, noise): sigma = (sigma / (sigma + 1)) diff --git a/comfy/ops.py b/comfy/ops.py index c3a1825ce..98fec1e1d 100644 --- a/comfy/ops.py +++ b/comfy/ops.py @@ -19,9 +19,8 @@ import torch import logging import comfy.model_management -from comfy.cli_args import args, PerformanceFeature, enables_dynamic_vram +from comfy.cli_args import args, PerformanceFeature import comfy.float -import comfy.rmsnorm import json import comfy.memory_management import comfy.pinned_memory @@ -54,6 +53,8 @@ try: SDPA_BACKEND_PRIORITY.insert(0, SDPBackend.CUDNN_ATTENTION) def scaled_dot_product_attention(q, k, v, *args, **kwargs): + if q.nelement() < 1024 * 128: # arbitrary number, for small inputs cudnn attention seems slower + return torch.nn.functional.scaled_dot_product_attention(q, k, v, *args, **kwargs) with sdpa_kernel(SDPA_BACKEND_PRIORITY, set_priority=True): return torch.nn.functional.scaled_dot_product_attention(q, k, v, *args, **kwargs) else: @@ -78,17 +79,21 @@ def cast_to_input(weight, input, non_blocking=False, copy=True): return comfy.model_management.cast_to(weight, input.dtype, input.device, non_blocking=non_blocking, copy=copy) -def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compute_dtype): +def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compute_dtype, want_requant): offload_stream = None xfer_dest = None - cast_geometry = comfy.memory_management.tensors_to_geometries([ s.weight, s.bias ]) signature = comfy_aimdo.model_vbar.vbar_fault(s._v) - if signature is not None: - xfer_dest = comfy_aimdo.torch.aimdo_to_tensor(s._v, device) resident = comfy_aimdo.model_vbar.vbar_signature_compare(signature, s._v_signature) + if signature is not None: + if resident: + weight = s._v_weight + bias = s._v_bias + else: + xfer_dest = comfy_aimdo.torch.aimdo_to_tensor(s._v, device) if not resident: + cast_geometry = comfy.memory_management.tensors_to_geometries([ s.weight, s.bias ]) cast_dest = None xfer_source = [ s.weight, s.bias ] @@ -96,16 +101,16 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu pin = comfy.pinned_memory.get_pin(s) if pin is not None: xfer_source = [ pin ] - else: - for data, geometry in zip([ s.weight, s.bias ], cast_geometry): - if data is None: - continue - if data.dtype != geometry.dtype: - cast_dest = xfer_dest - if cast_dest is None: - cast_dest = torch.empty((comfy.memory_management.vram_aligned_size(cast_geometry),), dtype=torch.uint8, device=device) - xfer_dest = None - break + + for data, geometry in zip([ s.weight, s.bias ], cast_geometry): + if data is None: + continue + if data.dtype != geometry.dtype: + cast_dest = xfer_dest + if cast_dest is None: + cast_dest = torch.empty((comfy.memory_management.vram_aligned_size(cast_geometry),), dtype=torch.uint8, device=device) + xfer_dest = None + break dest_size = comfy.memory_management.vram_aligned_size(xfer_source) offload_stream = comfy.model_management.get_offload_stream(device) @@ -132,15 +137,19 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu comfy.model_management.sync_stream(device, offload_stream) if cast_dest is not None: - for pre_cast, post_cast in zip(comfy.memory_management.interpret_gathered_like(xfer_source, xfer_dest), + for pre_cast, post_cast in zip(comfy.memory_management.interpret_gathered_like([s.weight, s.bias ], xfer_dest), comfy.memory_management.interpret_gathered_like(cast_geometry, cast_dest)): if post_cast is not None: post_cast.copy_(pre_cast) xfer_dest = cast_dest - params = comfy.memory_management.interpret_gathered_like(cast_geometry, xfer_dest) - weight = params[0] - bias = params[1] + params = comfy.memory_management.interpret_gathered_like(cast_geometry, xfer_dest) + weight = params[0] + bias = params[1] + if signature is not None: + s._v_weight = weight + s._v_bias = bias + s._v_signature=signature def post_cast(s, param_key, x, dtype, resident, update_weight): lowvram_fn = getattr(s, param_key + "_lowvram_function", None) @@ -161,14 +170,14 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu #FIXME: this is not accurate, we need to be sensitive to the compute dtype x = lowvram_fn(x) if (isinstance(orig, QuantizedTensor) and - (orig.dtype == dtype and len(fns) == 0 or update_weight)): + (want_requant and len(fns) == 0 or update_weight)): seed = comfy.utils.string_to_seed(s.seed_key) y = QuantizedTensor.from_float(x, s.layout_type, scale="recalculate", stochastic_rounding=seed) - if orig.dtype == dtype and len(fns) == 0: + if want_requant and len(fns) == 0: #The layer actually wants our freshly saved QT x = y - else: - y = x + elif update_weight: + y = comfy.float.stochastic_rounding(x, orig.dtype, seed = comfy.utils.string_to_seed(s.seed_key)) if update_weight: orig.copy_(y) for f in fns: @@ -180,13 +189,12 @@ def cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compu weight = post_cast(s, "weight", weight, dtype, resident, update_weight) if s.bias is not None: bias = post_cast(s, "bias", bias, bias_dtype, resident, update_weight) - s._v_signature=signature #FIXME: weird offload return protocol return weight, bias, (offload_stream, device if signature is not None else None, None) -def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None, offloadable=False, compute_dtype=None): +def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None, offloadable=False, compute_dtype=None, want_requant=False): # NOTE: offloadable=False is a a legacy and if you are a custom node author reading this please pass # offloadable=True and call uncast_bias_weight() after your last usage of the weight/bias. This # will add async-offload support to your cast and improve performance. @@ -204,7 +212,7 @@ def cast_bias_weight(s, input=None, dtype=None, device=None, bias_dtype=None, of non_blocking = comfy.model_management.device_supports_non_blocking(device) if hasattr(s, "_v"): - return cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compute_dtype) + return cast_bias_weight_with_vbar(s, dtype, device, bias_dtype, non_blocking, compute_dtype, want_requant) if offloadable and (device != s.weight.device or (s.bias is not None and device != s.bias.device)): @@ -288,7 +296,7 @@ class disable_weight_init: class Linear(torch.nn.Linear, CastWeightBiasOp): def __init__(self, in_features, out_features, bias=True, device=None, dtype=None): - if not comfy.model_management.WINDOWS or not enables_dynamic_vram(): + if not comfy.model_management.WINDOWS or not comfy.memory_management.aimdo_enabled: super().__init__(in_features, out_features, bias, device, dtype) return @@ -309,7 +317,7 @@ class disable_weight_init: def _load_from_state_dict(self, state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs): - if not comfy.model_management.WINDOWS or not enables_dynamic_vram(): + if not comfy.model_management.WINDOWS or not comfy.memory_management.aimdo_enabled: return super()._load_from_state_dict(state_dict, prefix, local_metadata, strict, missing_keys, unexpected_keys, error_msgs) assign_to_params_buffers = local_metadata.get("assign_to_params_buffers", False) @@ -454,7 +462,7 @@ class disable_weight_init: else: return super().forward(*args, **kwargs) - class RMSNorm(comfy.rmsnorm.RMSNorm, CastWeightBiasOp): + class RMSNorm(torch.nn.RMSNorm, CastWeightBiasOp): def reset_parameters(self): self.bias = None return None @@ -466,8 +474,7 @@ class disable_weight_init: weight = None bias = None offload_stream = None - x = comfy.rmsnorm.rms_norm(input, weight, self.eps) # TODO: switch to commented out line when old torch is deprecated - # x = torch.nn.functional.rms_norm(input, self.normalized_shape, weight, self.eps) + x = torch.nn.functional.rms_norm(input, self.normalized_shape, weight, self.eps) uncast_bias_weight(self, weight, bias, offload_stream) return x @@ -820,6 +827,10 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec else: sd = {} + if not hasattr(self, 'weight'): + logging.warning("Warning: state dict on uninitialized op {}".format(prefix)) + return sd + if self.bias is not None: sd["{}bias".format(prefix)] = self.bias @@ -843,8 +854,8 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec def _forward(self, input, weight, bias): return torch.nn.functional.linear(input, weight, bias) - def forward_comfy_cast_weights(self, input, compute_dtype=None): - weight, bias, offload_stream = cast_bias_weight(self, input, offloadable=True, compute_dtype=compute_dtype) + def forward_comfy_cast_weights(self, input, compute_dtype=None, want_requant=False): + weight, bias, offload_stream = cast_bias_weight(self, input, offloadable=True, compute_dtype=compute_dtype, want_requant=want_requant) x = self._forward(input, weight, bias) uncast_bias_weight(self, weight, bias, offload_stream) return x @@ -874,8 +885,7 @@ def mixed_precision_ops(quant_config={}, compute_dtype=torch.bfloat16, full_prec scale = comfy.model_management.cast_to_device(scale, input.device, None) input = QuantizedTensor.from_float(input_reshaped, self.layout_type, scale=scale) - - output = self.forward_comfy_cast_weights(input, compute_dtype) + output = self.forward_comfy_cast_weights(input, compute_dtype, want_requant=isinstance(input, QuantizedTensor)) # Reshape output back to 3D if input was 3D if reshaped_3d: diff --git a/comfy/pinned_memory.py b/comfy/pinned_memory.py index 0650e4d1a..8acc327a7 100644 --- a/comfy/pinned_memory.py +++ b/comfy/pinned_memory.py @@ -11,8 +11,7 @@ def pin_memory(module): if module.pin_failed or args.disable_pinned_memory or get_pin(module) is not None: return #FIXME: This is a RAM cache trigger event - params = comfy.memory_management.tensors_to_geometries([ module.weight, module.bias ]) - size = comfy.memory_management.vram_aligned_size(params) + size = comfy.memory_management.vram_aligned_size([ module.weight, module.bias ]) pin = torch.empty((size,), dtype=torch.uint8) if comfy.model_management.pin_memory(pin): module._pin = pin diff --git a/comfy/rmsnorm.py b/comfy/rmsnorm.py index 555542a46..ab7cf14fa 100644 --- a/comfy/rmsnorm.py +++ b/comfy/rmsnorm.py @@ -1,57 +1,10 @@ import torch import comfy.model_management -import numbers -import logging - -RMSNorm = None - -try: - rms_norm_torch = torch.nn.functional.rms_norm - RMSNorm = torch.nn.RMSNorm -except: - rms_norm_torch = None - logging.warning("Please update pytorch to use native RMSNorm") +RMSNorm = torch.nn.RMSNorm def rms_norm(x, weight=None, eps=1e-6): - if rms_norm_torch is not None and not (torch.jit.is_tracing() or torch.jit.is_scripting()): - if weight is None: - return rms_norm_torch(x, (x.shape[-1],), eps=eps) - else: - return rms_norm_torch(x, weight.shape, weight=comfy.model_management.cast_to(weight, dtype=x.dtype, device=x.device), eps=eps) + if weight is None: + return torch.nn.functional.rms_norm(x, (x.shape[-1],), eps=eps) else: - r = x * torch.rsqrt(torch.mean(x**2, dim=-1, keepdim=True) + eps) - if weight is None: - return r - else: - return r * comfy.model_management.cast_to(weight, dtype=x.dtype, device=x.device) - - -if RMSNorm is None: - class RMSNorm(torch.nn.Module): - def __init__( - self, - normalized_shape, - eps=1e-6, - elementwise_affine=True, - device=None, - dtype=None, - ): - factory_kwargs = {"device": device, "dtype": dtype} - super().__init__() - if isinstance(normalized_shape, numbers.Integral): - # mypy error: incompatible types in assignment - normalized_shape = (normalized_shape,) # type: ignore[assignment] - self.normalized_shape = tuple(normalized_shape) # type: ignore[arg-type] - self.eps = eps - self.elementwise_affine = elementwise_affine - if self.elementwise_affine: - self.weight = torch.nn.Parameter( - torch.empty(self.normalized_shape, **factory_kwargs) - ) - else: - self.register_parameter("weight", None) - self.bias = None - - def forward(self, x): - return rms_norm(x, self.weight, self.eps) + return torch.nn.functional.rms_norm(x, weight.shape, weight=comfy.model_management.cast_to(weight, dtype=x.dtype, device=x.device), eps=eps) diff --git a/comfy/sampler_helpers.py b/comfy/sampler_helpers.py index 9134e6d71..1f75f2ba7 100644 --- a/comfy/sampler_helpers.py +++ b/comfy/sampler_helpers.py @@ -122,20 +122,26 @@ def estimate_memory(model, noise_shape, conds): minimum_memory_required = model.model.memory_required([noise_shape[0]] + list(noise_shape[1:]), cond_shapes=cond_shapes_min) return memory_required, minimum_memory_required -def prepare_sampling(model: ModelPatcher, noise_shape, conds, model_options=None, force_full_load=False): +def prepare_sampling(model: ModelPatcher, noise_shape, conds, model_options=None, force_full_load=False, force_offload=False): executor = comfy.patcher_extension.WrapperExecutor.new_executor( _prepare_sampling, comfy.patcher_extension.get_all_wrappers(comfy.patcher_extension.WrappersMP.PREPARE_SAMPLING, model_options, is_model_options=True) ) - return executor.execute(model, noise_shape, conds, model_options=model_options, force_full_load=force_full_load) + return executor.execute(model, noise_shape, conds, model_options=model_options, force_full_load=force_full_load, force_offload=force_offload) -def _prepare_sampling(model: ModelPatcher, noise_shape, conds, model_options=None, force_full_load=False): +def _prepare_sampling(model: ModelPatcher, noise_shape, conds, model_options=None, force_full_load=False, force_offload=False): real_model: BaseModel = None models, inference_memory = get_additional_models(conds, model.model_dtype()) models += get_additional_models_from_model_options(model_options) models += model.get_nested_additional_models() # TODO: does this require inference_memory update? - memory_required, minimum_memory_required = estimate_memory(model, noise_shape, conds) - comfy.model_management.load_models_gpu([model] + models, memory_required=memory_required + inference_memory, minimum_memory_required=minimum_memory_required + inference_memory, force_full_load=force_full_load) + if force_offload: # In training + offload enabled, we want to force prepare sampling to trigger partial load + memory_required = 1e20 + minimum_memory_required = None + else: + memory_required, minimum_memory_required = estimate_memory(model, noise_shape, conds) + memory_required += inference_memory + minimum_memory_required += inference_memory + comfy.model_management.load_models_gpu([model] + models, memory_required=memory_required, minimum_memory_required=minimum_memory_required, force_full_load=force_full_load) real_model = model.model return real_model, conds, models diff --git a/comfy/sd.py b/comfy/sd.py index fd0ac85e7..de119eb8e 100644 --- a/comfy/sd.py +++ b/comfy/sd.py @@ -59,6 +59,7 @@ import comfy.text_encoders.kandinsky5 import comfy.text_encoders.jina_clip_2 import comfy.text_encoders.newbie import comfy.text_encoders.anima +import comfy.text_encoders.ace15 import comfy.model_patcher import comfy.lora @@ -422,6 +423,17 @@ class CLIP: def get_key_patches(self): return self.patcher.get_key_patches() + def generate(self, tokens, do_sample=True, max_length=256, temperature=1.0, top_k=50, top_p=0.95, min_p=0.0, repetition_penalty=1.0, seed=None): + self.cond_stage_model.reset_clip_options() + + self.load_model() + self.cond_stage_model.set_clip_options({"layer": None}) + self.cond_stage_model.set_clip_options({"execution_device": self.patcher.load_device}) + return self.cond_stage_model.generate(tokens, do_sample=do_sample, max_length=max_length, temperature=temperature, top_k=top_k, top_p=top_p, min_p=min_p, repetition_penalty=repetition_penalty, seed=seed) + + def decode(self, token_ids, skip_special_tokens=True): + return self.tokenizer.decode(token_ids, skip_special_tokens=skip_special_tokens) + class VAE: def __init__(self, sd=None, device=None, config=None, dtype=None, metadata=None): if 'decoder.up_blocks.0.resnets.0.norm1.weight' in sd.keys(): #diffusers format @@ -452,6 +464,8 @@ class VAE: self.extra_1d_channel = None self.crop_input = True + self.audio_sample_rate = 44100 + if config is None: if "decoder.mid.block_1.mix_factor" in sd: encoder_config = {'double_z': True, 'z_channels': 4, 'resolution': 256, 'in_channels': 3, 'out_ch': 3, 'ch': 128, 'ch_mult': [1, 2, 4, 4], 'num_res_blocks': 2, 'attn_resolutions': [], 'dropout': 0.0} @@ -549,14 +563,27 @@ class VAE: encoder_config={'target': "comfy.ldm.modules.diffusionmodules.model.Encoder", 'params': ddconfig}, decoder_config={'target': "comfy.ldm.modules.diffusionmodules.model.Decoder", 'params': ddconfig}) elif "decoder.layers.1.layers.0.beta" in sd: - self.first_stage_model = AudioOobleckVAE() + config = {} + param_key = None + self.upscale_ratio = 2048 + self.downscale_ratio = 2048 + if "decoder.layers.2.layers.1.weight_v" in sd: + param_key = "decoder.layers.2.layers.1.weight_v" + if "decoder.layers.2.layers.1.parametrizations.weight.original1" in sd: + param_key = "decoder.layers.2.layers.1.parametrizations.weight.original1" + if param_key is not None: + if sd[param_key].shape[-1] == 12: + config["strides"] = [2, 4, 4, 6, 10] + self.audio_sample_rate = 48000 + self.upscale_ratio = 1920 + self.downscale_ratio = 1920 + + self.first_stage_model = AudioOobleckVAE(**config) self.memory_used_encode = lambda shape, dtype: (1000 * shape[2]) * model_management.dtype_size(dtype) self.memory_used_decode = lambda shape, dtype: (1000 * shape[2] * 2048) * model_management.dtype_size(dtype) self.latent_channels = 64 self.output_channels = 2 self.pad_channel_value = "replicate" - self.upscale_ratio = 2048 - self.downscale_ratio = 2048 self.latent_dim = 1 self.process_output = lambda audio: audio self.process_input = lambda audio: audio @@ -667,8 +694,9 @@ class VAE: self.latent_dim = 3 self.latent_channels = 16 self.output_channels = sd["encoder.conv1.weight"].shape[1] + self.conv_out_channels = sd["decoder.head.2.weight"].shape[0] self.pad_channel_value = 1.0 - ddconfig = {"dim": dim, "z_dim": self.latent_channels, "dim_mult": [1, 2, 4, 4], "num_res_blocks": 2, "attn_scales": [], "temperal_downsample": [False, True, True], "image_channels": self.output_channels, "dropout": 0.0} + ddconfig = {"dim": dim, "z_dim": self.latent_channels, "dim_mult": [1, 2, 4, 4], "num_res_blocks": 2, "attn_scales": [], "temperal_downsample": [False, True, True], "image_channels": self.output_channels, "conv_out_channels": self.conv_out_channels, "dropout": 0.0} self.first_stage_model = comfy.ldm.wan.vae.WanVAE(**ddconfig) self.working_dtypes = [torch.bfloat16, torch.float16, torch.float32] self.memory_used_encode = lambda shape, dtype: (1500 if shape[2]<=4 else 6000) * shape[3] * shape[4] * model_management.dtype_size(dtype) @@ -777,8 +805,6 @@ class VAE: self.first_stage_model = AutoencoderKL(**(config['params'])) self.first_stage_model = self.first_stage_model.eval() - model_management.archive_model_dtypes(self.first_stage_model) - if device is None: device = model_management.vae_device() self.device = device @@ -787,6 +813,7 @@ class VAE: dtype = model_management.vae_dtype(self.device, self.working_dtypes) self.vae_dtype = dtype self.first_stage_model.to(self.vae_dtype) + model_management.archive_model_dtypes(self.first_stage_model) self.output_device = model_management.intermediate_device() mp = comfy.model_patcher.CoreModelPatcher @@ -856,7 +883,7 @@ class VAE: / 3.0) return output - def decode_tiled_1d(self, samples, tile_x=128, overlap=32): + def decode_tiled_1d(self, samples, tile_x=256, overlap=32): if samples.ndim == 3: decode_fn = lambda a: self.first_stage_model.decode(a.to(self.vae_dtype).to(self.device)).float() else: @@ -960,7 +987,7 @@ class VAE: if overlap is not None: args["overlap"] = overlap - if dims == 1: + if dims == 1 or self.extra_1d_channel is not None: args.pop("tile_y") output = self.decode_tiled_1d(samples, **args) elif dims == 2: @@ -1167,6 +1194,7 @@ class TEModel(Enum): JINA_CLIP_2 = 19 QWEN3_8B = 20 QWEN3_06B = 21 + GEMMA_3_4B_VISION = 22 def detect_te_model(sd): @@ -1195,7 +1223,10 @@ def detect_te_model(sd): if 'model.layers.47.self_attn.q_norm.weight' in sd: return TEModel.GEMMA_3_12B if 'model.layers.0.self_attn.q_norm.weight' in sd: - return TEModel.GEMMA_3_4B + if 'vision_model.embeddings.patch_embedding.weight' in sd: + return TEModel.GEMMA_3_4B_VISION + else: + return TEModel.GEMMA_3_4B return TEModel.GEMMA_2_2B if 'model.layers.0.self_attn.k_proj.bias' in sd: weight = sd['model.layers.0.self_attn.k_proj.bias'] @@ -1255,6 +1286,8 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip else: if "text_projection" in clip_data[i]: clip_data[i]["text_projection.weight"] = clip_data[i]["text_projection"].transpose(0, 1) #old models saved with the CLIPSave node + if "lm_head.weight" in clip_data[i]: + clip_data[i]["model.lm_head.weight"] = clip_data[i].pop("lm_head.weight") # prefix missing in some models tokenizer_data = {} clip_target = EmptyClass() @@ -1320,6 +1353,14 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip clip_target.clip = comfy.text_encoders.lumina2.te(**llama_detect(clip_data), model_type="gemma3_4b") clip_target.tokenizer = comfy.text_encoders.lumina2.NTokenizer tokenizer_data["spiece_model"] = clip_data[0].get("spiece_model", None) + elif te_model == TEModel.GEMMA_3_4B_VISION: + clip_target.clip = comfy.text_encoders.lumina2.te(**llama_detect(clip_data), model_type="gemma3_4b_vision") + clip_target.tokenizer = comfy.text_encoders.lumina2.NTokenizer + tokenizer_data["spiece_model"] = clip_data[0].get("spiece_model", None) + elif te_model == TEModel.GEMMA_3_12B: + clip_target.clip = comfy.text_encoders.lt.gemma3_te(**llama_detect(clip_data)) + clip_target.tokenizer = comfy.text_encoders.lt.Gemma3_12BTokenizer + tokenizer_data["spiece_model"] = clip_data[0].get("spiece_model", None) elif te_model == TEModel.LLAMA3_8: clip_target.clip = comfy.text_encoders.hidream.hidream_clip(**llama_detect(clip_data), clip_l=False, clip_g=False, t5=False, llama=True, dtype_t5=None) @@ -1427,6 +1468,14 @@ def load_text_encoder_state_dicts(state_dicts=[], embedding_directory=None, clip clip_data_jina = clip_data[0] tokenizer_data["gemma_spiece_model"] = clip_data_gemma.get("spiece_model", None) tokenizer_data["jina_spiece_model"] = clip_data_jina.get("spiece_model", None) + elif clip_type == CLIPType.ACE: + te_models = [detect_te_model(clip_data[0]), detect_te_model(clip_data[1])] + if TEModel.QWEN3_4B in te_models: + model_type = "qwen3_4b" + else: + model_type = "qwen3_2b" + clip_target.clip = comfy.text_encoders.ace15.te(lm_model=model_type, **llama_detect(clip_data)) + clip_target.tokenizer = comfy.text_encoders.ace15.ACE15Tokenizer else: clip_target.clip = sdxl_clip.SDXLClipModel clip_target.tokenizer = sdxl_clip.SDXLTokenizer @@ -1482,14 +1531,24 @@ def load_checkpoint(config_path=None, ckpt_path=None, output_vae=True, output_cl return (model, clip, vae) -def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True, model_options={}, te_model_options={}): +def load_checkpoint_guess_config(ckpt_path, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True, model_options={}, te_model_options={}, disable_dynamic=False): sd, metadata = comfy.utils.load_torch_file(ckpt_path, return_metadata=True) - out = load_state_dict_guess_config(sd, output_vae, output_clip, output_clipvision, embedding_directory, output_model, model_options, te_model_options=te_model_options, metadata=metadata) + out = load_state_dict_guess_config(sd, output_vae, output_clip, output_clipvision, embedding_directory, output_model, model_options, te_model_options=te_model_options, metadata=metadata, disable_dynamic=disable_dynamic) if out is None: raise RuntimeError("ERROR: Could not detect model type of: {}\n{}".format(ckpt_path, model_detection_error_hint(ckpt_path, sd))) + if output_model: + out[0].cached_patcher_init = (load_checkpoint_guess_config_model_only, (ckpt_path, embedding_directory, model_options, te_model_options)) return out -def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True, model_options={}, te_model_options={}, metadata=None): +def load_checkpoint_guess_config_model_only(ckpt_path, embedding_directory=None, model_options={}, te_model_options={}, disable_dynamic=False): + model, *_ = load_checkpoint_guess_config(ckpt_path, False, False, False, + embedding_directory=embedding_directory, + model_options=model_options, + te_model_options=te_model_options, + disable_dynamic=disable_dynamic) + return model + +def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_clipvision=False, embedding_directory=None, output_model=True, model_options={}, te_model_options={}, metadata=None, disable_dynamic=False): clip = None clipvision = None vae = None @@ -1538,7 +1597,8 @@ def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_c if output_model: inital_load_device = model_management.unet_inital_load_device(parameters, unet_dtype) model = model_config.get_model(sd, diffusion_model_prefix, device=inital_load_device) - model_patcher = comfy.model_patcher.CoreModelPatcher(model, load_device=load_device, offload_device=model_management.unet_offload_device()) + ModelPatcher = comfy.model_patcher.ModelPatcher if disable_dynamic else comfy.model_patcher.CoreModelPatcher + model_patcher = ModelPatcher(model, load_device=load_device, offload_device=model_management.unet_offload_device()) model.load_model_weights(sd, diffusion_model_prefix, assign=model_patcher.is_dynamic()) if output_vae: @@ -1589,7 +1649,7 @@ def load_state_dict_guess_config(sd, output_vae=True, output_clip=True, output_c return (model_patcher, clip, vae, clipvision) -def load_diffusion_model_state_dict(sd, model_options={}, metadata=None): +def load_diffusion_model_state_dict(sd, model_options={}, metadata=None, disable_dynamic=False): """ Loads a UNet diffusion model from a state dictionary, supporting both diffusers and regular formats. @@ -1673,7 +1733,8 @@ def load_diffusion_model_state_dict(sd, model_options={}, metadata=None): model_config.optimizations["fp8"] = True model = model_config.get_model(new_sd, "") - model_patcher = comfy.model_patcher.CoreModelPatcher(model, load_device=load_device, offload_device=offload_device) + ModelPatcher = comfy.model_patcher.ModelPatcher if disable_dynamic else comfy.model_patcher.CoreModelPatcher + model_patcher = ModelPatcher(model, load_device=load_device, offload_device=offload_device) if not model_management.is_device_cpu(offload_device): model.to(offload_device) model.load_model_weights(new_sd, "", assign=model_patcher.is_dynamic()) @@ -1682,12 +1743,13 @@ def load_diffusion_model_state_dict(sd, model_options={}, metadata=None): logging.info("left over keys in diffusion model: {}".format(left_over)) return model_patcher -def load_diffusion_model(unet_path, model_options={}): +def load_diffusion_model(unet_path, model_options={}, disable_dynamic=False): sd, metadata = comfy.utils.load_torch_file(unet_path, return_metadata=True) - model = load_diffusion_model_state_dict(sd, model_options=model_options, metadata=metadata) + model = load_diffusion_model_state_dict(sd, model_options=model_options, metadata=metadata, disable_dynamic=disable_dynamic) if model is None: logging.error("ERROR UNSUPPORTED DIFFUSION MODEL {}".format(unet_path)) raise RuntimeError("ERROR: Could not detect model type of: {}\n{}".format(unet_path, model_detection_error_hint(unet_path, sd))) + model.cached_patcher_init = (load_diffusion_model, (unet_path, model_options)) return model def load_unet(unet_path, dtype=None): diff --git a/comfy/sd1_clip.py b/comfy/sd1_clip.py index 9ecfc9c55..d89550840 100644 --- a/comfy/sd1_clip.py +++ b/comfy/sd1_clip.py @@ -155,6 +155,8 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder): self.execution_device = options.get("execution_device", self.execution_device) if isinstance(self.layer, list) or self.layer == "all": pass + elif isinstance(layer_idx, list): + self.layer = layer_idx elif layer_idx is None or abs(layer_idx) > self.num_layers: self.layer = "last" else: @@ -169,8 +171,9 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder): def process_tokens(self, tokens, device): end_token = self.special_tokens.get("end", None) + pad_token = self.special_tokens.get("pad", -1) if end_token is None: - cmp_token = self.special_tokens.get("pad", -1) + cmp_token = pad_token else: cmp_token = end_token @@ -184,15 +187,21 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder): other_embeds = [] eos = False index = 0 + left_pad = False for y in x: if isinstance(y, numbers.Integral): - if eos: + token = int(y) + if index == 0 and token == pad_token: + left_pad = True + + if eos or (left_pad and token == pad_token): attention_mask.append(0) else: attention_mask.append(1) - token = int(y) + left_pad = False + tokens_temp += [token] - if not eos and token == cmp_token: + if not eos and token == cmp_token and not left_pad: if end_token is None: attention_mask[-1] = 0 eos = True @@ -299,6 +308,15 @@ class SDClipModel(torch.nn.Module, ClipTokenWeightEncoder): def load_sd(self, sd): return self.transformer.load_state_dict(sd, strict=False, assign=getattr(self, "can_assign_sd", False)) + def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed): + if isinstance(tokens, dict): + tokens_only = next(iter(tokens.values())) # todo: get this better? + else: + tokens_only = tokens + tokens_only = [[t[0] for t in b] for b in tokens_only] + embeds = self.process_tokens(tokens_only, device=self.execution_device)[0] + return self.transformer.generate(embeds, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed) + def parse_parentheses(string): result = [] current_item = "" @@ -555,6 +573,8 @@ class SDTokenizer: min_length = tokenizer_options.get("{}_min_length".format(self.embedding_key), self.min_length) min_padding = tokenizer_options.get("{}_min_padding".format(self.embedding_key), self.min_padding) + min_length = kwargs.get("min_length", min_length) + text = escape_important(text) if kwargs.get("disable_weights", self.disable_weights): parsed_weights = [(text, 1.0)] @@ -654,6 +674,9 @@ class SDTokenizer: def state_dict(self): return {} + def decode(self, token_ids, skip_special_tokens=True): + return self.tokenizer.decode(token_ids, skip_special_tokens=skip_special_tokens) + class SD1Tokenizer: def __init__(self, embedding_directory=None, tokenizer_data={}, clip_name="l", tokenizer=SDTokenizer, name=None): if name is not None: @@ -677,6 +700,9 @@ class SD1Tokenizer: def state_dict(self): return getattr(self, self.clip).state_dict() + def decode(self, token_ids, skip_special_tokens=True): + return getattr(self, self.clip).decode(token_ids, skip_special_tokens=skip_special_tokens) + class SD1CheckpointClipModel(SDClipModel): def __init__(self, device="cpu", dtype=None, model_options={}): super().__init__(device=device, return_projected_pooled=False, dtype=dtype, model_options=model_options) @@ -713,3 +739,6 @@ class SD1ClipModel(torch.nn.Module): def load_sd(self, sd): return getattr(self, self.clip).load_sd(sd) + + def generate(self, tokens, do_sample=True, max_length=256, temperature=1.0, top_k=50, top_p=0.95, min_p=0.0, repetition_penalty=1.0, seed=None): + return getattr(self, self.clip).generate(tokens, do_sample=do_sample, max_length=max_length, temperature=temperature, top_k=top_k, top_p=top_p, min_p=min_p, repetition_penalty=repetition_penalty, seed=seed) diff --git a/comfy/supported_models.py b/comfy/supported_models.py index d25271d6e..1bb7b7011 100644 --- a/comfy/supported_models.py +++ b/comfy/supported_models.py @@ -24,6 +24,7 @@ import comfy.text_encoders.hunyuan_image import comfy.text_encoders.kandinsky5 import comfy.text_encoders.z_image import comfy.text_encoders.anima +import comfy.text_encoders.ace15 from . import supported_models_base from . import latent_formats @@ -524,7 +525,8 @@ class LotusD(SD20): } unet_extra_config = { - "num_classes": 'sequential' + "num_classes": 'sequential', + "num_head_channels": 64, } def get_model(self, state_dict, prefix="", device=None): @@ -709,6 +711,15 @@ class Flux(supported_models_base.BASE): supported_inference_dtypes = [torch.bfloat16, torch.float16, torch.float32] + def process_unet_state_dict(self, state_dict): + out_sd = {} + for k in list(state_dict.keys()): + key_out = k + if key_out.endswith("_norm.scale"): + key_out = "{}.weight".format(key_out[:-len(".scale")]) + out_sd[key_out] = state_dict[k] + return out_sd + vae_key_prefix = ["vae."] text_encoder_key_prefix = ["text_encoders."] @@ -897,11 +908,13 @@ class HunyuanVideo(supported_models_base.BASE): key_out = key_out.replace("txt_in.c_embedder.linear_1.", "txt_in.c_embedder.in_layer.").replace("txt_in.c_embedder.linear_2.", "txt_in.c_embedder.out_layer.") key_out = key_out.replace("_mod.linear.", "_mod.lin.").replace("_attn_qkv.", "_attn.qkv.") key_out = key_out.replace("mlp.fc1.", "mlp.0.").replace("mlp.fc2.", "mlp.2.") - key_out = key_out.replace("_attn_q_norm.weight", "_attn.norm.query_norm.scale").replace("_attn_k_norm.weight", "_attn.norm.key_norm.scale") - key_out = key_out.replace(".q_norm.weight", ".norm.query_norm.scale").replace(".k_norm.weight", ".norm.key_norm.scale") + key_out = key_out.replace("_attn_q_norm.weight", "_attn.norm.query_norm.weight").replace("_attn_k_norm.weight", "_attn.norm.key_norm.weight") + key_out = key_out.replace(".q_norm.weight", ".norm.query_norm.weight").replace(".k_norm.weight", ".norm.key_norm.weight") key_out = key_out.replace("_attn_proj.", "_attn.proj.") key_out = key_out.replace(".modulation.linear.", ".modulation.lin.") key_out = key_out.replace("_in.mlp.2.", "_in.out_layer.").replace("_in.mlp.0.", "_in.in_layer.") + if key_out.endswith(".scale"): + key_out = "{}.weight".format(key_out[:-len(".scale")]) out_sd[key_out] = state_dict[k] return out_sd @@ -992,7 +1005,7 @@ class CosmosT2IPredict2(supported_models_base.BASE): memory_usage_factor = 1.0 - supported_inference_dtypes = [torch.bfloat16, torch.float32] + supported_inference_dtypes = [torch.bfloat16, torch.float16, torch.float32] def __init__(self, unet_config): super().__init__(unet_config) @@ -1022,11 +1035,7 @@ class Anima(supported_models_base.BASE): memory_usage_factor = 1.0 - supported_inference_dtypes = [torch.bfloat16, torch.float32] - - def __init__(self, unet_config): - super().__init__(unet_config) - self.memory_usage_factor = (unet_config.get("model_channels", 2048) / 2048) * 0.95 + supported_inference_dtypes = [torch.bfloat16, torch.float16, torch.float32] def get_model(self, state_dict, prefix="", device=None): out = model_base.Anima(self, device=device) @@ -1037,6 +1046,12 @@ class Anima(supported_models_base.BASE): detect = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}qwen3_06b.transformer.".format(pref)) return supported_models_base.ClipTarget(comfy.text_encoders.anima.AnimaTokenizer, comfy.text_encoders.anima.te(**detect)) + def set_inference_dtype(self, dtype, manual_cast_dtype, **kwargs): + self.memory_usage_factor = (self.unet_config.get("model_channels", 2048) / 2048) * 0.95 + if dtype is torch.float16: + self.memory_usage_factor *= 1.4 + return super().set_inference_dtype(dtype, manual_cast_dtype, **kwargs) + class CosmosI2VPredict2(CosmosT2IPredict2): unet_config = { "image_model": "cosmos_predict2", @@ -1242,6 +1257,16 @@ class WAN22_T2V(WAN21_T2V): out = model_base.WAN22(self, image_to_video=True, device=device) return out +class WAN21_FlowRVS(WAN21_T2V): + unet_config = { + "image_model": "wan2.1", + "model_type": "flow_rvs", + } + + def get_model(self, state_dict, prefix="", device=None): + out = model_base.WAN21_FlowRVS(self, image_to_video=True, device=device) + return out + class Hunyuan3Dv2(supported_models_base.BASE): unet_config = { "image_model": "hunyuan3d2", @@ -1261,6 +1286,15 @@ class Hunyuan3Dv2(supported_models_base.BASE): latent_format = latent_formats.Hunyuan3Dv2 + def process_unet_state_dict(self, state_dict): + out_sd = {} + for k in list(state_dict.keys()): + key_out = k + if key_out.endswith(".scale"): + key_out = "{}.weight".format(key_out[:-len(".scale")]) + out_sd[key_out] = state_dict[k] + return out_sd + def process_unet_state_dict_for_saving(self, state_dict): replace_prefix = {"": "model."} return utils.state_dict_prefix_replace(state_dict, replace_prefix) @@ -1338,6 +1372,14 @@ class Chroma(supported_models_base.BASE): supported_inference_dtypes = [torch.bfloat16, torch.float16, torch.float32] + def process_unet_state_dict(self, state_dict): + out_sd = {} + for k in list(state_dict.keys()): + key_out = k + if key_out.endswith(".scale"): + key_out = "{}.weight".format(key_out[:-len(".scale")]) + out_sd[key_out] = state_dict[k] + return out_sd def get_model(self, state_dict, prefix="", device=None): out = model_base.Chroma(self, device=device) @@ -1596,6 +1638,46 @@ class Kandinsky5Image(Kandinsky5): return supported_models_base.ClipTarget(comfy.text_encoders.kandinsky5.Kandinsky5TokenizerImage, comfy.text_encoders.kandinsky5.te(**hunyuan_detect)) -models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, LTXAV, HunyuanVideo15_SR_Distilled, HunyuanVideo15, HunyuanImage21Refiner, HunyuanImage21, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, CosmosT2IPredict2, CosmosI2VPredict2, ZImage, Lumina2, WAN22_T2V, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, WAN21_Camera, WAN22_Camera, WAN22_S2V, WAN21_HuMo, WAN22_Animate, Hunyuan3Dv2mini, Hunyuan3Dv2, Hunyuan3Dv2_1, HiDream, Chroma, ChromaRadiance, ACEStep, Omnigen2, QwenImage, Flux2, Kandinsky5Image, Kandinsky5, Anima] +class ACEStep15(supported_models_base.BASE): + unet_config = { + "audio_model": "ace1.5", + } + + unet_extra_config = { + } + + sampling_settings = { + "multiplier": 1.0, + "shift": 3.0, + } + + latent_format = comfy.latent_formats.ACEAudio15 + + memory_usage_factor = 4.7 + + supported_inference_dtypes = [torch.bfloat16, torch.float32] + + vae_key_prefix = ["vae."] + text_encoder_key_prefix = ["text_encoders."] + + def get_model(self, state_dict, prefix="", device=None): + out = model_base.ACEStep15(self, device=device) + return out + + def clip_target(self, state_dict={}): + pref = self.text_encoder_key_prefix[0] + detect_2b = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}qwen3_2b.transformer.".format(pref)) + detect_4b = comfy.text_encoders.hunyuan_video.llama_detect(state_dict, "{}qwen3_4b.transformer.".format(pref)) + if "dtype_llama" in detect_2b: + detect = detect_2b + detect["lm_model"] = "qwen3_2b" + elif "dtype_llama" in detect_4b: + detect = detect_4b + detect["lm_model"] = "qwen3_4b" + + return supported_models_base.ClipTarget(comfy.text_encoders.ace15.ACE15Tokenizer, comfy.text_encoders.ace15.te(**detect)) + + +models = [LotusD, Stable_Zero123, SD15_instructpix2pix, SD15, SD20, SD21UnclipL, SD21UnclipH, SDXL_instructpix2pix, SDXLRefiner, SDXL, SSD1B, KOALA_700M, KOALA_1B, Segmind_Vega, SD_X4Upscaler, Stable_Cascade_C, Stable_Cascade_B, SV3D_u, SV3D_p, SD3, StableAudio, AuraFlow, PixArtAlpha, PixArtSigma, HunyuanDiT, HunyuanDiT1, FluxInpaint, Flux, FluxSchnell, GenmoMochi, LTXV, LTXAV, HunyuanVideo15_SR_Distilled, HunyuanVideo15, HunyuanImage21Refiner, HunyuanImage21, HunyuanVideoSkyreelsI2V, HunyuanVideoI2V, HunyuanVideo, CosmosT2V, CosmosI2V, CosmosT2IPredict2, CosmosI2VPredict2, ZImage, Lumina2, WAN22_T2V, WAN21_T2V, WAN21_I2V, WAN21_FunControl2V, WAN21_Vace, WAN21_Camera, WAN22_Camera, WAN22_S2V, WAN21_HuMo, WAN22_Animate, WAN21_FlowRVS, Hunyuan3Dv2mini, Hunyuan3Dv2, Hunyuan3Dv2_1, HiDream, Chroma, ChromaRadiance, ACEStep, ACEStep15, Omnigen2, QwenImage, Flux2, Kandinsky5Image, Kandinsky5, Anima] models += [SVD_img2vid] diff --git a/comfy/text_encoders/ace15.py b/comfy/text_encoders/ace15.py new file mode 100644 index 000000000..f135d74c1 --- /dev/null +++ b/comfy/text_encoders/ace15.py @@ -0,0 +1,348 @@ +from .anima import Qwen3Tokenizer +import comfy.text_encoders.llama +from comfy import sd1_clip +import torch +import math +import yaml +import comfy.utils + + +def sample_manual_loop_no_classes( + model, + ids=None, + execution_dtype=None, + cfg_scale: float = 2.0, + temperature: float = 0.85, + top_p: float = 0.9, + top_k: int = None, + min_p: float = 0.000, + seed: int = 1, + min_tokens: int = 1, + max_new_tokens: int = 2048, + audio_start_id: int = 151669, # The cutoff ID for audio codes + audio_end_id: int = 215669, + eos_token_id: int = 151645, +): + if ids is None: + return [] + device = model.execution_device + + if execution_dtype is None: + if comfy.model_management.should_use_bf16(device): + execution_dtype = torch.bfloat16 + else: + execution_dtype = torch.float32 + + embeds, attention_mask, num_tokens, embeds_info = model.process_tokens(ids, device) + embeds_batch = embeds.shape[0] + + output_audio_codes = [] + past_key_values = [] + generator = torch.Generator(device=device) + generator.manual_seed(seed) + model_config = model.transformer.model.config + past_kv_shape = [embeds_batch, model_config.num_key_value_heads, embeds.shape[1] + min_tokens, model_config.head_dim] + + for x in range(model_config.num_hidden_layers): + past_key_values.append((torch.empty(past_kv_shape, device=device, dtype=execution_dtype), torch.empty(past_kv_shape, device=device, dtype=execution_dtype), 0)) + + progress_bar = comfy.utils.ProgressBar(max_new_tokens) + + for step in comfy.utils.model_trange(max_new_tokens, desc="LM sampling"): + outputs = model.transformer(None, attention_mask, embeds=embeds.to(execution_dtype), num_tokens=num_tokens, intermediate_output=None, dtype=execution_dtype, embeds_info=embeds_info, past_key_values=past_key_values) + next_token_logits = model.transformer.logits(outputs[0])[:, -1] + past_key_values = outputs[2] + + if cfg_scale != 1.0: + cond_logits = next_token_logits[0:1] + uncond_logits = next_token_logits[1:2] + cfg_logits = uncond_logits + cfg_scale * (cond_logits - uncond_logits) + else: + cfg_logits = next_token_logits[0:1] + + use_eos_score = eos_token_id is not None and eos_token_id < audio_start_id and min_tokens < step + if use_eos_score: + eos_score = cfg_logits[:, eos_token_id].clone() + + remove_logit_value = torch.finfo(cfg_logits.dtype).min + # Only generate audio tokens + cfg_logits[:, :audio_start_id] = remove_logit_value + cfg_logits[:, audio_end_id:] = remove_logit_value + + if use_eos_score: + cfg_logits[:, eos_token_id] = eos_score + + if top_k is not None and top_k > 0: + top_k_vals, _ = torch.topk(cfg_logits, top_k) + min_val = top_k_vals[..., -1, None] + cfg_logits[cfg_logits < min_val] = remove_logit_value + + if min_p is not None and min_p > 0: + probs = torch.softmax(cfg_logits, dim=-1) + p_max = probs.max(dim=-1, keepdim=True).values + indices_to_remove = probs < (min_p * p_max) + cfg_logits[indices_to_remove] = remove_logit_value + + if top_p is not None and top_p < 1.0: + sorted_logits, sorted_indices = torch.sort(cfg_logits, descending=True) + cumulative_probs = torch.cumsum(torch.softmax(sorted_logits, dim=-1), dim=-1) + sorted_indices_to_remove = cumulative_probs > top_p + sorted_indices_to_remove[..., 1:] = sorted_indices_to_remove[..., :-1].clone() + sorted_indices_to_remove[..., 0] = 0 + indices_to_remove = sorted_indices_to_remove.scatter(1, sorted_indices, sorted_indices_to_remove) + cfg_logits[indices_to_remove] = remove_logit_value + + if temperature > 0: + cfg_logits = cfg_logits / temperature + next_token = torch.multinomial(torch.softmax(cfg_logits, dim=-1), num_samples=1, generator=generator).squeeze(1) + else: + next_token = torch.argmax(cfg_logits, dim=-1) + + token = next_token.item() + + if token == eos_token_id: + break + + embed, _, _, _ = model.process_tokens([[token]], device) + embeds = embed.repeat(embeds_batch, 1, 1) + attention_mask = torch.cat([attention_mask, torch.ones((embeds_batch, 1), device=device, dtype=attention_mask.dtype)], dim=1) + + output_audio_codes.append(token - audio_start_id) + progress_bar.update_absolute(step) + + return output_audio_codes + + +def generate_audio_codes(model, positive, negative, min_tokens=1, max_tokens=1024, seed=0, cfg_scale=2.0, temperature=0.85, top_p=0.9, top_k=0, min_p=0.000): + positive = [[token for token, _ in inner_list] for inner_list in positive] + positive = positive[0] + + if cfg_scale != 1.0: + negative = [[token for token, _ in inner_list] for inner_list in negative] + negative = negative[0] + + neg_pad = 0 + if len(negative) < len(positive): + neg_pad = (len(positive) - len(negative)) + negative = [model.special_tokens["pad"]] * neg_pad + negative + + pos_pad = 0 + if len(negative) > len(positive): + pos_pad = (len(negative) - len(positive)) + positive = [model.special_tokens["pad"]] * pos_pad + positive + + ids = [positive, negative] + else: + ids = [positive] + + return sample_manual_loop_no_classes(model, ids, cfg_scale=cfg_scale, temperature=temperature, top_p=top_p, top_k=top_k, min_p=min_p, seed=seed, min_tokens=min_tokens, max_new_tokens=max_tokens) + + +class ACE15Tokenizer(sd1_clip.SD1Tokenizer): + def __init__(self, embedding_directory=None, tokenizer_data={}): + super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, name="qwen3_06b", tokenizer=Qwen3Tokenizer) + + def _metas_to_cot(self, *, return_yaml: bool = False, **kwargs) -> str: + user_metas = { + k: kwargs.pop(k) + for k in ("bpm", "duration", "keyscale", "timesignature") + if k in kwargs + } + timesignature = user_metas.get("timesignature") + if isinstance(timesignature, str) and timesignature.endswith("/4"): + user_metas["timesignature"] = timesignature[:-2] + user_metas = { + k: v if not isinstance(v, str) or not v.isdigit() else int(v) + for k, v in user_metas.items() + if v not in {"unspecified", None} + } + if len(user_metas): + meta_yaml = yaml.dump(user_metas, allow_unicode=True, sort_keys=True).strip() + else: + meta_yaml = "" + return f"\n{meta_yaml}\n" if not return_yaml else meta_yaml + + def _metas_to_cap(self, **kwargs) -> str: + use_keys = ("bpm", "timesignature", "keyscale", "duration") + user_metas = { k: kwargs.pop(k, "N/A") for k in use_keys } + timesignature = user_metas.get("timesignature") + if isinstance(timesignature, str) and timesignature.endswith("/4"): + user_metas["timesignature"] = timesignature[:-2] + duration = user_metas["duration"] + if duration == "N/A": + user_metas["duration"] = "30 seconds" + elif isinstance(duration, (str, int, float)): + user_metas["duration"] = f"{math.ceil(float(duration))} seconds" + else: + raise TypeError("Unexpected type for duration key, must be str, int or float") + return "\n".join(f"- {k}: {user_metas[k]}" for k in use_keys) + + def tokenize_with_weights(self, text, return_word_ids=False, **kwargs): + text = text.strip() + text_negative = kwargs.get("caption_negative", text).strip() + lyrics = kwargs.get("lyrics", "") + lyrics_negative = kwargs.get("lyrics_negative", lyrics) + duration = kwargs.get("duration", 120) + if isinstance(duration, str): + duration = float(duration.split(None, 1)[0]) + language = kwargs.get("language") + seed = kwargs.get("seed", 0) + + generate_audio_codes = kwargs.get("generate_audio_codes", True) + cfg_scale = kwargs.get("cfg_scale", 2.0) + temperature = kwargs.get("temperature", 0.85) + top_p = kwargs.get("top_p", 0.9) + top_k = kwargs.get("top_k", 0.0) + min_p = kwargs.get("min_p", 0.000) + + duration = math.ceil(duration) + kwargs["duration"] = duration + tokens_duration = duration * 5 + min_tokens = int(kwargs.get("min_tokens", tokens_duration)) + max_tokens = int(kwargs.get("max_tokens", tokens_duration)) + + metas_negative = { + k.rsplit("_", 1)[0]: kwargs.pop(k) + for k in ("bpm_negative", "duration_negative", "keyscale_negative", "timesignature_negative", "language_negative", "caption_negative") + if k in kwargs + } + if not kwargs.get("use_negative_caption"): + _ = metas_negative.pop("caption", None) + + cot_text = self._metas_to_cot(caption=text, **kwargs) + cot_text_negative = "\n\n" if not metas_negative else self._metas_to_cot(**metas_negative) + meta_cap = self._metas_to_cap(**kwargs) + + lm_template = "<|im_start|>system\n# Instruction\nGenerate audio semantic tokens based on the given conditions:\n\n<|im_end|>\n<|im_start|>user\n# Caption\n{}\n\n# Lyric\n{}\n<|im_end|>\n<|im_start|>assistant\n{}\n\n<|im_end|>\n" + lyrics_template = "# Languages\n{}\n\n# Lyric\n{}<|endoftext|><|endoftext|>" + qwen3_06b_template = "# Instruction\nGenerate audio semantic tokens based on the given conditions:\n\n# Caption\n{}\n\n# Metas\n{}\n<|endoftext|>\n<|endoftext|>" + + llm_prompts = { + "lm_prompt": lm_template.format(text, lyrics.strip(), cot_text), + "lm_prompt_negative": lm_template.format(text_negative, lyrics_negative.strip(), cot_text_negative), + "lyrics": lyrics_template.format(language if language is not None else "", lyrics), + "qwen3_06b": qwen3_06b_template.format(text, meta_cap), + } + + out = { + prompt_key: self.qwen3_06b.tokenize_with_weights( + prompt, + prompt_key == "qwen3_06b" and return_word_ids, + disable_weights = True, + **kwargs, + ) + for prompt_key, prompt in llm_prompts.items() + } + out["lm_metadata"] = {"min_tokens": min_tokens, + "max_tokens": max_tokens, + "seed": seed, + "generate_audio_codes": generate_audio_codes, + "cfg_scale": cfg_scale, + "temperature": temperature, + "top_p": top_p, + "top_k": top_k, + "min_p": min_p, + } + return out + + +class Qwen3_06BModel(sd1_clip.SDClipModel): + def __init__(self, device="cpu", layer="last", layer_idx=None, dtype=None, attention_mask=True, model_options={}): + super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"pad": 151643}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Qwen3_06B_ACE15, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options) + +class Qwen3_2B_ACE15(sd1_clip.SDClipModel): + def __init__(self, device="cpu", layer="last", layer_idx=None, dtype=None, attention_mask=True, model_options={}): + llama_quantization_metadata = model_options.get("llama_quantization_metadata", None) + if llama_quantization_metadata is not None: + model_options = model_options.copy() + model_options["quantization_metadata"] = llama_quantization_metadata + + super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"pad": 151643}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Qwen3_2B_ACE15_lm, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options) + +class Qwen3_4B_ACE15(sd1_clip.SDClipModel): + def __init__(self, device="cpu", layer="last", layer_idx=None, dtype=None, attention_mask=True, model_options={}): + llama_quantization_metadata = model_options.get("llama_quantization_metadata", None) + if llama_quantization_metadata is not None: + model_options = model_options.copy() + model_options["quantization_metadata"] = llama_quantization_metadata + + super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"pad": 151643}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Qwen3_4B_ACE15_lm, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options) + +class ACE15TEModel(torch.nn.Module): + def __init__(self, device="cpu", dtype=None, dtype_llama=None, lm_model=None, model_options={}): + super().__init__() + if dtype_llama is None: + dtype_llama = dtype + + model = None + self.constant = 0.4375 + if lm_model == "qwen3_4b": + model = Qwen3_4B_ACE15 + self.constant = 0.5625 + elif lm_model == "qwen3_2b": + model = Qwen3_2B_ACE15 + + self.lm_model = lm_model + self.qwen3_06b = Qwen3_06BModel(device=device, dtype=dtype, model_options=model_options) + if model is not None: + setattr(self, self.lm_model, model(device=device, dtype=dtype_llama, model_options=model_options)) + + self.dtypes = set([dtype, dtype_llama]) + + def encode_token_weights(self, token_weight_pairs): + token_weight_pairs_base = token_weight_pairs["qwen3_06b"] + token_weight_pairs_lyrics = token_weight_pairs["lyrics"] + + self.qwen3_06b.set_clip_options({"layer": None}) + base_out, _, extra = self.qwen3_06b.encode_token_weights(token_weight_pairs_base) + self.qwen3_06b.set_clip_options({"layer": [0]}) + lyrics_embeds, _, extra_l = self.qwen3_06b.encode_token_weights(token_weight_pairs_lyrics) + + out = {"conditioning_lyrics": lyrics_embeds[:, 0]} + + lm_metadata = token_weight_pairs["lm_metadata"] + if lm_metadata["generate_audio_codes"]: + audio_codes = generate_audio_codes(getattr(self, self.lm_model, self.qwen3_06b), token_weight_pairs["lm_prompt"], token_weight_pairs["lm_prompt_negative"], min_tokens=lm_metadata["min_tokens"], max_tokens=lm_metadata["min_tokens"], seed=lm_metadata["seed"], cfg_scale=lm_metadata["cfg_scale"], temperature=lm_metadata["temperature"], top_p=lm_metadata["top_p"], top_k=lm_metadata["top_k"], min_p=lm_metadata["min_p"]) + out["audio_codes"] = [audio_codes] + + return base_out, None, out + + def set_clip_options(self, options): + self.qwen3_06b.set_clip_options(options) + lm_model = getattr(self, self.lm_model, None) + if lm_model is not None: + lm_model.set_clip_options(options) + + def reset_clip_options(self): + self.qwen3_06b.reset_clip_options() + lm_model = getattr(self, self.lm_model, None) + if lm_model is not None: + lm_model.reset_clip_options() + + def load_sd(self, sd): + if "model.layers.0.post_attention_layernorm.weight" in sd: + shape = sd["model.layers.0.post_attention_layernorm.weight"].shape + if shape[0] == 1024: + return self.qwen3_06b.load_sd(sd) + else: + return getattr(self, self.lm_model).load_sd(sd) + + def memory_estimation_function(self, token_weight_pairs, device=None): + lm_metadata = token_weight_pairs["lm_metadata"] + constant = self.constant + if comfy.model_management.should_use_bf16(device): + constant *= 0.5 + + token_weight_pairs = token_weight_pairs.get("lm_prompt", []) + num_tokens = sum(map(lambda a: len(a), token_weight_pairs)) + num_tokens += lm_metadata['min_tokens'] + return num_tokens * constant * 1024 * 1024 + +def te(dtype_llama=None, llama_quantization_metadata=None, lm_model="qwen3_2b"): + class ACE15TEModel_(ACE15TEModel): + def __init__(self, device="cpu", dtype=None, model_options={}): + if llama_quantization_metadata is not None: + model_options = model_options.copy() + model_options["llama_quantization_metadata"] = llama_quantization_metadata + super().__init__(device=device, dtype_llama=dtype_llama, lm_model=lm_model, dtype=dtype, model_options=model_options) + return ACE15TEModel_ diff --git a/comfy/text_encoders/anima.py b/comfy/text_encoders/anima.py index 41f95bcb6..2e31b2b04 100644 --- a/comfy/text_encoders/anima.py +++ b/comfy/text_encoders/anima.py @@ -8,7 +8,7 @@ import torch class Qwen3Tokenizer(sd1_clip.SDTokenizer): def __init__(self, embedding_directory=None, tokenizer_data={}): tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "qwen25_tokenizer") - super().__init__(tokenizer_path, pad_with_end=False, embedding_size=1024, embedding_key='qwen3_06b', tokenizer_class=Qwen2Tokenizer, has_start_token=False, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, pad_token=151643, tokenizer_data=tokenizer_data) + super().__init__(tokenizer_path, pad_with_end=False, embedding_directory=embedding_directory, embedding_size=1024, embedding_key='qwen3_06b', tokenizer_class=Qwen2Tokenizer, has_start_token=False, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, pad_token=151643, tokenizer_data=tokenizer_data) class T5XXLTokenizer(sd1_clip.SDTokenizer): def __init__(self, embedding_directory=None, tokenizer_data={}): @@ -23,7 +23,7 @@ class AnimaTokenizer: def tokenize_with_weights(self, text:str, return_word_ids=False, **kwargs): out = {} qwen_ids = self.qwen3_06b.tokenize_with_weights(text, return_word_ids, **kwargs) - out["qwen3_06b"] = [[(token, 1.0) for token, _ in inner_list] for inner_list in qwen_ids] # Set weights to 1.0 + out["qwen3_06b"] = [[(k[0], 1.0, k[2]) if return_word_ids else (k[0], 1.0) for k in inner_list] for inner_list in qwen_ids] # Set weights to 1.0 out["t5xxl"] = self.t5xxl.tokenize_with_weights(text, return_word_ids, **kwargs) return out @@ -33,6 +33,8 @@ class AnimaTokenizer: def state_dict(self): return {} + def decode(self, token_ids, **kwargs): + return self.qwen3_06b.decode(token_ids, **kwargs) class Qwen3_06BModel(sd1_clip.SDClipModel): def __init__(self, device="cpu", layer="last", layer_idx=None, dtype=None, attention_mask=True, model_options={}): diff --git a/comfy/text_encoders/flux.py b/comfy/text_encoders/flux.py index f67a5f805..1ae398789 100644 --- a/comfy/text_encoders/flux.py +++ b/comfy/text_encoders/flux.py @@ -118,7 +118,7 @@ class MistralTokenizerClass: class Mistral3Tokenizer(sd1_clip.SDTokenizer): def __init__(self, embedding_directory=None, tokenizer_data={}): self.tekken_data = tokenizer_data.get("tekken_model", None) - super().__init__("", pad_with_end=False, embedding_size=5120, embedding_key='mistral3_24b', tokenizer_class=MistralTokenizerClass, has_end_token=False, pad_to_max_length=False, pad_token=11, start_token=1, max_length=99999999, min_length=1, pad_left=True, tokenizer_args=load_mistral_tokenizer(self.tekken_data), tokenizer_data=tokenizer_data) + super().__init__("", pad_with_end=False, embedding_directory=embedding_directory, embedding_size=5120, embedding_key='mistral3_24b', tokenizer_class=MistralTokenizerClass, has_end_token=False, pad_to_max_length=False, pad_token=11, start_token=1, max_length=99999999, min_length=1, pad_left=True, tokenizer_args=load_mistral_tokenizer(self.tekken_data), tokenizer_data=tokenizer_data) def state_dict(self): return {"tekken_model": self.tekken_data} @@ -176,12 +176,12 @@ def flux2_te(dtype_llama=None, llama_quantization_metadata=None, pruned=False): class Qwen3Tokenizer(sd1_clip.SDTokenizer): def __init__(self, embedding_directory=None, tokenizer_data={}): tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "qwen25_tokenizer") - super().__init__(tokenizer_path, pad_with_end=False, embedding_size=2560, embedding_key='qwen3_4b', tokenizer_class=Qwen2Tokenizer, has_start_token=False, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=512, pad_token=151643, tokenizer_data=tokenizer_data) + super().__init__(tokenizer_path, pad_with_end=False, embedding_directory=embedding_directory, embedding_size=2560, embedding_key='qwen3_4b', tokenizer_class=Qwen2Tokenizer, has_start_token=False, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=512, pad_token=151643, tokenizer_data=tokenizer_data) class Qwen3Tokenizer8B(sd1_clip.SDTokenizer): def __init__(self, embedding_directory=None, tokenizer_data={}): tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "qwen25_tokenizer") - super().__init__(tokenizer_path, pad_with_end=False, embedding_size=4096, embedding_key='qwen3_8b', tokenizer_class=Qwen2Tokenizer, has_start_token=False, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=512, pad_token=151643, tokenizer_data=tokenizer_data) + super().__init__(tokenizer_path, pad_with_end=False, embedding_directory=embedding_directory, embedding_size=4096, embedding_key='qwen3_8b', tokenizer_class=Qwen2Tokenizer, has_start_token=False, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=512, pad_token=151643, tokenizer_data=tokenizer_data) class KleinTokenizer(sd1_clip.SD1Tokenizer): def __init__(self, embedding_directory=None, tokenizer_data={}, name="qwen3_4b"): diff --git a/comfy/text_encoders/llama.py b/comfy/text_encoders/llama.py index 68ac1e804..ccc200b7a 100644 --- a/comfy/text_encoders/llama.py +++ b/comfy/text_encoders/llama.py @@ -3,9 +3,12 @@ import torch.nn as nn from dataclasses import dataclass from typing import Optional, Any, Tuple import math +from tqdm import tqdm +import comfy.utils from comfy.ldm.modules.attention import optimized_attention_for_device import comfy.model_management +import comfy.ops import comfy.ldm.common_dit import comfy.clip_model @@ -102,6 +105,79 @@ class Qwen3_06BConfig: rope_scale = None final_norm: bool = True lm_head: bool = False + stop_tokens = [151643, 151645] + +@dataclass +class Qwen3_06B_ACE15_Config: + vocab_size: int = 151669 + hidden_size: int = 1024 + intermediate_size: int = 3072 + num_hidden_layers: int = 28 + num_attention_heads: int = 16 + num_key_value_heads: int = 8 + max_position_embeddings: int = 32768 + rms_norm_eps: float = 1e-6 + rope_theta: float = 1000000.0 + transformer_type: str = "llama" + head_dim = 128 + rms_norm_add = False + mlp_activation = "silu" + qkv_bias = False + rope_dims = None + q_norm = "gemma3" + k_norm = "gemma3" + rope_scale = None + final_norm: bool = True + lm_head: bool = False + stop_tokens = [151643, 151645] + +@dataclass +class Qwen3_2B_ACE15_lm_Config: + vocab_size: int = 217204 + hidden_size: int = 2048 + intermediate_size: int = 6144 + num_hidden_layers: int = 28 + num_attention_heads: int = 16 + num_key_value_heads: int = 8 + max_position_embeddings: int = 40960 + rms_norm_eps: float = 1e-6 + rope_theta: float = 1000000.0 + transformer_type: str = "llama" + head_dim = 128 + rms_norm_add = False + mlp_activation = "silu" + qkv_bias = False + rope_dims = None + q_norm = "gemma3" + k_norm = "gemma3" + rope_scale = None + final_norm: bool = True + lm_head: bool = False + stop_tokens = [151643, 151645] + +@dataclass +class Qwen3_4B_ACE15_lm_Config: + vocab_size: int = 217204 + hidden_size: int = 2560 + intermediate_size: int = 9728 + num_hidden_layers: int = 36 + num_attention_heads: int = 32 + num_key_value_heads: int = 8 + max_position_embeddings: int = 40960 + rms_norm_eps: float = 1e-6 + rope_theta: float = 1000000.0 + transformer_type: str = "llama" + head_dim = 128 + rms_norm_add = False + mlp_activation = "silu" + qkv_bias = False + rope_dims = None + q_norm = "gemma3" + k_norm = "gemma3" + rope_scale = None + final_norm: bool = True + lm_head: bool = False + stop_tokens = [151643, 151645] @dataclass class Qwen3_4BConfig: @@ -125,6 +201,7 @@ class Qwen3_4BConfig: rope_scale = None final_norm: bool = True lm_head: bool = False + stop_tokens = [151643, 151645] @dataclass class Qwen3_8BConfig: @@ -148,6 +225,7 @@ class Qwen3_8BConfig: rope_scale = None final_norm: bool = True lm_head: bool = False + stop_tokens = [151643, 151645] @dataclass class Ovis25_2BConfig: @@ -218,6 +296,7 @@ class Gemma2_2B_Config: rope_scale = None final_norm: bool = True lm_head: bool = False + stop_tokens = [1] @dataclass class Gemma3_4B_Config: @@ -242,6 +321,14 @@ class Gemma3_4B_Config: rope_scale = [8.0, 1.0] final_norm: bool = True lm_head: bool = False + stop_tokens = [1, 106] + +GEMMA3_VISION_CONFIG = {"num_channels": 3, "hidden_act": "gelu_pytorch_tanh", "hidden_size": 1152, "image_size": 896, "intermediate_size": 4304, "model_type": "siglip_vision_model", "num_attention_heads": 16, "num_hidden_layers": 27, "patch_size": 14} + +@dataclass +class Gemma3_4B_Vision_Config(Gemma3_4B_Config): + vision_config = GEMMA3_VISION_CONFIG + mm_tokens_per_image = 256 @dataclass class Gemma3_12B_Config: @@ -266,8 +353,9 @@ class Gemma3_12B_Config: rope_scale = [8.0, 1.0] final_norm: bool = True lm_head: bool = False - vision_config = {"num_channels": 3, "hidden_act": "gelu_pytorch_tanh", "hidden_size": 1152, "image_size": 896, "intermediate_size": 4304, "model_type": "siglip_vision_model", "num_attention_heads": 16, "num_hidden_layers": 27, "patch_size": 14} + vision_config = GEMMA3_VISION_CONFIG mm_tokens_per_image = 256 + stop_tokens = [1, 106] class RMSNorm(nn.Module): def __init__(self, dim: int, eps: float = 1e-5, add=False, device=None, dtype=None): @@ -285,13 +373,6 @@ class RMSNorm(nn.Module): -def rotate_half(x): - """Rotates half the hidden dims of the input.""" - x1 = x[..., : x.shape[-1] // 2] - x2 = x[..., x.shape[-1] // 2 :] - return torch.cat((-x2, x1), dim=-1) - - def precompute_freqs_cis(head_dim, position_ids, theta, rope_scale=None, rope_dims=None, device=None): if not isinstance(theta, list): theta = [theta] @@ -320,20 +401,30 @@ def precompute_freqs_cis(head_dim, position_ids, theta, rope_scale=None, rope_di else: cos = cos.unsqueeze(1) sin = sin.unsqueeze(1) - out.append((cos, sin)) + sin_split = sin.shape[-1] // 2 + out.append((cos, sin[..., : sin_split], -sin[..., sin_split :])) if len(out) == 1: return out[0] return out - def apply_rope(xq, xk, freqs_cis): org_dtype = xq.dtype cos = freqs_cis[0] sin = freqs_cis[1] - q_embed = (xq * cos) + (rotate_half(xq) * sin) - k_embed = (xk * cos) + (rotate_half(xk) * sin) + nsin = freqs_cis[2] + + q_embed = (xq * cos) + q_split = q_embed.shape[-1] // 2 + q_embed[..., : q_split].addcmul_(xq[..., q_split :], nsin) + q_embed[..., q_split :].addcmul_(xq[..., : q_split], sin) + + k_embed = (xk * cos) + k_split = k_embed.shape[-1] // 2 + k_embed[..., : k_split].addcmul_(xk[..., k_split :], nsin) + k_embed[..., k_split :].addcmul_(xk[..., : k_split], sin) + return q_embed.to(org_dtype), k_embed.to(org_dtype) @@ -368,8 +459,10 @@ class Attention(nn.Module): freqs_cis: Optional[torch.Tensor] = None, optimized_attention=None, past_key_value: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, + sliding_window: Optional[int] = None, ): batch_size, seq_length, _ = hidden_states.shape + xq = self.q_proj(hidden_states) xk = self.k_proj(hidden_states) xv = self.v_proj(hidden_states) @@ -404,6 +497,11 @@ class Attention(nn.Module): else: present_key_value = (xk, xv, index + num_tokens) + if sliding_window is not None and xk.shape[2] > sliding_window: + xk = xk[:, :, -sliding_window:] + xv = xv[:, :, -sliding_window:] + attention_mask = attention_mask[..., -sliding_window:] if attention_mask is not None else None + xk = xk.repeat_interleave(self.num_heads // self.num_kv_heads, dim=1) xv = xv.repeat_interleave(self.num_heads // self.num_kv_heads, dim=1) @@ -486,10 +584,12 @@ class TransformerBlockGemma2(nn.Module): optimized_attention=None, past_key_value: Optional[Tuple[torch.Tensor, torch.Tensor]] = None, ): + sliding_window = None if self.transformer_type == 'gemma3': if self.sliding_attention: + sliding_window = self.sliding_attention if x.shape[1] > self.sliding_attention: - sliding_mask = torch.full((x.shape[1], x.shape[1]), float("-inf"), device=x.device, dtype=x.dtype) + sliding_mask = torch.full((x.shape[1], x.shape[1]), torch.finfo(x.dtype).min, device=x.device, dtype=x.dtype) sliding_mask.tril_(diagonal=-self.sliding_attention) if attention_mask is not None: attention_mask = attention_mask + sliding_mask @@ -508,6 +608,7 @@ class TransformerBlockGemma2(nn.Module): freqs_cis=freqs_cis, optimized_attention=optimized_attention, past_key_value=past_key_value, + sliding_window=sliding_window, ) x = self.post_attention_layernorm(x) @@ -581,10 +682,10 @@ class Llama2_(nn.Module): mask = None if attention_mask is not None: mask = 1.0 - attention_mask.to(x.dtype).reshape((attention_mask.shape[0], 1, -1, attention_mask.shape[-1])).expand(attention_mask.shape[0], 1, seq_len, attention_mask.shape[-1]) - mask = mask.masked_fill(mask.to(torch.bool), float("-inf")) + mask = mask.masked_fill(mask.to(torch.bool), torch.finfo(x.dtype).min / 4) if seq_len > 1: - causal_mask = torch.empty(past_len + seq_len, past_len + seq_len, dtype=x.dtype, device=x.device).fill_(float("-inf")).triu_(1) + causal_mask = torch.empty(past_len + seq_len, past_len + seq_len, dtype=x.dtype, device=x.device).fill_(torch.finfo(x.dtype).min / 4).triu_(1) if mask is not None: mask += causal_mask else: @@ -692,6 +793,122 @@ class BaseLlama: def forward(self, input_ids, *args, **kwargs): return self.model(input_ids, *args, **kwargs) +class BaseGenerate: + def logits(self, x): + input = x[:, -1:] + if hasattr(self.model, "lm_head"): + module = self.model.lm_head + else: + module = self.model.embed_tokens + + offload_stream = None + if module.comfy_cast_weights: + weight, _, offload_stream = comfy.ops.cast_bias_weight(module, input, offloadable=True) + else: + weight = self.model.embed_tokens.weight.to(x) + + x = torch.nn.functional.linear(input, weight, None) + + comfy.ops.uncast_bias_weight(module, weight, None, offload_stream) + return x + + def generate(self, embeds=None, do_sample=True, max_length=256, temperature=1.0, top_k=50, top_p=0.9, min_p=0.0, repetition_penalty=1.0, seed=42, stop_tokens=None, initial_tokens=[], execution_dtype=None, min_tokens=0): + device = embeds.device + model_config = self.model.config + + if stop_tokens is None: + stop_tokens = self.model.config.stop_tokens + + if execution_dtype is None: + if comfy.model_management.should_use_bf16(device): + execution_dtype = torch.bfloat16 + else: + execution_dtype = torch.float32 + embeds = embeds.to(execution_dtype) + + if embeds.ndim == 2: + embeds = embeds.unsqueeze(0) + + past_key_values = [] #kv_cache init + max_cache_len = embeds.shape[1] + max_length + for x in range(model_config.num_hidden_layers): + past_key_values.append((torch.empty([embeds.shape[0], model_config.num_key_value_heads, max_cache_len, model_config.head_dim], device=device, dtype=execution_dtype), + torch.empty([embeds.shape[0], model_config.num_key_value_heads, max_cache_len, model_config.head_dim], device=device, dtype=execution_dtype), 0)) + + generator = torch.Generator(device=device).manual_seed(seed) if do_sample else None + + generated_token_ids = [] + pbar = comfy.utils.ProgressBar(max_length) + + # Generation loop + for step in tqdm(range(max_length), desc="Generating tokens"): + x, _, past_key_values = self.model.forward(None, embeds=embeds, attention_mask=None, past_key_values=past_key_values) + logits = self.logits(x)[:, -1] + next_token = self.sample_token(logits, temperature, top_k, top_p, min_p, repetition_penalty, initial_tokens + generated_token_ids, generator, do_sample=do_sample) + token_id = next_token[0].item() + generated_token_ids.append(token_id) + + embeds = self.model.embed_tokens(next_token).to(execution_dtype) + pbar.update(1) + + if token_id in stop_tokens: + break + + return generated_token_ids + + def sample_token(self, logits, temperature, top_k, top_p, min_p, repetition_penalty, token_history, generator, do_sample=True): + + if not do_sample or temperature == 0.0: + return torch.argmax(logits, dim=-1, keepdim=True) + + # Sampling mode + if repetition_penalty != 1.0: + for i in range(logits.shape[0]): + for token_id in set(token_history): + logits[i, token_id] *= repetition_penalty if logits[i, token_id] < 0 else 1/repetition_penalty + + if temperature != 1.0: + logits = logits / temperature + + if top_k > 0: + indices_to_remove = logits < torch.topk(logits, top_k)[0][..., -1, None] + logits[indices_to_remove] = torch.finfo(logits.dtype).min + + if min_p > 0.0: + probs_before_filter = torch.nn.functional.softmax(logits, dim=-1) + top_probs, _ = probs_before_filter.max(dim=-1, keepdim=True) + min_threshold = min_p * top_probs + indices_to_remove = probs_before_filter < min_threshold + logits[indices_to_remove] = torch.finfo(logits.dtype).min + + if top_p < 1.0: + sorted_logits, sorted_indices = torch.sort(logits, descending=True) + cumulative_probs = torch.cumsum(torch.nn.functional.softmax(sorted_logits, dim=-1), dim=-1) + sorted_indices_to_remove = cumulative_probs > top_p + sorted_indices_to_remove[..., 0] = False + indices_to_remove = torch.zeros_like(logits, dtype=torch.bool) + indices_to_remove.scatter_(1, sorted_indices, sorted_indices_to_remove) + logits[indices_to_remove] = torch.finfo(logits.dtype).min + + probs = torch.nn.functional.softmax(logits, dim=-1) + + return torch.multinomial(probs, num_samples=1, generator=generator) + +class BaseQwen3: + def logits(self, x): + input = x[:, -1:] + module = self.model.embed_tokens + + offload_stream = None + if module.comfy_cast_weights: + weight, _, offload_stream = comfy.ops.cast_bias_weight(module, input, offloadable=True) + else: + weight = self.model.embed_tokens.weight.to(x) + + x = torch.nn.functional.linear(input, weight, None) + + comfy.ops.uncast_bias_weight(module, weight, None, offload_stream) + return x class Llama2(BaseLlama, torch.nn.Module): def __init__(self, config_dict, dtype, device, operations): @@ -720,7 +937,7 @@ class Qwen25_3B(BaseLlama, torch.nn.Module): self.model = Llama2_(config, device=device, dtype=dtype, ops=operations) self.dtype = dtype -class Qwen3_06B(BaseLlama, torch.nn.Module): +class Qwen3_06B(BaseLlama, BaseQwen3, BaseGenerate, torch.nn.Module): def __init__(self, config_dict, dtype, device, operations): super().__init__() config = Qwen3_06BConfig(**config_dict) @@ -729,7 +946,25 @@ class Qwen3_06B(BaseLlama, torch.nn.Module): self.model = Llama2_(config, device=device, dtype=dtype, ops=operations) self.dtype = dtype -class Qwen3_4B(BaseLlama, torch.nn.Module): +class Qwen3_06B_ACE15(BaseLlama, BaseQwen3, torch.nn.Module): + def __init__(self, config_dict, dtype, device, operations): + super().__init__() + config = Qwen3_06B_ACE15_Config(**config_dict) + self.num_layers = config.num_hidden_layers + + self.model = Llama2_(config, device=device, dtype=dtype, ops=operations) + self.dtype = dtype + +class Qwen3_2B_ACE15_lm(BaseLlama, BaseQwen3, torch.nn.Module): + def __init__(self, config_dict, dtype, device, operations): + super().__init__() + config = Qwen3_2B_ACE15_lm_Config(**config_dict) + self.num_layers = config.num_hidden_layers + + self.model = Llama2_(config, device=device, dtype=dtype, ops=operations) + self.dtype = dtype + +class Qwen3_4B(BaseLlama, BaseQwen3, BaseGenerate, torch.nn.Module): def __init__(self, config_dict, dtype, device, operations): super().__init__() config = Qwen3_4BConfig(**config_dict) @@ -738,7 +973,16 @@ class Qwen3_4B(BaseLlama, torch.nn.Module): self.model = Llama2_(config, device=device, dtype=dtype, ops=operations) self.dtype = dtype -class Qwen3_8B(BaseLlama, torch.nn.Module): +class Qwen3_4B_ACE15_lm(BaseLlama, BaseQwen3, torch.nn.Module): + def __init__(self, config_dict, dtype, device, operations): + super().__init__() + config = Qwen3_4B_ACE15_lm_Config(**config_dict) + self.num_layers = config.num_hidden_layers + + self.model = Llama2_(config, device=device, dtype=dtype, ops=operations) + self.dtype = dtype + +class Qwen3_8B(BaseLlama, BaseQwen3, BaseGenerate, torch.nn.Module): def __init__(self, config_dict, dtype, device, operations): super().__init__() config = Qwen3_8BConfig(**config_dict) @@ -756,7 +1000,7 @@ class Ovis25_2B(BaseLlama, torch.nn.Module): self.model = Llama2_(config, device=device, dtype=dtype, ops=operations) self.dtype = dtype -class Qwen25_7BVLI(BaseLlama, torch.nn.Module): +class Qwen25_7BVLI(BaseLlama, BaseGenerate, torch.nn.Module): def __init__(self, config_dict, dtype, device, operations): super().__init__() config = Qwen25_7BVLI_Config(**config_dict) @@ -766,6 +1010,9 @@ class Qwen25_7BVLI(BaseLlama, torch.nn.Module): self.visual = qwen_vl.Qwen2VLVisionTransformer(hidden_size=1280, output_hidden_size=config.hidden_size, device=device, dtype=dtype, ops=operations) self.dtype = dtype + # todo: should this be tied or not? + #self.lm_head = operations.Linear(config.hidden_size, config.vocab_size, bias=False, device=device, dtype=dtype) + def preprocess_embed(self, embed, device): if embed["type"] == "image": image, grid = qwen_vl.process_qwen2vl_images(embed["data"]) @@ -799,7 +1046,7 @@ class Qwen25_7BVLI(BaseLlama, torch.nn.Module): return super().forward(x, attention_mask=attention_mask, embeds=embeds, num_tokens=num_tokens, intermediate_output=intermediate_output, final_layer_norm_intermediate=final_layer_norm_intermediate, dtype=dtype, position_ids=position_ids) -class Gemma2_2B(BaseLlama, torch.nn.Module): +class Gemma2_2B(BaseLlama, BaseGenerate, torch.nn.Module): def __init__(self, config_dict, dtype, device, operations): super().__init__() config = Gemma2_2B_Config(**config_dict) @@ -808,7 +1055,7 @@ class Gemma2_2B(BaseLlama, torch.nn.Module): self.model = Llama2_(config, device=device, dtype=dtype, ops=operations) self.dtype = dtype -class Gemma3_4B(BaseLlama, torch.nn.Module): +class Gemma3_4B(BaseLlama, BaseGenerate, torch.nn.Module): def __init__(self, config_dict, dtype, device, operations): super().__init__() config = Gemma3_4B_Config(**config_dict) @@ -817,7 +1064,25 @@ class Gemma3_4B(BaseLlama, torch.nn.Module): self.model = Llama2_(config, device=device, dtype=dtype, ops=operations) self.dtype = dtype -class Gemma3_12B(BaseLlama, torch.nn.Module): +class Gemma3_4B_Vision(BaseLlama, BaseGenerate, torch.nn.Module): + def __init__(self, config_dict, dtype, device, operations): + super().__init__() + config = Gemma3_4B_Vision_Config(**config_dict) + self.num_layers = config.num_hidden_layers + + self.model = Llama2_(config, device=device, dtype=dtype, ops=operations) + self.dtype = dtype + self.multi_modal_projector = Gemma3MultiModalProjector(config, dtype, device, operations) + self.vision_model = comfy.clip_model.CLIPVision(config.vision_config, dtype, device, operations) + self.image_size = config.vision_config["image_size"] + + def preprocess_embed(self, embed, device): + if embed["type"] == "image": + image = comfy.clip_model.clip_preprocess(embed["data"], size=self.image_size, mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5], crop=True) + return self.multi_modal_projector(self.vision_model(image.to(device, dtype=torch.float32))[0]), None + return None, None + +class Gemma3_12B(BaseLlama, BaseGenerate, torch.nn.Module): def __init__(self, config_dict, dtype, device, operations): super().__init__() config = Gemma3_12B_Config(**config_dict) diff --git a/comfy/text_encoders/lt.py b/comfy/text_encoders/lt.py index 26573fb12..e86ea9f4e 100644 --- a/comfy/text_encoders/lt.py +++ b/comfy/text_encoders/lt.py @@ -3,9 +3,10 @@ import os from transformers import T5TokenizerFast from .spiece_tokenizer import SPieceTokenizer import comfy.text_encoders.genmo -from comfy.ldm.lightricks.embeddings_connector import Embeddings1DConnector import torch import comfy.utils +import math +import itertools class T5XXLTokenizer(sd1_clip.SDTokenizer): def __init__(self, embedding_directory=None, tokenizer_data={}): @@ -22,46 +23,86 @@ def ltxv_te(*args, **kwargs): return comfy.text_encoders.genmo.mochi_te(*args, **kwargs) -class Gemma3_12BTokenizer(sd1_clip.SDTokenizer): - def __init__(self, embedding_directory=None, tokenizer_data={}): - tokenizer = tokenizer_data.get("spiece_model", None) - super().__init__(tokenizer, pad_with_end=False, embedding_size=3840, embedding_key='gemma3_12b', tokenizer_class=SPieceTokenizer, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, tokenizer_args={"add_bos": True, "add_eos": False}, tokenizer_data=tokenizer_data) - +class Gemma3_Tokenizer(): def state_dict(self): return {"spiece_model": self.tokenizer.serialize_model()} + def tokenize_with_weights(self, text, return_word_ids=False, image=None, llama_template=None, skip_template=True, **kwargs): + self.llama_template = "system\nYou are a helpful assistant.\nuser\n{}\nmodel\n" + self.llama_template_images = "system\nYou are a helpful assistant.\nuser\n\n{}\n\nmodel\n" + + if image is None: + images = [] + else: + samples = image.movedim(-1, 1) + total = int(896 * 896) + + scale_by = math.sqrt(total / (samples.shape[3] * samples.shape[2])) + width = round(samples.shape[3] * scale_by) + height = round(samples.shape[2] * scale_by) + + s = comfy.utils.common_upscale(samples, width, height, "area", "disabled").movedim(1, -1) + images = [s[:, :, :, :3]] + + if text.startswith(''): + skip_template = True + + if skip_template: + llama_text = text + else: + if llama_template is None: + if len(images) > 0: + llama_text = self.llama_template_images.format(text) + else: + llama_text = self.llama_template.format(text) + else: + llama_text = llama_template.format(text) + + text_tokens = super().tokenize_with_weights(llama_text, return_word_ids) + + if len(images) > 0: + embed_count = 0 + for r in text_tokens: + for i, token in enumerate(r): + if token[0] == 262144 and embed_count < len(images): + r[i] = ({"type": "image", "data": images[embed_count]},) + token[1:] + embed_count += 1 + return text_tokens + +class Gemma3_12BTokenizer(Gemma3_Tokenizer, sd1_clip.SDTokenizer): + def __init__(self, embedding_directory=None, tokenizer_data={}): + tokenizer = tokenizer_data.get("spiece_model", None) + special_tokens = {"": 262144, "": 106} + super().__init__(tokenizer, pad_with_end=False, embedding_size=3840, embedding_key='gemma3_12b', tokenizer_class=SPieceTokenizer, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1024, pad_left=True, disable_weights=True, tokenizer_args={"add_bos": True, "add_eos": False, "special_tokens": special_tokens}, tokenizer_data=tokenizer_data) + + class LTXAVGemmaTokenizer(sd1_clip.SD1Tokenizer): def __init__(self, embedding_directory=None, tokenizer_data={}): super().__init__(embedding_directory=embedding_directory, tokenizer_data=tokenizer_data, name="gemma3_12b", tokenizer=Gemma3_12BTokenizer) + class Gemma3_12BModel(sd1_clip.SDClipModel): def __init__(self, device="cpu", layer="all", layer_idx=None, dtype=None, attention_mask=True, model_options={}): llama_quantization_metadata = model_options.get("llama_quantization_metadata", None) if llama_quantization_metadata is not None: model_options = model_options.copy() model_options["quantization_metadata"] = llama_quantization_metadata - + self.dtypes = set() + self.dtypes.add(dtype) super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Gemma3_12B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options) - def tokenize_with_weights(self, text, return_word_ids=False, llama_template="{}", image_embeds=None, **kwargs): - text = llama_template.format(text) - text_tokens = super().tokenize_with_weights(text, return_word_ids) - embed_count = 0 - for k in text_tokens: - tt = text_tokens[k] - for r in tt: - for i in range(len(r)): - if r[i][0] == 262144: - if image_embeds is not None and embed_count < image_embeds.shape[0]: - r[i] = ({"type": "embedding", "data": image_embeds[embed_count], "original_type": "image"},) + r[i][1:] - embed_count += 1 - return text_tokens + def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed): + tokens_only = [[t[0] for t in b] for b in tokens] + embeds, _, _, embeds_info = self.process_tokens(tokens_only, self.execution_device) + comfy.utils.normalize_image_embeddings(embeds, embeds_info, self.transformer.model.config.hidden_size ** 0.5) + return self.transformer.generate(embeds, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed, stop_tokens=[106]) # 106 is class LTXAVTEModel(torch.nn.Module): def __init__(self, dtype_llama=None, device="cpu", dtype=None, model_options={}): super().__init__() self.dtypes = set() self.dtypes.add(dtype) + self.compat_mode = False self.gemma3_12b = Gemma3_12BModel(device=device, dtype=dtype_llama, model_options=model_options, layer="all", layer_idx=None) self.dtypes.add(dtype_llama) @@ -69,6 +110,11 @@ class LTXAVTEModel(torch.nn.Module): operations = self.gemma3_12b.operations # TODO self.text_embedding_projection = operations.Linear(3840 * 49, 3840, bias=False, dtype=dtype, device=device) + def enable_compat_mode(self): # TODO: remove + from comfy.ldm.lightricks.embeddings_connector import Embeddings1DConnector + operations = self.gemma3_12b.operations + dtype = self.text_embedding_projection.weight.dtype + device = self.text_embedding_projection.weight.device self.audio_embeddings_connector = Embeddings1DConnector( split_rope=True, double_precision_rope=True, @@ -84,6 +130,7 @@ class LTXAVTEModel(torch.nn.Module): device=device, operations=operations, ) + self.compat_mode = True def set_clip_options(self, options): self.execution_device = options.get("execution_device", self.execution_device) @@ -97,6 +144,7 @@ class LTXAVTEModel(torch.nn.Module): token_weight_pairs = token_weight_pairs["gemma3_12b"] out, pooled, extra = self.gemma3_12b.encode_token_weights(token_weight_pairs) + out = out[:, :, -torch.sum(extra["attention_mask"]).item():] out_device = out.device if comfy.model_management.should_use_bf16(self.execution_device): out = out.to(device=self.execution_device, dtype=torch.bfloat16) @@ -105,30 +153,45 @@ class LTXAVTEModel(torch.nn.Module): out = out.reshape((out.shape[0], out.shape[1], -1)) out = self.text_embedding_projection(out) out = out.float() - out_vid = self.video_embeddings_connector(out)[0] - out_audio = self.audio_embeddings_connector(out)[0] - out = torch.concat((out_vid, out_audio), dim=-1) + + if self.compat_mode: + out_vid = self.video_embeddings_connector(out)[0] + out_audio = self.audio_embeddings_connector(out)[0] + out = torch.concat((out_vid, out_audio), dim=-1) return out.to(out_device), pooled + def generate(self, tokens, do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed): + return self.gemma3_12b.generate(tokens["gemma3_12b"], do_sample, max_length, temperature, top_k, top_p, min_p, repetition_penalty, seed) + def load_sd(self, sd): if "model.layers.47.self_attn.q_norm.weight" in sd: return self.gemma3_12b.load_sd(sd) else: - sdo = comfy.utils.state_dict_prefix_replace(sd, {"text_embedding_projection.aggregate_embed.weight": "text_embedding_projection.weight", "model.diffusion_model.video_embeddings_connector.": "video_embeddings_connector.", "model.diffusion_model.audio_embeddings_connector.": "audio_embeddings_connector."}, filter_keys=True) + sdo = comfy.utils.state_dict_prefix_replace(sd, {"text_embedding_projection.aggregate_embed.weight": "text_embedding_projection.weight"}, filter_keys=True) if len(sdo) == 0: sdo = sd missing_all = [] unexpected_all = [] - for prefix, component in [("text_embedding_projection.", self.text_embedding_projection), ("video_embeddings_connector.", self.video_embeddings_connector), ("audio_embeddings_connector.", self.audio_embeddings_connector)]: + for prefix, component in [("text_embedding_projection.", self.text_embedding_projection)]: component_sd = {k.replace(prefix, ""): v for k, v in sdo.items() if k.startswith(prefix)} if component_sd: missing, unexpected = component.load_state_dict(component_sd, strict=False, assign=getattr(self, "can_assign_sd", False)) missing_all.extend([f"{prefix}{k}" for k in missing]) unexpected_all.extend([f"{prefix}{k}" for k in unexpected]) + if "model.diffusion_model.audio_embeddings_connector.transformer_1d_blocks.2.attn1.to_q.bias" not in sd: # TODO: remove + ww = sd.get("model.diffusion_model.audio_embeddings_connector.transformer_1d_blocks.0.attn1.to_q.bias", None) + if ww is not None: + if ww.shape[0] == 3840: + self.enable_compat_mode() + sdv = comfy.utils.state_dict_prefix_replace(sd, {"model.diffusion_model.video_embeddings_connector.": ""}, filter_keys=True) + self.video_embeddings_connector.load_state_dict(sdv, strict=False, assign=getattr(self, "can_assign_sd", False)) + sda = comfy.utils.state_dict_prefix_replace(sd, {"model.diffusion_model.audio_embeddings_connector.": ""}, filter_keys=True) + self.audio_embeddings_connector.load_state_dict(sda, strict=False, assign=getattr(self, "can_assign_sd", False)) + return (missing_all, unexpected_all) def memory_estimation_function(self, token_weight_pairs, device=None): @@ -137,7 +200,10 @@ class LTXAVTEModel(torch.nn.Module): constant /= 2.0 token_weight_pairs = token_weight_pairs.get("gemma3_12b", []) - num_tokens = sum(map(lambda a: len(a), token_weight_pairs)) + m = min([sum(1 for _ in itertools.takewhile(lambda x: x[0] == 0, sub)) for sub in token_weight_pairs]) + + num_tokens = sum(map(lambda a: len(a), token_weight_pairs)) - m + num_tokens = max(num_tokens, 642) return num_tokens * constant * 1024 * 1024 def ltxav_te(dtype_llama=None, llama_quantization_metadata=None): @@ -150,3 +216,14 @@ def ltxav_te(dtype_llama=None, llama_quantization_metadata=None): dtype = dtype_llama super().__init__(dtype_llama=dtype_llama, device=device, dtype=dtype, model_options=model_options) return LTXAVTEModel_ + +def gemma3_te(dtype_llama=None, llama_quantization_metadata=None): + class Gemma3_12BModel_(Gemma3_12BModel): + def __init__(self, device="cpu", dtype=None, model_options={}): + if llama_quantization_metadata is not None: + model_options = model_options.copy() + model_options["llama_quantization_metadata"] = llama_quantization_metadata + if dtype_llama is not None: + dtype = dtype_llama + super().__init__(device=device, dtype=dtype, model_options=model_options) + return Gemma3_12BModel_ diff --git a/comfy/text_encoders/lumina2.py b/comfy/text_encoders/lumina2.py index b29a7cc87..01ebdfabe 100644 --- a/comfy/text_encoders/lumina2.py +++ b/comfy/text_encoders/lumina2.py @@ -1,23 +1,23 @@ from comfy import sd1_clip from .spiece_tokenizer import SPieceTokenizer import comfy.text_encoders.llama - +from comfy.text_encoders.lt import Gemma3_Tokenizer +import comfy.utils class Gemma2BTokenizer(sd1_clip.SDTokenizer): def __init__(self, embedding_directory=None, tokenizer_data={}): tokenizer = tokenizer_data.get("spiece_model", None) - super().__init__(tokenizer, pad_with_end=False, embedding_size=2304, embedding_key='gemma2_2b', tokenizer_class=SPieceTokenizer, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, tokenizer_args={"add_bos": True, "add_eos": False}, tokenizer_data=tokenizer_data) + special_tokens = {"": 107} + super().__init__(tokenizer, pad_with_end=False, embedding_size=2304, embedding_key='gemma2_2b', tokenizer_class=SPieceTokenizer, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, tokenizer_args={"add_bos": True, "add_eos": False, "special_tokens": special_tokens}, tokenizer_data=tokenizer_data) def state_dict(self): return {"spiece_model": self.tokenizer.serialize_model()} -class Gemma3_4BTokenizer(sd1_clip.SDTokenizer): +class Gemma3_4BTokenizer(Gemma3_Tokenizer, sd1_clip.SDTokenizer): def __init__(self, embedding_directory=None, tokenizer_data={}): tokenizer = tokenizer_data.get("spiece_model", None) - super().__init__(tokenizer, pad_with_end=False, embedding_size=2560, embedding_key='gemma3_4b', tokenizer_class=SPieceTokenizer, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, tokenizer_args={"add_bos": True, "add_eos": False}, disable_weights=True, tokenizer_data=tokenizer_data) - - def state_dict(self): - return {"spiece_model": self.tokenizer.serialize_model()} + special_tokens = {"": 262144, "": 106} + super().__init__(tokenizer, pad_with_end=False, embedding_size=2560, embedding_key='gemma3_4b', tokenizer_class=SPieceTokenizer, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, tokenizer_args={"add_bos": True, "add_eos": False, "special_tokens": special_tokens}, disable_weights=True, tokenizer_data=tokenizer_data) class LuminaTokenizer(sd1_clip.SD1Tokenizer): def __init__(self, embedding_directory=None, tokenizer_data={}): @@ -40,6 +40,20 @@ class Gemma3_4BModel(sd1_clip.SDClipModel): super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Gemma3_4B, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options) +class Gemma3_4B_Vision_Model(sd1_clip.SDClipModel): + def __init__(self, device="cpu", layer="hidden", layer_idx=-2, dtype=None, attention_mask=True, model_options={}): + llama_quantization_metadata = model_options.get("llama_quantization_metadata", None) + if llama_quantization_metadata is not None: + model_options = model_options.copy() + model_options["quantization_metadata"] = llama_quantization_metadata + + super().__init__(device=device, layer=layer, layer_idx=layer_idx, textmodel_json_config={}, dtype=dtype, special_tokens={"start": 2, "pad": 0}, layer_norm_hidden_state=False, model_class=comfy.text_encoders.llama.Gemma3_4B_Vision, enable_attention_masks=attention_mask, return_attention_masks=attention_mask, model_options=model_options) + + def process_tokens(self, tokens, device): + embeds, _, _, embeds_info = super().process_tokens(tokens, device) + comfy.utils.normalize_image_embeddings(embeds, embeds_info, self.transformer.model.config.hidden_size ** 0.5) + return embeds + class LuminaModel(sd1_clip.SD1ClipModel): def __init__(self, device="cpu", dtype=None, model_options={}, name="gemma2_2b", clip_model=Gemma2_2BModel): super().__init__(device=device, dtype=dtype, name=name, clip_model=clip_model, model_options=model_options) @@ -50,6 +64,8 @@ def te(dtype_llama=None, llama_quantization_metadata=None, model_type="gemma2_2b model = Gemma2_2BModel elif model_type == "gemma3_4b": model = Gemma3_4BModel + elif model_type == "gemma3_4b_vision": + model = Gemma3_4B_Vision_Model class LuminaTEModel_(LuminaModel): def __init__(self, device="cpu", dtype=None, model_options={}): diff --git a/comfy/text_encoders/spiece_tokenizer.py b/comfy/text_encoders/spiece_tokenizer.py index caccb3ca2..099d8d2d9 100644 --- a/comfy/text_encoders/spiece_tokenizer.py +++ b/comfy/text_encoders/spiece_tokenizer.py @@ -6,9 +6,10 @@ class SPieceTokenizer: def from_pretrained(path, **kwargs): return SPieceTokenizer(path, **kwargs) - def __init__(self, tokenizer_path, add_bos=False, add_eos=True): + def __init__(self, tokenizer_path, add_bos=False, add_eos=True, special_tokens=None): self.add_bos = add_bos self.add_eos = add_eos + self.special_tokens = special_tokens import sentencepiece if torch.is_tensor(tokenizer_path): tokenizer_path = tokenizer_path.numpy().tobytes() @@ -27,8 +28,32 @@ class SPieceTokenizer: return out def __call__(self, string): + if self.special_tokens is not None: + import re + special_tokens_pattern = '|'.join(re.escape(token) for token in self.special_tokens.keys()) + if special_tokens_pattern and re.search(special_tokens_pattern, string): + parts = re.split(f'({special_tokens_pattern})', string) + result = [] + for part in parts: + if not part: + continue + if part in self.special_tokens: + result.append(self.special_tokens[part]) + else: + encoded = self.tokenizer.encode(part, add_bos=False, add_eos=False) + result.extend(encoded) + return {"input_ids": result} + out = self.tokenizer.encode(string) return {"input_ids": out} + def decode(self, token_ids, skip_special_tokens=False): + + if skip_special_tokens and self.special_tokens: + special_token_ids = set(self.special_tokens.values()) + token_ids = [tid for tid in token_ids if tid not in special_token_ids] + + return self.tokenizer.decode(token_ids) + def serialize_model(self): return torch.ByteTensor(list(self.tokenizer.serialized_model_proto())) diff --git a/comfy/text_encoders/z_image.py b/comfy/text_encoders/z_image.py index ad41bfb1e..33b7cf594 100644 --- a/comfy/text_encoders/z_image.py +++ b/comfy/text_encoders/z_image.py @@ -6,7 +6,7 @@ import os class Qwen3Tokenizer(sd1_clip.SDTokenizer): def __init__(self, embedding_directory=None, tokenizer_data={}): tokenizer_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "qwen25_tokenizer") - super().__init__(tokenizer_path, pad_with_end=False, embedding_size=2560, embedding_key='qwen3_4b', tokenizer_class=Qwen2Tokenizer, has_start_token=False, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, pad_token=151643, tokenizer_data=tokenizer_data) + super().__init__(tokenizer_path, pad_with_end=False, embedding_directory=embedding_directory, embedding_size=2560, embedding_key='qwen3_4b', tokenizer_class=Qwen2Tokenizer, has_start_token=False, has_end_token=False, pad_to_max_length=False, max_length=99999999, min_length=1, pad_token=151643, tokenizer_data=tokenizer_data) class ZImageTokenizer(sd1_clip.SD1Tokenizer): diff --git a/comfy/utils.py b/comfy/utils.py index 9e98eb176..0769cef44 100644 --- a/comfy/utils.py +++ b/comfy/utils.py @@ -20,42 +20,44 @@ import torch import math import struct -import comfy.checkpoint_pickle +import comfy.memory_management import safetensors.torch import numpy as np from PIL import Image import logging import itertools from torch.nn.functional import interpolate +from tqdm.auto import trange from einops import rearrange -from comfy.cli_args import args, enables_dynamic_vram +from comfy.cli_args import args import json import time import mmap +import warnings MMAP_TORCH_FILES = args.mmap_torch_files DISABLE_MMAP = args.disable_mmap -ALWAYS_SAFE_LOAD = False -if hasattr(torch.serialization, "add_safe_globals"): # TODO: this was added in pytorch 2.4, the unsafe path should be removed once earlier versions are deprecated + +if True: # ckpt/pt file whitelist for safe loading of old sd files class ModelCheckpoint: pass ModelCheckpoint.__module__ = "pytorch_lightning.callbacks.model_checkpoint" def scalar(*args, **kwargs): - from numpy.core.multiarray import scalar as sc - return sc(*args, **kwargs) + return None scalar.__module__ = "numpy.core.multiarray" from numpy import dtype from numpy.dtypes import Float64DType - from _codecs import encode + + def encode(*args, **kwargs): # no longer necessary on newer torch + return None + encode.__module__ = "_codecs" torch.serialization.add_safe_globals([ModelCheckpoint, scalar, dtype, Float64DType, encode]) - ALWAYS_SAFE_LOAD = True logging.info("Checkpoint files will always be loaded safely.") -else: - logging.warning("Warning, you are using an old pytorch version and some ckpt/pt files might be loaded unsafely. Upgrading to 2.4 or above is recommended as older versions of pytorch are no longer supported.") + # Current as of safetensors 0.7.0 _TYPES = { @@ -81,11 +83,12 @@ _TYPES = { def load_safetensors(ckpt): f = open(ckpt, "rb") mapping = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ) + mv = memoryview(mapping) header_size = struct.unpack(">= 1 return crc ^ 0xFFFFFFFF + +def deepcopy_list_dict(obj, memo=None): + if memo is None: + memo = {} + + obj_id = id(obj) + if obj_id in memo: + return memo[obj_id] + + if isinstance(obj, dict): + res = {deepcopy_list_dict(k, memo): deepcopy_list_dict(v, memo) for k, v in obj.items()} + elif isinstance(obj, list): + res = [deepcopy_list_dict(i, memo) for i in obj] + else: + res = obj + + memo[obj_id] = res + return res + +def normalize_image_embeddings(embeds, embeds_info, scale_factor): + """Normalize image embeddings to match text embedding scale""" + for info in embeds_info: + if info.get("type") == "image": + start_idx = info["index"] + end_idx = start_idx + info["size"] + embeds[:, start_idx:end_idx, :] /= scale_factor diff --git a/comfy/weight_adapter/base.py b/comfy/weight_adapter/base.py index bce89a0e2..d352e066b 100644 --- a/comfy/weight_adapter/base.py +++ b/comfy/weight_adapter/base.py @@ -49,6 +49,12 @@ class WeightAdapterBase: """ raise NotImplementedError + def calculate_shape( + self, + key + ): + return None + def calculate_weight( self, weight, diff --git a/comfy/weight_adapter/bypass.py b/comfy/weight_adapter/bypass.py index d4aaf98ca..b9d5ec7d9 100644 --- a/comfy/weight_adapter/bypass.py +++ b/comfy/weight_adapter/bypass.py @@ -21,6 +21,7 @@ from typing import Optional, Union import torch import torch.nn as nn +import comfy.model_management from .base import WeightAdapterBase, WeightAdapterTrainBase from comfy.patcher_extension import PatcherInjection @@ -181,18 +182,21 @@ class BypassForwardHook: ) return # Already injected - # Move adapter weights to module's device to avoid CPU-GPU transfer on every forward - device = None + # Move adapter weights to compute device (GPU) + # Use get_torch_device() instead of module.weight.device because + # with offloading, module weights may be on CPU while compute happens on GPU + device = comfy.model_management.get_torch_device() + + # Get dtype from module weight if available dtype = None if hasattr(self.module, "weight") and self.module.weight is not None: - device = self.module.weight.device dtype = self.module.weight.dtype - elif hasattr(self.module, "W_q"): # Quantized layers might use different attr - device = self.module.W_q.device - dtype = self.module.W_q.dtype - if device is not None: - self._move_adapter_weights_to_device(device, dtype) + # Only use dtype if it's a standard float type, not quantized + if dtype is not None and dtype not in (torch.float32, torch.float16, torch.bfloat16): + dtype = None + + self._move_adapter_weights_to_device(device, dtype) self.original_forward = self.module.forward self.module.forward = self._bypass_forward diff --git a/comfy/weight_adapter/lora.py b/comfy/weight_adapter/lora.py index bc4260a8f..8e1261a12 100644 --- a/comfy/weight_adapter/lora.py +++ b/comfy/weight_adapter/lora.py @@ -214,6 +214,13 @@ class LoRAAdapter(WeightAdapterBase): else: return None + def calculate_shape( + self, + key + ): + reshape = self.weights[5] + return tuple(reshape) if reshape is not None else None + def calculate_weight( self, weight, diff --git a/comfy_api/feature_flags.py b/comfy_api/feature_flags.py index de167f037..a90a5ca40 100644 --- a/comfy_api/feature_flags.py +++ b/comfy_api/feature_flags.py @@ -14,6 +14,7 @@ SERVER_FEATURE_FLAGS: dict[str, Any] = { "supports_preview_metadata": True, "max_upload_size": args.max_upload_size * 1024 * 1024, # Convert MB to bytes "extension": {"manager": {"supports_v4": True}}, + "node_replacements": True, } diff --git a/comfy_api/latest/__init__.py b/comfy_api/latest/__init__.py index b0fa14ff6..f2399422b 100644 --- a/comfy_api/latest/__init__.py +++ b/comfy_api/latest/__init__.py @@ -7,7 +7,7 @@ from comfy_api.internal.singleton import ProxiedSingleton from comfy_api.internal.async_to_sync import create_sync_class from ._input import ImageInput, AudioInput, MaskInput, LatentInput, VideoInput from ._input_impl import VideoFromFile, VideoFromComponents -from ._util import VideoCodec, VideoContainer, VideoComponents, MESH, VOXEL +from ._util import VideoCodec, VideoContainer, VideoComponents, MESH, VOXEL, File3D from . import _io_public as io from . import _ui_public as ui from comfy_execution.utils import get_executing_context @@ -21,6 +21,17 @@ class ComfyAPI_latest(ComfyAPIBase): VERSION = "latest" STABLE = False + def __init__(self): + super().__init__() + self.node_replacement = self.NodeReplacement() + self.execution = self.Execution() + + class NodeReplacement(ProxiedSingleton): + async def register(self, node_replace: io.NodeReplace) -> None: + """Register a node replacement mapping.""" + from server import PromptServer + PromptServer.instance.node_replace_manager.register(node_replace) + class Execution(ProxiedSingleton): async def set_progress( self, @@ -73,8 +84,6 @@ class ComfyAPI_latest(ComfyAPIBase): image=to_display, ) - execution: Execution - class ComfyExtension(ABC): async def on_load(self) -> None: """ @@ -105,6 +114,7 @@ class Types: VideoComponents = VideoComponents MESH = MESH VOXEL = VOXEL + File3D = File3D ComfyAPI = ComfyAPI_latest diff --git a/comfy_api/latest/_input/video_types.py b/comfy_api/latest/_input/video_types.py index e634a0311..451e9526e 100644 --- a/comfy_api/latest/_input/video_types.py +++ b/comfy_api/latest/_input/video_types.py @@ -34,6 +34,21 @@ class VideoInput(ABC): """ pass + @abstractmethod + def as_trimmed( + self, + start_time: float | None = None, + duration: float | None = None, + strict_duration: bool = False, + ) -> VideoInput | None: + """ + Create a new VideoInput which is trimmed to have the corresponding start_time and duration + + Returns: + A new VideoInput, or None if the result would have negative duration + """ + pass + def get_stream_source(self) -> Union[str, io.BytesIO]: """ Get a streamable source for the video. This allows processing without diff --git a/comfy_api/latest/_input_impl/video_types.py b/comfy_api/latest/_input_impl/video_types.py index 1405d0b81..a3d48c87f 100644 --- a/comfy_api/latest/_input_impl/video_types.py +++ b/comfy_api/latest/_input_impl/video_types.py @@ -6,6 +6,7 @@ from typing import Optional from .._input import AudioInput, VideoInput import av import io +import itertools import json import numpy as np import math @@ -29,7 +30,6 @@ def container_to_output_format(container_format: str | None) -> str | None: formats = container_format.split(",") return formats[0] - def get_open_write_kwargs( dest: str | io.BytesIO, container_format: str, to_format: str | None ) -> dict: @@ -57,12 +57,14 @@ class VideoFromFile(VideoInput): Class representing video input from a file. """ - def __init__(self, file: str | io.BytesIO): + def __init__(self, file: str | io.BytesIO, *, start_time: float=0, duration: float=0): """ Initialize the VideoFromFile object based off of either a path on disk or a BytesIO object containing the file contents. """ self.__file = file + self.__start_time = start_time + self.__duration = duration def get_stream_source(self) -> str | io.BytesIO: """ @@ -96,6 +98,16 @@ class VideoFromFile(VideoInput): Returns: Duration in seconds """ + raw_duration = self._get_raw_duration() + if self.__start_time < 0: + duration_from_start = min(raw_duration, -self.__start_time) + else: + duration_from_start = raw_duration - self.__start_time + if self.__duration: + return min(self.__duration, duration_from_start) + return duration_from_start + + def _get_raw_duration(self) -> float: if isinstance(self.__file, io.BytesIO): self.__file.seek(0) with av.open(self.__file, mode="r") as container: @@ -113,9 +125,13 @@ class VideoFromFile(VideoInput): if video_stream and video_stream.average_rate: frame_count = 0 container.seek(0) - for packet in container.demux(video_stream): - for _ in packet.decode(): - frame_count += 1 + frame_iterator = ( + container.decode(video_stream) + if video_stream.codec.capabilities & 0x100 + else container.demux(video_stream) + ) + for packet in frame_iterator: + frame_count += 1 if frame_count > 0: return float(frame_count / video_stream.average_rate) @@ -131,36 +147,54 @@ class VideoFromFile(VideoInput): with av.open(self.__file, mode="r") as container: video_stream = self._get_first_video_stream(container) - # 1. Prefer the frames field if available - if video_stream.frames and video_stream.frames > 0: + # 1. Prefer the frames field if available and usable + if ( + video_stream.frames + and video_stream.frames > 0 + and not self.__start_time + and not self.__duration + ): return int(video_stream.frames) # 2. Try to estimate from duration and average_rate using only metadata - if container.duration is not None and video_stream.average_rate: - duration_seconds = float(container.duration / av.time_base) - estimated_frames = int(round(duration_seconds * float(video_stream.average_rate))) - if estimated_frames > 0: - return estimated_frames - if ( getattr(video_stream, "duration", None) is not None and getattr(video_stream, "time_base", None) is not None and video_stream.average_rate ): - duration_seconds = float(video_stream.duration * video_stream.time_base) + raw_duration = float(video_stream.duration * video_stream.time_base) + if self.__start_time < 0: + duration_from_start = min(raw_duration, -self.__start_time) + else: + duration_from_start = raw_duration - self.__start_time + duration_seconds = min(self.__duration, duration_from_start) estimated_frames = int(round(duration_seconds * float(video_stream.average_rate))) if estimated_frames > 0: return estimated_frames # 3. Last resort: decode frames and count them (streaming) - frame_count = 0 - container.seek(0) - for packet in container.demux(video_stream): - for _ in packet.decode(): - frame_count += 1 - - if frame_count == 0: - raise ValueError(f"Could not determine frame count for file '{self.__file}'") + if self.__start_time < 0: + start_time = max(self._get_raw_duration() + self.__start_time, 0) + else: + start_time = self.__start_time + frame_count = 1 + start_pts = int(start_time / video_stream.time_base) + end_pts = int((start_time + self.__duration) / video_stream.time_base) + container.seek(start_pts, stream=video_stream) + frame_iterator = ( + container.decode(video_stream) + if video_stream.codec.capabilities & 0x100 + else container.demux(video_stream) + ) + for frame in frame_iterator: + if frame.pts >= start_pts: + break + else: + raise ValueError(f"Could not determine frame count for file '{self.__file}'\nNo frames exist for start_time {self.__start_time}") + for frame in frame_iterator: + if frame.pts >= end_pts: + break + frame_count += 1 return frame_count def get_frame_rate(self) -> Fraction: @@ -199,9 +233,21 @@ class VideoFromFile(VideoInput): return container.format.name def get_components_internal(self, container: InputContainer) -> VideoComponents: + video_stream = self._get_first_video_stream(container) + if self.__start_time < 0: + start_time = max(self._get_raw_duration() + self.__start_time, 0) + else: + start_time = self.__start_time # Get video frames frames = [] - for frame in container.decode(video=0): + start_pts = int(start_time / video_stream.time_base) + end_pts = int((start_time + self.__duration) / video_stream.time_base) + container.seek(start_pts, stream=video_stream) + for frame in container.decode(video_stream): + if frame.pts < start_pts: + continue + if self.__duration and frame.pts >= end_pts: + break img = frame.to_ndarray(format='rgb24') # shape: (H, W, 3) img = torch.from_numpy(img) / 255.0 # shape: (H, W, 3) frames.append(img) @@ -209,31 +255,44 @@ class VideoFromFile(VideoInput): images = torch.stack(frames) if len(frames) > 0 else torch.zeros(0, 3, 0, 0) # Get frame rate - video_stream = next(s for s in container.streams if s.type == 'video') - frame_rate = Fraction(video_stream.average_rate) if video_stream and video_stream.average_rate else Fraction(1) + frame_rate = Fraction(video_stream.average_rate) if video_stream.average_rate else Fraction(1) # Get audio if available audio = None - try: - container.seek(0) # Reset the container to the beginning - for stream in container.streams: - if stream.type != 'audio': - continue - assert isinstance(stream, av.AudioStream) - audio_frames = [] - for packet in container.demux(stream): - for frame in packet.decode(): - assert isinstance(frame, av.AudioFrame) - audio_frames.append(frame.to_ndarray()) # shape: (channels, samples) - if len(audio_frames) > 0: - audio_data = np.concatenate(audio_frames, axis=1) # shape: (channels, total_samples) - audio_tensor = torch.from_numpy(audio_data).unsqueeze(0) # shape: (1, channels, total_samples) - audio = AudioInput({ - "waveform": audio_tensor, - "sample_rate": int(stream.sample_rate) if stream.sample_rate else 1, - }) - except StopIteration: - pass # No audio stream + container.seek(start_pts, stream=video_stream) + # Use last stream for consistency + if len(container.streams.audio): + audio_stream = container.streams.audio[-1] + audio_frames = [] + resample = av.audio.resampler.AudioResampler(format='fltp').resample + frames = itertools.chain.from_iterable( + map(resample, container.decode(audio_stream)) + ) + + has_first_frame = False + for frame in frames: + offset_seconds = start_time - frame.pts * audio_stream.time_base + to_skip = int(offset_seconds * audio_stream.sample_rate) + if to_skip < frame.samples: + has_first_frame = True + break + if has_first_frame: + audio_frames.append(frame.to_ndarray()[..., to_skip:]) + + for frame in frames: + if frame.time > start_time + self.__duration: + break + audio_frames.append(frame.to_ndarray()) # shape: (channels, samples) + if len(audio_frames) > 0: + audio_data = np.concatenate(audio_frames, axis=1) # shape: (channels, total_samples) + if self.__duration: + audio_data = audio_data[..., :int(self.__duration * audio_stream.sample_rate)] + + audio_tensor = torch.from_numpy(audio_data).unsqueeze(0) # shape: (1, channels, total_samples) + audio = AudioInput({ + "waveform": audio_tensor, + "sample_rate": int(audio_stream.sample_rate) if audio_stream.sample_rate else 1, + }) metadata = container.metadata return VideoComponents(images=images, audio=audio, frame_rate=frame_rate, metadata=metadata) @@ -250,7 +309,7 @@ class VideoFromFile(VideoInput): path: str | io.BytesIO, format: VideoContainer = VideoContainer.AUTO, codec: VideoCodec = VideoCodec.AUTO, - metadata: Optional[dict] = None + metadata: Optional[dict] = None, ): if isinstance(self.__file, io.BytesIO): self.__file.seek(0) # Reset the BytesIO object to the beginning @@ -262,15 +321,14 @@ class VideoFromFile(VideoInput): reuse_streams = False if codec != VideoCodec.AUTO and codec != video_encoding and video_encoding is not None: reuse_streams = False + if self.__start_time or self.__duration: + reuse_streams = False if not reuse_streams: components = self.get_components_internal(container) video = VideoFromComponents(components) return video.save_to( - path, - format=format, - codec=codec, - metadata=metadata + path, format=format, codec=codec, metadata=metadata ) streams = container.streams @@ -304,10 +362,21 @@ class VideoFromFile(VideoInput): output_container.mux(packet) def _get_first_video_stream(self, container: InputContainer): - video_stream = next((s for s in container.streams if s.type == "video"), None) - if video_stream is None: - raise ValueError(f"No video stream found in file '{self.__file}'") - return video_stream + if len(container.streams.video): + return container.streams.video[0] + raise ValueError(f"No video stream found in file '{self.__file}'") + + def as_trimmed( + self, start_time: float = 0, duration: float = 0, strict_duration: bool = True + ) -> VideoInput | None: + trimmed = VideoFromFile( + self.get_stream_source(), + start_time=start_time + self.__start_time, + duration=duration, + ) + if trimmed.get_duration() < duration and strict_duration: + return None + return trimmed class VideoFromComponents(VideoInput): @@ -322,7 +391,7 @@ class VideoFromComponents(VideoInput): return VideoComponents( images=self.__components.images, audio=self.__components.audio, - frame_rate=self.__components.frame_rate + frame_rate=self.__components.frame_rate, ) def save_to( @@ -330,7 +399,7 @@ class VideoFromComponents(VideoInput): path: str, format: VideoContainer = VideoContainer.AUTO, codec: VideoCodec = VideoCodec.AUTO, - metadata: Optional[dict] = None + metadata: Optional[dict] = None, ): if format != VideoContainer.AUTO and format != VideoContainer.MP4: raise ValueError("Only MP4 format is supported for now") @@ -357,7 +426,10 @@ class VideoFromComponents(VideoInput): audio_stream: Optional[av.AudioStream] = None if self.__components.audio: audio_sample_rate = int(self.__components.audio['sample_rate']) - audio_stream = output.add_stream('aac', rate=audio_sample_rate) + waveform = self.__components.audio['waveform'] + waveform = waveform[0, :, :math.ceil((audio_sample_rate / frame_rate) * self.__components.images.shape[0])] + layout = {1: 'mono', 2: 'stereo', 6: '5.1'}.get(waveform.shape[0], 'stereo') + audio_stream = output.add_stream('aac', rate=audio_sample_rate, layout=layout) # Encode video for i, frame in enumerate(self.__components.images): @@ -372,12 +444,21 @@ class VideoFromComponents(VideoInput): output.mux(packet) if audio_stream and self.__components.audio: - waveform = self.__components.audio['waveform'] - waveform = waveform[:, :, :math.ceil((audio_sample_rate / frame_rate) * self.__components.images.shape[0])] - frame = av.AudioFrame.from_ndarray(waveform.movedim(2, 1).reshape(1, -1).float().cpu().numpy(), format='flt', layout='mono' if waveform.shape[1] == 1 else 'stereo') + frame = av.AudioFrame.from_ndarray(waveform.float().cpu().contiguous().numpy(), format='fltp', layout=layout) frame.sample_rate = audio_sample_rate frame.pts = 0 output.mux(audio_stream.encode(frame)) # Flush encoder output.mux(audio_stream.encode(None)) + + def as_trimmed( + self, + start_time: float | None = None, + duration: float | None = None, + strict_duration: bool = True, + ) -> VideoInput | None: + if self.get_duration() < start_time + duration: + return None + #TODO Consider tracking duration and trimming at time of save? + return VideoFromFile(self.get_stream_source(), start_time=start_time, duration=duration) diff --git a/comfy_api/latest/_io.py b/comfy_api/latest/_io.py index eeea9781a..189d7d9bc 100644 --- a/comfy_api/latest/_io.py +++ b/comfy_api/latest/_io.py @@ -27,7 +27,7 @@ if TYPE_CHECKING: from comfy_api.internal import (_ComfyNodeInternal, _NodeOutputInternal, classproperty, copy_class, first_real_override, is_class, prune_dict, shallow_clone_class) from comfy_execution.graph_utils import ExecutionBlocker -from ._util import MESH, VOXEL, SVG as _SVG +from ._util import MESH, VOXEL, SVG as _SVG, File3D class FolderType(str, Enum): @@ -73,8 +73,15 @@ class RemoteOptions: class NumberDisplay(str, Enum): number = "number" slider = "slider" + gradient_slider = "gradientslider" +class ControlAfterGenerate(str, Enum): + fixed = "fixed" + increment = "increment" + decrement = "decrement" + randomize = "randomize" + class _ComfyType(ABC): Type = Any io_type: str = None @@ -263,7 +270,7 @@ class Int(ComfyTypeIO): class Input(WidgetInput): '''Integer input.''' def __init__(self, id: str, display_name: str=None, optional=False, tooltip: str=None, lazy: bool=None, - default: int=None, min: int=None, max: int=None, step: int=None, control_after_generate: bool=None, + default: int=None, min: int=None, max: int=None, step: int=None, control_after_generate: bool | ControlAfterGenerate=None, display_mode: NumberDisplay=None, socketless: bool=None, force_input: bool=None, extra_dict=None, raw_link: bool=None, advanced: bool=None): super().__init__(id, display_name, optional, tooltip, lazy, default, socketless, None, force_input, extra_dict, raw_link, advanced) self.min = min @@ -290,13 +297,15 @@ class Float(ComfyTypeIO): '''Float input.''' def __init__(self, id: str, display_name: str=None, optional=False, tooltip: str=None, lazy: bool=None, default: float=None, min: float=None, max: float=None, step: float=None, round: float=None, - display_mode: NumberDisplay=None, socketless: bool=None, force_input: bool=None, extra_dict=None, raw_link: bool=None, advanced: bool=None): + display_mode: NumberDisplay=None, gradient_stops: list[list[float]]=None, + socketless: bool=None, force_input: bool=None, extra_dict=None, raw_link: bool=None, advanced: bool=None): super().__init__(id, display_name, optional, tooltip, lazy, default, socketless, None, force_input, extra_dict, raw_link, advanced) self.min = min self.max = max self.step = step self.round = round self.display_mode = display_mode + self.gradient_stops = gradient_stops self.default: float def as_dict(self): @@ -306,6 +315,7 @@ class Float(ComfyTypeIO): "step": self.step, "round": self.round, "display": self.display_mode, + "gradient_stops": self.gradient_stops, }) @comfytype(io_type="STRING") @@ -345,7 +355,7 @@ class Combo(ComfyTypeIO): tooltip: str=None, lazy: bool=None, default: str | int | Enum = None, - control_after_generate: bool=None, + control_after_generate: bool | ControlAfterGenerate=None, upload: UploadType=None, image_folder: FolderType=None, remote: RemoteOptions=None, @@ -389,7 +399,7 @@ class MultiCombo(ComfyTypeI): Type = list[str] class Input(Combo.Input): def __init__(self, id: str, options: list[str], display_name: str=None, optional=False, tooltip: str=None, lazy: bool=None, - default: list[str]=None, placeholder: str=None, chip: bool=None, control_after_generate: bool=None, + default: list[str]=None, placeholder: str=None, chip: bool=None, control_after_generate: bool | ControlAfterGenerate=None, socketless: bool=None, extra_dict=None, raw_link: bool=None, advanced: bool=None): super().__init__(id, options, display_name, optional, tooltip, lazy, default, control_after_generate, socketless=socketless, extra_dict=extra_dict, raw_link=raw_link, advanced=advanced) self.multiselect = True @@ -667,6 +677,49 @@ class Voxel(ComfyTypeIO): class Mesh(ComfyTypeIO): Type = MESH + +@comfytype(io_type="FILE_3D") +class File3DAny(ComfyTypeIO): + """General 3D file type - accepts any supported 3D format.""" + Type = File3D + + +@comfytype(io_type="FILE_3D_GLB") +class File3DGLB(ComfyTypeIO): + """GLB format 3D file - binary glTF, best for web and cross-platform.""" + Type = File3D + + +@comfytype(io_type="FILE_3D_GLTF") +class File3DGLTF(ComfyTypeIO): + """GLTF format 3D file - JSON-based glTF with external resources.""" + Type = File3D + + +@comfytype(io_type="FILE_3D_FBX") +class File3DFBX(ComfyTypeIO): + """FBX format 3D file - best for game engines and animation.""" + Type = File3D + + +@comfytype(io_type="FILE_3D_OBJ") +class File3DOBJ(ComfyTypeIO): + """OBJ format 3D file - simple geometry format.""" + Type = File3D + + +@comfytype(io_type="FILE_3D_STL") +class File3DSTL(ComfyTypeIO): + """STL format 3D file - best for 3D printing.""" + Type = File3D + + +@comfytype(io_type="FILE_3D_USDZ") +class File3DUSDZ(ComfyTypeIO): + """USDZ format 3D file - Apple AR format.""" + Type = File3D + + @comfytype(io_type="HOOKS") class Hooks(ComfyTypeIO): if TYPE_CHECKING: @@ -1160,6 +1213,33 @@ class Color(ComfyTypeIO): def as_dict(self): return super().as_dict() +@comfytype(io_type="BOUNDING_BOX") +class BoundingBox(ComfyTypeIO): + class BoundingBoxDict(TypedDict): + x: int + y: int + width: int + height: int + Type = BoundingBoxDict + + class Input(WidgetInput): + def __init__(self, id: str, display_name: str=None, optional=False, tooltip: str=None, + socketless: bool=True, default: dict=None, component: str=None, force_input: bool=None): + super().__init__(id, display_name, optional, tooltip, None, default, socketless) + self.component = component + self.force_input = force_input + if default is None: + self.default = {"x": 0, "y": 0, "width": 512, "height": 512} + + def as_dict(self): + d = super().as_dict() + if self.component: + d["component"] = self.component + if self.force_input is not None: + d["forceInput"] = self.force_input + return d + + DYNAMIC_INPUT_LOOKUP: dict[str, Callable[[dict[str, Any], dict[str, Any], tuple[str, dict[str, Any]], str, list[str] | None], None]] = {} def register_dynamic_input_func(io_type: str, func: Callable[[dict[str, Any], dict[str, Any], tuple[str, dict[str, Any]], str, list[str] | None], None]): DYNAMIC_INPUT_LOOKUP[io_type] = func @@ -1266,6 +1346,7 @@ class NodeInfoV1: api_node: bool=None price_badge: dict | None = None search_aliases: list[str]=None + essentials_category: str=None @dataclass @@ -1387,6 +1468,8 @@ class Schema: """Flags a node as expandable, allowing NodeOutput to include 'expand' property.""" accept_all_inputs: bool=False """When True, all inputs from the prompt will be passed to the node as kwargs, even if not defined in the schema.""" + essentials_category: str | None = None + """Optional category for the Essentials tab. Path-based like category field (e.g., 'Basic', 'Image Tools/Editing').""" def validate(self): '''Validate the schema: @@ -1493,6 +1576,7 @@ class Schema: python_module=getattr(cls, "RELATIVE_PYTHON_MODULE", "nodes"), price_badge=self.price_badge.as_dict(self.inputs) if self.price_badge is not None else None, search_aliases=self.search_aliases if self.search_aliases else None, + essentials_category=self.essentials_category, ) return info @@ -1987,11 +2071,74 @@ class _UIOutput(ABC): ... +class InputMapOldId(TypedDict): + """Map an old node input to a new node input by ID.""" + new_id: str + old_id: str + +class InputMapSetValue(TypedDict): + """Set a specific value for a new node input.""" + new_id: str + set_value: Any + +InputMap = InputMapOldId | InputMapSetValue +""" +Input mapping for node replacement. Type is inferred by dictionary keys: +- {"new_id": str, "old_id": str} - maps old input to new input +- {"new_id": str, "set_value": Any} - sets a specific value for new input +""" + +class OutputMap(TypedDict): + """Map outputs of node replacement via indexes.""" + new_idx: int + old_idx: int + +class NodeReplace: + """ + Defines a possible node replacement, mapping inputs and outputs of the old node to the new node. + + Also supports assigning specific values to the input widgets of the new node. + + Args: + new_node_id: The class name of the new replacement node. + old_node_id: The class name of the deprecated node. + old_widget_ids: Ordered list of input IDs for widgets that may not have an input slot + connected. The workflow JSON stores widget values by their relative position index, + not by ID. This list maps those positional indexes to input IDs, enabling the + replacement system to correctly identify widget values during node migration. + input_mapping: List of input mappings from old node to new node. + output_mapping: List of output mappings from old node to new node. + """ + def __init__(self, + new_node_id: str, + old_node_id: str, + old_widget_ids: list[str] | None=None, + input_mapping: list[InputMap] | None=None, + output_mapping: list[OutputMap] | None=None, + ): + self.new_node_id = new_node_id + self.old_node_id = old_node_id + self.old_widget_ids = old_widget_ids + self.input_mapping = input_mapping + self.output_mapping = output_mapping + + def as_dict(self): + """Create serializable representation of the node replacement.""" + return { + "new_node_id": self.new_node_id, + "old_node_id": self.old_node_id, + "old_widget_ids": self.old_widget_ids, + "input_mapping": list(self.input_mapping) if self.input_mapping else None, + "output_mapping": list(self.output_mapping) if self.output_mapping else None, + } + + __all__ = [ "FolderType", "UploadType", "RemoteOptions", "NumberDisplay", + "ControlAfterGenerate", "comfytype", "Custom", @@ -2037,6 +2184,13 @@ __all__ = [ "LossMap", "Voxel", "Mesh", + "File3DAny", + "File3DGLB", + "File3DGLTF", + "File3DFBX", + "File3DOBJ", + "File3DSTL", + "File3DUSDZ", "Hooks", "HookKeyframes", "TimestepsRange", @@ -2071,4 +2225,6 @@ __all__ = [ "ImageCompare", "PriceBadgeDepends", "PriceBadge", + "BoundingBox", + "NodeReplace", ] diff --git a/comfy_api/latest/_util/__init__.py b/comfy_api/latest/_util/__init__.py index 6313eb01b..115baf392 100644 --- a/comfy_api/latest/_util/__init__.py +++ b/comfy_api/latest/_util/__init__.py @@ -1,5 +1,5 @@ from .video_types import VideoContainer, VideoCodec, VideoComponents -from .geometry_types import VOXEL, MESH +from .geometry_types import VOXEL, MESH, File3D from .image_types import SVG __all__ = [ @@ -9,5 +9,6 @@ __all__ = [ "VideoComponents", "VOXEL", "MESH", + "File3D", "SVG", ] diff --git a/comfy_api/latest/_util/geometry_types.py b/comfy_api/latest/_util/geometry_types.py index 385122778..b586fceb3 100644 --- a/comfy_api/latest/_util/geometry_types.py +++ b/comfy_api/latest/_util/geometry_types.py @@ -1,3 +1,8 @@ +import shutil +from io import BytesIO +from pathlib import Path +from typing import IO + import torch @@ -10,3 +15,75 @@ class MESH: def __init__(self, vertices: torch.Tensor, faces: torch.Tensor): self.vertices = vertices self.faces = faces + + +class File3D: + """Class representing a 3D file from a file path or binary stream. + + Supports both disk-backed (file path) and memory-backed (BytesIO) storage. + """ + + def __init__(self, source: str | IO[bytes], file_format: str = ""): + self._source = source + self._format = file_format or self._infer_format() + + def _infer_format(self) -> str: + if isinstance(self._source, str): + return Path(self._source).suffix.lstrip(".").lower() + return "" + + @property + def format(self) -> str: + return self._format + + @format.setter + def format(self, value: str) -> None: + self._format = value.lstrip(".").lower() if value else "" + + @property + def is_disk_backed(self) -> bool: + return isinstance(self._source, str) + + def get_source(self) -> str | IO[bytes]: + if isinstance(self._source, str): + return self._source + if hasattr(self._source, "seek"): + self._source.seek(0) + return self._source + + def get_data(self) -> BytesIO: + if isinstance(self._source, str): + with open(self._source, "rb") as f: + result = BytesIO(f.read()) + return result + if hasattr(self._source, "seek"): + self._source.seek(0) + if isinstance(self._source, BytesIO): + return self._source + return BytesIO(self._source.read()) + + def save_to(self, path: str) -> str: + dest = Path(path) + dest.parent.mkdir(parents=True, exist_ok=True) + + if isinstance(self._source, str): + if Path(self._source).resolve() != dest.resolve(): + shutil.copy2(self._source, dest) + else: + if hasattr(self._source, "seek"): + self._source.seek(0) + with open(dest, "wb") as f: + f.write(self._source.read()) + return str(dest) + + def get_bytes(self) -> bytes: + if isinstance(self._source, str): + return Path(self._source).read_bytes() + if hasattr(self._source, "seek"): + self._source.seek(0) + return self._source.read() + + def __repr__(self) -> str: + if isinstance(self._source, str): + return f"File3D(source={self._source!r}, format={self._format!r})" + return f"File3D(, format={self._format!r})" diff --git a/comfy_api_nodes/apis/__init__.py b/comfy_api_nodes/apis/__init__.py index ee2aa1ce6..46a583b5e 100644 --- a/comfy_api_nodes/apis/__init__.py +++ b/comfy_api_nodes/apis/__init__.py @@ -1197,12 +1197,6 @@ class KlingImageGenImageReferenceType(str, Enum): face = 'face' -class KlingImageGenModelName(str, Enum): - kling_v1 = 'kling-v1' - kling_v1_5 = 'kling-v1-5' - kling_v2 = 'kling-v2' - - class KlingImageGenerationsRequest(BaseModel): aspect_ratio: Optional[KlingImageGenAspectRatio] = '16:9' callback_url: Optional[AnyUrl] = Field( @@ -1218,7 +1212,7 @@ class KlingImageGenerationsRequest(BaseModel): 0.5, description='Reference intensity for user-uploaded images', ge=0.0, le=1.0 ) image_reference: Optional[KlingImageGenImageReferenceType] = None - model_name: Optional[KlingImageGenModelName] = 'kling-v1' + model_name: str = Field(...) n: Optional[int] = Field(1, description='Number of generated images', ge=1, le=9) negative_prompt: Optional[str] = Field( None, description='Negative text prompt', max_length=200 diff --git a/comfy_api_nodes/apis/bria.py b/comfy_api_nodes/apis/bria.py index 9119cacc6..8c496b56c 100644 --- a/comfy_api_nodes/apis/bria.py +++ b/comfy_api_nodes/apis/bria.py @@ -45,17 +45,55 @@ class BriaEditImageRequest(BaseModel): ) +class BriaRemoveBackgroundRequest(BaseModel): + image: str = Field(...) + sync: bool = Field(False) + visual_input_content_moderation: bool = Field( + False, description="If true, returns 422 on input image moderation failure." + ) + visual_output_content_moderation: bool = Field( + False, description="If true, returns 422 on visual output moderation failure." + ) + seed: int = Field(...) + + class BriaStatusResponse(BaseModel): request_id: str = Field(...) status_url: str = Field(...) warning: str | None = Field(None) -class BriaResult(BaseModel): +class BriaRemoveBackgroundResult(BaseModel): + image_url: str = Field(...) + + +class BriaRemoveBackgroundResponse(BaseModel): + status: str = Field(...) + result: BriaRemoveBackgroundResult | None = Field(None) + + +class BriaImageEditResult(BaseModel): structured_prompt: str = Field(...) image_url: str = Field(...) -class BriaResponse(BaseModel): +class BriaImageEditResponse(BaseModel): status: str = Field(...) - result: BriaResult | None = Field(None) + result: BriaImageEditResult | None = Field(None) + + +class BriaRemoveVideoBackgroundRequest(BaseModel): + video: str = Field(...) + background_color: str = Field(default="transparent", description="Background color for the output video.") + output_container_and_codec: str = Field(...) + preserve_audio: bool = Field(True) + seed: int = Field(...) + + +class BriaRemoveVideoBackgroundResult(BaseModel): + video_url: str = Field(...) + + +class BriaRemoveVideoBackgroundResponse(BaseModel): + status: str = Field(...) + result: BriaRemoveVideoBackgroundResult | None = Field(None) diff --git a/comfy_api_nodes/apis/bytedance.py b/comfy_api_nodes/apis/bytedance.py index 23cbe2372..18455396d 100644 --- a/comfy_api_nodes/apis/bytedance.py +++ b/comfy_api_nodes/apis/bytedance.py @@ -27,6 +27,7 @@ class Seedream4TaskCreationRequest(BaseModel): sequential_image_generation: str = Field("disabled") sequential_image_generation_options: Seedream4Options = Field(Seedream4Options(max_images=15)) watermark: bool = Field(False) + output_format: str | None = None class ImageTaskCreationResponse(BaseModel): @@ -106,6 +107,7 @@ RECOMMENDED_PRESETS_SEEDREAM_4 = [ ("2496x1664 (3:2)", 2496, 1664), ("1664x2496 (2:3)", 1664, 2496), ("3024x1296 (21:9)", 3024, 1296), + ("3072x3072 (1:1)", 3072, 3072), ("4096x4096 (1:1)", 4096, 4096), ("Custom", None, None), ] diff --git a/comfy_api_nodes/apis/elevenlabs.py b/comfy_api_nodes/apis/elevenlabs.py new file mode 100644 index 000000000..e58450fdf --- /dev/null +++ b/comfy_api_nodes/apis/elevenlabs.py @@ -0,0 +1,88 @@ +from pydantic import BaseModel, Field + + +class SpeechToTextRequest(BaseModel): + model_id: str = Field(...) + cloud_storage_url: str = Field(...) + language_code: str | None = Field(None, description="ISO-639-1 or ISO-639-3 language code") + tag_audio_events: bool | None = Field(None, description="Annotate sounds like (laughter) in transcript") + num_speakers: int | None = Field(None, description="Max speakers predicted") + timestamps_granularity: str = Field(default="word", description="Timing precision: none, word, or character") + diarize: bool | None = Field(None, description="Annotate which speaker is talking") + diarization_threshold: float | None = Field(None, description="Speaker separation sensitivity") + temperature: float | None = Field(None, description="Randomness control") + seed: int = Field(..., description="Seed for deterministic sampling") + + +class SpeechToTextWord(BaseModel): + text: str = Field(..., description="The word text") + type: str = Field(default="word", description="Type of text element (word, spacing, etc.)") + start: float | None = Field(None, description="Start time in seconds (when timestamps enabled)") + end: float | None = Field(None, description="End time in seconds (when timestamps enabled)") + speaker_id: str | None = Field(None, description="Speaker identifier when diarization is enabled") + logprob: float | None = Field(None, description="Log probability of the word") + + +class SpeechToTextResponse(BaseModel): + language_code: str = Field(..., description="Detected or specified language code") + language_probability: float | None = Field(None, description="Confidence of language detection") + text: str = Field(..., description="Full transcript text") + words: list[SpeechToTextWord] | None = Field(None, description="Word-level timing information") + + +class TextToSpeechVoiceSettings(BaseModel): + stability: float | None = Field(None, description="Voice stability") + similarity_boost: float | None = Field(None, description="Similarity boost") + style: float | None = Field(None, description="Style exaggeration") + use_speaker_boost: bool | None = Field(None, description="Boost similarity to original speaker") + speed: float | None = Field(None, description="Speech speed") + + +class TextToSpeechRequest(BaseModel): + text: str = Field(..., description="Text to convert to speech") + model_id: str = Field(..., description="Model ID for TTS") + language_code: str | None = Field(None, description="ISO-639-1 or ISO-639-3 language code") + voice_settings: TextToSpeechVoiceSettings | None = Field(None, description="Voice settings") + seed: int = Field(..., description="Seed for deterministic sampling") + apply_text_normalization: str | None = Field(None, description="Text normalization mode: auto, on, off") + + +class TextToSoundEffectsRequest(BaseModel): + text: str = Field(..., description="Text prompt to convert into a sound effect") + duration_seconds: float = Field(..., description="Duration of generated sound in seconds") + prompt_influence: float = Field(..., description="How closely generation follows the prompt") + loop: bool | None = Field(None, description="Whether to create a smoothly looping sound effect") + + +class AddVoiceRequest(BaseModel): + name: str = Field(..., description="Name that identifies the voice") + remove_background_noise: bool = Field(..., description="Remove background noise from voice samples") + + +class AddVoiceResponse(BaseModel): + voice_id: str = Field(..., description="The newly created voice's unique identifier") + + +class SpeechToSpeechRequest(BaseModel): + model_id: str = Field(..., description="Model ID for speech-to-speech") + voice_settings: str = Field(..., description="JSON string of voice settings") + seed: int = Field(..., description="Seed for deterministic sampling") + remove_background_noise: bool = Field(..., description="Remove background noise from input audio") + + +class DialogueInput(BaseModel): + text: str = Field(..., description="Text content to convert to speech") + voice_id: str = Field(..., description="Voice identifier for this dialogue segment") + + +class DialogueSettings(BaseModel): + stability: float | None = Field(None, description="Voice stability (0-1)") + + +class TextToDialogueRequest(BaseModel): + inputs: list[DialogueInput] = Field(..., description="List of dialogue segments") + model_id: str = Field(..., description="Model ID for dialogue generation") + language_code: str | None = Field(None, description="ISO-639-1 language code") + settings: DialogueSettings | None = Field(None, description="Voice settings") + seed: int | None = Field(None, description="Seed for deterministic sampling") + apply_text_normalization: str | None = Field(None, description="Text normalization mode: auto, on, off") diff --git a/comfy_api_nodes/apis/gemini.py b/comfy_api_nodes/apis/gemini.py index d81337dae..639035fef 100644 --- a/comfy_api_nodes/apis/gemini.py +++ b/comfy_api_nodes/apis/gemini.py @@ -116,14 +116,26 @@ class GeminiGenerationConfig(BaseModel): topP: float | None = Field(None, ge=0.0, le=1.0) +class GeminiImageOutputOptions(BaseModel): + mimeType: str = Field("image/png") + compressionQuality: int | None = Field(None) + + class GeminiImageConfig(BaseModel): aspectRatio: str | None = Field(None) imageSize: str | None = Field(None) + imageOutputOptions: GeminiImageOutputOptions = Field(default_factory=GeminiImageOutputOptions) + + +class GeminiThinkingConfig(BaseModel): + includeThoughts: bool | None = Field(None) + thinkingLevel: str = Field(...) class GeminiImageGenerationConfig(GeminiGenerationConfig): responseModalities: list[str] | None = Field(None) imageConfig: GeminiImageConfig | None = Field(None) + thinkingConfig: GeminiThinkingConfig | None = Field(None) class GeminiImageGenerateContentRequest(BaseModel): diff --git a/comfy_api_nodes/apis/hitpaw.py b/comfy_api_nodes/apis/hitpaw.py new file mode 100644 index 000000000..b23c5d9eb --- /dev/null +++ b/comfy_api_nodes/apis/hitpaw.py @@ -0,0 +1,51 @@ +from typing import TypedDict + +from pydantic import BaseModel, Field + + +class InputVideoModel(TypedDict): + model: str + resolution: str + + +class ImageEnhanceTaskCreateRequest(BaseModel): + model_name: str = Field(...) + img_url: str = Field(...) + extension: str = Field(".png") + exif: bool = Field(False) + DPI: int | None = Field(None) + + +class VideoEnhanceTaskCreateRequest(BaseModel): + video_url: str = Field(...) + extension: str = Field(".mp4") + model_name: str | None = Field(...) + resolution: list[int] = Field(..., description="Target resolution [width, height]") + original_resolution: list[int] = Field(..., description="Original video resolution [width, height]") + + +class TaskCreateDataResponse(BaseModel): + job_id: str = Field(...) + consume_coins: int | None = Field(None) + + +class TaskStatusPollRequest(BaseModel): + job_id: str = Field(...) + + +class TaskCreateResponse(BaseModel): + code: int = Field(...) + message: str = Field(...) + data: TaskCreateDataResponse | None = Field(None) + + +class TaskStatusDataResponse(BaseModel): + job_id: str = Field(...) + status: str = Field(...) + res_url: str = Field("") + + +class TaskStatusResponse(BaseModel): + code: int = Field(...) + message: str = Field(...) + data: TaskStatusDataResponse = Field(...) diff --git a/comfy_api_nodes/apis/hunyuan3d.py b/comfy_api_nodes/apis/hunyuan3d.py index 6421c9bd5..e84eba31e 100644 --- a/comfy_api_nodes/apis/hunyuan3d.py +++ b/comfy_api_nodes/apis/hunyuan3d.py @@ -64,3 +64,23 @@ class To3DProTaskResultResponse(BaseModel): class To3DProTaskQueryRequest(BaseModel): JobId: str = Field(...) + + +class To3DUVFileInput(BaseModel): + Type: str = Field(..., description="File type: GLB, OBJ, or FBX") + Url: str = Field(...) + + +class To3DUVTaskRequest(BaseModel): + File: To3DUVFileInput = Field(...) + + +class TextureEditImageInfo(BaseModel): + Url: str = Field(...) + + +class TextureEditTaskRequest(BaseModel): + File3D: To3DUVFileInput = Field(...) + Image: TextureEditImageInfo | None = Field(None) + Prompt: str | None = Field(None) + EnablePBR: bool | None = Field(None) diff --git a/comfy_api_nodes/apis/kling.py b/comfy_api_nodes/apis/kling.py index bf54ede3e..a5bd5f1d3 100644 --- a/comfy_api_nodes/apis/kling.py +++ b/comfy_api_nodes/apis/kling.py @@ -1,12 +1,22 @@ from pydantic import BaseModel, Field +class MultiPromptEntry(BaseModel): + index: int = Field(...) + prompt: str = Field(...) + duration: str = Field(...) + + class OmniProText2VideoRequest(BaseModel): model_name: str = Field(..., description="kling-video-o1") aspect_ratio: str = Field(..., description="'16:9', '9:16' or '1:1'") duration: str = Field(..., description="'5' or '10'") prompt: str = Field(...) mode: str = Field("pro") + multi_shot: bool | None = Field(None) + multi_prompt: list[MultiPromptEntry] | None = Field(None) + shot_type: str | None = Field(None) + sound: str = Field(..., description="'on' or 'off'") class OmniParamImage(BaseModel): @@ -26,6 +36,10 @@ class OmniProFirstLastFrameRequest(BaseModel): duration: str = Field(..., description="'5' or '10'") prompt: str = Field(...) mode: str = Field("pro") + sound: str | None = Field(None, description="'on' or 'off'") + multi_shot: bool | None = Field(None) + multi_prompt: list[MultiPromptEntry] | None = Field(None) + shot_type: str | None = Field(None) class OmniProReferences2VideoRequest(BaseModel): @@ -38,6 +52,10 @@ class OmniProReferences2VideoRequest(BaseModel): duration: str | None = Field(..., description="From 3 to 10.") prompt: str = Field(...) mode: str = Field("pro") + sound: str | None = Field(None, description="'on' or 'off'") + multi_shot: bool | None = Field(None) + multi_prompt: list[MultiPromptEntry] | None = Field(None) + shot_type: str | None = Field(None) class TaskStatusVideoResult(BaseModel): @@ -54,6 +72,7 @@ class TaskStatusImageResult(BaseModel): class TaskStatusResults(BaseModel): videos: list[TaskStatusVideoResult] | None = Field(None) images: list[TaskStatusImageResult] | None = Field(None) + series_images: list[TaskStatusImageResult] | None = Field(None) class TaskStatusResponseData(BaseModel): @@ -77,31 +96,49 @@ class OmniImageParamImage(BaseModel): class OmniProImageRequest(BaseModel): - model_name: str = Field(..., description="kling-image-o1") - resolution: str = Field(..., description="'1k' or '2k'") + model_name: str = Field(...) + resolution: str = Field(...) aspect_ratio: str | None = Field(...) prompt: str = Field(...) mode: str = Field("pro") n: int | None = Field(1, le=9) image_list: list[OmniImageParamImage] | None = Field(..., max_length=10) + result_type: str | None = Field(None, description="Set to 'series' for series generation") + series_amount: int | None = Field(None, ge=2, le=9, description="Number of images in a series") class TextToVideoWithAudioRequest(BaseModel): - model_name: str = Field(..., description="kling-v2-6") + model_name: str = Field(...) aspect_ratio: str = Field(..., description="'16:9', '9:16' or '1:1'") - duration: str = Field(..., description="'5' or '10'") - prompt: str = Field(...) + duration: str = Field(...) + prompt: str | None = Field(...) + negative_prompt: str | None = Field(None) mode: str = Field("pro") sound: str = Field(..., description="'on' or 'off'") + multi_shot: bool | None = Field(None) + multi_prompt: list[MultiPromptEntry] | None = Field(None) + shot_type: str | None = Field(None) class ImageToVideoWithAudioRequest(BaseModel): - model_name: str = Field(..., description="kling-v2-6") + model_name: str = Field(...) image: str = Field(...) - duration: str = Field(..., description="'5' or '10'") - prompt: str = Field(...) + image_tail: str | None = Field(None) + duration: str = Field(...) + prompt: str | None = Field(...) + negative_prompt: str | None = Field(None) mode: str = Field("pro") sound: str = Field(..., description="'on' or 'off'") + multi_shot: bool | None = Field(None) + multi_prompt: list[MultiPromptEntry] | None = Field(None) + shot_type: str | None = Field(None) + + +class KlingAvatarRequest(BaseModel): + image: str = Field(...) + sound_file: str = Field(...) + prompt: str | None = Field(None) + mode: str = Field(...) class MotionControlRequest(BaseModel): diff --git a/comfy_api_nodes/apis/meshy.py b/comfy_api_nodes/apis/meshy.py index be46d0d58..7d72e6e91 100644 --- a/comfy_api_nodes/apis/meshy.py +++ b/comfy_api_nodes/apis/meshy.py @@ -109,14 +109,19 @@ class MeshyTextureRequest(BaseModel): class MeshyModelsUrls(BaseModel): glb: str = Field("") + fbx: str = Field("") + usdz: str = Field("") + obj: str = Field("") class MeshyRiggedModelsUrls(BaseModel): rigged_character_glb_url: str = Field("") + rigged_character_fbx_url: str = Field("") class MeshyAnimatedModelsUrls(BaseModel): animation_glb_url: str = Field("") + animation_fbx_url: str = Field("") class MeshyResultTextureUrls(BaseModel): diff --git a/comfy_api_nodes/apis/recraft.py b/comfy_api_nodes/apis/recraft.py index 0bd7d23b3..78ededd94 100644 --- a/comfy_api_nodes/apis/recraft.py +++ b/comfy_api_nodes/apis/recraft.py @@ -198,11 +198,6 @@ dict_recraft_substyles_v3 = { } -class RecraftModel(str, Enum): - recraftv3 = 'recraftv3' - recraftv2 = 'recraftv2' - - class RecraftImageSize(str, Enum): res_1024x1024 = '1024x1024' res_1365x1024 = '1365x1024' @@ -221,6 +216,41 @@ class RecraftImageSize(str, Enum): res_1707x1024 = '1707x1024' +RECRAFT_V4_SIZES = [ + "1024x1024", + "1536x768", + "768x1536", + "1280x832", + "832x1280", + "1216x896", + "896x1216", + "1152x896", + "896x1152", + "832x1344", + "1280x896", + "896x1280", + "1344x768", + "768x1344", +] + +RECRAFT_V4_PRO_SIZES = [ + "2048x2048", + "3072x1536", + "1536x3072", + "2560x1664", + "1664x2560", + "2432x1792", + "1792x2432", + "2304x1792", + "1792x2304", + "1664x2688", + "1434x1024", + "1024x1434", + "2560x1792", + "1792x2560", +] + + class RecraftColorObject(BaseModel): rgb: list[int] = Field(..., description='An array of 3 integer values in range of 0...255 defining RGB Color Model') @@ -234,17 +264,16 @@ class RecraftControlsObject(BaseModel): class RecraftImageGenerationRequest(BaseModel): prompt: str = Field(..., description='The text prompt describing the image to generate') - size: RecraftImageSize | None = Field(None, description='The size of the generated image (e.g., "1024x1024")') + size: str | None = Field(None, description='The size of the generated image (e.g., "1024x1024")') n: int = Field(..., description='The number of images to generate') negative_prompt: str | None = Field(None, description='A text description of undesired elements on an image') - model: RecraftModel | None = Field(RecraftModel.recraftv3, description='The model to use for generation (e.g., "recraftv3")') + model: str = Field(...) style: str | None = Field(None, description='The style to apply to the generated image (e.g., "digital_illustration")') substyle: str | None = Field(None, description='The substyle to apply to the generated image, depending on the style input') controls: RecraftControlsObject | None = Field(None, description='A set of custom parameters to tweak generation process') style_id: str | None = Field(None, description='Use a previously uploaded style as a reference; UUID') strength: float | None = Field(None, description='Defines the difference with the original image, should lie in [0, 1], where 0 means almost identical, and 1 means miserable similarity') random_seed: int | None = Field(None, description="Seed for video generation") - # text_layout class RecraftReturnedObject(BaseModel): diff --git a/comfy_api_nodes/nodes_bfl.py b/comfy_api_nodes/nodes_bfl.py index 61c3b4503..23590bf24 100644 --- a/comfy_api_nodes/nodes_bfl.py +++ b/comfy_api_nodes/nodes_bfl.py @@ -57,6 +57,7 @@ class FluxProUltraImageNode(IO.ComfyNode): tooltip="Whether to perform upsampling on the prompt. " "If active, automatically modifies the prompt for more creative generation, " "but results are nondeterministic (same seed will not produce exactly the same result).", + advanced=True, ), IO.Int.Input( "seed", @@ -200,6 +201,7 @@ class FluxKontextProImageNode(IO.ComfyNode): "prompt_upsampling", default=False, tooltip="Whether to perform upsampling on the prompt. If active, automatically modifies the prompt for more creative generation, but results are nondeterministic (same seed will not produce exactly the same result).", + advanced=True, ), IO.Image.Input( "input_image", @@ -296,6 +298,7 @@ class FluxProExpandNode(IO.ComfyNode): tooltip="Whether to perform upsampling on the prompt. " "If active, automatically modifies the prompt for more creative generation, " "but results are nondeterministic (same seed will not produce exactly the same result).", + advanced=True, ), IO.Int.Input( "top", @@ -433,6 +436,7 @@ class FluxProFillNode(IO.ComfyNode): tooltip="Whether to perform upsampling on the prompt. " "If active, automatically modifies the prompt for more creative generation, " "but results are nondeterministic (same seed will not produce exactly the same result).", + advanced=True, ), IO.Float.Input( "guidance", @@ -577,6 +581,7 @@ class Flux2ProImageNode(IO.ComfyNode): default=True, tooltip="Whether to perform upsampling on the prompt. " "If active, automatically modifies the prompt for more creative generation.", + advanced=True, ), IO.Image.Input("images", optional=True, tooltip="Up to 9 images to be used as references."), ], diff --git a/comfy_api_nodes/nodes_bria.py b/comfy_api_nodes/nodes_bria.py index d3a52bc1b..4044ee3ea 100644 --- a/comfy_api_nodes/nodes_bria.py +++ b/comfy_api_nodes/nodes_bria.py @@ -3,7 +3,11 @@ from typing_extensions import override from comfy_api.latest import IO, ComfyExtension, Input from comfy_api_nodes.apis.bria import ( BriaEditImageRequest, - BriaResponse, + BriaRemoveBackgroundRequest, + BriaRemoveBackgroundResponse, + BriaRemoveVideoBackgroundRequest, + BriaRemoveVideoBackgroundResponse, + BriaImageEditResponse, BriaStatusResponse, InputModerationSettings, ) @@ -11,10 +15,12 @@ from comfy_api_nodes.util import ( ApiEndpoint, convert_mask_to_image, download_url_to_image_tensor, - get_number_of_images, + download_url_to_video_output, poll_op, sync_op, - upload_images_to_comfyapi, + upload_image_to_comfyapi, + upload_video_to_comfyapi, + validate_video_duration, ) @@ -73,21 +79,15 @@ class BriaImageEditNode(IO.ComfyNode): IO.DynamicCombo.Input( "moderation", options=[ + IO.DynamicCombo.Option("false", []), IO.DynamicCombo.Option( "true", [ - IO.Boolean.Input( - "prompt_content_moderation", default=False - ), - IO.Boolean.Input( - "visual_input_moderation", default=False - ), - IO.Boolean.Input( - "visual_output_moderation", default=True - ), + IO.Boolean.Input("prompt_content_moderation", default=False), + IO.Boolean.Input("visual_input_moderation", default=False), + IO.Boolean.Input("visual_output_moderation", default=True), ], ), - IO.DynamicCombo.Option("false", []), ], tooltip="Moderation settings", ), @@ -127,50 +127,26 @@ class BriaImageEditNode(IO.ComfyNode): mask: Input.Image | None = None, ) -> IO.NodeOutput: if not prompt and not structured_prompt: - raise ValueError( - "One of prompt or structured_prompt is required to be non-empty." - ) - if get_number_of_images(image) != 1: - raise ValueError("Exactly one input image is required.") + raise ValueError("One of prompt or structured_prompt is required to be non-empty.") mask_url = None if mask is not None: - mask_url = ( - await upload_images_to_comfyapi( - cls, - convert_mask_to_image(mask), - max_images=1, - mime_type="image/png", - wait_label="Uploading mask", - ) - )[0] + mask_url = await upload_image_to_comfyapi(cls, convert_mask_to_image(mask), wait_label="Uploading mask") response = await sync_op( cls, ApiEndpoint(path="proxy/bria/v2/image/edit", method="POST"), data=BriaEditImageRequest( instruction=prompt if prompt else None, structured_instruction=structured_prompt if structured_prompt else None, - images=await upload_images_to_comfyapi( - cls, - image, - max_images=1, - mime_type="image/png", - wait_label="Uploading image", - ), + images=[await upload_image_to_comfyapi(cls, image, wait_label="Uploading image")], mask=mask_url, negative_prompt=negative_prompt if negative_prompt else None, guidance_scale=guidance_scale, seed=seed, model_version=model, steps_num=steps, - prompt_content_moderation=moderation.get( - "prompt_content_moderation", False - ), - visual_input_content_moderation=moderation.get( - "visual_input_moderation", False - ), - visual_output_content_moderation=moderation.get( - "visual_output_moderation", False - ), + prompt_content_moderation=moderation.get("prompt_content_moderation", False), + visual_input_content_moderation=moderation.get("visual_input_moderation", False), + visual_output_content_moderation=moderation.get("visual_output_moderation", False), ), response_model=BriaStatusResponse, ) @@ -178,7 +154,7 @@ class BriaImageEditNode(IO.ComfyNode): cls, ApiEndpoint(path=f"/proxy/bria/v2/status/{response.request_id}"), status_extractor=lambda r: r.status, - response_model=BriaResponse, + response_model=BriaImageEditResponse, ) return IO.NodeOutput( await download_url_to_image_tensor(response.result.image_url), @@ -186,11 +162,167 @@ class BriaImageEditNode(IO.ComfyNode): ) +class BriaRemoveImageBackground(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="BriaRemoveImageBackground", + display_name="Bria Remove Image Background", + category="api node/image/Bria", + description="Remove the background from an image using Bria RMBG 2.0.", + inputs=[ + IO.Image.Input("image"), + IO.DynamicCombo.Input( + "moderation", + options=[ + IO.DynamicCombo.Option("false", []), + IO.DynamicCombo.Option( + "true", + [ + IO.Boolean.Input("visual_input_moderation", default=False), + IO.Boolean.Input("visual_output_moderation", default=True), + ], + ), + ], + tooltip="Moderation settings", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + ), + ], + outputs=[IO.Image.Output()], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.018}""", + ), + ) + + @classmethod + async def execute( + cls, + image: Input.Image, + moderation: dict, + seed: int, + ) -> IO.NodeOutput: + response = await sync_op( + cls, + ApiEndpoint(path="/proxy/bria/v2/image/edit/remove_background", method="POST"), + data=BriaRemoveBackgroundRequest( + image=await upload_image_to_comfyapi(cls, image, wait_label="Uploading image"), + sync=False, + visual_input_content_moderation=moderation.get("visual_input_moderation", False), + visual_output_content_moderation=moderation.get("visual_output_moderation", False), + seed=seed, + ), + response_model=BriaStatusResponse, + ) + response = await poll_op( + cls, + ApiEndpoint(path=f"/proxy/bria/v2/status/{response.request_id}"), + status_extractor=lambda r: r.status, + response_model=BriaRemoveBackgroundResponse, + ) + return IO.NodeOutput(await download_url_to_image_tensor(response.result.image_url)) + + +class BriaRemoveVideoBackground(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="BriaRemoveVideoBackground", + display_name="Bria Remove Video Background", + category="api node/video/Bria", + description="Remove the background from a video using Bria. ", + inputs=[ + IO.Video.Input("video"), + IO.Combo.Input( + "background_color", + options=[ + "Black", + "White", + "Gray", + "Red", + "Green", + "Blue", + "Yellow", + "Cyan", + "Magenta", + "Orange", + ], + tooltip="Background color for the output video.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + ), + ], + outputs=[IO.Video.Output()], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.14,"format":{"suffix":"/second"}}""", + ), + ) + + @classmethod + async def execute( + cls, + video: Input.Video, + background_color: str, + seed: int, + ) -> IO.NodeOutput: + validate_video_duration(video, max_duration=60.0) + response = await sync_op( + cls, + ApiEndpoint(path="/proxy/bria/v2/video/edit/remove_background", method="POST"), + data=BriaRemoveVideoBackgroundRequest( + video=await upload_video_to_comfyapi(cls, video), + background_color=background_color, + output_container_and_codec="mp4_h264", + seed=seed, + ), + response_model=BriaStatusResponse, + ) + response = await poll_op( + cls, + ApiEndpoint(path=f"/proxy/bria/v2/status/{response.request_id}"), + status_extractor=lambda r: r.status, + response_model=BriaRemoveVideoBackgroundResponse, + ) + return IO.NodeOutput(await download_url_to_video_output(response.result.video_url)) + + class BriaExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[IO.ComfyNode]]: return [ BriaImageEditNode, + BriaRemoveImageBackground, + BriaRemoveVideoBackground, ] diff --git a/comfy_api_nodes/nodes_bytedance.py b/comfy_api_nodes/nodes_bytedance.py index 0cb5e3be8..6dbd5984e 100644 --- a/comfy_api_nodes/nodes_bytedance.py +++ b/comfy_api_nodes/nodes_bytedance.py @@ -37,6 +37,12 @@ from comfy_api_nodes.util import ( BYTEPLUS_IMAGE_ENDPOINT = "/proxy/byteplus/api/v3/images/generations" +SEEDREAM_MODELS = { + "seedream 5.0 lite": "seedream-5-0-260128", + "seedream-4-5-251128": "seedream-4-5-251128", + "seedream-4-0-250828": "seedream-4-0-250828", +} + # Long-running tasks endpoints(e.g., video) BYTEPLUS_TASK_ENDPOINT = "/proxy/byteplus/api/v3/contents/generations/tasks" BYTEPLUS_TASK_STATUS_ENDPOINT = "/proxy/byteplus/api/v3/contents/generations/tasks" # + /{task_id} @@ -114,6 +120,7 @@ class ByteDanceImageNode(IO.ComfyNode): default=False, tooltip='Whether to add an "AI generated" watermark to the image', optional=True, + advanced=True, ), ], outputs=[ @@ -179,14 +186,13 @@ class ByteDanceSeedreamNode(IO.ComfyNode): def define_schema(cls): return IO.Schema( node_id="ByteDanceSeedreamNode", - display_name="ByteDance Seedream 4.5", + display_name="ByteDance Seedream 4.5 & 5.0", category="api node/image/ByteDance", description="Unified text-to-image generation and precise single-sentence editing at up to 4K resolution.", inputs=[ IO.Combo.Input( "model", - options=["seedream-4-5-251128", "seedream-4-0-250828"], - tooltip="Model name", + options=list(SEEDREAM_MODELS.keys()), ), IO.String.Input( "prompt", @@ -197,7 +203,7 @@ class ByteDanceSeedreamNode(IO.ComfyNode): IO.Image.Input( "image", tooltip="Input image(s) for image-to-image generation. " - "List of 1-10 images for single or multi-reference generation.", + "Reference image(s) for single or multi-reference generation.", optional=True, ), IO.Combo.Input( @@ -209,8 +215,8 @@ class ByteDanceSeedreamNode(IO.ComfyNode): "width", default=2048, min=1024, - max=4096, - step=8, + max=6240, + step=2, tooltip="Custom width for image. Value is working only if `size_preset` is set to `Custom`", optional=True, ), @@ -218,8 +224,8 @@ class ByteDanceSeedreamNode(IO.ComfyNode): "height", default=2048, min=1024, - max=4096, - step=8, + max=4992, + step=2, tooltip="Custom height for image. Value is working only if `size_preset` is set to `Custom`", optional=True, ), @@ -259,12 +265,14 @@ class ByteDanceSeedreamNode(IO.ComfyNode): default=False, tooltip='Whether to add an "AI generated" watermark to the image.', optional=True, + advanced=True, ), IO.Boolean.Input( "fail_on_partial", default=True, tooltip="If enabled, abort execution if any requested images are missing or return an error.", optional=True, + advanced=True, ), ], outputs=[ @@ -280,7 +288,8 @@ class ByteDanceSeedreamNode(IO.ComfyNode): depends_on=IO.PriceBadgeDepends(widgets=["model"]), expr=""" ( - $price := $contains(widgets.model, "seedream-4-5-251128") ? 0.04 : 0.03; + $price := $contains(widgets.model, "5.0 lite") ? 0.035 : + $contains(widgets.model, "4-5") ? 0.04 : 0.03; { "type":"usd", "usd": $price, @@ -306,6 +315,7 @@ class ByteDanceSeedreamNode(IO.ComfyNode): watermark: bool = False, fail_on_partial: bool = True, ) -> IO.NodeOutput: + model = SEEDREAM_MODELS[model] validate_string(prompt, strip_whitespace=True, min_length=1) w = h = None for label, tw, th in RECOMMENDED_PRESETS_SEEDREAM_4: @@ -315,15 +325,12 @@ class ByteDanceSeedreamNode(IO.ComfyNode): if w is None or h is None: w, h = width, height - if not (1024 <= w <= 4096) or not (1024 <= h <= 4096): - raise ValueError( - f"Custom size out of range: {w}x{h}. " "Both width and height must be between 1024 and 4096 pixels." - ) + out_num_pixels = w * h mp_provided = out_num_pixels / 1_000_000.0 - if "seedream-4-5" in model and out_num_pixels < 3686400: + if ("seedream-4-5" in model or "seedream-5-0" in model) and out_num_pixels < 3686400: raise ValueError( - f"Minimum image resolution that Seedream 4.5 can generate is 3.68MP, " + f"Minimum image resolution for the selected model is 3.68MP, " f"but {mp_provided:.2f}MP provided." ) if "seedream-4-0" in model and out_num_pixels < 921600: @@ -331,9 +338,18 @@ class ByteDanceSeedreamNode(IO.ComfyNode): f"Minimum image resolution that the selected model can generate is 0.92MP, " f"but {mp_provided:.2f}MP provided." ) + max_pixels = 10_404_496 if "seedream-5-0" in model else 16_777_216 + if out_num_pixels > max_pixels: + raise ValueError( + f"Maximum image resolution for the selected model is {max_pixels / 1_000_000:.2f}MP, " + f"but {mp_provided:.2f}MP provided." + ) n_input_images = get_number_of_images(image) if image is not None else 0 - if n_input_images > 10: - raise ValueError(f"Maximum of 10 reference images are supported, but {n_input_images} received.") + max_num_of_images = 14 if model == "seedream-5-0-260128" else 10 + if n_input_images > max_num_of_images: + raise ValueError( + f"Maximum of {max_num_of_images} reference images are supported, but {n_input_images} received." + ) if sequential_image_generation == "auto" and n_input_images + max_images > 15: raise ValueError( "The maximum number of generated images plus the number of reference images cannot exceed 15." @@ -361,6 +377,7 @@ class ByteDanceSeedreamNode(IO.ComfyNode): sequential_image_generation=sequential_image_generation, sequential_image_generation_options=Seedream4Options(max_images=max_images), watermark=watermark, + output_format="png" if model == "seedream-5-0-260128" else None, ), ) if len(response.data) == 1: @@ -432,18 +449,21 @@ class ByteDanceTextToVideoNode(IO.ComfyNode): tooltip="Specifies whether to fix the camera. The platform appends an instruction " "to fix the camera to your prompt, but does not guarantee the actual effect.", optional=True, + advanced=True, ), IO.Boolean.Input( "watermark", default=False, tooltip='Whether to add an "AI generated" watermark to the video.', optional=True, + advanced=True, ), IO.Boolean.Input( "generate_audio", default=False, tooltip="This parameter is ignored for any model except seedance-1-5-pro.", optional=True, + advanced=True, ), ], outputs=[ @@ -561,18 +581,21 @@ class ByteDanceImageToVideoNode(IO.ComfyNode): tooltip="Specifies whether to fix the camera. The platform appends an instruction " "to fix the camera to your prompt, but does not guarantee the actual effect.", optional=True, + advanced=True, ), IO.Boolean.Input( "watermark", default=False, tooltip='Whether to add an "AI generated" watermark to the video.', optional=True, + advanced=True, ), IO.Boolean.Input( "generate_audio", default=False, tooltip="This parameter is ignored for any model except seedance-1-5-pro.", optional=True, + advanced=True, ), ], outputs=[ @@ -694,18 +717,21 @@ class ByteDanceFirstLastFrameNode(IO.ComfyNode): tooltip="Specifies whether to fix the camera. The platform appends an instruction " "to fix the camera to your prompt, but does not guarantee the actual effect.", optional=True, + advanced=True, ), IO.Boolean.Input( "watermark", default=False, tooltip='Whether to add an "AI generated" watermark to the video.', optional=True, + advanced=True, ), IO.Boolean.Input( "generate_audio", default=False, tooltip="This parameter is ignored for any model except seedance-1-5-pro.", optional=True, + advanced=True, ), ], outputs=[ @@ -834,6 +860,7 @@ class ByteDanceImageReferenceNode(IO.ComfyNode): default=False, tooltip='Whether to add an "AI generated" watermark to the video.', optional=True, + advanced=True, ), ], outputs=[ diff --git a/comfy_api_nodes/nodes_elevenlabs.py b/comfy_api_nodes/nodes_elevenlabs.py new file mode 100644 index 000000000..e452daf77 --- /dev/null +++ b/comfy_api_nodes/nodes_elevenlabs.py @@ -0,0 +1,924 @@ +import json +import uuid + +from typing_extensions import override + +from comfy_api.latest import IO, ComfyExtension, Input +from comfy_api_nodes.apis.elevenlabs import ( + AddVoiceRequest, + AddVoiceResponse, + DialogueInput, + DialogueSettings, + SpeechToSpeechRequest, + SpeechToTextRequest, + SpeechToTextResponse, + TextToDialogueRequest, + TextToSoundEffectsRequest, + TextToSpeechRequest, + TextToSpeechVoiceSettings, +) +from comfy_api_nodes.util import ( + ApiEndpoint, + audio_bytes_to_audio_input, + audio_ndarray_to_bytesio, + audio_tensor_to_contiguous_ndarray, + sync_op, + sync_op_raw, + upload_audio_to_comfyapi, + validate_string, +) + +ELEVENLABS_MUSIC_SECTIONS = "ELEVENLABS_MUSIC_SECTIONS" # Custom type for music sections +ELEVENLABS_COMPOSITION_PLAN = "ELEVENLABS_COMPOSITION_PLAN" # Custom type for composition plan +ELEVENLABS_VOICE = "ELEVENLABS_VOICE" # Custom type for voice selection + +# Predefined ElevenLabs voices: (voice_id, display_name, gender, accent) +ELEVENLABS_VOICES = [ + ("CwhRBWXzGAHq8TQ4Fs17", "Roger", "male", "american"), + ("EXAVITQu4vr4xnSDxMaL", "Sarah", "female", "american"), + ("FGY2WhTYpPnrIDTdsKH5", "Laura", "female", "american"), + ("IKne3meq5aSn9XLyUdCD", "Charlie", "male", "australian"), + ("JBFqnCBsd6RMkjVDRZzb", "George", "male", "british"), + ("N2lVS1w4EtoT3dr4eOWO", "Callum", "male", "american"), + ("SAz9YHcvj6GT2YYXdXww", "River", "neutral", "american"), + ("SOYHLrjzK2X1ezoPC6cr", "Harry", "male", "american"), + ("TX3LPaxmHKxFdv7VOQHJ", "Liam", "male", "american"), + ("Xb7hH8MSUJpSbSDYk0k2", "Alice", "female", "british"), + ("XrExE9yKIg1WjnnlVkGX", "Matilda", "female", "american"), + ("bIHbv24MWmeRgasZH58o", "Will", "male", "american"), + ("cgSgspJ2msm6clMCkdW9", "Jessica", "female", "american"), + ("cjVigY5qzO86Huf0OWal", "Eric", "male", "american"), + ("hpp4J3VqNfWAUOO0d1Us", "Bella", "female", "american"), + ("iP95p4xoKVk53GoZ742B", "Chris", "male", "american"), + ("nPczCjzI2devNBz1zQrb", "Brian", "male", "american"), + ("onwK4e9ZLuTAKqWW03F9", "Daniel", "male", "british"), + ("pFZP5JQG7iQjIQuC4Bku", "Lily", "female", "british"), + ("pNInz6obpgDQGcFmaJgB", "Adam", "male", "american"), + ("pqHfZKP75CvOlQylNhV4", "Bill", "male", "american"), +] + +ELEVENLABS_VOICE_OPTIONS = [f"{name} ({gender}, {accent})" for _, name, gender, accent in ELEVENLABS_VOICES] +ELEVENLABS_VOICE_MAP = { + f"{name} ({gender}, {accent})": voice_id for voice_id, name, gender, accent in ELEVENLABS_VOICES +} + + +class ElevenLabsSpeechToText(IO.ComfyNode): + @classmethod + def define_schema(cls) -> IO.Schema: + return IO.Schema( + node_id="ElevenLabsSpeechToText", + display_name="ElevenLabs Speech to Text", + category="api node/audio/ElevenLabs", + description="Transcribe audio to text. " + "Supports automatic language detection, speaker diarization, and audio event tagging.", + inputs=[ + IO.Audio.Input( + "audio", + tooltip="Audio to transcribe.", + ), + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "scribe_v2", + [ + IO.Boolean.Input( + "tag_audio_events", + default=False, + tooltip="Annotate sounds like (laughter), (music), etc. in transcript.", + ), + IO.Boolean.Input( + "diarize", + default=False, + tooltip="Annotate which speaker is talking.", + ), + IO.Float.Input( + "diarization_threshold", + default=0.22, + min=0.1, + max=0.4, + step=0.01, + display_mode=IO.NumberDisplay.slider, + tooltip="Speaker separation sensitivity. " + "Lower values are more sensitive to speaker changes.", + ), + IO.Float.Input( + "temperature", + default=0.0, + min=0.0, + max=2.0, + step=0.01, + display_mode=IO.NumberDisplay.slider, + tooltip="Randomness control. " + "0.0 uses model default. Higher values increase randomness.", + ), + IO.Combo.Input( + "timestamps_granularity", + options=["word", "character", "none"], + default="word", + tooltip="Timing precision for transcript words.", + ), + ], + ), + ], + tooltip="Model to use for transcription.", + ), + IO.String.Input( + "language_code", + default="", + tooltip="ISO-639-1 or ISO-639-3 language code (e.g., 'en', 'es', 'fra'). " + "Leave empty for automatic detection.", + ), + IO.Int.Input( + "num_speakers", + default=0, + min=0, + max=32, + display_mode=IO.NumberDisplay.slider, + tooltip="Maximum number of speakers to predict. Set to 0 for automatic detection.", + ), + IO.Int.Input( + "seed", + default=1, + min=0, + max=2147483647, + tooltip="Seed for reproducibility (determinism not guaranteed).", + ), + ], + outputs=[ + IO.String.Output(display_name="text"), + IO.String.Output(display_name="language_code"), + IO.String.Output(display_name="words_json"), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.0073,"format":{"approximate":true,"suffix":"/minute"}}""", + ), + ) + + @classmethod + async def execute( + cls, + audio: Input.Audio, + model: dict, + language_code: str, + num_speakers: int, + seed: int, + ) -> IO.NodeOutput: + if model["diarize"] and num_speakers: + raise ValueError( + "Number of speakers cannot be specified when diarization is enabled. " + "Either disable diarization or set num_speakers to 0." + ) + request = SpeechToTextRequest( + model_id=model["model"], + cloud_storage_url=await upload_audio_to_comfyapi( + cls, audio, container_format="mp4", codec_name="aac", mime_type="audio/mp4" + ), + language_code=language_code if language_code.strip() else None, + tag_audio_events=model["tag_audio_events"], + num_speakers=num_speakers if num_speakers > 0 else None, + timestamps_granularity=model["timestamps_granularity"], + diarize=model["diarize"], + diarization_threshold=model["diarization_threshold"] if model["diarize"] else None, + seed=seed, + temperature=model["temperature"], + ) + response = await sync_op( + cls, + ApiEndpoint(path="/proxy/elevenlabs/v1/speech-to-text", method="POST"), + response_model=SpeechToTextResponse, + data=request, + content_type="multipart/form-data", + ) + words_json = json.dumps( + [w.model_dump(exclude_none=True) for w in response.words] if response.words else [], + indent=2, + ) + return IO.NodeOutput(response.text, response.language_code, words_json) + + +class ElevenLabsVoiceSelector(IO.ComfyNode): + @classmethod + def define_schema(cls) -> IO.Schema: + return IO.Schema( + node_id="ElevenLabsVoiceSelector", + display_name="ElevenLabs Voice Selector", + category="api node/audio/ElevenLabs", + description="Select a predefined ElevenLabs voice for text-to-speech generation.", + inputs=[ + IO.Combo.Input( + "voice", + options=ELEVENLABS_VOICE_OPTIONS, + tooltip="Choose a voice from the predefined ElevenLabs voices.", + ), + ], + outputs=[ + IO.Custom(ELEVENLABS_VOICE).Output(display_name="voice"), + ], + is_api_node=False, + ) + + @classmethod + def execute(cls, voice: str) -> IO.NodeOutput: + voice_id = ELEVENLABS_VOICE_MAP.get(voice) + if not voice_id: + raise ValueError(f"Unknown voice: {voice}") + return IO.NodeOutput(voice_id) + + +class ElevenLabsTextToSpeech(IO.ComfyNode): + @classmethod + def define_schema(cls) -> IO.Schema: + return IO.Schema( + node_id="ElevenLabsTextToSpeech", + display_name="ElevenLabs Text to Speech", + category="api node/audio/ElevenLabs", + description="Convert text to speech.", + inputs=[ + IO.Custom(ELEVENLABS_VOICE).Input( + "voice", + tooltip="Voice to use for speech synthesis. Connect from Voice Selector or Instant Voice Clone.", + ), + IO.String.Input( + "text", + multiline=True, + default="", + tooltip="The text to convert to speech.", + ), + IO.Float.Input( + "stability", + default=0.5, + min=0.0, + max=1.0, + step=0.01, + display_mode=IO.NumberDisplay.slider, + tooltip="Voice stability. Lower values give broader emotional range, " + "higher values produce more consistent but potentially monotonous speech.", + ), + IO.Combo.Input( + "apply_text_normalization", + options=["auto", "on", "off"], + tooltip="Text normalization mode. 'auto' lets the system decide, " + "'on' always applies normalization, 'off' skips it.", + ), + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "eleven_multilingual_v2", + [ + IO.Float.Input( + "speed", + default=1.0, + min=0.7, + max=1.3, + step=0.01, + display_mode=IO.NumberDisplay.slider, + tooltip="Speech speed. 1.0 is normal, <1.0 slower, >1.0 faster.", + ), + IO.Float.Input( + "similarity_boost", + default=0.75, + min=0.0, + max=1.0, + step=0.01, + display_mode=IO.NumberDisplay.slider, + tooltip="Similarity boost. Higher values make the voice more similar to the original.", + ), + IO.Boolean.Input( + "use_speaker_boost", + default=False, + tooltip="Boost similarity to the original speaker voice.", + ), + IO.Float.Input( + "style", + default=0.0, + min=0.0, + max=0.2, + step=0.01, + display_mode=IO.NumberDisplay.slider, + tooltip="Style exaggeration. Higher values increase stylistic expression " + "but may reduce stability.", + ), + ], + ), + IO.DynamicCombo.Option( + "eleven_v3", + [ + IO.Float.Input( + "speed", + default=1.0, + min=0.7, + max=1.3, + step=0.01, + display_mode=IO.NumberDisplay.slider, + tooltip="Speech speed. 1.0 is normal, <1.0 slower, >1.0 faster.", + ), + IO.Float.Input( + "similarity_boost", + default=0.75, + min=0.0, + max=1.0, + step=0.01, + display_mode=IO.NumberDisplay.slider, + tooltip="Similarity boost. Higher values make the voice more similar to the original.", + ), + ], + ), + ], + tooltip="Model to use for text-to-speech.", + ), + IO.String.Input( + "language_code", + default="", + tooltip="ISO-639-1 or ISO-639-3 language code (e.g., 'en', 'es', 'fra'). " + "Leave empty for automatic detection.", + ), + IO.Int.Input( + "seed", + default=1, + min=0, + max=2147483647, + tooltip="Seed for reproducibility (determinism not guaranteed).", + ), + IO.Combo.Input( + "output_format", + options=["mp3_44100_192", "opus_48000_192"], + tooltip="Audio output format.", + ), + ], + outputs=[ + IO.Audio.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.24,"format":{"approximate":true,"suffix":"/1K chars"}}""", + ), + ) + + @classmethod + async def execute( + cls, + voice: str, + text: str, + stability: float, + apply_text_normalization: str, + model: dict, + language_code: str, + seed: int, + output_format: str, + ) -> IO.NodeOutput: + validate_string(text, min_length=1) + request = TextToSpeechRequest( + text=text, + model_id=model["model"], + language_code=language_code if language_code.strip() else None, + voice_settings=TextToSpeechVoiceSettings( + stability=stability, + similarity_boost=model["similarity_boost"], + speed=model["speed"], + use_speaker_boost=model.get("use_speaker_boost", None), + style=model.get("style", None), + ), + seed=seed, + apply_text_normalization=apply_text_normalization, + ) + response = await sync_op_raw( + cls, + ApiEndpoint( + path=f"/proxy/elevenlabs/v1/text-to-speech/{voice}", + method="POST", + query_params={"output_format": output_format}, + ), + data=request, + as_binary=True, + ) + return IO.NodeOutput(audio_bytes_to_audio_input(response)) + + +class ElevenLabsAudioIsolation(IO.ComfyNode): + @classmethod + def define_schema(cls) -> IO.Schema: + return IO.Schema( + node_id="ElevenLabsAudioIsolation", + display_name="ElevenLabs Voice Isolation", + category="api node/audio/ElevenLabs", + description="Remove background noise from audio, isolating vocals or speech.", + inputs=[ + IO.Audio.Input( + "audio", + tooltip="Audio to process for background noise removal.", + ), + ], + outputs=[ + IO.Audio.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.24,"format":{"approximate":true,"suffix":"/minute"}}""", + ), + ) + + @classmethod + async def execute( + cls, + audio: Input.Audio, + ) -> IO.NodeOutput: + audio_data_np = audio_tensor_to_contiguous_ndarray(audio["waveform"]) + audio_bytes_io = audio_ndarray_to_bytesio(audio_data_np, audio["sample_rate"], "mp4", "aac") + response = await sync_op_raw( + cls, + ApiEndpoint(path="/proxy/elevenlabs/v1/audio-isolation", method="POST"), + files={"audio": ("audio.mp4", audio_bytes_io, "audio/mp4")}, + content_type="multipart/form-data", + as_binary=True, + ) + return IO.NodeOutput(audio_bytes_to_audio_input(response)) + + +class ElevenLabsTextToSoundEffects(IO.ComfyNode): + @classmethod + def define_schema(cls) -> IO.Schema: + return IO.Schema( + node_id="ElevenLabsTextToSoundEffects", + display_name="ElevenLabs Text to Sound Effects", + category="api node/audio/ElevenLabs", + description="Generate sound effects from text descriptions.", + inputs=[ + IO.String.Input( + "text", + multiline=True, + default="", + tooltip="Text description of the sound effect to generate.", + ), + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "eleven_sfx_v2", + [ + IO.Float.Input( + "duration", + default=5.0, + min=0.5, + max=30.0, + step=0.1, + display_mode=IO.NumberDisplay.slider, + tooltip="Duration of generated sound in seconds.", + ), + IO.Boolean.Input( + "loop", + default=False, + tooltip="Create a smoothly looping sound effect.", + ), + IO.Float.Input( + "prompt_influence", + default=0.3, + min=0.0, + max=1.0, + step=0.01, + display_mode=IO.NumberDisplay.slider, + tooltip="How closely generation follows the prompt. " + "Higher values make the sound follow the text more closely.", + ), + ], + ), + ], + tooltip="Model to use for sound effect generation.", + ), + IO.Combo.Input( + "output_format", + options=["mp3_44100_192", "opus_48000_192"], + tooltip="Audio output format.", + ), + ], + outputs=[ + IO.Audio.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.14,"format":{"approximate":true,"suffix":"/minute"}}""", + ), + ) + + @classmethod + async def execute( + cls, + text: str, + model: dict, + output_format: str, + ) -> IO.NodeOutput: + validate_string(text, min_length=1) + response = await sync_op_raw( + cls, + ApiEndpoint( + path="/proxy/elevenlabs/v1/sound-generation", + method="POST", + query_params={"output_format": output_format}, + ), + data=TextToSoundEffectsRequest( + text=text, + duration_seconds=model["duration"], + prompt_influence=model["prompt_influence"], + loop=model.get("loop", None), + ), + as_binary=True, + ) + return IO.NodeOutput(audio_bytes_to_audio_input(response)) + + +class ElevenLabsInstantVoiceClone(IO.ComfyNode): + @classmethod + def define_schema(cls) -> IO.Schema: + return IO.Schema( + node_id="ElevenLabsInstantVoiceClone", + display_name="ElevenLabs Instant Voice Clone", + category="api node/audio/ElevenLabs", + description="Create a cloned voice from audio samples. " + "Provide 1-8 audio recordings of the voice to clone.", + inputs=[ + IO.Autogrow.Input( + "files", + template=IO.Autogrow.TemplatePrefix( + IO.Audio.Input("audio"), + prefix="audio", + min=1, + max=8, + ), + tooltip="Audio recordings for voice cloning.", + ), + IO.Boolean.Input( + "remove_background_noise", + default=False, + tooltip="Remove background noise from voice samples using audio isolation.", + ), + ], + outputs=[ + IO.Custom(ELEVENLABS_VOICE).Output(display_name="voice"), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge(expr="""{"type":"usd","usd":0.15}"""), + ) + + @classmethod + async def execute( + cls, + files: IO.Autogrow.Type, + remove_background_noise: bool, + ) -> IO.NodeOutput: + file_tuples: list[tuple[str, tuple[str, bytes, str]]] = [] + for key in files: + audio = files[key] + sample_rate: int = audio["sample_rate"] + waveform = audio["waveform"] + audio_data_np = audio_tensor_to_contiguous_ndarray(waveform) + audio_bytes_io = audio_ndarray_to_bytesio(audio_data_np, sample_rate, "mp4", "aac") + file_tuples.append(("files", (f"{key}.mp4", audio_bytes_io.getvalue(), "audio/mp4"))) + + response = await sync_op( + cls, + ApiEndpoint(path="/proxy/elevenlabs/v1/voices/add", method="POST"), + response_model=AddVoiceResponse, + data=AddVoiceRequest( + name=str(uuid.uuid4()), + remove_background_noise=remove_background_noise, + ), + files=file_tuples, + content_type="multipart/form-data", + ) + return IO.NodeOutput(response.voice_id) + + +ELEVENLABS_STS_VOICE_SETTINGS = [ + IO.Float.Input( + "speed", + default=1.0, + min=0.7, + max=1.3, + step=0.01, + display_mode=IO.NumberDisplay.slider, + tooltip="Speech speed. 1.0 is normal, <1.0 slower, >1.0 faster.", + ), + IO.Float.Input( + "similarity_boost", + default=0.75, + min=0.0, + max=1.0, + step=0.01, + display_mode=IO.NumberDisplay.slider, + tooltip="Similarity boost. Higher values make the voice more similar to the original.", + ), + IO.Boolean.Input( + "use_speaker_boost", + default=False, + tooltip="Boost similarity to the original speaker voice.", + ), + IO.Float.Input( + "style", + default=0.0, + min=0.0, + max=0.2, + step=0.01, + display_mode=IO.NumberDisplay.slider, + tooltip="Style exaggeration. Higher values increase stylistic expression but may reduce stability.", + ), +] + + +class ElevenLabsSpeechToSpeech(IO.ComfyNode): + @classmethod + def define_schema(cls) -> IO.Schema: + return IO.Schema( + node_id="ElevenLabsSpeechToSpeech", + display_name="ElevenLabs Speech to Speech", + category="api node/audio/ElevenLabs", + description="Transform speech from one voice to another while preserving the original content and emotion.", + inputs=[ + IO.Custom(ELEVENLABS_VOICE).Input( + "voice", + tooltip="Target voice for the transformation. " + "Connect from Voice Selector or Instant Voice Clone.", + ), + IO.Audio.Input( + "audio", + tooltip="Source audio to transform.", + ), + IO.Float.Input( + "stability", + default=0.5, + min=0.0, + max=1.0, + step=0.01, + display_mode=IO.NumberDisplay.slider, + tooltip="Voice stability. Lower values give broader emotional range, " + "higher values produce more consistent but potentially monotonous speech.", + ), + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "eleven_multilingual_sts_v2", + ELEVENLABS_STS_VOICE_SETTINGS, + ), + IO.DynamicCombo.Option( + "eleven_english_sts_v2", + ELEVENLABS_STS_VOICE_SETTINGS, + ), + ], + tooltip="Model to use for speech-to-speech transformation.", + ), + IO.Combo.Input( + "output_format", + options=["mp3_44100_192", "opus_48000_192"], + tooltip="Audio output format.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=4294967295, + tooltip="Seed for reproducibility.", + ), + IO.Boolean.Input( + "remove_background_noise", + default=False, + tooltip="Remove background noise from input audio using audio isolation.", + ), + ], + outputs=[ + IO.Audio.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.24,"format":{"approximate":true,"suffix":"/minute"}}""", + ), + ) + + @classmethod + async def execute( + cls, + voice: str, + audio: Input.Audio, + stability: float, + model: dict, + output_format: str, + seed: int, + remove_background_noise: bool, + ) -> IO.NodeOutput: + audio_data_np = audio_tensor_to_contiguous_ndarray(audio["waveform"]) + audio_bytes_io = audio_ndarray_to_bytesio(audio_data_np, audio["sample_rate"], "mp4", "aac") + voice_settings = TextToSpeechVoiceSettings( + stability=stability, + similarity_boost=model["similarity_boost"], + style=model["style"], + use_speaker_boost=model["use_speaker_boost"], + speed=model["speed"], + ) + response = await sync_op_raw( + cls, + ApiEndpoint( + path=f"/proxy/elevenlabs/v1/speech-to-speech/{voice}", + method="POST", + query_params={"output_format": output_format}, + ), + data=SpeechToSpeechRequest( + model_id=model["model"], + voice_settings=voice_settings.model_dump_json(exclude_none=True), + seed=seed, + remove_background_noise=remove_background_noise, + ), + files={"audio": ("audio.mp4", audio_bytes_io.getvalue(), "audio/mp4")}, + content_type="multipart/form-data", + as_binary=True, + ) + return IO.NodeOutput(audio_bytes_to_audio_input(response)) + + +def _generate_dialogue_inputs(count: int) -> list: + """Generate input widgets for a given number of dialogue entries.""" + inputs = [] + for i in range(1, count + 1): + inputs.extend( + [ + IO.String.Input( + f"text{i}", + multiline=True, + default="", + tooltip=f"Text content for dialogue entry {i}.", + ), + IO.Custom(ELEVENLABS_VOICE).Input( + f"voice{i}", + tooltip=f"Voice for dialogue entry {i}. Connect from Voice Selector or Instant Voice Clone.", + ), + ] + ) + return inputs + + +class ElevenLabsTextToDialogue(IO.ComfyNode): + @classmethod + def define_schema(cls) -> IO.Schema: + return IO.Schema( + node_id="ElevenLabsTextToDialogue", + display_name="ElevenLabs Text to Dialogue", + category="api node/audio/ElevenLabs", + description="Generate multi-speaker dialogue from text. Each dialogue entry has its own text and voice.", + inputs=[ + IO.Float.Input( + "stability", + default=0.5, + min=0.0, + max=1.0, + step=0.5, + display_mode=IO.NumberDisplay.slider, + tooltip="Voice stability. Lower values give broader emotional range, " + "higher values produce more consistent but potentially monotonous speech.", + ), + IO.Combo.Input( + "apply_text_normalization", + options=["auto", "on", "off"], + tooltip="Text normalization mode. 'auto' lets the system decide, " + "'on' always applies normalization, 'off' skips it.", + ), + IO.Combo.Input( + "model", + options=["eleven_v3"], + tooltip="Model to use for dialogue generation.", + ), + IO.DynamicCombo.Input( + "inputs", + options=[ + IO.DynamicCombo.Option("1", _generate_dialogue_inputs(1)), + IO.DynamicCombo.Option("2", _generate_dialogue_inputs(2)), + IO.DynamicCombo.Option("3", _generate_dialogue_inputs(3)), + IO.DynamicCombo.Option("4", _generate_dialogue_inputs(4)), + IO.DynamicCombo.Option("5", _generate_dialogue_inputs(5)), + IO.DynamicCombo.Option("6", _generate_dialogue_inputs(6)), + IO.DynamicCombo.Option("7", _generate_dialogue_inputs(7)), + IO.DynamicCombo.Option("8", _generate_dialogue_inputs(8)), + IO.DynamicCombo.Option("9", _generate_dialogue_inputs(9)), + IO.DynamicCombo.Option("10", _generate_dialogue_inputs(10)), + ], + tooltip="Number of dialogue entries.", + ), + IO.String.Input( + "language_code", + default="", + tooltip="ISO-639-1 or ISO-639-3 language code (e.g., 'en', 'es', 'fra'). " + "Leave empty for automatic detection.", + ), + IO.Int.Input( + "seed", + default=1, + min=0, + max=4294967295, + tooltip="Seed for reproducibility.", + ), + IO.Combo.Input( + "output_format", + options=["mp3_44100_192", "opus_48000_192"], + tooltip="Audio output format.", + ), + ], + outputs=[ + IO.Audio.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.24,"format":{"approximate":true,"suffix":"/1K chars"}}""", + ), + ) + + @classmethod + async def execute( + cls, + stability: float, + apply_text_normalization: str, + model: str, + inputs: dict, + language_code: str, + seed: int, + output_format: str, + ) -> IO.NodeOutput: + num_entries = int(inputs["inputs"]) + dialogue_inputs: list[DialogueInput] = [] + for i in range(1, num_entries + 1): + text = inputs[f"text{i}"] + voice_id = inputs[f"voice{i}"] + validate_string(text, min_length=1) + dialogue_inputs.append(DialogueInput(text=text, voice_id=voice_id)) + request = TextToDialogueRequest( + inputs=dialogue_inputs, + model_id=model, + language_code=language_code if language_code.strip() else None, + settings=DialogueSettings(stability=stability), + seed=seed, + apply_text_normalization=apply_text_normalization, + ) + response = await sync_op_raw( + cls, + ApiEndpoint( + path="/proxy/elevenlabs/v1/text-to-dialogue", + method="POST", + query_params={"output_format": output_format}, + ), + data=request, + as_binary=True, + ) + return IO.NodeOutput(audio_bytes_to_audio_input(response)) + + +class ElevenLabsExtension(ComfyExtension): + @override + async def get_node_list(self) -> list[type[IO.ComfyNode]]: + return [ + ElevenLabsSpeechToText, + ElevenLabsVoiceSelector, + ElevenLabsTextToSpeech, + ElevenLabsAudioIsolation, + ElevenLabsTextToSoundEffects, + ElevenLabsInstantVoiceClone, + ElevenLabsSpeechToSpeech, + ElevenLabsTextToDialogue, + ] + + +async def comfy_entrypoint() -> ElevenLabsExtension: + return ElevenLabsExtension() diff --git a/comfy_api_nodes/nodes_gemini.py b/comfy_api_nodes/nodes_gemini.py index 3b31caa7b..3fe804e0b 100644 --- a/comfy_api_nodes/nodes_gemini.py +++ b/comfy_api_nodes/nodes_gemini.py @@ -6,6 +6,7 @@ See: https://cloud.google.com/vertex-ai/generative-ai/docs/model-reference/infer import base64 import os from enum import Enum +from fnmatch import fnmatch from io import BytesIO from typing import Literal @@ -28,6 +29,7 @@ from comfy_api_nodes.apis.gemini import ( GeminiRole, GeminiSystemInstructionContent, GeminiTextPart, + GeminiThinkingConfig, Modality, ) from comfy_api_nodes.util import ( @@ -54,6 +56,21 @@ GEMINI_IMAGE_SYS_PROMPT = ( "Prioritize generating the visual representation above any text, formatting, or conversational requests." ) +GEMINI_IMAGE_2_PRICE_BADGE = IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model", "resolution"]), + expr=""" + ( + $m := widgets.model; + $r := widgets.resolution; + $isFlash := $contains($m, "nano banana 2"); + $flashPrices := {"1k": 0.0696, "2k": 0.0696, "4k": 0.123}; + $proPrices := {"1k": 0.134, "2k": 0.134, "4k": 0.24}; + $prices := $isFlash ? $flashPrices : $proPrices; + {"type":"usd","usd": $lookup($prices, $r), "format":{"suffix":"/Image","approximate":true}} + ) + """, +) + class GeminiModel(str, Enum): """ @@ -119,6 +136,13 @@ async def create_image_parts( return image_parts +def _mime_matches(mime: GeminiMimeType | None, pattern: str) -> bool: + """Check if a MIME type matches a pattern. Supports fnmatch globs (e.g. 'image/*').""" + if mime is None: + return False + return fnmatch(mime.value, pattern) + + def get_parts_by_type(response: GeminiGenerateContentResponse, part_type: Literal["text"] | str) -> list[GeminiPart]: """ Filter response parts by their type. @@ -151,9 +175,9 @@ def get_parts_by_type(response: GeminiGenerateContentResponse, part_type: Litera for part in candidate.content.parts: if part_type == "text" and part.text: parts.append(part) - elif part.inlineData and part.inlineData.mimeType == part_type: + elif part.inlineData and _mime_matches(part.inlineData.mimeType, part_type): parts.append(part) - elif part.fileData and part.fileData.mimeType == part_type: + elif part.fileData and _mime_matches(part.fileData.mimeType, part_type): parts.append(part) if not parts and blocked_reasons: @@ -178,7 +202,7 @@ def get_text_from_response(response: GeminiGenerateContentResponse) -> str: async def get_image_from_response(response: GeminiGenerateContentResponse) -> Input.Image: image_tensors: list[Input.Image] = [] - parts = get_parts_by_type(response, "image/png") + parts = get_parts_by_type(response, "image/*") for part in parts: if part.inlineData: image_data = base64.b64decode(part.inlineData.data) @@ -221,6 +245,10 @@ def calculate_tokens_price(response: GeminiGenerateContentResponse) -> float | N input_tokens_price = 2 output_text_tokens_price = 12.0 output_image_tokens_price = 120.0 + elif response.modelVersion == "gemini-3.1-flash-image-preview": + input_tokens_price = 0.5 + output_text_tokens_price = 3.0 + output_image_tokens_price = 60.0 else: return None final_price = response.usageMetadata.promptTokenCount * input_tokens_price @@ -308,6 +336,7 @@ class GeminiNode(IO.ComfyNode): default="", optional=True, tooltip="Foundational instructions that dictate an AI's behavior.", + advanced=True, ), ], outputs=[ @@ -585,6 +614,7 @@ class GeminiImage(IO.ComfyNode): tooltip="Choose 'IMAGE' for image-only output, or " "'IMAGE+TEXT' to return both the generated image and a text response.", optional=True, + advanced=True, ), IO.String.Input( "system_prompt", @@ -592,6 +622,7 @@ class GeminiImage(IO.ComfyNode): default=GEMINI_IMAGE_SYS_PROMPT, optional=True, tooltip="Foundational instructions that dictate an AI's behavior.", + advanced=True, ), ], outputs=[ @@ -626,7 +657,7 @@ class GeminiImage(IO.ComfyNode): if not aspect_ratio: aspect_ratio = "auto" # for backward compatability with old workflows; to-do remove this in December - image_config = GeminiImageConfig(aspectRatio=aspect_ratio) + image_config = GeminiImageConfig() if aspect_ratio == "auto" else GeminiImageConfig(aspectRatio=aspect_ratio) if images is not None: parts.extend(await create_image_parts(cls, images)) @@ -646,7 +677,7 @@ class GeminiImage(IO.ComfyNode): ], generationConfig=GeminiImageGenerationConfig( responseModalities=(["IMAGE"] if response_modalities == "IMAGE" else ["TEXT", "IMAGE"]), - imageConfig=None if aspect_ratio == "auto" else image_config, + imageConfig=image_config, ), systemInstruction=gemini_system_prompt, ), @@ -675,7 +706,7 @@ class GeminiImage2(IO.ComfyNode): ), IO.Combo.Input( "model", - options=["gemini-3-pro-image-preview"], + options=["gemini-3-pro-image-preview", "Nano Banana 2 (Gemini 3.1 Flash Image)"], ), IO.Int.Input( "seed", @@ -706,6 +737,7 @@ class GeminiImage2(IO.ComfyNode): options=["IMAGE+TEXT", "IMAGE"], tooltip="Choose 'IMAGE' for image-only output, or " "'IMAGE+TEXT' to return both the generated image and a text response.", + advanced=True, ), IO.Image.Input( "images", @@ -725,6 +757,7 @@ class GeminiImage2(IO.ComfyNode): default=GEMINI_IMAGE_SYS_PROMPT, optional=True, tooltip="Foundational instructions that dictate an AI's behavior.", + advanced=True, ), ], outputs=[ @@ -737,19 +770,7 @@ class GeminiImage2(IO.ComfyNode): IO.Hidden.unique_id, ], is_api_node=True, - price_badge=IO.PriceBadge( - depends_on=IO.PriceBadgeDepends(widgets=["resolution"]), - expr=""" - ( - $r := widgets.resolution; - ($contains($r,"1k") or $contains($r,"2k")) - ? {"type":"usd","usd":0.134,"format":{"suffix":"/Image","approximate":true}} - : $contains($r,"4k") - ? {"type":"usd","usd":0.24,"format":{"suffix":"/Image","approximate":true}} - : {"type":"text","text":"Token-based"} - ) - """, - ), + price_badge=GEMINI_IMAGE_2_PRICE_BADGE, ) @classmethod @@ -766,6 +787,10 @@ class GeminiImage2(IO.ComfyNode): system_prompt: str = "", ) -> IO.NodeOutput: validate_string(prompt, strip_whitespace=True, min_length=1) + if model == "Nano Banana 2 (Gemini 3.1 Flash Image)": + model = "gemini-3.1-flash-image-preview" + if response_modalities == "IMAGE+TEXT": + raise ValueError("IMAGE+TEXT is not currently available for the Nano Banana 2 model.") parts: list[GeminiPart] = [GeminiPart(text=prompt)] if images is not None: @@ -802,6 +827,168 @@ class GeminiImage2(IO.ComfyNode): return IO.NodeOutput(await get_image_from_response(response), get_text_from_response(response)) +class GeminiNanoBanana2(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="GeminiNanoBanana2", + display_name="Nano Banana 2", + category="api node/image/Gemini", + description="Generate or edit images synchronously via Google Vertex API.", + inputs=[ + IO.String.Input( + "prompt", + multiline=True, + tooltip="Text prompt describing the image to generate or the edits to apply. " + "Include any constraints, styles, or details the model should follow.", + default="", + ), + IO.Combo.Input( + "model", + options=["Nano Banana 2 (Gemini 3.1 Flash Image)"], + ), + IO.Int.Input( + "seed", + default=42, + min=0, + max=0xFFFFFFFFFFFFFFFF, + control_after_generate=True, + tooltip="When the seed is fixed to a specific value, the model makes a best effort to provide " + "the same response for repeated requests. Deterministic output isn't guaranteed. " + "Also, changing the model or parameter settings, such as the temperature, " + "can cause variations in the response even when you use the same seed value. " + "By default, a random seed value is used.", + ), + IO.Combo.Input( + "aspect_ratio", + options=[ + "auto", + "1:1", + "2:3", + "3:2", + "3:4", + "4:3", + "4:5", + "5:4", + "9:16", + "16:9", + "21:9", + # "1:4", + # "4:1", + # "8:1", + # "1:8", + ], + default="auto", + tooltip="If set to 'auto', matches your input image's aspect ratio; " + "if no image is provided, a 16:9 square is usually generated.", + ), + IO.Combo.Input( + "resolution", + options=[ + # "512px", + "1K", + "2K", + "4K", + ], + tooltip="Target output resolution. For 2K/4K the native Gemini upscaler is used.", + ), + IO.Combo.Input( + "response_modalities", + options=["IMAGE"], + advanced=True, + ), + IO.Combo.Input( + "thinking_level", + options=["MINIMAL", "HIGH"], + ), + IO.Image.Input( + "images", + optional=True, + tooltip="Optional reference image(s). " + "To include multiple images, use the Batch Images node (up to 14).", + ), + IO.Custom("GEMINI_INPUT_FILES").Input( + "files", + optional=True, + tooltip="Optional file(s) to use as context for the model. " + "Accepts inputs from the Gemini Generate Content Input Files node.", + ), + IO.String.Input( + "system_prompt", + multiline=True, + default=GEMINI_IMAGE_SYS_PROMPT, + optional=True, + tooltip="Foundational instructions that dictate an AI's behavior.", + advanced=True, + ), + ], + outputs=[ + IO.Image.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=GEMINI_IMAGE_2_PRICE_BADGE, + ) + + @classmethod + async def execute( + cls, + prompt: str, + model: str, + seed: int, + aspect_ratio: str, + resolution: str, + response_modalities: str, + thinking_level: str, + images: Input.Image | None = None, + files: list[GeminiPart] | None = None, + system_prompt: str = "", + ) -> IO.NodeOutput: + validate_string(prompt, strip_whitespace=True, min_length=1) + if model == "Nano Banana 2 (Gemini 3.1 Flash Image)": + model = "gemini-3.1-flash-image-preview" + + parts: list[GeminiPart] = [GeminiPart(text=prompt)] + if images is not None: + if get_number_of_images(images) > 14: + raise ValueError("The current maximum number of supported images is 14.") + parts.extend(await create_image_parts(cls, images)) + if files is not None: + parts.extend(files) + + image_config = GeminiImageConfig(imageSize=resolution) + if aspect_ratio != "auto": + image_config.aspectRatio = aspect_ratio + + gemini_system_prompt = None + if system_prompt: + gemini_system_prompt = GeminiSystemInstructionContent(parts=[GeminiTextPart(text=system_prompt)], role=None) + + response = await sync_op( + cls, + ApiEndpoint(path=f"/proxy/vertexai/gemini/{model}", method="POST"), + data=GeminiImageGenerateContentRequest( + contents=[ + GeminiContent(role=GeminiRole.user, parts=parts), + ], + generationConfig=GeminiImageGenerationConfig( + responseModalities=(["IMAGE"] if response_modalities == "IMAGE" else ["TEXT", "IMAGE"]), + imageConfig=image_config, + thinkingConfig=GeminiThinkingConfig(thinkingLevel=thinking_level), + ), + systemInstruction=gemini_system_prompt, + ), + response_model=GeminiGenerateContentResponse, + price_extractor=calculate_tokens_price, + ) + return IO.NodeOutput(await get_image_from_response(response), get_text_from_response(response)) + + class GeminiExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[IO.ComfyNode]]: @@ -809,6 +996,7 @@ class GeminiExtension(ComfyExtension): GeminiNode, GeminiImage, GeminiImage2, + GeminiNanoBanana2, GeminiInputFiles, ] diff --git a/comfy_api_nodes/nodes_hitpaw.py b/comfy_api_nodes/nodes_hitpaw.py new file mode 100644 index 000000000..488080a74 --- /dev/null +++ b/comfy_api_nodes/nodes_hitpaw.py @@ -0,0 +1,342 @@ +import math + +from typing_extensions import override + +from comfy_api.latest import IO, ComfyExtension, Input +from comfy_api_nodes.apis.hitpaw import ( + ImageEnhanceTaskCreateRequest, + InputVideoModel, + TaskCreateDataResponse, + TaskCreateResponse, + TaskStatusPollRequest, + TaskStatusResponse, + VideoEnhanceTaskCreateRequest, +) +from comfy_api_nodes.util import ( + ApiEndpoint, + download_url_to_image_tensor, + download_url_to_video_output, + downscale_image_tensor, + get_image_dimensions, + poll_op, + sync_op, + upload_image_to_comfyapi, + upload_video_to_comfyapi, + validate_video_duration, +) + +VIDEO_MODELS_MODELS_MAP = { + "Portrait Restore Model (1x)": "portrait_restore_1x", + "Portrait Restore Model (2x)": "portrait_restore_2x", + "General Restore Model (1x)": "general_restore_1x", + "General Restore Model (2x)": "general_restore_2x", + "General Restore Model (4x)": "general_restore_4x", + "Ultra HD Model (2x)": "ultrahd_restore_2x", + "Generative Model (1x)": "generative_1x", +} + +# Resolution name to target dimension (shorter side) in pixels +RESOLUTION_TARGET_MAP = { + "720p": 720, + "1080p": 1080, + "2K/QHD": 1440, + "4K/UHD": 2160, + "8K": 4320, +} + +# Square (1:1) resolutions use standard square dimensions +RESOLUTION_SQUARE_MAP = { + "720p": 720, + "1080p": 1080, + "2K/QHD": 1440, + "4K/UHD": 2048, # DCI 4K square + "8K": 4096, # DCI 8K square +} + +# Models with limited resolution support (no 8K) +LIMITED_RESOLUTION_MODELS = {"Generative Model (1x)"} + +# Resolution options for different model types +RESOLUTIONS_LIMITED = ["original", "720p", "1080p", "2K/QHD", "4K/UHD"] +RESOLUTIONS_FULL = ["original", "720p", "1080p", "2K/QHD", "4K/UHD", "8K"] + +# Maximum output resolution in pixels +MAX_PIXELS_GENERATIVE = 32_000_000 +MAX_MP_GENERATIVE = MAX_PIXELS_GENERATIVE // 1_000_000 + + +class HitPawGeneralImageEnhance(IO.ComfyNode): + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="HitPawGeneralImageEnhance", + display_name="HitPaw General Image Enhance", + category="api node/image/HitPaw", + description="Upscale low-resolution images to super-resolution, eliminate artifacts and noise. " + f"Maximum output: {MAX_MP_GENERATIVE} megapixels.", + inputs=[ + IO.Combo.Input("model", options=["generative_portrait", "generative"]), + IO.Image.Input("image"), + IO.Combo.Input("upscale_factor", options=[1, 2, 4]), + IO.Boolean.Input( + "auto_downscale", + default=False, + tooltip="Automatically downscale input image if output would exceed the limit.", + ), + ], + outputs=[ + IO.Image.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model"]), + expr=""" + ( + $prices := { + "generative_portrait": {"min": 0.02, "max": 0.06}, + "generative": {"min": 0.05, "max": 0.15} + }; + $price := $lookup($prices, widgets.model); + { + "type": "range_usd", + "min_usd": $price.min, + "max_usd": $price.max + } + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + model: str, + image: Input.Image, + upscale_factor: int, + auto_downscale: bool, + ) -> IO.NodeOutput: + height, width = get_image_dimensions(image) + requested_scale = upscale_factor + output_pixels = height * width * requested_scale * requested_scale + if output_pixels > MAX_PIXELS_GENERATIVE: + if auto_downscale: + input_pixels = width * height + scale = 1 + max_input_pixels = MAX_PIXELS_GENERATIVE + + for candidate in [4, 2, 1]: + if candidate > requested_scale: + continue + scale_output_pixels = input_pixels * candidate * candidate + if scale_output_pixels <= MAX_PIXELS_GENERATIVE: + scale = candidate + max_input_pixels = None + break + # Check if we can downscale input by at most 2x to fit + downscale_ratio = math.sqrt(scale_output_pixels / MAX_PIXELS_GENERATIVE) + if downscale_ratio <= 2.0: + scale = candidate + max_input_pixels = MAX_PIXELS_GENERATIVE // (candidate * candidate) + break + + if max_input_pixels is not None: + image = downscale_image_tensor(image, total_pixels=max_input_pixels) + upscale_factor = scale + else: + output_width = width * requested_scale + output_height = height * requested_scale + raise ValueError( + f"Output size ({output_width}x{output_height} = {output_pixels:,} pixels) " + f"exceeds maximum allowed size of {MAX_PIXELS_GENERATIVE:,} pixels ({MAX_MP_GENERATIVE}MP). " + f"Enable auto_downscale or use a smaller input image or a lower upscale factor." + ) + + initial_res = await sync_op( + cls, + ApiEndpoint(path="/proxy/hitpaw/api/photo-enhancer", method="POST"), + response_model=TaskCreateResponse, + data=ImageEnhanceTaskCreateRequest( + model_name=f"{model}_{upscale_factor}x", + img_url=await upload_image_to_comfyapi(cls, image, total_pixels=None), + ), + wait_label="Creating task", + final_label_on_success="Task created", + ) + if initial_res.code != 200: + raise ValueError(f"Task creation failed with code {initial_res.code}: {initial_res.message}") + request_price = initial_res.data.consume_coins / 1000 + final_response = await poll_op( + cls, + ApiEndpoint(path="/proxy/hitpaw/api/task-status", method="POST"), + data=TaskCreateDataResponse(job_id=initial_res.data.job_id), + response_model=TaskStatusResponse, + status_extractor=lambda x: x.data.status, + price_extractor=lambda x: request_price, + poll_interval=10.0, + max_poll_attempts=480, + ) + return IO.NodeOutput(await download_url_to_image_tensor(final_response.data.res_url)) + + +class HitPawVideoEnhance(IO.ComfyNode): + @classmethod + def define_schema(cls): + model_options = [] + for model_name in VIDEO_MODELS_MODELS_MAP: + if model_name in LIMITED_RESOLUTION_MODELS: + resolutions = RESOLUTIONS_LIMITED + else: + resolutions = RESOLUTIONS_FULL + model_options.append( + IO.DynamicCombo.Option( + model_name, + [IO.Combo.Input("resolution", options=resolutions)], + ) + ) + + return IO.Schema( + node_id="HitPawVideoEnhance", + display_name="HitPaw Video Enhance", + category="api node/video/HitPaw", + description="Upscale low-resolution videos to high resolution, eliminate artifacts and noise. " + "Prices shown are per second of video.", + inputs=[ + IO.DynamicCombo.Input("model", options=model_options), + IO.Video.Input("video"), + ], + outputs=[ + IO.Video.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model", "model.resolution"]), + expr=""" + ( + $m := $lookup(widgets, "model"); + $res := $lookup(widgets, "model.resolution"); + $standard_model_prices := { + "original": {"min": 0.01, "max": 0.198}, + "720p": {"min": 0.01, "max": 0.06}, + "1080p": {"min": 0.015, "max": 0.09}, + "2k/qhd": {"min": 0.02, "max": 0.117}, + "4k/uhd": {"min": 0.025, "max": 0.152}, + "8k": {"min": 0.033, "max": 0.198} + }; + $ultra_hd_model_prices := { + "original": {"min": 0.015, "max": 0.264}, + "720p": {"min": 0.015, "max": 0.092}, + "1080p": {"min": 0.02, "max": 0.12}, + "2k/qhd": {"min": 0.026, "max": 0.156}, + "4k/uhd": {"min": 0.034, "max": 0.203}, + "8k": {"min": 0.044, "max": 0.264} + }; + $generative_model_prices := { + "original": {"min": 0.015, "max": 0.338}, + "720p": {"min": 0.008, "max": 0.090}, + "1080p": {"min": 0.05, "max": 0.15}, + "2k/qhd": {"min": 0.038, "max": 0.225}, + "4k/uhd": {"min": 0.056, "max": 0.338} + }; + $prices := $contains($m, "ultra hd") ? $ultra_hd_model_prices : + $contains($m, "generative") ? $generative_model_prices : + $standard_model_prices; + $price := $lookup($prices, $res); + { + "type": "range_usd", + "min_usd": $price.min, + "max_usd": $price.max, + "format": {"approximate": true, "suffix": "/second"} + } + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + model: InputVideoModel, + video: Input.Video, + ) -> IO.NodeOutput: + validate_video_duration(video, min_duration=0.5, max_duration=60 * 60) + resolution = model["resolution"] + src_width, src_height = video.get_dimensions() + + if resolution == "original": + output_width = src_width + output_height = src_height + else: + if src_width == src_height: + target_size = RESOLUTION_SQUARE_MAP[resolution] + if target_size < src_width: + raise ValueError( + f"Selected resolution {resolution} ({target_size}x{target_size}) is smaller than " + f"the input video ({src_width}x{src_height}). Please select a higher resolution or 'original'." + ) + output_width = target_size + output_height = target_size + else: + min_dimension = min(src_width, src_height) + target_size = RESOLUTION_TARGET_MAP[resolution] + if target_size < min_dimension: + raise ValueError( + f"Selected resolution {resolution} ({target_size}p) is smaller than " + f"the input video's shorter dimension ({min_dimension}p). " + f"Please select a higher resolution or 'original'." + ) + if src_width > src_height: + output_height = target_size + output_width = int(target_size * (src_width / src_height)) + else: + output_width = target_size + output_height = int(target_size * (src_height / src_width)) + initial_res = await sync_op( + cls, + ApiEndpoint(path="/proxy/hitpaw/api/video-enhancer", method="POST"), + response_model=TaskCreateResponse, + data=VideoEnhanceTaskCreateRequest( + video_url=await upload_video_to_comfyapi(cls, video), + resolution=[output_width, output_height], + original_resolution=[src_width, src_height], + model_name=VIDEO_MODELS_MODELS_MAP[model["model"]], + ), + wait_label="Creating task", + final_label_on_success="Task created", + ) + request_price = initial_res.data.consume_coins / 1000 + if initial_res.code != 200: + raise ValueError(f"Task creation failed with code {initial_res.code}: {initial_res.message}") + final_response = await poll_op( + cls, + ApiEndpoint(path="/proxy/hitpaw/api/task-status", method="POST"), + data=TaskStatusPollRequest(job_id=initial_res.data.job_id), + response_model=TaskStatusResponse, + status_extractor=lambda x: x.data.status, + price_extractor=lambda x: request_price, + poll_interval=10.0, + max_poll_attempts=320, + ) + return IO.NodeOutput(await download_url_to_video_output(final_response.data.res_url)) + + +class HitPawExtension(ComfyExtension): + @override + async def get_node_list(self) -> list[type[IO.ComfyNode]]: + return [ + HitPawGeneralImageEnhance, + HitPawVideoEnhance, + ] + + +async def comfy_entrypoint() -> HitPawExtension: + return HitPawExtension() diff --git a/comfy_api_nodes/nodes_hunyuan3d.py b/comfy_api_nodes/nodes_hunyuan3d.py index b3a736643..d1d9578ec 100644 --- a/comfy_api_nodes/nodes_hunyuan3d.py +++ b/comfy_api_nodes/nodes_hunyuan3d.py @@ -1,35 +1,49 @@ -import os - from typing_extensions import override -from comfy_api.latest import IO, ComfyExtension, Input +from comfy_api.latest import IO, ComfyExtension, Input, Types from comfy_api_nodes.apis.hunyuan3d import ( Hunyuan3DViewImage, InputGenerateType, ResultFile3D, + TextureEditTaskRequest, To3DProTaskCreateResponse, To3DProTaskQueryRequest, To3DProTaskRequest, To3DProTaskResultResponse, + To3DUVFileInput, + To3DUVTaskRequest, ) from comfy_api_nodes.util import ( ApiEndpoint, - download_url_to_bytesio, + download_url_to_file_3d, + download_url_to_image_tensor, downscale_image_tensor_by_max_side, poll_op, sync_op, + upload_3d_model_to_comfyapi, upload_image_to_comfyapi, validate_image_dimensions, validate_string, ) -from folder_paths import get_output_directory -def get_glb_obj_from_response(response_objs: list[ResultFile3D]) -> ResultFile3D: +def _is_tencent_rate_limited(status: int, body: object) -> bool: + return ( + status == 400 + and isinstance(body, dict) + and "RequestLimitExceeded" in str(body.get("Response", {}).get("Error", {}).get("Code", "")) + ) + + +def get_file_from_response( + response_objs: list[ResultFile3D], file_type: str, raise_if_not_found: bool = True +) -> ResultFile3D | None: for i in response_objs: - if i.Type.lower() == "glb": + if i.Type.lower() == file_type.lower(): return i - raise ValueError("No GLB file found in response. Please report this to the developers.") + if raise_if_not_found: + raise ValueError(f"'{file_type}' file type is not found in the response.") + return None class TencentTextToModelNode(IO.ComfyNode): @@ -38,8 +52,9 @@ class TencentTextToModelNode(IO.ComfyNode): def define_schema(cls): return IO.Schema( node_id="TencentTextToModelNode", - display_name="Hunyuan3D: Text to Model (Pro)", + display_name="Hunyuan3D: Text to Model", category="api node/3d/Tencent", + essentials_category="3D", inputs=[ IO.Combo.Input( "model", @@ -74,7 +89,9 @@ class TencentTextToModelNode(IO.ComfyNode): ), ], outputs=[ - IO.String.Output(display_name="model_file"), + IO.String.Output(display_name="model_file"), # for backward compatibility only + IO.File3DGLB.Output(display_name="GLB"), + IO.File3DOBJ.Output(display_name="OBJ"), ], hidden=[ IO.Hidden.auth_token_comfy_org, @@ -121,22 +138,27 @@ class TencentTextToModelNode(IO.ComfyNode): EnablePBR=generate_type.get("pbr", None), PolygonType=generate_type.get("polygon_type", None), ), + is_rate_limited=_is_tencent_rate_limited, ) if response.Error: raise ValueError(f"Task creation failed with code {response.Error.Code}: {response.Error.Message}") + task_id = response.JobId result = await poll_op( cls, ApiEndpoint(path="/proxy/tencent/hunyuan/3d-pro/query", method="POST"), - data=To3DProTaskQueryRequest(JobId=response.JobId), + data=To3DProTaskQueryRequest(JobId=task_id), response_model=To3DProTaskResultResponse, status_extractor=lambda r: r.Status, ) - model_file = f"hunyuan_model_{response.JobId}.glb" - await download_url_to_bytesio( - get_glb_obj_from_response(result.ResultFile3Ds).Url, - os.path.join(get_output_directory(), model_file), + return IO.NodeOutput( + f"{task_id}.glb", + await download_url_to_file_3d( + get_file_from_response(result.ResultFile3Ds, "glb").Url, "glb", task_id=task_id + ), + await download_url_to_file_3d( + get_file_from_response(result.ResultFile3Ds, "obj").Url, "obj", task_id=task_id + ), ) - return IO.NodeOutput(model_file) class TencentImageToModelNode(IO.ComfyNode): @@ -145,8 +167,9 @@ class TencentImageToModelNode(IO.ComfyNode): def define_schema(cls): return IO.Schema( node_id="TencentImageToModelNode", - display_name="Hunyuan3D: Image(s) to Model (Pro)", + display_name="Hunyuan3D: Image(s) to Model", category="api node/3d/Tencent", + essentials_category="3D", inputs=[ IO.Combo.Input( "model", @@ -184,7 +207,9 @@ class TencentImageToModelNode(IO.ComfyNode): ), ], outputs=[ - IO.String.Output(display_name="model_file"), + IO.String.Output(display_name="model_file"), # for backward compatibility only + IO.File3DGLB.Output(display_name="GLB"), + IO.File3DOBJ.Output(display_name="OBJ"), ], hidden=[ IO.Hidden.auth_token_comfy_org, @@ -266,22 +291,270 @@ class TencentImageToModelNode(IO.ComfyNode): EnablePBR=generate_type.get("pbr", None), PolygonType=generate_type.get("polygon_type", None), ), + is_rate_limited=_is_tencent_rate_limited, + ) + if response.Error: + raise ValueError(f"Task creation failed with code {response.Error.Code}: {response.Error.Message}") + task_id = response.JobId + result = await poll_op( + cls, + ApiEndpoint(path="/proxy/tencent/hunyuan/3d-pro/query", method="POST"), + data=To3DProTaskQueryRequest(JobId=task_id), + response_model=To3DProTaskResultResponse, + status_extractor=lambda r: r.Status, + ) + return IO.NodeOutput( + f"{task_id}.glb", + await download_url_to_file_3d( + get_file_from_response(result.ResultFile3Ds, "glb").Url, "glb", task_id=task_id + ), + await download_url_to_file_3d( + get_file_from_response(result.ResultFile3Ds, "obj").Url, "obj", task_id=task_id + ), + ) + + +class TencentModelTo3DUVNode(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="TencentModelTo3DUVNode", + display_name="Hunyuan3D: Model to UV", + category="api node/3d/Tencent", + description="Perform UV unfolding on a 3D model to generate UV texture. " + "Input model must have less than 30000 faces.", + inputs=[ + IO.MultiType.Input( + "model_3d", + types=[IO.File3DGLB, IO.File3DOBJ, IO.File3DFBX, IO.File3DAny], + tooltip="Input 3D model (GLB, OBJ, or FBX)", + ), + IO.Int.Input( + "seed", + default=1, + min=0, + max=2147483647, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + ), + ], + outputs=[ + IO.File3DOBJ.Output(display_name="OBJ"), + IO.File3DFBX.Output(display_name="FBX"), + IO.Image.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge(expr='{"type":"usd","usd":0.2}'), + ) + + SUPPORTED_FORMATS = {"glb", "obj", "fbx"} + + @classmethod + async def execute( + cls, + model_3d: Types.File3D, + seed: int, + ) -> IO.NodeOutput: + _ = seed + file_format = model_3d.format.lower() + if file_format not in cls.SUPPORTED_FORMATS: + raise ValueError( + f"Unsupported file format: '{file_format}'. " + f"Supported formats: {', '.join(sorted(cls.SUPPORTED_FORMATS))}." + ) + response = await sync_op( + cls, + ApiEndpoint(path="/proxy/tencent/hunyuan/3d-uv", method="POST"), + response_model=To3DProTaskCreateResponse, + data=To3DUVTaskRequest( + File=To3DUVFileInput( + Type=file_format.upper(), + Url=await upload_3d_model_to_comfyapi(cls, model_3d, file_format), + ) + ), + is_rate_limited=_is_tencent_rate_limited, ) if response.Error: raise ValueError(f"Task creation failed with code {response.Error.Code}: {response.Error.Message}") result = await poll_op( cls, - ApiEndpoint(path="/proxy/tencent/hunyuan/3d-pro/query", method="POST"), + ApiEndpoint(path="/proxy/tencent/hunyuan/3d-uv/query", method="POST"), data=To3DProTaskQueryRequest(JobId=response.JobId), response_model=To3DProTaskResultResponse, status_extractor=lambda r: r.Status, ) - model_file = f"hunyuan_model_{response.JobId}.glb" - await download_url_to_bytesio( - get_glb_obj_from_response(result.ResultFile3Ds).Url, - os.path.join(get_output_directory(), model_file), + return IO.NodeOutput( + await download_url_to_file_3d(get_file_from_response(result.ResultFile3Ds, "obj").Url, "obj"), + await download_url_to_file_3d(get_file_from_response(result.ResultFile3Ds, "fbx").Url, "fbx"), + await download_url_to_image_tensor(get_file_from_response(result.ResultFile3Ds, "image").Url), + ) + + +class Tencent3DTextureEditNode(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="Tencent3DTextureEditNode", + display_name="Hunyuan3D: 3D Texture Edit", + category="api node/3d/Tencent", + description="After inputting the 3D model, perform 3D model texture redrawing.", + inputs=[ + IO.MultiType.Input( + "model_3d", + types=[IO.File3DFBX, IO.File3DAny], + tooltip="3D model in FBX format. Model should have less than 100000 faces.", + ), + IO.String.Input( + "prompt", + multiline=True, + default="", + tooltip="Describes texture editing. Supports up to 1024 UTF-8 characters.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + ), + ], + outputs=[ + IO.File3DGLB.Output(display_name="GLB"), + IO.File3DFBX.Output(display_name="FBX"), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd": 0.6}""", + ), + ) + + @classmethod + async def execute( + cls, + model_3d: Types.File3D, + prompt: str, + seed: int, + ) -> IO.NodeOutput: + _ = seed + file_format = model_3d.format.lower() + if file_format != "fbx": + raise ValueError(f"Unsupported file format: '{file_format}'. Only FBX format is supported.") + validate_string(prompt, field_name="prompt", min_length=1, max_length=1024) + model_url = await upload_3d_model_to_comfyapi(cls, model_3d, file_format) + response = await sync_op( + cls, + ApiEndpoint(path="/proxy/tencent/hunyuan/3d-texture-edit", method="POST"), + response_model=To3DProTaskCreateResponse, + data=TextureEditTaskRequest( + File3D=To3DUVFileInput(Type=file_format.upper(), Url=model_url), + Prompt=prompt, + EnablePBR=True, + ), + is_rate_limited=_is_tencent_rate_limited, + ) + if response.Error: + raise ValueError(f"Task creation failed with code {response.Error.Code}: {response.Error.Message}") + + result = await poll_op( + cls, + ApiEndpoint(path="/proxy/tencent/hunyuan/3d-texture-edit/query", method="POST"), + data=To3DProTaskQueryRequest(JobId=response.JobId), + response_model=To3DProTaskResultResponse, + status_extractor=lambda r: r.Status, + ) + return IO.NodeOutput( + await download_url_to_file_3d(get_file_from_response(result.ResultFile3Ds, "glb").Url, "glb"), + await download_url_to_file_3d(get_file_from_response(result.ResultFile3Ds, "fbx").Url, "fbx"), + ) + + +class Tencent3DPartNode(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="Tencent3DPartNode", + display_name="Hunyuan3D: 3D Part", + category="api node/3d/Tencent", + description="Automatically perform component identification and generation based on the model structure.", + inputs=[ + IO.MultiType.Input( + "model_3d", + types=[IO.File3DFBX, IO.File3DAny], + tooltip="3D model in FBX format. Model should have less than 30000 faces.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + ), + ], + outputs=[ + IO.File3DFBX.Output(display_name="FBX"), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge(expr='{"type":"usd","usd":0.6}'), + ) + + @classmethod + async def execute( + cls, + model_3d: Types.File3D, + seed: int, + ) -> IO.NodeOutput: + _ = seed + file_format = model_3d.format.lower() + if file_format != "fbx": + raise ValueError(f"Unsupported file format: '{file_format}'. Only FBX format is supported.") + model_url = await upload_3d_model_to_comfyapi(cls, model_3d, file_format) + response = await sync_op( + cls, + ApiEndpoint(path="/proxy/tencent/hunyuan/3d-part", method="POST"), + response_model=To3DProTaskCreateResponse, + data=To3DUVTaskRequest( + File=To3DUVFileInput(Type=file_format.upper(), Url=model_url), + ), + is_rate_limited=_is_tencent_rate_limited, + ) + if response.Error: + raise ValueError(f"Task creation failed with code {response.Error.Code}: {response.Error.Message}") + result = await poll_op( + cls, + ApiEndpoint(path="/proxy/tencent/hunyuan/3d-part/query", method="POST"), + data=To3DProTaskQueryRequest(JobId=response.JobId), + response_model=To3DProTaskResultResponse, + status_extractor=lambda r: r.Status, + ) + return IO.NodeOutput( + await download_url_to_file_3d(get_file_from_response(result.ResultFile3Ds, "fbx").Url, "fbx"), ) - return IO.NodeOutput(model_file) class TencentHunyuan3DExtension(ComfyExtension): @@ -290,6 +563,9 @@ class TencentHunyuan3DExtension(ComfyExtension): return [ TencentTextToModelNode, TencentImageToModelNode, + # TencentModelTo3DUVNode, + # Tencent3DTextureEditNode, + Tencent3DPartNode, ] diff --git a/comfy_api_nodes/nodes_ideogram.py b/comfy_api_nodes/nodes_ideogram.py index feaf7a858..97c3609bd 100644 --- a/comfy_api_nodes/nodes_ideogram.py +++ b/comfy_api_nodes/nodes_ideogram.py @@ -261,6 +261,7 @@ class IdeogramV1(IO.ComfyNode): default="AUTO", tooltip="Determine if MagicPrompt should be used in generation", optional=True, + advanced=True, ), IO.Int.Input( "seed", @@ -394,6 +395,7 @@ class IdeogramV2(IO.ComfyNode): default="AUTO", tooltip="Determine if MagicPrompt should be used in generation", optional=True, + advanced=True, ), IO.Int.Input( "seed", @@ -411,6 +413,7 @@ class IdeogramV2(IO.ComfyNode): default="NONE", tooltip="Style type for generation (V2 only)", optional=True, + advanced=True, ), IO.String.Input( "negative_prompt", @@ -564,6 +567,7 @@ class IdeogramV3(IO.ComfyNode): default="AUTO", tooltip="Determine if MagicPrompt should be used in generation", optional=True, + advanced=True, ), IO.Int.Input( "seed", @@ -590,6 +594,7 @@ class IdeogramV3(IO.ComfyNode): default="DEFAULT", tooltip="Controls the trade-off between generation speed and quality", optional=True, + advanced=True, ), IO.Image.Input( "character_image", diff --git a/comfy_api_nodes/nodes_kling.py b/comfy_api_nodes/nodes_kling.py index 739fe1855..74fa078ff 100644 --- a/comfy_api_nodes/nodes_kling.py +++ b/comfy_api_nodes/nodes_kling.py @@ -38,7 +38,6 @@ from comfy_api_nodes.apis import ( KlingImageGenerationsRequest, KlingImageGenerationsResponse, KlingImageGenImageReferenceType, - KlingImageGenModelName, KlingImageGenAspectRatio, KlingVideoEffectsRequest, KlingVideoEffectsResponse, @@ -51,7 +50,9 @@ from comfy_api_nodes.apis import ( ) from comfy_api_nodes.apis.kling import ( ImageToVideoWithAudioRequest, + KlingAvatarRequest, MotionControlRequest, + MultiPromptEntry, OmniImageParamImage, OmniParamImage, OmniParamVideo, @@ -71,8 +72,10 @@ from comfy_api_nodes.util import ( sync_op, tensor_to_base64_string, upload_audio_to_comfyapi, + upload_image_to_comfyapi, upload_images_to_comfyapi, upload_video_to_comfyapi, + validate_audio_duration, validate_image_aspect_ratio, validate_image_dimensions, validate_string, @@ -80,6 +83,31 @@ from comfy_api_nodes.util import ( validate_video_duration, ) + +def _generate_storyboard_inputs(count: int) -> list: + inputs = [] + for i in range(1, count + 1): + inputs.extend( + [ + IO.String.Input( + f"storyboard_{i}_prompt", + multiline=True, + default="", + tooltip=f"Prompt for storyboard segment {i}. Max 512 characters.", + ), + IO.Int.Input( + f"storyboard_{i}_duration", + default=4, + min=1, + max=15, + display_mode=IO.NumberDisplay.slider, + tooltip=f"Duration for storyboard segment {i} in seconds.", + ), + ] + ) + return inputs + + KLING_API_VERSION = "v1" PATH_TEXT_TO_VIDEO = f"/proxy/kling/{KLING_API_VERSION}/videos/text2video" PATH_IMAGE_TO_VIDEO = f"/proxy/kling/{KLING_API_VERSION}/videos/image2video" @@ -820,20 +848,48 @@ class OmniProTextToVideoNode(IO.ComfyNode): def define_schema(cls) -> IO.Schema: return IO.Schema( node_id="KlingOmniProTextToVideoNode", - display_name="Kling Omni Text to Video (Pro)", + display_name="Kling 3.0 Omni Text to Video", category="api node/video/Kling", description="Use text prompts to generate videos with the latest Kling model.", inputs=[ - IO.Combo.Input("model_name", options=["kling-video-o1"]), + IO.Combo.Input("model_name", options=["kling-v3-omni", "kling-video-o1"]), IO.String.Input( "prompt", multiline=True, tooltip="A text prompt describing the video content. " - "This can include both positive and negative descriptions.", + "This can include both positive and negative descriptions. " + "Ignored when storyboards are enabled.", ), IO.Combo.Input("aspect_ratio", options=["16:9", "9:16", "1:1"]), - IO.Combo.Input("duration", options=[5, 10]), + IO.Int.Input("duration", default=5, min=3, max=15, display_mode=IO.NumberDisplay.slider), IO.Combo.Input("resolution", options=["1080p", "720p"], optional=True), + IO.DynamicCombo.Input( + "storyboards", + options=[ + IO.DynamicCombo.Option("disabled", []), + IO.DynamicCombo.Option("1 storyboard", _generate_storyboard_inputs(1)), + IO.DynamicCombo.Option("2 storyboards", _generate_storyboard_inputs(2)), + IO.DynamicCombo.Option("3 storyboards", _generate_storyboard_inputs(3)), + IO.DynamicCombo.Option("4 storyboards", _generate_storyboard_inputs(4)), + IO.DynamicCombo.Option("5 storyboards", _generate_storyboard_inputs(5)), + IO.DynamicCombo.Option("6 storyboards", _generate_storyboard_inputs(6)), + ], + tooltip="Generate a series of video segments with individual prompts and durations. " + "Ignored for o1 model.", + optional=True, + ), + IO.Boolean.Input("generate_audio", default=False, optional=True), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + optional=True, + ), ], outputs=[ IO.Video.Output(), @@ -845,11 +901,15 @@ class OmniProTextToVideoNode(IO.ComfyNode): ], is_api_node=True, price_badge=IO.PriceBadge( - depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution"]), + depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution", "model_name", "generate_audio"]), expr=""" ( $mode := (widgets.resolution = "720p") ? "std" : "pro"; - $rates := {"std": 0.084, "pro": 0.112}; + $isV3 := $contains(widgets.model_name, "v3"); + $audio := $isV3 and widgets.generate_audio; + $rates := $audio + ? {"std": 0.112, "pro": 0.14} + : {"std": 0.084, "pro": 0.112}; {"type":"usd","usd": $lookup($rates, $mode) * widgets.duration} ) """, @@ -864,8 +924,45 @@ class OmniProTextToVideoNode(IO.ComfyNode): aspect_ratio: str, duration: int, resolution: str = "1080p", + storyboards: dict | None = None, + generate_audio: bool = False, + seed: int = 0, ) -> IO.NodeOutput: - validate_string(prompt, min_length=1, max_length=2500) + _ = seed + if model_name == "kling-video-o1": + if duration not in (5, 10): + raise ValueError("kling-video-o1 only supports durations of 5 or 10 seconds.") + if generate_audio: + raise ValueError("kling-video-o1 does not support audio generation.") + stories_enabled = storyboards is not None and storyboards["storyboards"] != "disabled" + if stories_enabled and model_name == "kling-video-o1": + raise ValueError("kling-video-o1 does not support storyboards.") + validate_string(prompt, strip_whitespace=True, min_length=0 if stories_enabled else 1, max_length=2500) + + multi_shot = None + multi_prompt_list = None + if stories_enabled: + count = int(storyboards["storyboards"].split()[0]) + multi_shot = True + multi_prompt_list = [] + for i in range(1, count + 1): + sb_prompt = storyboards[f"storyboard_{i}_prompt"] + sb_duration = storyboards[f"storyboard_{i}_duration"] + validate_string(sb_prompt, field_name=f"storyboard_{i}_prompt", min_length=1, max_length=512) + multi_prompt_list.append( + MultiPromptEntry( + index=i, + prompt=sb_prompt, + duration=str(sb_duration), + ) + ) + total_storyboard_duration = sum(int(e.duration) for e in multi_prompt_list) + if total_storyboard_duration != duration: + raise ValueError( + f"Total storyboard duration ({total_storyboard_duration}s) " + f"must equal the global duration ({duration}s)." + ) + response = await sync_op( cls, ApiEndpoint(path="/proxy/kling/v1/videos/omni-video", method="POST"), @@ -876,6 +973,10 @@ class OmniProTextToVideoNode(IO.ComfyNode): aspect_ratio=aspect_ratio, duration=str(duration), mode="pro" if resolution == "1080p" else "std", + multi_shot=multi_shot, + multi_prompt=multi_prompt_list, + shot_type="customize" if multi_shot else None, + sound="on" if generate_audio else "off", ), ) return await finish_omni_video_task(cls, response) @@ -887,24 +988,26 @@ class OmniProFirstLastFrameNode(IO.ComfyNode): def define_schema(cls) -> IO.Schema: return IO.Schema( node_id="KlingOmniProFirstLastFrameNode", - display_name="Kling Omni First-Last-Frame to Video (Pro)", + display_name="Kling 3.0 Omni First-Last-Frame to Video", category="api node/video/Kling", description="Use a start frame, an optional end frame, or reference images with the latest Kling model.", inputs=[ - IO.Combo.Input("model_name", options=["kling-video-o1"]), + IO.Combo.Input("model_name", options=["kling-v3-omni", "kling-video-o1"]), IO.String.Input( "prompt", multiline=True, tooltip="A text prompt describing the video content. " - "This can include both positive and negative descriptions.", + "This can include both positive and negative descriptions. " + "Ignored when storyboards are enabled.", ), - IO.Int.Input("duration", default=5, min=3, max=10, display_mode=IO.NumberDisplay.slider), + IO.Int.Input("duration", default=5, min=3, max=15, display_mode=IO.NumberDisplay.slider), IO.Image.Input("first_frame"), IO.Image.Input( "end_frame", optional=True, tooltip="An optional end frame for the video. " - "This cannot be used simultaneously with 'reference_images'.", + "This cannot be used simultaneously with 'reference_images'. " + "Does not work with storyboards.", ), IO.Image.Input( "reference_images", @@ -912,6 +1015,38 @@ class OmniProFirstLastFrameNode(IO.ComfyNode): tooltip="Up to 6 additional reference images.", ), IO.Combo.Input("resolution", options=["1080p", "720p"], optional=True), + IO.DynamicCombo.Input( + "storyboards", + options=[ + IO.DynamicCombo.Option("disabled", []), + IO.DynamicCombo.Option("1 storyboard", _generate_storyboard_inputs(1)), + IO.DynamicCombo.Option("2 storyboards", _generate_storyboard_inputs(2)), + IO.DynamicCombo.Option("3 storyboards", _generate_storyboard_inputs(3)), + IO.DynamicCombo.Option("4 storyboards", _generate_storyboard_inputs(4)), + IO.DynamicCombo.Option("5 storyboards", _generate_storyboard_inputs(5)), + IO.DynamicCombo.Option("6 storyboards", _generate_storyboard_inputs(6)), + ], + tooltip="Generate a series of video segments with individual prompts and durations. " + "Only supported for kling-v3-omni.", + optional=True, + ), + IO.Boolean.Input( + "generate_audio", + default=False, + optional=True, + tooltip="Generate audio for the video. Only supported for kling-v3-omni.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + optional=True, + ), ], outputs=[ IO.Video.Output(), @@ -923,11 +1058,15 @@ class OmniProFirstLastFrameNode(IO.ComfyNode): ], is_api_node=True, price_badge=IO.PriceBadge( - depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution"]), + depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution", "model_name", "generate_audio"]), expr=""" ( $mode := (widgets.resolution = "720p") ? "std" : "pro"; - $rates := {"std": 0.084, "pro": 0.112}; + $isV3 := $contains(widgets.model_name, "v3"); + $audio := $isV3 and widgets.generate_audio; + $rates := $audio + ? {"std": 0.112, "pro": 0.14} + : {"std": 0.084, "pro": 0.112}; {"type":"usd","usd": $lookup($rates, $mode) * widgets.duration} ) """, @@ -944,15 +1083,59 @@ class OmniProFirstLastFrameNode(IO.ComfyNode): end_frame: Input.Image | None = None, reference_images: Input.Image | None = None, resolution: str = "1080p", + storyboards: dict | None = None, + generate_audio: bool = False, + seed: int = 0, ) -> IO.NodeOutput: + _ = seed + if model_name == "kling-video-o1": + if duration > 10: + raise ValueError("kling-video-o1 does not support durations greater than 10 seconds.") + if generate_audio: + raise ValueError("kling-video-o1 does not support audio generation.") + stories_enabled = storyboards is not None and storyboards["storyboards"] != "disabled" + if stories_enabled and model_name == "kling-video-o1": + raise ValueError("kling-video-o1 does not support storyboards.") prompt = normalize_omni_prompt_references(prompt) - validate_string(prompt, min_length=1, max_length=2500) + validate_string(prompt, strip_whitespace=True, min_length=0 if stories_enabled else 1, max_length=2500) if end_frame is not None and reference_images is not None: raise ValueError("The 'end_frame' input cannot be used simultaneously with 'reference_images'.") - if duration not in (5, 10) and end_frame is None and reference_images is None: + if end_frame is not None and stories_enabled: + raise ValueError("The 'end_frame' input cannot be used simultaneously with storyboards.") + if ( + model_name == "kling-video-o1" + and duration not in (5, 10) + and end_frame is None + and reference_images is None + ): raise ValueError( "Duration is only supported for 5 or 10 seconds if there is no end frame or reference images." ) + + multi_shot = None + multi_prompt_list = None + if stories_enabled: + count = int(storyboards["storyboards"].split()[0]) + multi_shot = True + multi_prompt_list = [] + for i in range(1, count + 1): + sb_prompt = storyboards[f"storyboard_{i}_prompt"] + sb_duration = storyboards[f"storyboard_{i}_duration"] + validate_string(sb_prompt, field_name=f"storyboard_{i}_prompt", min_length=1, max_length=512) + multi_prompt_list.append( + MultiPromptEntry( + index=i, + prompt=sb_prompt, + duration=str(sb_duration), + ) + ) + total_storyboard_duration = sum(int(e.duration) for e in multi_prompt_list) + if total_storyboard_duration != duration: + raise ValueError( + f"Total storyboard duration ({total_storyboard_duration}s) " + f"must equal the global duration ({duration}s)." + ) + validate_image_dimensions(first_frame, min_width=300, min_height=300) validate_image_aspect_ratio(first_frame, (1, 2.5), (2.5, 1)) image_list: list[OmniParamImage] = [ @@ -988,6 +1171,10 @@ class OmniProFirstLastFrameNode(IO.ComfyNode): duration=str(duration), image_list=image_list, mode="pro" if resolution == "1080p" else "std", + sound="on" if generate_audio else "off", + multi_shot=multi_shot, + multi_prompt=multi_prompt_list, + shot_type="customize" if multi_shot else None, ), ) return await finish_omni_video_task(cls, response) @@ -999,24 +1186,57 @@ class OmniProImageToVideoNode(IO.ComfyNode): def define_schema(cls) -> IO.Schema: return IO.Schema( node_id="KlingOmniProImageToVideoNode", - display_name="Kling Omni Image to Video (Pro)", + display_name="Kling 3.0 Omni Image to Video", category="api node/video/Kling", description="Use up to 7 reference images to generate a video with the latest Kling model.", inputs=[ - IO.Combo.Input("model_name", options=["kling-video-o1"]), + IO.Combo.Input("model_name", options=["kling-v3-omni", "kling-video-o1"]), IO.String.Input( "prompt", multiline=True, tooltip="A text prompt describing the video content. " - "This can include both positive and negative descriptions.", + "This can include both positive and negative descriptions. " + "Ignored when storyboards are enabled.", ), IO.Combo.Input("aspect_ratio", options=["16:9", "9:16", "1:1"]), - IO.Int.Input("duration", default=3, min=3, max=10, display_mode=IO.NumberDisplay.slider), + IO.Int.Input("duration", default=5, min=3, max=15, display_mode=IO.NumberDisplay.slider), IO.Image.Input( "reference_images", tooltip="Up to 7 reference images.", ), IO.Combo.Input("resolution", options=["1080p", "720p"], optional=True), + IO.DynamicCombo.Input( + "storyboards", + options=[ + IO.DynamicCombo.Option("disabled", []), + IO.DynamicCombo.Option("1 storyboard", _generate_storyboard_inputs(1)), + IO.DynamicCombo.Option("2 storyboards", _generate_storyboard_inputs(2)), + IO.DynamicCombo.Option("3 storyboards", _generate_storyboard_inputs(3)), + IO.DynamicCombo.Option("4 storyboards", _generate_storyboard_inputs(4)), + IO.DynamicCombo.Option("5 storyboards", _generate_storyboard_inputs(5)), + IO.DynamicCombo.Option("6 storyboards", _generate_storyboard_inputs(6)), + ], + tooltip="Generate a series of video segments with individual prompts and durations. " + "Only supported for kling-v3-omni.", + optional=True, + ), + IO.Boolean.Input( + "generate_audio", + default=False, + optional=True, + tooltip="Generate audio for the video. Only supported for kling-v3-omni.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + optional=True, + ), ], outputs=[ IO.Video.Output(), @@ -1028,11 +1248,15 @@ class OmniProImageToVideoNode(IO.ComfyNode): ], is_api_node=True, price_badge=IO.PriceBadge( - depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution"]), + depends_on=IO.PriceBadgeDepends(widgets=["duration", "resolution", "model_name", "generate_audio"]), expr=""" ( $mode := (widgets.resolution = "720p") ? "std" : "pro"; - $rates := {"std": 0.084, "pro": 0.112}; + $isV3 := $contains(widgets.model_name, "v3"); + $audio := $isV3 and widgets.generate_audio; + $rates := $audio + ? {"std": 0.112, "pro": 0.14} + : {"std": 0.084, "pro": 0.112}; {"type":"usd","usd": $lookup($rates, $mode) * widgets.duration} ) """, @@ -1048,9 +1272,46 @@ class OmniProImageToVideoNode(IO.ComfyNode): duration: int, reference_images: Input.Image, resolution: str = "1080p", + storyboards: dict | None = None, + generate_audio: bool = False, + seed: int = 0, ) -> IO.NodeOutput: + _ = seed + if model_name == "kling-video-o1": + if duration > 10: + raise ValueError("kling-video-o1 does not support durations greater than 10 seconds.") + if generate_audio: + raise ValueError("kling-video-o1 does not support audio generation.") + stories_enabled = storyboards is not None and storyboards["storyboards"] != "disabled" + if stories_enabled and model_name == "kling-video-o1": + raise ValueError("kling-video-o1 does not support storyboards.") prompt = normalize_omni_prompt_references(prompt) - validate_string(prompt, min_length=1, max_length=2500) + validate_string(prompt, strip_whitespace=True, min_length=0 if stories_enabled else 1, max_length=2500) + + multi_shot = None + multi_prompt_list = None + if stories_enabled: + count = int(storyboards["storyboards"].split()[0]) + multi_shot = True + multi_prompt_list = [] + for i in range(1, count + 1): + sb_prompt = storyboards[f"storyboard_{i}_prompt"] + sb_duration = storyboards[f"storyboard_{i}_duration"] + validate_string(sb_prompt, field_name=f"storyboard_{i}_prompt", min_length=1, max_length=512) + multi_prompt_list.append( + MultiPromptEntry( + index=i, + prompt=sb_prompt, + duration=str(sb_duration), + ) + ) + total_storyboard_duration = sum(int(e.duration) for e in multi_prompt_list) + if total_storyboard_duration != duration: + raise ValueError( + f"Total storyboard duration ({total_storyboard_duration}s) " + f"must equal the global duration ({duration}s)." + ) + if get_number_of_images(reference_images) > 7: raise ValueError("The maximum number of reference images is 7.") for i in reference_images: @@ -1070,6 +1331,10 @@ class OmniProImageToVideoNode(IO.ComfyNode): duration=str(duration), image_list=image_list, mode="pro" if resolution == "1080p" else "std", + sound="on" if generate_audio else "off", + multi_shot=multi_shot, + multi_prompt=multi_prompt_list, + shot_type="customize" if multi_shot else None, ), ) return await finish_omni_video_task(cls, response) @@ -1081,11 +1346,11 @@ class OmniProVideoToVideoNode(IO.ComfyNode): def define_schema(cls) -> IO.Schema: return IO.Schema( node_id="KlingOmniProVideoToVideoNode", - display_name="Kling Omni Video to Video (Pro)", + display_name="Kling 3.0 Omni Video to Video", category="api node/video/Kling", description="Use a video and up to 4 reference images to generate a video with the latest Kling model.", inputs=[ - IO.Combo.Input("model_name", options=["kling-video-o1"]), + IO.Combo.Input("model_name", options=["kling-v3-omni", "kling-video-o1"]), IO.String.Input( "prompt", multiline=True, @@ -1102,6 +1367,17 @@ class OmniProVideoToVideoNode(IO.ComfyNode): optional=True, ), IO.Combo.Input("resolution", options=["1080p", "720p"], optional=True), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + optional=True, + ), ], outputs=[ IO.Video.Output(), @@ -1135,7 +1411,9 @@ class OmniProVideoToVideoNode(IO.ComfyNode): keep_original_sound: bool, reference_images: Input.Image | None = None, resolution: str = "1080p", + seed: int = 0, ) -> IO.NodeOutput: + _ = seed prompt = normalize_omni_prompt_references(prompt) validate_string(prompt, min_length=1, max_length=2500) validate_video_duration(reference_video, min_duration=3.0, max_duration=10.05) @@ -1179,11 +1457,11 @@ class OmniProEditVideoNode(IO.ComfyNode): def define_schema(cls) -> IO.Schema: return IO.Schema( node_id="KlingOmniProEditVideoNode", - display_name="Kling Omni Edit Video (Pro)", + display_name="Kling 3.0 Omni Edit Video", category="api node/video/Kling", description="Edit an existing video with the latest model from Kling.", inputs=[ - IO.Combo.Input("model_name", options=["kling-video-o1"]), + IO.Combo.Input("model_name", options=["kling-v3-omni", "kling-video-o1"]), IO.String.Input( "prompt", multiline=True, @@ -1198,6 +1476,17 @@ class OmniProEditVideoNode(IO.ComfyNode): optional=True, ), IO.Combo.Input("resolution", options=["1080p", "720p"], optional=True), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + optional=True, + ), ], outputs=[ IO.Video.Output(), @@ -1229,7 +1518,9 @@ class OmniProEditVideoNode(IO.ComfyNode): keep_original_sound: bool, reference_images: Input.Image | None = None, resolution: str = "1080p", + seed: int = 0, ) -> IO.NodeOutput: + _ = seed prompt = normalize_omni_prompt_references(prompt) validate_string(prompt, min_length=1, max_length=2500) validate_video_duration(video, min_duration=3.0, max_duration=10.05) @@ -1273,27 +1564,43 @@ class OmniProImageNode(IO.ComfyNode): def define_schema(cls) -> IO.Schema: return IO.Schema( node_id="KlingOmniProImageNode", - display_name="Kling Omni Image (Pro)", + display_name="Kling 3.0 Omni Image", category="api node/image/Kling", description="Create or edit images with the latest model from Kling.", inputs=[ - IO.Combo.Input("model_name", options=["kling-image-o1"]), + IO.Combo.Input("model_name", options=["kling-v3-omni", "kling-image-o1"]), IO.String.Input( "prompt", multiline=True, tooltip="A text prompt describing the image content. " "This can include both positive and negative descriptions.", ), - IO.Combo.Input("resolution", options=["1K", "2K"]), + IO.Combo.Input("resolution", options=["1K", "2K", "4K"]), IO.Combo.Input( "aspect_ratio", options=["16:9", "9:16", "1:1", "4:3", "3:4", "3:2", "2:3", "21:9"], ), + IO.Combo.Input( + "series_amount", + options=["disabled", "2", "3", "4", "5", "6", "7", "8", "9"], + tooltip="Generate a series of images. Not supported for kling-image-o1.", + ), IO.Image.Input( "reference_images", tooltip="Up to 10 additional reference images.", optional=True, ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + optional=True, + ), ], outputs=[ IO.Image.Output(), @@ -1305,7 +1612,16 @@ class OmniProImageNode(IO.ComfyNode): ], is_api_node=True, price_badge=IO.PriceBadge( - expr="""{"type":"usd","usd":0.028}""", + depends_on=IO.PriceBadgeDepends(widgets=["resolution", "series_amount", "model_name"]), + expr=""" + ( + $prices := {"1k": 0.028, "2k": 0.028, "4k": 0.056}; + $base := $lookup($prices, widgets.resolution); + $isO1 := widgets.model_name = "kling-image-o1"; + $mult := ($isO1 or widgets.series_amount = "disabled") ? 1 : $number(widgets.series_amount); + {"type":"usd","usd": $base * $mult} + ) + """, ), ) @@ -1316,8 +1632,13 @@ class OmniProImageNode(IO.ComfyNode): prompt: str, resolution: str, aspect_ratio: str, + series_amount: str = "disabled", reference_images: Input.Image | None = None, + seed: int = 0, ) -> IO.NodeOutput: + _ = seed + if model_name == "kling-image-o1" and resolution == "4K": + raise ValueError("4K resolution is not supported for kling-image-o1 model.") prompt = normalize_omni_prompt_references(prompt) validate_string(prompt, min_length=1, max_length=2500) image_list: list[OmniImageParamImage] = [] @@ -1329,6 +1650,9 @@ class OmniProImageNode(IO.ComfyNode): validate_image_aspect_ratio(i, (1, 2.5), (2.5, 1)) for i in await upload_images_to_comfyapi(cls, reference_images, wait_label="Uploading reference image"): image_list.append(OmniImageParamImage(image=i)) + use_series = series_amount != "disabled" + if use_series and model_name == "kling-image-o1": + raise ValueError("kling-image-o1 does not support series generation.") response = await sync_op( cls, ApiEndpoint(path="/proxy/kling/v1/images/omni-image", method="POST"), @@ -1339,6 +1663,8 @@ class OmniProImageNode(IO.ComfyNode): resolution=resolution.lower(), aspect_ratio=aspect_ratio, image_list=image_list if image_list else None, + result_type="series" if use_series else None, + series_amount=int(series_amount) if use_series else None, ), ) if response.code: @@ -1351,7 +1677,9 @@ class OmniProImageNode(IO.ComfyNode): response_model=TaskStatusResponse, status_extractor=lambda r: (r.data.task_status if r.data else None), ) - return IO.NodeOutput(await download_url_to_image_tensor(final_response.data.task_result.images[0].url)) + images = final_response.data.task_result.series_images or final_response.data.task_result.images + tensors = [await download_url_to_image_tensor(img.url) for img in images] + return IO.NodeOutput(torch.cat(tensors, dim=0)) class KlingCameraControlT2VNode(IO.ComfyNode): @@ -1936,6 +2264,7 @@ class KlingLipSyncAudioToVideoNode(IO.ComfyNode): node_id="KlingLipSyncAudioToVideoNode", display_name="Kling Lip Sync Video with Audio", category="api node/video/Kling", + essentials_category="Video Generation", description="Kling Lip Sync Audio to Video Node. Syncs mouth movements in a video file to the audio content of an audio file. When using, ensure that the audio contains clearly distinguishable vocals and that the video contains a distinct face. The audio file should not be larger than 5MB. The video file should not be larger than 100MB, should have height/width between 720px and 1920px, and should be between 2s and 10s in length.", inputs=[ IO.Video.Input("video"), @@ -2007,6 +2336,7 @@ class KlingLipSyncTextToVideoNode(IO.ComfyNode): max=2.0, display_mode=IO.NumberDisplay.slider, tooltip="Speech Rate. Valid range: 0.8~2.0, accurate to one decimal place.", + advanced=True, ), ], outputs=[ @@ -2119,7 +2449,7 @@ class KlingImageGenerationNode(IO.ComfyNode): def define_schema(cls) -> IO.Schema: return IO.Schema( node_id="KlingImageGenerationNode", - display_name="Kling Image Generation", + display_name="Kling 3.0 Image", category="api node/image/Kling", description="Kling Image Generation Node. Generate an image from a text prompt with an optional reference image.", inputs=[ @@ -2128,6 +2458,7 @@ class KlingImageGenerationNode(IO.ComfyNode): IO.Combo.Input( "image_type", options=[i.value for i in KlingImageGenImageReferenceType], + advanced=True, ), IO.Float.Input( "image_fidelity", @@ -2137,6 +2468,7 @@ class KlingImageGenerationNode(IO.ComfyNode): step=0.01, display_mode=IO.NumberDisplay.slider, tooltip="Reference intensity for user-uploaded images", + advanced=True, ), IO.Float.Input( "human_fidelity", @@ -2146,12 +2478,9 @@ class KlingImageGenerationNode(IO.ComfyNode): step=0.01, display_mode=IO.NumberDisplay.slider, tooltip="Subject reference similarity", + advanced=True, ), - IO.Combo.Input( - "model_name", - options=[i.value for i in KlingImageGenModelName], - default="kling-v2", - ), + IO.Combo.Input("model_name", options=["kling-v3", "kling-v2", "kling-v1-5"]), IO.Combo.Input( "aspect_ratio", options=[i.value for i in KlingImageGenAspectRatio], @@ -2165,6 +2494,17 @@ class KlingImageGenerationNode(IO.ComfyNode): tooltip="Number of generated images", ), IO.Image.Input("image", optional=True), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + optional=True, + ), ], outputs=[ IO.Image.Output(), @@ -2183,7 +2523,7 @@ class KlingImageGenerationNode(IO.ComfyNode): $base := $contains($m,"kling-v1-5") ? (inputs.image.connected ? 0.028 : 0.014) - : ($contains($m,"kling-v1") ? 0.0035 : 0.014); + : $contains($m,"kling-v3") ? 0.028 : 0.014; {"type":"usd","usd": $base * widgets.n} ) """, @@ -2193,7 +2533,7 @@ class KlingImageGenerationNode(IO.ComfyNode): @classmethod async def execute( cls, - model_name: KlingImageGenModelName, + model_name: str, prompt: str, negative_prompt: str, image_type: KlingImageGenImageReferenceType, @@ -2202,17 +2542,11 @@ class KlingImageGenerationNode(IO.ComfyNode): n: int, aspect_ratio: KlingImageGenAspectRatio, image: torch.Tensor | None = None, + seed: int = 0, ) -> IO.NodeOutput: + _ = seed validate_string(prompt, field_name="prompt", min_length=1, max_length=MAX_PROMPT_LENGTH_IMAGE_GEN) validate_string(negative_prompt, field_name="negative_prompt", max_length=MAX_PROMPT_LENGTH_IMAGE_GEN) - - if image is None: - image_type = None - elif model_name == KlingImageGenModelName.kling_v1: - raise ValueError(f"The model {KlingImageGenModelName.kling_v1.value} does not support reference images.") - else: - image = tensor_to_base64_string(image) - task_creation_response = await sync_op( cls, ApiEndpoint(path=PATH_IMAGE_GENERATIONS, method="POST"), @@ -2221,8 +2555,8 @@ class KlingImageGenerationNode(IO.ComfyNode): model_name=model_name, prompt=prompt, negative_prompt=negative_prompt, - image=image, - image_reference=image_type, + image=tensor_to_base64_string(image) if image is not None else None, + image_reference=image_type if image is not None else None, image_fidelity=image_fidelity, human_fidelity=human_fidelity, n=n, @@ -2252,7 +2586,7 @@ class TextToVideoWithAudio(IO.ComfyNode): def define_schema(cls) -> IO.Schema: return IO.Schema( node_id="KlingTextToVideoWithAudio", - display_name="Kling Text to Video with Audio", + display_name="Kling 2.6 Text to Video with Audio", category="api node/video/Kling", inputs=[ IO.Combo.Input("model_name", options=["kling-v2-6"]), @@ -2260,7 +2594,7 @@ class TextToVideoWithAudio(IO.ComfyNode): IO.Combo.Input("mode", options=["pro"]), IO.Combo.Input("aspect_ratio", options=["16:9", "9:16", "1:1"]), IO.Combo.Input("duration", options=[5, 10]), - IO.Boolean.Input("generate_audio", default=True), + IO.Boolean.Input("generate_audio", default=True, advanced=True), ], outputs=[ IO.Video.Output(), @@ -2320,7 +2654,7 @@ class ImageToVideoWithAudio(IO.ComfyNode): def define_schema(cls) -> IO.Schema: return IO.Schema( node_id="KlingImageToVideoWithAudio", - display_name="Kling Image(First Frame) to Video with Audio", + display_name="Kling 2.6 Image(First Frame) to Video with Audio", category="api node/video/Kling", inputs=[ IO.Combo.Input("model_name", options=["kling-v2-6"]), @@ -2328,7 +2662,7 @@ class ImageToVideoWithAudio(IO.ComfyNode): IO.String.Input("prompt", multiline=True, tooltip="Positive text prompt."), IO.Combo.Input("mode", options=["pro"]), IO.Combo.Input("duration", options=[5, 10]), - IO.Boolean.Input("generate_audio", default=True), + IO.Boolean.Input("generate_audio", default=True, advanced=True), ], outputs=[ IO.Video.Output(), @@ -2478,6 +2812,432 @@ class MotionControl(IO.ComfyNode): return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url)) +class KlingVideoNode(IO.ComfyNode): + + @classmethod + def define_schema(cls) -> IO.Schema: + return IO.Schema( + node_id="KlingVideoNode", + display_name="Kling 3.0 Video", + category="api node/video/Kling", + description="Generate videos with Kling V3. " + "Supports text-to-video and image-to-video with optional storyboard multi-prompt and audio generation.", + inputs=[ + IO.DynamicCombo.Input( + "multi_shot", + options=[ + IO.DynamicCombo.Option( + "disabled", + [ + IO.String.Input("prompt", multiline=True, default=""), + IO.String.Input("negative_prompt", multiline=True, default=""), + IO.Int.Input( + "duration", + default=5, + min=3, + max=15, + display_mode=IO.NumberDisplay.slider, + ), + ], + ), + IO.DynamicCombo.Option("1 storyboard", _generate_storyboard_inputs(1)), + IO.DynamicCombo.Option("2 storyboards", _generate_storyboard_inputs(2)), + IO.DynamicCombo.Option("3 storyboards", _generate_storyboard_inputs(3)), + IO.DynamicCombo.Option("4 storyboards", _generate_storyboard_inputs(4)), + IO.DynamicCombo.Option("5 storyboards", _generate_storyboard_inputs(5)), + IO.DynamicCombo.Option("6 storyboards", _generate_storyboard_inputs(6)), + ], + tooltip="Generate a series of video segments with individual prompts and durations.", + ), + IO.Boolean.Input("generate_audio", default=True), + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "kling-v3", + [ + IO.Combo.Input("resolution", options=["1080p", "720p"]), + IO.Combo.Input( + "aspect_ratio", + options=["16:9", "9:16", "1:1"], + tooltip="Ignored in image-to-video mode.", + ), + ], + ), + ], + tooltip="Model and generation settings.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + ), + IO.Image.Input( + "start_frame", + optional=True, + tooltip="Optional start frame image. When connected, switches to image-to-video mode.", + ), + ], + outputs=[ + IO.Video.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends( + widgets=[ + "model.resolution", + "generate_audio", + "multi_shot", + "multi_shot.duration", + "multi_shot.storyboard_1_duration", + "multi_shot.storyboard_2_duration", + "multi_shot.storyboard_3_duration", + "multi_shot.storyboard_4_duration", + "multi_shot.storyboard_5_duration", + "multi_shot.storyboard_6_duration", + ], + ), + expr=""" + ( + $rates := {"1080p": {"off": 0.112, "on": 0.168}, "720p": {"off": 0.084, "on": 0.126}}; + $res := $lookup(widgets, "model.resolution"); + $audio := widgets.generate_audio ? "on" : "off"; + $rate := $lookup($lookup($rates, $res), $audio); + $ms := widgets.multi_shot; + $isSb := $ms != "disabled"; + $n := $isSb ? $number($substring($ms, 0, 1)) : 0; + $d1 := $lookup(widgets, "multi_shot.storyboard_1_duration"); + $d2 := $n >= 2 ? $lookup(widgets, "multi_shot.storyboard_2_duration") : 0; + $d3 := $n >= 3 ? $lookup(widgets, "multi_shot.storyboard_3_duration") : 0; + $d4 := $n >= 4 ? $lookup(widgets, "multi_shot.storyboard_4_duration") : 0; + $d5 := $n >= 5 ? $lookup(widgets, "multi_shot.storyboard_5_duration") : 0; + $d6 := $n >= 6 ? $lookup(widgets, "multi_shot.storyboard_6_duration") : 0; + $dur := $isSb ? $d1 + $d2 + $d3 + $d4 + $d5 + $d6 : $lookup(widgets, "multi_shot.duration"); + {"type":"usd","usd": $rate * $dur} + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + multi_shot: dict, + generate_audio: bool, + model: dict, + seed: int, + start_frame: Input.Image | None = None, + ) -> IO.NodeOutput: + _ = seed + mode = "pro" if model["resolution"] == "1080p" else "std" + custom_multi_shot = False + if multi_shot["multi_shot"] == "disabled": + shot_type = None + else: + shot_type = "customize" + custom_multi_shot = True + + multi_prompt_list = None + if shot_type == "customize": + count = int(multi_shot["multi_shot"].split()[0]) + multi_prompt_list = [] + for i in range(1, count + 1): + sb_prompt = multi_shot[f"storyboard_{i}_prompt"] + sb_duration = multi_shot[f"storyboard_{i}_duration"] + validate_string(sb_prompt, field_name=f"storyboard_{i}_prompt", min_length=1, max_length=512) + multi_prompt_list.append( + MultiPromptEntry( + index=i, + prompt=sb_prompt, + duration=str(sb_duration), + ) + ) + duration = sum(int(e.duration) for e in multi_prompt_list) + if duration < 3 or duration > 15: + raise ValueError( + f"Total storyboard duration ({duration}s) must be between 3 and 15 seconds." + ) + else: + duration = multi_shot["duration"] + validate_string(multi_shot["prompt"], min_length=1, max_length=2500) + + if start_frame is not None: + validate_image_dimensions(start_frame, min_width=300, min_height=300) + validate_image_aspect_ratio(start_frame, (1, 2.5), (2.5, 1)) + image_url = await upload_image_to_comfyapi(cls, start_frame, wait_label="Uploading start frame") + response = await sync_op( + cls, + ApiEndpoint(path="/proxy/kling/v1/videos/image2video", method="POST"), + response_model=TaskStatusResponse, + data=ImageToVideoWithAudioRequest( + model_name=model["model"], + image=image_url, + prompt=None if custom_multi_shot else multi_shot["prompt"], + negative_prompt=None if custom_multi_shot else multi_shot["negative_prompt"], + mode=mode, + duration=str(duration), + sound="on" if generate_audio else "off", + multi_shot=True if shot_type else None, + multi_prompt=multi_prompt_list, + shot_type=shot_type, + ), + ) + poll_path = f"/proxy/kling/v1/videos/image2video/{response.data.task_id}" + else: + response = await sync_op( + cls, + ApiEndpoint(path="/proxy/kling/v1/videos/text2video", method="POST"), + response_model=TaskStatusResponse, + data=TextToVideoWithAudioRequest( + model_name=model["model"], + aspect_ratio=model["aspect_ratio"], + prompt=None if custom_multi_shot else multi_shot["prompt"], + negative_prompt=None if custom_multi_shot else multi_shot["negative_prompt"], + mode=mode, + duration=str(duration), + sound="on" if generate_audio else "off", + multi_shot=True if shot_type else None, + multi_prompt=multi_prompt_list, + shot_type=shot_type, + ), + ) + poll_path = f"/proxy/kling/v1/videos/text2video/{response.data.task_id}" + + if response.code: + raise RuntimeError( + f"Kling request failed. Code: {response.code}, Message: {response.message}, Data: {response.data}" + ) + final_response = await poll_op( + cls, + ApiEndpoint(path=poll_path), + response_model=TaskStatusResponse, + status_extractor=lambda r: (r.data.task_status if r.data else None), + ) + return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url)) + + +class KlingFirstLastFrameNode(IO.ComfyNode): + + @classmethod + def define_schema(cls) -> IO.Schema: + return IO.Schema( + node_id="KlingFirstLastFrameNode", + display_name="Kling 3.0 First-Last-Frame to Video", + category="api node/video/Kling", + description="Generate videos with Kling V3 using first and last frames.", + inputs=[ + IO.String.Input("prompt", multiline=True, default=""), + IO.Int.Input( + "duration", + default=5, + min=3, + max=15, + display_mode=IO.NumberDisplay.slider, + ), + IO.Image.Input("first_frame"), + IO.Image.Input("end_frame"), + IO.Boolean.Input("generate_audio", default=True), + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "kling-v3", + [ + IO.Combo.Input("resolution", options=["1080p", "720p"]), + ], + ), + ], + tooltip="Model and generation settings.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + ), + ], + outputs=[ + IO.Video.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends( + widgets=["model.resolution", "generate_audio", "duration"], + ), + expr=""" + ( + $rates := {"1080p": {"off": 0.112, "on": 0.168}, "720p": {"off": 0.084, "on": 0.126}}; + $res := $lookup(widgets, "model.resolution"); + $audio := widgets.generate_audio ? "on" : "off"; + $rate := $lookup($lookup($rates, $res), $audio); + {"type":"usd","usd": $rate * widgets.duration} + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + prompt: str, + duration: int, + first_frame: Input.Image, + end_frame: Input.Image, + generate_audio: bool, + model: dict, + seed: int, + ) -> IO.NodeOutput: + _ = seed + validate_string(prompt, min_length=1, max_length=2500) + validate_image_dimensions(first_frame, min_width=300, min_height=300) + validate_image_aspect_ratio(first_frame, (1, 2.5), (2.5, 1)) + validate_image_dimensions(end_frame, min_width=300, min_height=300) + validate_image_aspect_ratio(end_frame, (1, 2.5), (2.5, 1)) + image_url = await upload_image_to_comfyapi(cls, first_frame, wait_label="Uploading first frame") + image_tail_url = await upload_image_to_comfyapi(cls, end_frame, wait_label="Uploading end frame") + response = await sync_op( + cls, + ApiEndpoint(path="/proxy/kling/v1/videos/image2video", method="POST"), + response_model=TaskStatusResponse, + data=ImageToVideoWithAudioRequest( + model_name=model["model"], + image=image_url, + image_tail=image_tail_url, + prompt=prompt, + mode="pro" if model["resolution"] == "1080p" else "std", + duration=str(duration), + sound="on" if generate_audio else "off", + ), + ) + if response.code: + raise RuntimeError( + f"Kling request failed. Code: {response.code}, Message: {response.message}, Data: {response.data}" + ) + final_response = await poll_op( + cls, + ApiEndpoint(path=f"/proxy/kling/v1/videos/image2video/{response.data.task_id}"), + response_model=TaskStatusResponse, + status_extractor=lambda r: (r.data.task_status if r.data else None), + ) + return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url)) + + +class KlingAvatarNode(IO.ComfyNode): + + @classmethod + def define_schema(cls) -> IO.Schema: + return IO.Schema( + node_id="KlingAvatarNode", + display_name="Kling Avatar 2.0", + category="api node/video/Kling", + description="Generate broadcast-style digital human videos from a single photo and an audio file.", + inputs=[ + IO.Image.Input( + "image", + tooltip="Avatar reference image. " + "Width and height must be at least 300px. Aspect ratio must be between 1:2.5 and 2.5:1.", + ), + IO.Audio.Input( + "sound_file", + tooltip="Audio input. Must be between 2 and 300 seconds in duration.", + ), + IO.Combo.Input("mode", options=["std", "pro"]), + IO.String.Input( + "prompt", + multiline=True, + default="", + optional=True, + tooltip="Optional prompt to define avatar actions, emotions, and camera movements.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=2147483647, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + tooltip="Seed controls whether the node should re-run; " + "results are non-deterministic regardless of seed.", + ), + ], + outputs=[ + IO.Video.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["mode"]), + expr=""" + ( + $prices := {"std": 0.056, "pro": 0.112}; + {"type":"usd","usd": $lookup($prices, widgets.mode), "format":{"suffix":"/second"}} + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + image: Input.Image, + sound_file: Input.Audio, + mode: str, + seed: int, + prompt: str = "", + ) -> IO.NodeOutput: + validate_image_dimensions(image, min_width=300, min_height=300) + validate_image_aspect_ratio(image, (1, 2.5), (2.5, 1)) + validate_audio_duration(sound_file, min_duration=2, max_duration=300) + response = await sync_op( + cls, + ApiEndpoint(path="/proxy/kling/v1/videos/avatar/image2video", method="POST"), + response_model=TaskStatusResponse, + data=KlingAvatarRequest( + image=await upload_image_to_comfyapi(cls, image), + sound_file=await upload_audio_to_comfyapi( + cls, sound_file, container_format="mp3", codec_name="libmp3lame", mime_type="audio/mpeg" + ), + prompt=prompt or None, + mode=mode, + ), + ) + if response.code: + raise RuntimeError( + f"Kling request failed. Code: {response.code}, Message: {response.message}, Data: {response.data}" + ) + final_response = await poll_op( + cls, + ApiEndpoint(path=f"/proxy/kling/v1/videos/avatar/image2video/{response.data.task_id}"), + response_model=TaskStatusResponse, + status_extractor=lambda r: (r.data.task_status if r.data else None), + max_poll_attempts=800, + ) + return IO.NodeOutput(await download_url_to_video_output(final_response.data.task_result.videos[0].url)) + + class KlingExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[IO.ComfyNode]]: @@ -2504,6 +3264,9 @@ class KlingExtension(ComfyExtension): TextToVideoWithAudio, ImageToVideoWithAudio, MotionControl, + KlingVideoNode, + KlingFirstLastFrameNode, + KlingAvatarNode, ] diff --git a/comfy_api_nodes/nodes_ltxv.py b/comfy_api_nodes/nodes_ltxv.py index c6424af92..0a219af96 100644 --- a/comfy_api_nodes/nodes_ltxv.py +++ b/comfy_api_nodes/nodes_ltxv.py @@ -74,6 +74,7 @@ class TextToVideoNode(IO.ComfyNode): default=False, optional=True, tooltip="When true, the generated video will include AI-generated audio matching the scene.", + advanced=True, ), ], outputs=[ @@ -151,6 +152,7 @@ class ImageToVideoNode(IO.ComfyNode): default=False, optional=True, tooltip="When true, the generated video will include AI-generated audio matching the scene.", + advanced=True, ), ], outputs=[ diff --git a/comfy_api_nodes/nodes_magnific.py b/comfy_api_nodes/nodes_magnific.py index 013e71cc8..0f53208d4 100644 --- a/comfy_api_nodes/nodes_magnific.py +++ b/comfy_api_nodes/nodes_magnific.py @@ -30,6 +30,30 @@ from comfy_api_nodes.util import ( validate_image_dimensions, ) +_EUR_TO_USD = 1.19 + + +def _tier_price_eur(megapixels: float) -> float: + """Price in EUR for a single Magnific upscaling step based on input megapixels.""" + if megapixels <= 1.3: + return 0.143 + if megapixels <= 3.0: + return 0.286 + if megapixels <= 6.4: + return 0.429 + return 1.716 + + +def _calculate_magnific_upscale_price_usd(width: int, height: int, scale: int) -> float: + """Calculate total Magnific upscale price in USD for given input dimensions and scale factor.""" + num_steps = int(math.log2(scale)) + total_eur = 0.0 + pixels = width * height + for _ in range(num_steps): + total_eur += _tier_price_eur(pixels / 1_000_000) + pixels *= 4 + return round(total_eur * _EUR_TO_USD, 2) + class MagnificImageUpscalerCreativeNode(IO.ComfyNode): @classmethod @@ -86,11 +110,13 @@ class MagnificImageUpscalerCreativeNode(IO.ComfyNode): IO.Combo.Input( "engine", options=["automatic", "magnific_illusio", "magnific_sharpy", "magnific_sparkle"], + advanced=True, ), IO.Boolean.Input( "auto_downscale", default=False, tooltip="Automatically downscale input image if output would exceed maximum pixel limit.", + advanced=True, ), ], outputs=[ @@ -103,11 +129,20 @@ class MagnificImageUpscalerCreativeNode(IO.ComfyNode): ], is_api_node=True, price_badge=IO.PriceBadge( - depends_on=IO.PriceBadgeDepends(widgets=["scale_factor"]), + depends_on=IO.PriceBadgeDepends(widgets=["scale_factor", "auto_downscale"]), expr=""" ( - $max := widgets.scale_factor = "2x" ? 1.326 : 1.657; - {"type": "range_usd", "min_usd": 0.11, "max_usd": $max} + $ad := widgets.auto_downscale; + $mins := $ad + ? {"2x": 0.172, "4x": 0.343, "8x": 0.515, "16x": 0.515} + : {"2x": 0.172, "4x": 0.343, "8x": 0.515, "16x": 0.844}; + $maxs := {"2x": 0.515, "4x": 0.844, "8x": 1.015, "16x": 1.187}; + { + "type": "range_usd", + "min_usd": $lookup($mins, widgets.scale_factor), + "max_usd": $lookup($maxs, widgets.scale_factor), + "format": { "approximate": true } + } ) """, ), @@ -168,6 +203,10 @@ class MagnificImageUpscalerCreativeNode(IO.ComfyNode): f"Use a smaller input image or lower scale factor." ) + final_height, final_width = get_image_dimensions(image) + actual_scale = int(scale_factor.rstrip("x")) + price_usd = _calculate_magnific_upscale_price_usd(final_width, final_height, actual_scale) + initial_res = await sync_op( cls, ApiEndpoint(path="/proxy/freepik/v1/ai/image-upscaler", method="POST"), @@ -189,6 +228,7 @@ class MagnificImageUpscalerCreativeNode(IO.ComfyNode): ApiEndpoint(path=f"/proxy/freepik/v1/ai/image-upscaler/{initial_res.task_id}"), response_model=TaskResponse, status_extractor=lambda x: x.status, + price_extractor=lambda _: price_usd, poll_interval=10.0, max_poll_attempts=480, ) @@ -242,6 +282,7 @@ class MagnificImageUpscalerPreciseV2Node(IO.ComfyNode): "auto_downscale", default=False, tooltip="Automatically downscale input image if output would exceed maximum resolution.", + advanced=True, ), ], outputs=[ @@ -257,8 +298,14 @@ class MagnificImageUpscalerPreciseV2Node(IO.ComfyNode): depends_on=IO.PriceBadgeDepends(widgets=["scale_factor"]), expr=""" ( - $max := widgets.scale_factor = "2x" ? 1.326 : 1.657; - {"type": "range_usd", "min_usd": 0.11, "max_usd": $max} + $mins := {"2x": 0.172, "4x": 0.343, "8x": 0.515, "16x": 0.844}; + $maxs := {"2x": 2.045, "4x": 2.545, "8x": 2.889, "16x": 3.06}; + { + "type": "range_usd", + "min_usd": $lookup($mins, widgets.scale_factor), + "max_usd": $lookup($maxs, widgets.scale_factor), + "format": { "approximate": true } + } ) """, ), @@ -321,6 +368,9 @@ class MagnificImageUpscalerPreciseV2Node(IO.ComfyNode): f"Use a smaller input image or lower scale factor." ) + final_height, final_width = get_image_dimensions(image) + price_usd = _calculate_magnific_upscale_price_usd(final_width, final_height, requested_scale) + initial_res = await sync_op( cls, ApiEndpoint(path="/proxy/freepik/v1/ai/image-upscaler-precision-v2", method="POST"), @@ -339,6 +389,7 @@ class MagnificImageUpscalerPreciseV2Node(IO.ComfyNode): ApiEndpoint(path=f"/proxy/freepik/v1/ai/image-upscaler-precision-v2/{initial_res.task_id}"), response_model=TaskResponse, status_extractor=lambda x: x.status, + price_extractor=lambda _: price_usd, poll_interval=10.0, max_poll_attempts=480, ) @@ -392,6 +443,7 @@ class MagnificImageStyleTransferNode(IO.ComfyNode): "softy", ], tooltip="Processing engine selection.", + advanced=True, ), IO.DynamicCombo.Input( "portrait_mode", @@ -420,6 +472,7 @@ class MagnificImageStyleTransferNode(IO.ComfyNode): default=True, tooltip="When disabled, expect each generation to introduce a degree of randomness, " "leading to more diverse outcomes.", + advanced=True, ), ], outputs=[ @@ -534,16 +587,19 @@ class MagnificImageRelightNode(IO.ComfyNode): "interpolate_from_original", default=False, tooltip="Restricts generation freedom to match original more closely.", + advanced=True, ), IO.Boolean.Input( "change_background", default=True, tooltip="Modifies background based on prompt/reference.", + advanced=True, ), IO.Boolean.Input( "preserve_details", default=True, tooltip="Maintains texture and fine details from original.", + advanced=True, ), IO.DynamicCombo.Input( "advanced_settings", @@ -877,8 +933,8 @@ class MagnificExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[IO.ComfyNode]]: return [ - # MagnificImageUpscalerCreativeNode, - # MagnificImageUpscalerPreciseV2Node, + MagnificImageUpscalerCreativeNode, + MagnificImageUpscalerPreciseV2Node, MagnificImageStyleTransferNode, MagnificImageRelightNode, MagnificImageSkinEnhancerNode, diff --git a/comfy_api_nodes/nodes_meshy.py b/comfy_api_nodes/nodes_meshy.py index 740607983..3cf577f4a 100644 --- a/comfy_api_nodes/nodes_meshy.py +++ b/comfy_api_nodes/nodes_meshy.py @@ -1,5 +1,3 @@ -import os - from typing_extensions import override from comfy_api.latest import IO, ComfyExtension, Input @@ -20,13 +18,12 @@ from comfy_api_nodes.apis.meshy import ( ) from comfy_api_nodes.util import ( ApiEndpoint, - download_url_to_bytesio, + download_url_to_file_3d, poll_op, sync_op, upload_images_to_comfyapi, validate_string, ) -from folder_paths import get_output_directory class MeshyTextToModelNode(IO.ComfyNode): @@ -61,11 +58,12 @@ class MeshyTextToModelNode(IO.ComfyNode): ], tooltip="When set to false, returns an unprocessed triangular mesh.", ), - IO.Combo.Input("symmetry_mode", options=["auto", "on", "off"]), + IO.Combo.Input("symmetry_mode", options=["auto", "on", "off"], advanced=True), IO.Combo.Input( "pose_mode", options=["", "A-pose", "T-pose"], tooltip="Specify the pose mode for the generated model.", + advanced=True, ), IO.Int.Input( "seed", @@ -79,8 +77,10 @@ class MeshyTextToModelNode(IO.ComfyNode): ), ], outputs=[ - IO.String.Output(display_name="model_file"), + IO.String.Output(display_name="model_file"), # for backward compatibility only IO.Custom("MESHY_TASK_ID").Output(display_name="meshy_task_id"), + IO.File3DGLB.Output(display_name="GLB"), + IO.File3DFBX.Output(display_name="FBX"), ], hidden=[ IO.Hidden.auth_token_comfy_org, @@ -122,16 +122,20 @@ class MeshyTextToModelNode(IO.ComfyNode): seed=seed, ), ) + task_id = response.result result = await poll_op( cls, - ApiEndpoint(path=f"/proxy/meshy/openapi/v2/text-to-3d/{response.result}"), + ApiEndpoint(path=f"/proxy/meshy/openapi/v2/text-to-3d/{task_id}"), response_model=MeshyModelResult, status_extractor=lambda r: r.status, progress_extractor=lambda r: r.progress, ) - model_file = f"meshy_model_{response.result}.glb" - await download_url_to_bytesio(result.model_urls.glb, os.path.join(get_output_directory(), model_file)) - return IO.NodeOutput(model_file, response.result) + return IO.NodeOutput( + f"{task_id}.glb", + task_id, + await download_url_to_file_3d(result.model_urls.glb, "glb", task_id=task_id), + await download_url_to_file_3d(result.model_urls.fbx, "fbx", task_id=task_id), + ) class MeshyRefineNode(IO.ComfyNode): @@ -152,6 +156,7 @@ class MeshyRefineNode(IO.ComfyNode): tooltip="Generate PBR Maps (metallic, roughness, normal) in addition to the base color. " "Note: this should be set to false when using Sculpture style, " "as Sculpture style generates its own set of PBR maps.", + advanced=True, ), IO.String.Input( "texture_prompt", @@ -167,8 +172,10 @@ class MeshyRefineNode(IO.ComfyNode): ), ], outputs=[ - IO.String.Output(display_name="model_file"), + IO.String.Output(display_name="model_file"), # for backward compatibility only IO.Custom("MESHY_TASK_ID").Output(display_name="meshy_task_id"), + IO.File3DGLB.Output(display_name="GLB"), + IO.File3DFBX.Output(display_name="FBX"), ], hidden=[ IO.Hidden.auth_token_comfy_org, @@ -210,16 +217,20 @@ class MeshyRefineNode(IO.ComfyNode): ai_model=model, ), ) + task_id = response.result result = await poll_op( cls, - ApiEndpoint(path=f"/proxy/meshy/openapi/v2/text-to-3d/{response.result}"), + ApiEndpoint(path=f"/proxy/meshy/openapi/v2/text-to-3d/{task_id}"), response_model=MeshyModelResult, status_extractor=lambda r: r.status, progress_extractor=lambda r: r.progress, ) - model_file = f"meshy_model_{response.result}.glb" - await download_url_to_bytesio(result.model_urls.glb, os.path.join(get_output_directory(), model_file)) - return IO.NodeOutput(model_file, response.result) + return IO.NodeOutput( + f"{task_id}.glb", + task_id, + await download_url_to_file_3d(result.model_urls.glb, "glb", task_id=task_id), + await download_url_to_file_3d(result.model_urls.fbx, "fbx", task_id=task_id), + ) class MeshyImageToModelNode(IO.ComfyNode): @@ -290,6 +301,7 @@ class MeshyImageToModelNode(IO.ComfyNode): "pose_mode", options=["", "A-pose", "T-pose"], tooltip="Specify the pose mode for the generated model.", + advanced=True, ), IO.Int.Input( "seed", @@ -303,8 +315,10 @@ class MeshyImageToModelNode(IO.ComfyNode): ), ], outputs=[ - IO.String.Output(display_name="model_file"), + IO.String.Output(display_name="model_file"), # for backward compatibility only IO.Custom("MESHY_TASK_ID").Output(display_name="meshy_task_id"), + IO.File3DGLB.Output(display_name="GLB"), + IO.File3DFBX.Output(display_name="FBX"), ], hidden=[ IO.Hidden.auth_token_comfy_org, @@ -368,16 +382,20 @@ class MeshyImageToModelNode(IO.ComfyNode): seed=seed, ), ) + task_id = response.result result = await poll_op( cls, - ApiEndpoint(path=f"/proxy/meshy/openapi/v1/image-to-3d/{response.result}"), + ApiEndpoint(path=f"/proxy/meshy/openapi/v1/image-to-3d/{task_id}"), response_model=MeshyModelResult, status_extractor=lambda r: r.status, progress_extractor=lambda r: r.progress, ) - model_file = f"meshy_model_{response.result}.glb" - await download_url_to_bytesio(result.model_urls.glb, os.path.join(get_output_directory(), model_file)) - return IO.NodeOutput(model_file, response.result) + return IO.NodeOutput( + f"{task_id}.glb", + task_id, + await download_url_to_file_3d(result.model_urls.glb, "glb", task_id=task_id), + await download_url_to_file_3d(result.model_urls.fbx, "fbx", task_id=task_id), + ) class MeshyMultiImageToModelNode(IO.ComfyNode): @@ -414,7 +432,7 @@ class MeshyMultiImageToModelNode(IO.ComfyNode): ], tooltip="When set to false, returns an unprocessed triangular mesh.", ), - IO.Combo.Input("symmetry_mode", options=["auto", "on", "off"]), + IO.Combo.Input("symmetry_mode", options=["auto", "on", "off"], advanced=True), IO.DynamicCombo.Input( "should_texture", options=[ @@ -451,6 +469,7 @@ class MeshyMultiImageToModelNode(IO.ComfyNode): "pose_mode", options=["", "A-pose", "T-pose"], tooltip="Specify the pose mode for the generated model.", + advanced=True, ), IO.Int.Input( "seed", @@ -464,8 +483,10 @@ class MeshyMultiImageToModelNode(IO.ComfyNode): ), ], outputs=[ - IO.String.Output(display_name="model_file"), + IO.String.Output(display_name="model_file"), # for backward compatibility only IO.Custom("MESHY_TASK_ID").Output(display_name="meshy_task_id"), + IO.File3DGLB.Output(display_name="GLB"), + IO.File3DFBX.Output(display_name="FBX"), ], hidden=[ IO.Hidden.auth_token_comfy_org, @@ -531,16 +552,20 @@ class MeshyMultiImageToModelNode(IO.ComfyNode): seed=seed, ), ) + task_id = response.result result = await poll_op( cls, - ApiEndpoint(path=f"/proxy/meshy/openapi/v1/multi-image-to-3d/{response.result}"), + ApiEndpoint(path=f"/proxy/meshy/openapi/v1/multi-image-to-3d/{task_id}"), response_model=MeshyModelResult, status_extractor=lambda r: r.status, progress_extractor=lambda r: r.progress, ) - model_file = f"meshy_model_{response.result}.glb" - await download_url_to_bytesio(result.model_urls.glb, os.path.join(get_output_directory(), model_file)) - return IO.NodeOutput(model_file, response.result) + return IO.NodeOutput( + f"{task_id}.glb", + task_id, + await download_url_to_file_3d(result.model_urls.glb, "glb", task_id=task_id), + await download_url_to_file_3d(result.model_urls.fbx, "fbx", task_id=task_id), + ) class MeshyRigModelNode(IO.ComfyNode): @@ -571,8 +596,10 @@ class MeshyRigModelNode(IO.ComfyNode): ), ], outputs=[ - IO.String.Output(display_name="model_file"), + IO.String.Output(display_name="model_file"), # for backward compatibility only IO.Custom("MESHY_RIGGED_TASK_ID").Output(display_name="rig_task_id"), + IO.File3DGLB.Output(display_name="GLB"), + IO.File3DFBX.Output(display_name="FBX"), ], hidden=[ IO.Hidden.auth_token_comfy_org, @@ -606,18 +633,20 @@ class MeshyRigModelNode(IO.ComfyNode): texture_image_url=texture_image_url, ), ) + task_id = response.result result = await poll_op( cls, - ApiEndpoint(path=f"/proxy/meshy/openapi/v1/rigging/{response.result}"), + ApiEndpoint(path=f"/proxy/meshy/openapi/v1/rigging/{task_id}"), response_model=MeshyRiggedResult, status_extractor=lambda r: r.status, progress_extractor=lambda r: r.progress, ) - model_file = f"meshy_model_{response.result}.glb" - await download_url_to_bytesio( - result.result.rigged_character_glb_url, os.path.join(get_output_directory(), model_file) + return IO.NodeOutput( + f"{task_id}.glb", + task_id, + await download_url_to_file_3d(result.result.rigged_character_glb_url, "glb", task_id=task_id), + await download_url_to_file_3d(result.result.rigged_character_fbx_url, "fbx", task_id=task_id), ) - return IO.NodeOutput(model_file, response.result) class MeshyAnimateModelNode(IO.ComfyNode): @@ -640,7 +669,9 @@ class MeshyAnimateModelNode(IO.ComfyNode): ), ], outputs=[ - IO.String.Output(display_name="model_file"), + IO.String.Output(display_name="model_file"), # for backward compatibility only + IO.File3DGLB.Output(display_name="GLB"), + IO.File3DFBX.Output(display_name="FBX"), ], hidden=[ IO.Hidden.auth_token_comfy_org, @@ -669,16 +700,19 @@ class MeshyAnimateModelNode(IO.ComfyNode): action_id=action_id, ), ) + task_id = response.result result = await poll_op( cls, - ApiEndpoint(path=f"/proxy/meshy/openapi/v1/animations/{response.result}"), + ApiEndpoint(path=f"/proxy/meshy/openapi/v1/animations/{task_id}"), response_model=MeshyAnimationResult, status_extractor=lambda r: r.status, progress_extractor=lambda r: r.progress, ) - model_file = f"meshy_model_{response.result}.glb" - await download_url_to_bytesio(result.result.animation_glb_url, os.path.join(get_output_directory(), model_file)) - return IO.NodeOutput(model_file, response.result) + return IO.NodeOutput( + f"{task_id}.glb", + await download_url_to_file_3d(result.result.animation_glb_url, "glb", task_id=task_id), + await download_url_to_file_3d(result.result.animation_fbx_url, "fbx", task_id=task_id), + ) class MeshyTextureNode(IO.ComfyNode): @@ -698,8 +732,9 @@ class MeshyTextureNode(IO.ComfyNode): tooltip="Use the original UV of the model instead of generating new UVs. " "When enabled, Meshy preserves existing textures from the uploaded model. " "If the model has no original UV, the quality of the output might not be as good.", + advanced=True, ), - IO.Boolean.Input("pbr", default=False), + IO.Boolean.Input("pbr", default=False, advanced=True), IO.String.Input( "text_style_prompt", default="", @@ -715,8 +750,10 @@ class MeshyTextureNode(IO.ComfyNode): ), ], outputs=[ - IO.String.Output(display_name="model_file"), + IO.String.Output(display_name="model_file"), # for backward compatibility only IO.Custom("MODEL_TASK_ID").Output(display_name="meshy_task_id"), + IO.File3DGLB.Output(display_name="GLB"), + IO.File3DFBX.Output(display_name="FBX"), ], hidden=[ IO.Hidden.auth_token_comfy_org, @@ -760,16 +797,20 @@ class MeshyTextureNode(IO.ComfyNode): image_style_url=image_style_url, ), ) + task_id = response.result result = await poll_op( cls, - ApiEndpoint(path=f"/proxy/meshy/openapi/v1/retexture/{response.result}"), + ApiEndpoint(path=f"/proxy/meshy/openapi/v1/retexture/{task_id}"), response_model=MeshyModelResult, status_extractor=lambda r: r.status, progress_extractor=lambda r: r.progress, ) - model_file = f"meshy_model_{response.result}.glb" - await download_url_to_bytesio(result.model_urls.glb, os.path.join(get_output_directory(), model_file)) - return IO.NodeOutput(model_file, response.result) + return IO.NodeOutput( + f"{task_id}.glb", + task_id, + await download_url_to_file_3d(result.model_urls.glb, "glb", task_id=task_id), + await download_url_to_file_3d(result.model_urls.fbx, "fbx", task_id=task_id), + ) class MeshyExtension(ComfyExtension): diff --git a/comfy_api_nodes/nodes_moonvalley.py b/comfy_api_nodes/nodes_moonvalley.py index 08315fa2b..78a230529 100644 --- a/comfy_api_nodes/nodes_moonvalley.py +++ b/comfy_api_nodes/nodes_moonvalley.py @@ -219,8 +219,8 @@ class MoonvalleyImg2VideoNode(IO.ComfyNode): ), IO.Int.Input( "steps", - default=33, - min=1, + default=80, + min=75, # steps should be greater or equal to cooldown_steps(75) + warmup_steps(0) max=100, step=1, tooltip="Number of denoising steps", @@ -340,8 +340,8 @@ class MoonvalleyVideo2VideoNode(IO.ComfyNode): ), IO.Int.Input( "steps", - default=33, - min=1, + default=60, + min=60, # steps should be greater or equal to cooldown_steps(36) + warmup_steps(24) max=100, step=1, display_mode=IO.NumberDisplay.number, @@ -370,7 +370,7 @@ class MoonvalleyVideo2VideoNode(IO.ComfyNode): video: Input.Video | None = None, control_type: str = "Motion Transfer", motion_intensity: int | None = 100, - steps=33, + steps=60, prompt_adherence=4.5, ) -> IO.NodeOutput: validated_video = validate_video_to_video_input(video) @@ -465,8 +465,8 @@ class MoonvalleyTxt2VideoNode(IO.ComfyNode): ), IO.Int.Input( "steps", - default=33, - min=1, + default=80, + min=75, # steps should be greater or equal to cooldown_steps(75) + warmup_steps(0) max=100, step=1, tooltip="Inference steps", diff --git a/comfy_api_nodes/nodes_openai.py b/comfy_api_nodes/nodes_openai.py index f05aaab7b..4ee896fa8 100644 --- a/comfy_api_nodes/nodes_openai.py +++ b/comfy_api_nodes/nodes_openai.py @@ -43,7 +43,6 @@ class SupportedOpenAIModel(str, Enum): o1 = "o1" o3 = "o3" o1_pro = "o1-pro" - gpt_4o = "gpt-4o" gpt_4_1 = "gpt-4.1" gpt_4_1_mini = "gpt-4.1-mini" gpt_4_1_nano = "gpt-4.1-nano" @@ -576,6 +575,7 @@ class OpenAIChatNode(IO.ComfyNode): node_id="OpenAIChatNode", display_name="OpenAI ChatGPT", category="api node/text/OpenAI", + essentials_category="Text Generation", description="Generate text responses from an OpenAI model.", inputs=[ IO.String.Input( @@ -588,6 +588,7 @@ class OpenAIChatNode(IO.ComfyNode): "persist_context", default=False, tooltip="This parameter is deprecated and has no effect.", + advanced=True, ), IO.Combo.Input( "model", @@ -649,11 +650,6 @@ class OpenAIChatNode(IO.ComfyNode): "usd": [0.01, 0.04], "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" } } - : $contains($m, "gpt-4o") ? { - "type": "list_usd", - "usd": [0.0025, 0.01], - "format": { "approximate": true, "separator": "-", "suffix": " per 1K tokens" } - } : $contains($m, "gpt-4.1-nano") ? { "type": "list_usd", "usd": [0.0001, 0.0004], @@ -862,6 +858,7 @@ class OpenAIChatConfig(IO.ComfyNode): options=["auto", "disabled"], default="auto", tooltip="The truncation strategy to use for the model response. auto: If the context of this response and previous ones exceeds the model's context window size, the model will truncate the response to fit the context window by dropping input items in the middle of the conversation.disabled: If a model response will exceed the context window size for a model, the request will fail with a 400 error", + advanced=True, ), IO.Int.Input( "max_output_tokens", @@ -870,6 +867,7 @@ class OpenAIChatConfig(IO.ComfyNode): max=16384, tooltip="An upper bound for the number of tokens that can be generated for a response, including visible output tokens", optional=True, + advanced=True, ), IO.String.Input( "instructions", diff --git a/comfy_api_nodes/nodes_recraft.py b/comfy_api_nodes/nodes_recraft.py index 3a1f32263..4d1d508fa 100644 --- a/comfy_api_nodes/nodes_recraft.py +++ b/comfy_api_nodes/nodes_recraft.py @@ -1,5 +1,4 @@ from io import BytesIO -from typing import Optional, Union import aiohttp import torch @@ -9,6 +8,8 @@ from typing_extensions import override from comfy.utils import ProgressBar from comfy_api.latest import IO, ComfyExtension from comfy_api_nodes.apis.recraft import ( + RECRAFT_V4_PRO_SIZES, + RECRAFT_V4_SIZES, RecraftColor, RecraftColorChain, RecraftControls, @@ -18,7 +19,6 @@ from comfy_api_nodes.apis.recraft import ( RecraftImageGenerationResponse, RecraftImageSize, RecraftIO, - RecraftModel, RecraftStyle, RecraftStyleV3, get_v3_substyles, @@ -39,7 +39,7 @@ async def handle_recraft_file_request( cls: type[IO.ComfyNode], image: torch.Tensor, path: str, - mask: Optional[torch.Tensor] = None, + mask: torch.Tensor | None = None, total_pixels: int = 4096 * 4096, timeout: int = 1024, request=None, @@ -73,11 +73,11 @@ async def handle_recraft_file_request( def recraft_multipart_parser( data, parent_key=None, - formatter: Optional[type[callable]] = None, - converted_to_check: Optional[list[list]] = None, + formatter: type[callable] | None = None, + converted_to_check: list[list] | None = None, is_list: bool = False, return_mode: str = "formdata", # "dict" | "formdata" -) -> Union[dict, aiohttp.FormData]: +) -> dict | aiohttp.FormData: """ Formats data such that multipart/form-data will work with aiohttp library when both files and data are present. @@ -309,7 +309,7 @@ class RecraftStyleInfiniteStyleLibrary(IO.ComfyNode): node_id="RecraftStyleV3InfiniteStyleLibrary", display_name="Recraft Style - Infinite Style Library", category="api node/image/Recraft", - description="Select style based on preexisting UUID from Recraft's Infinite Style Library.", + description="Choose style based on preexisting UUID from Recraft's Infinite Style Library.", inputs=[ IO.String.Input("style_id", default="", tooltip="UUID of style from Infinite Style Library."), ], @@ -485,7 +485,7 @@ class RecraftTextToImageNode(IO.ComfyNode): data=RecraftImageGenerationRequest( prompt=prompt, negative_prompt=negative_prompt, - model=RecraftModel.recraftv3, + model="recraftv3", size=size, n=n, style=recraft_style.style, @@ -598,7 +598,7 @@ class RecraftImageToImageNode(IO.ComfyNode): request = RecraftImageGenerationRequest( prompt=prompt, negative_prompt=negative_prompt, - model=RecraftModel.recraftv3, + model="recraftv3", n=n, strength=round(strength, 2), style=recraft_style.style, @@ -698,7 +698,7 @@ class RecraftImageInpaintingNode(IO.ComfyNode): request = RecraftImageGenerationRequest( prompt=prompt, negative_prompt=negative_prompt, - model=RecraftModel.recraftv3, + model="recraftv3", n=n, style=recraft_style.style, substyle=recraft_style.substyle, @@ -810,7 +810,7 @@ class RecraftTextToVectorNode(IO.ComfyNode): data=RecraftImageGenerationRequest( prompt=prompt, negative_prompt=negative_prompt, - model=RecraftModel.recraftv3, + model="recraftv3", size=size, n=n, style=recraft_style.style, @@ -933,7 +933,7 @@ class RecraftReplaceBackgroundNode(IO.ComfyNode): request = RecraftImageGenerationRequest( prompt=prompt, negative_prompt=negative_prompt, - model=RecraftModel.recraftv3, + model="recraftv3", n=n, style=recraft_style.style, substyle=recraft_style.substyle, @@ -963,6 +963,7 @@ class RecraftRemoveBackgroundNode(IO.ComfyNode): node_id="RecraftRemoveBackgroundNode", display_name="Recraft Remove Background", category="api node/image/Recraft", + essentials_category="Image Tools", description="Remove background from image, and return processed image and mask.", inputs=[ IO.Image.Input("image"), @@ -1078,6 +1079,252 @@ class RecraftCreativeUpscaleNode(RecraftCrispUpscaleNode): ) +class RecraftV4TextToImageNode(IO.ComfyNode): + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="RecraftV4TextToImageNode", + display_name="Recraft V4 Text to Image", + category="api node/image/Recraft", + description="Generates images using Recraft V4 or V4 Pro models.", + inputs=[ + IO.String.Input( + "prompt", + multiline=True, + tooltip="Prompt for the image generation. Maximum 10,000 characters.", + ), + IO.String.Input( + "negative_prompt", + multiline=True, + tooltip="An optional text description of undesired elements on an image.", + ), + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "recraftv4", + [ + IO.Combo.Input( + "size", + options=RECRAFT_V4_SIZES, + default="1024x1024", + tooltip="The size of the generated image.", + ), + ], + ), + IO.DynamicCombo.Option( + "recraftv4_pro", + [ + IO.Combo.Input( + "size", + options=RECRAFT_V4_PRO_SIZES, + default="2048x2048", + tooltip="The size of the generated image.", + ), + ], + ), + ], + tooltip="The model to use for generation.", + ), + IO.Int.Input( + "n", + default=1, + min=1, + max=6, + tooltip="The number of images to generate.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=0xFFFFFFFFFFFFFFFF, + control_after_generate=True, + tooltip="Seed to determine if node should re-run; " + "actual results are nondeterministic regardless of seed.", + ), + IO.Custom(RecraftIO.CONTROLS).Input( + "recraft_controls", + tooltip="Optional additional controls over the generation via the Recraft Controls node.", + optional=True, + ), + ], + outputs=[ + IO.Image.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model", "n"]), + expr=""" + ( + $prices := {"recraftv4": 0.04, "recraftv4_pro": 0.25}; + {"type":"usd","usd": $lookup($prices, widgets.model) * widgets.n} + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + prompt: str, + negative_prompt: str, + model: dict, + n: int, + seed: int, + recraft_controls: RecraftControls | None = None, + ) -> IO.NodeOutput: + validate_string(prompt, strip_whitespace=False, min_length=1, max_length=10000) + response = await sync_op( + cls, + ApiEndpoint(path="/proxy/recraft/image_generation", method="POST"), + response_model=RecraftImageGenerationResponse, + data=RecraftImageGenerationRequest( + prompt=prompt, + negative_prompt=negative_prompt if negative_prompt else None, + model=model["model"], + size=model["size"], + n=n, + controls=recraft_controls.create_api_model() if recraft_controls else None, + ), + max_retries=1, + ) + images = [] + for data in response.data: + with handle_recraft_image_output(): + image = bytesio_to_image_tensor(await download_url_as_bytesio(data.url, timeout=1024)) + if len(image.shape) < 4: + image = image.unsqueeze(0) + images.append(image) + return IO.NodeOutput(torch.cat(images, dim=0)) + + +class RecraftV4TextToVectorNode(IO.ComfyNode): + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="RecraftV4TextToVectorNode", + display_name="Recraft V4 Text to Vector", + category="api node/image/Recraft", + description="Generates SVG using Recraft V4 or V4 Pro models.", + inputs=[ + IO.String.Input( + "prompt", + multiline=True, + tooltip="Prompt for the image generation. Maximum 10,000 characters.", + ), + IO.String.Input( + "negative_prompt", + multiline=True, + tooltip="An optional text description of undesired elements on an image.", + ), + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "recraftv4", + [ + IO.Combo.Input( + "size", + options=RECRAFT_V4_SIZES, + default="1024x1024", + tooltip="The size of the generated image.", + ), + ], + ), + IO.DynamicCombo.Option( + "recraftv4_pro", + [ + IO.Combo.Input( + "size", + options=RECRAFT_V4_PRO_SIZES, + default="2048x2048", + tooltip="The size of the generated image.", + ), + ], + ), + ], + tooltip="The model to use for generation.", + ), + IO.Int.Input( + "n", + default=1, + min=1, + max=6, + tooltip="The number of images to generate.", + ), + IO.Int.Input( + "seed", + default=0, + min=0, + max=0xFFFFFFFFFFFFFFFF, + control_after_generate=True, + tooltip="Seed to determine if node should re-run; " + "actual results are nondeterministic regardless of seed.", + ), + IO.Custom(RecraftIO.CONTROLS).Input( + "recraft_controls", + tooltip="Optional additional controls over the generation via the Recraft Controls node.", + optional=True, + ), + ], + outputs=[ + IO.SVG.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model", "n"]), + expr=""" + ( + $prices := {"recraftv4": 0.08, "recraftv4_pro": 0.30}; + {"type":"usd","usd": $lookup($prices, widgets.model) * widgets.n} + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + prompt: str, + negative_prompt: str, + model: dict, + n: int, + seed: int, + recraft_controls: RecraftControls | None = None, + ) -> IO.NodeOutput: + validate_string(prompt, strip_whitespace=False, min_length=1, max_length=10000) + response = await sync_op( + cls, + ApiEndpoint(path="/proxy/recraft/image_generation", method="POST"), + response_model=RecraftImageGenerationResponse, + data=RecraftImageGenerationRequest( + prompt=prompt, + negative_prompt=negative_prompt if negative_prompt else None, + model=model["model"], + size=model["size"], + n=n, + style="vector_illustration", + substyle=None, + controls=recraft_controls.create_api_model() if recraft_controls else None, + ), + max_retries=1, + ) + svg_data = [] + for data in response.data: + svg_data.append(await download_url_as_bytesio(data.url, timeout=1024)) + return IO.NodeOutput(SVG(svg_data)) + + class RecraftExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[IO.ComfyNode]]: @@ -1098,6 +1345,8 @@ class RecraftExtension(ComfyExtension): RecraftCreateStyleNode, RecraftColorRGBNode, RecraftControlsNode, + RecraftV4TextToImageNode, + RecraftV4TextToVectorNode, ] diff --git a/comfy_api_nodes/nodes_rodin.py b/comfy_api_nodes/nodes_rodin.py index 3ffdc8b90..2b829b8db 100644 --- a/comfy_api_nodes/nodes_rodin.py +++ b/comfy_api_nodes/nodes_rodin.py @@ -10,7 +10,6 @@ import folder_paths as comfy_paths import os import logging import math -from typing import Optional from io import BytesIO from typing_extensions import override from PIL import Image @@ -28,8 +27,9 @@ from comfy_api_nodes.util import ( poll_op, ApiEndpoint, download_url_to_bytesio, + download_url_to_file_3d, ) -from comfy_api.latest import ComfyExtension, IO +from comfy_api.latest import ComfyExtension, IO, Types COMMON_PARAMETERS = [ @@ -177,7 +177,7 @@ def check_rodin_status(response: Rodin3DCheckStatusResponse) -> str: return "DONE" return "Generating" -def extract_progress(response: Rodin3DCheckStatusResponse) -> Optional[int]: +def extract_progress(response: Rodin3DCheckStatusResponse) -> int | None: if not response.jobs: return None completed_count = sum(1 for job in response.jobs if job.status == JobStatus.Done) @@ -207,17 +207,25 @@ async def get_rodin_download_list(uuid: str, cls: type[IO.ComfyNode]) -> Rodin3D ) -async def download_files(url_list, task_uuid: str): +async def download_files(url_list, task_uuid: str) -> tuple[str | None, Types.File3D | None]: result_folder_name = f"Rodin3D_{task_uuid}" save_path = os.path.join(comfy_paths.get_output_directory(), result_folder_name) os.makedirs(save_path, exist_ok=True) model_file_path = None + file_3d = None + for i in url_list.list: file_path = os.path.join(save_path, i.name) - if file_path.endswith(".glb"): + if i.name.lower().endswith(".glb"): model_file_path = os.path.join(result_folder_name, i.name) - await download_url_to_bytesio(i.url, file_path) - return model_file_path + file_3d = await download_url_to_file_3d(i.url, "glb") + # Save to disk for backward compatibility + with open(file_path, "wb") as f: + f.write(file_3d.get_bytes()) + else: + await download_url_to_bytesio(i.url, file_path) + + return model_file_path, file_3d class Rodin3D_Regular(IO.ComfyNode): @@ -234,7 +242,10 @@ class Rodin3D_Regular(IO.ComfyNode): IO.Image.Input("Images"), *COMMON_PARAMETERS, ], - outputs=[IO.String.Output(display_name="3D Model Path")], + outputs=[ + IO.String.Output(display_name="3D Model Path"), # for backward compatibility only + IO.File3DGLB.Output(display_name="GLB"), + ], hidden=[ IO.Hidden.auth_token_comfy_org, IO.Hidden.api_key_comfy_org, @@ -271,9 +282,9 @@ class Rodin3D_Regular(IO.ComfyNode): ) await poll_for_task_status(subscription_key, cls) download_list = await get_rodin_download_list(task_uuid, cls) - model = await download_files(download_list, task_uuid) + model_path, file_3d = await download_files(download_list, task_uuid) - return IO.NodeOutput(model) + return IO.NodeOutput(model_path, file_3d) class Rodin3D_Detail(IO.ComfyNode): @@ -290,7 +301,10 @@ class Rodin3D_Detail(IO.ComfyNode): IO.Image.Input("Images"), *COMMON_PARAMETERS, ], - outputs=[IO.String.Output(display_name="3D Model Path")], + outputs=[ + IO.String.Output(display_name="3D Model Path"), # for backward compatibility only + IO.File3DGLB.Output(display_name="GLB"), + ], hidden=[ IO.Hidden.auth_token_comfy_org, IO.Hidden.api_key_comfy_org, @@ -327,9 +341,9 @@ class Rodin3D_Detail(IO.ComfyNode): ) await poll_for_task_status(subscription_key, cls) download_list = await get_rodin_download_list(task_uuid, cls) - model = await download_files(download_list, task_uuid) + model_path, file_3d = await download_files(download_list, task_uuid) - return IO.NodeOutput(model) + return IO.NodeOutput(model_path, file_3d) class Rodin3D_Smooth(IO.ComfyNode): @@ -346,7 +360,10 @@ class Rodin3D_Smooth(IO.ComfyNode): IO.Image.Input("Images"), *COMMON_PARAMETERS, ], - outputs=[IO.String.Output(display_name="3D Model Path")], + outputs=[ + IO.String.Output(display_name="3D Model Path"), # for backward compatibility only + IO.File3DGLB.Output(display_name="GLB"), + ], hidden=[ IO.Hidden.auth_token_comfy_org, IO.Hidden.api_key_comfy_org, @@ -382,9 +399,9 @@ class Rodin3D_Smooth(IO.ComfyNode): ) await poll_for_task_status(subscription_key, cls) download_list = await get_rodin_download_list(task_uuid, cls) - model = await download_files(download_list, task_uuid) + model_path, file_3d = await download_files(download_list, task_uuid) - return IO.NodeOutput(model) + return IO.NodeOutput(model_path, file_3d) class Rodin3D_Sketch(IO.ComfyNode): @@ -408,7 +425,10 @@ class Rodin3D_Sketch(IO.ComfyNode): optional=True, ), ], - outputs=[IO.String.Output(display_name="3D Model Path")], + outputs=[ + IO.String.Output(display_name="3D Model Path"), # for backward compatibility only + IO.File3DGLB.Output(display_name="GLB"), + ], hidden=[ IO.Hidden.auth_token_comfy_org, IO.Hidden.api_key_comfy_org, @@ -441,9 +461,9 @@ class Rodin3D_Sketch(IO.ComfyNode): ) await poll_for_task_status(subscription_key, cls) download_list = await get_rodin_download_list(task_uuid, cls) - model = await download_files(download_list, task_uuid) + model_path, file_3d = await download_files(download_list, task_uuid) - return IO.NodeOutput(model) + return IO.NodeOutput(model_path, file_3d) class Rodin3D_Gen2(IO.ComfyNode): @@ -473,15 +493,21 @@ class Rodin3D_Gen2(IO.ComfyNode): default="500K-Triangle", optional=True, ), - IO.Boolean.Input("TAPose", default=False), + IO.Boolean.Input("TAPose", default=False, advanced=True), + ], + outputs=[ + IO.String.Output(display_name="3D Model Path"), # for backward compatibility only + IO.File3DGLB.Output(display_name="GLB"), ], - outputs=[IO.String.Output(display_name="3D Model Path")], hidden=[ IO.Hidden.auth_token_comfy_org, IO.Hidden.api_key_comfy_org, IO.Hidden.unique_id, ], is_api_node=True, + price_badge=IO.PriceBadge( + expr="""{"type":"usd","usd":0.4}""", + ), ) @classmethod @@ -511,9 +537,9 @@ class Rodin3D_Gen2(IO.ComfyNode): ) await poll_for_task_status(subscription_key, cls) download_list = await get_rodin_download_list(task_uuid, cls) - model = await download_files(download_list, task_uuid) + model_path, file_3d = await download_files(download_list, task_uuid) - return IO.NodeOutput(model) + return IO.NodeOutput(model_path, file_3d) class Rodin3DExtension(ComfyExtension): diff --git a/comfy_api_nodes/nodes_stability.py b/comfy_api_nodes/nodes_stability.py index 5665109cf..9ef13c83b 100644 --- a/comfy_api_nodes/nodes_stability.py +++ b/comfy_api_nodes/nodes_stability.py @@ -86,6 +86,7 @@ class StabilityStableImageUltraNode(IO.ComfyNode): "style_preset", options=get_stability_style_presets(), tooltip="Optional desired style of generated image.", + advanced=True, ), IO.Int.Input( "seed", @@ -107,6 +108,7 @@ class StabilityStableImageUltraNode(IO.ComfyNode): tooltip="A blurb of text describing what you do not wish to see in the output image. This is an advanced feature.", force_input=True, optional=True, + advanced=True, ), IO.Float.Input( "image_denoise", @@ -218,6 +220,7 @@ class StabilityStableImageSD_3_5Node(IO.ComfyNode): "style_preset", options=get_stability_style_presets(), tooltip="Optional desired style of generated image.", + advanced=True, ), IO.Float.Input( "cfg_scale", @@ -247,6 +250,7 @@ class StabilityStableImageSD_3_5Node(IO.ComfyNode): tooltip="Keywords of what you do not wish to see in the output image. This is an advanced feature.", force_input=True, optional=True, + advanced=True, ), IO.Float.Input( "image_denoise", @@ -384,6 +388,7 @@ class StabilityUpscaleConservativeNode(IO.ComfyNode): tooltip="Keywords of what you do not wish to see in the output image. This is an advanced feature.", force_input=True, optional=True, + advanced=True, ), ], outputs=[ @@ -474,6 +479,7 @@ class StabilityUpscaleCreativeNode(IO.ComfyNode): "style_preset", options=get_stability_style_presets(), tooltip="Optional desired style of generated image.", + advanced=True, ), IO.Int.Input( "seed", @@ -491,6 +497,7 @@ class StabilityUpscaleCreativeNode(IO.ComfyNode): tooltip="Keywords of what you do not wish to see in the output image. This is an advanced feature.", force_input=True, optional=True, + advanced=True, ), ], outputs=[ @@ -624,6 +631,7 @@ class StabilityTextToAudio(IO.ComfyNode): node_id="StabilityTextToAudio", display_name="Stability AI Text To Audio", category="api node/audio/Stability AI", + essentials_category="Audio", description=cleandoc(cls.__doc__ or ""), inputs=[ IO.Combo.Input( @@ -659,6 +667,7 @@ class StabilityTextToAudio(IO.ComfyNode): step=1, tooltip="Controls the number of sampling steps.", optional=True, + advanced=True, ), ], outputs=[ @@ -736,6 +745,7 @@ class StabilityAudioToAudio(IO.ComfyNode): step=1, tooltip="Controls the number of sampling steps.", optional=True, + advanced=True, ), IO.Float.Input( "strength", @@ -829,6 +839,7 @@ class StabilityAudioInpaint(IO.ComfyNode): step=1, tooltip="Controls the number of sampling steps.", optional=True, + advanced=True, ), IO.Int.Input( "mask_start", @@ -837,6 +848,7 @@ class StabilityAudioInpaint(IO.ComfyNode): max=190, step=1, optional=True, + advanced=True, ), IO.Int.Input( "mask_end", @@ -845,6 +857,7 @@ class StabilityAudioInpaint(IO.ComfyNode): max=190, step=1, optional=True, + advanced=True, ), ], outputs=[ diff --git a/comfy_api_nodes/nodes_topaz.py b/comfy_api_nodes/nodes_topaz.py index 8fccde25a..6b61bd4b2 100644 --- a/comfy_api_nodes/nodes_topaz.py +++ b/comfy_api_nodes/nodes_topaz.py @@ -63,12 +63,14 @@ class TopazImageEnhance(IO.ComfyNode): "subject_detection", options=["All", "Foreground", "Background"], optional=True, + advanced=True, ), IO.Boolean.Input( "face_enhancement", default=True, optional=True, tooltip="Enhance faces (if present) during processing.", + advanced=True, ), IO.Float.Input( "face_enhancement_creativity", @@ -79,6 +81,7 @@ class TopazImageEnhance(IO.ComfyNode): display_mode=IO.NumberDisplay.number, optional=True, tooltip="Set the creativity level for face enhancement.", + advanced=True, ), IO.Float.Input( "face_enhancement_strength", @@ -89,6 +92,7 @@ class TopazImageEnhance(IO.ComfyNode): display_mode=IO.NumberDisplay.number, optional=True, tooltip="Controls how sharp enhanced faces are relative to the background.", + advanced=True, ), IO.Boolean.Input( "crop_to_fill", @@ -96,6 +100,7 @@ class TopazImageEnhance(IO.ComfyNode): optional=True, tooltip="By default, the image is letterboxed when the output aspect ratio differs. " "Enable to crop the image to fill the output dimensions.", + advanced=True, ), IO.Int.Input( "output_width", @@ -106,6 +111,7 @@ class TopazImageEnhance(IO.ComfyNode): display_mode=IO.NumberDisplay.number, optional=True, tooltip="Zero value means to calculate automatically (usually it will be original size or output_height if specified).", + advanced=True, ), IO.Int.Input( "output_height", @@ -116,6 +122,7 @@ class TopazImageEnhance(IO.ComfyNode): display_mode=IO.NumberDisplay.number, optional=True, tooltip="Zero value means to output in the same height as original or output width.", + advanced=True, ), IO.Int.Input( "creativity", @@ -131,12 +138,14 @@ class TopazImageEnhance(IO.ComfyNode): default=True, optional=True, tooltip="Preserve subjects' facial identity.", + advanced=True, ), IO.Boolean.Input( "color_preservation", default=True, optional=True, tooltip="Preserve the original colors.", + advanced=True, ), ], outputs=[ @@ -234,9 +243,10 @@ class TopazVideoEnhance(IO.ComfyNode): default="low", tooltip="Creativity level (applies only to Starlight (Astra) Creative).", optional=True, + advanced=True, ), IO.Boolean.Input("interpolation_enabled", default=False, optional=True), - IO.Combo.Input("interpolation_model", options=["apo-8"], default="apo-8", optional=True), + IO.Combo.Input("interpolation_model", options=["apo-8"], default="apo-8", optional=True, advanced=True), IO.Int.Input( "interpolation_slowmo", default=1, @@ -246,6 +256,7 @@ class TopazVideoEnhance(IO.ComfyNode): tooltip="Slow-motion factor applied to the input video. " "For example, 2 makes the output twice as slow and doubles the duration.", optional=True, + advanced=True, ), IO.Int.Input( "interpolation_frame_rate", @@ -261,6 +272,7 @@ class TopazVideoEnhance(IO.ComfyNode): default=False, tooltip="Analyze the input for duplicate frames and remove them.", optional=True, + advanced=True, ), IO.Float.Input( "interpolation_duplicate_threshold", @@ -271,6 +283,7 @@ class TopazVideoEnhance(IO.ComfyNode): display_mode=IO.NumberDisplay.number, tooltip="Detection sensitivity for duplicate frames.", optional=True, + advanced=True, ), IO.Combo.Input( "dynamic_compression_level", @@ -278,6 +291,7 @@ class TopazVideoEnhance(IO.ComfyNode): default="Low", tooltip="CQP level.", optional=True, + advanced=True, ), ], outputs=[ diff --git a/comfy_api_nodes/nodes_tripo.py b/comfy_api_nodes/nodes_tripo.py index 5abf27b4d..9f4298dce 100644 --- a/comfy_api_nodes/nodes_tripo.py +++ b/comfy_api_nodes/nodes_tripo.py @@ -1,10 +1,6 @@ -import os -from typing import Optional - -import torch from typing_extensions import override -from comfy_api.latest import IO, ComfyExtension +from comfy_api.latest import IO, ComfyExtension, Input from comfy_api_nodes.apis.tripo import ( TripoAnimateRetargetRequest, TripoAnimateRigRequest, @@ -26,12 +22,11 @@ from comfy_api_nodes.apis.tripo import ( ) from comfy_api_nodes.util import ( ApiEndpoint, - download_url_as_bytesio, + download_url_to_file_3d, poll_op, sync_op, upload_images_to_comfyapi, ) -from folder_paths import get_output_directory def get_model_url_from_response(response: TripoTaskResponse) -> str: @@ -45,7 +40,7 @@ def get_model_url_from_response(response: TripoTaskResponse) -> str: async def poll_until_finished( node_cls: type[IO.ComfyNode], response: TripoTaskResponse, - average_duration: Optional[int] = None, + average_duration: int | None = None, ) -> IO.NodeOutput: """Polls the Tripo API endpoint until the task reaches a terminal state, then returns the response.""" if response.code != 0: @@ -69,12 +64,8 @@ async def poll_until_finished( ) if response_poll.data.status == TripoTaskStatus.SUCCESS: url = get_model_url_from_response(response_poll) - bytesio = await download_url_as_bytesio(url) - # Save the downloaded model file - model_file = f"tripo_model_{task_id}.glb" - with open(os.path.join(get_output_directory(), model_file), "wb") as f: - f.write(bytesio.getvalue()) - return IO.NodeOutput(model_file, task_id) + file_glb = await download_url_to_file_3d(url, "glb", task_id=task_id) + return IO.NodeOutput(f"{task_id}.glb", task_id, file_glb) raise RuntimeError(f"Failed to generate mesh: {response_poll}") @@ -98,17 +89,18 @@ class TripoTextToModelNode(IO.ComfyNode): IO.Combo.Input("style", options=TripoStyle, default="None", optional=True), IO.Boolean.Input("texture", default=True, optional=True), IO.Boolean.Input("pbr", default=True, optional=True), - IO.Int.Input("image_seed", default=42, optional=True), - IO.Int.Input("model_seed", default=42, optional=True), - IO.Int.Input("texture_seed", default=42, optional=True), - IO.Combo.Input("texture_quality", default="standard", options=["standard", "detailed"], optional=True), - IO.Int.Input("face_limit", default=-1, min=-1, max=2000000, optional=True), - IO.Boolean.Input("quad", default=False, optional=True), - IO.Combo.Input("geometry_quality", default="standard", options=["standard", "detailed"], optional=True), + IO.Int.Input("image_seed", default=42, optional=True, advanced=True), + IO.Int.Input("model_seed", default=42, optional=True, advanced=True), + IO.Int.Input("texture_seed", default=42, optional=True, advanced=True), + IO.Combo.Input("texture_quality", default="standard", options=["standard", "detailed"], optional=True, advanced=True), + IO.Int.Input("face_limit", default=-1, min=-1, max=2000000, optional=True, advanced=True), + IO.Boolean.Input("quad", default=False, optional=True, advanced=True), + IO.Combo.Input("geometry_quality", default="standard", options=["standard", "detailed"], optional=True, advanced=True), ], outputs=[ - IO.String.Output(display_name="model_file"), + IO.String.Output(display_name="model_file"), # for backward compatibility only IO.Custom("MODEL_TASK_ID").Output(display_name="model task_id"), + IO.File3DGLB.Output(display_name="GLB"), ], hidden=[ IO.Hidden.auth_token_comfy_org, @@ -155,18 +147,18 @@ class TripoTextToModelNode(IO.ComfyNode): async def execute( cls, prompt: str, - negative_prompt: Optional[str] = None, + negative_prompt: str | None = None, model_version=None, - style: Optional[str] = None, - texture: Optional[bool] = None, - pbr: Optional[bool] = None, - image_seed: Optional[int] = None, - model_seed: Optional[int] = None, - texture_seed: Optional[int] = None, - texture_quality: Optional[str] = None, - geometry_quality: Optional[str] = None, - face_limit: Optional[int] = None, - quad: Optional[bool] = None, + style: str | None = None, + texture: bool | None = None, + pbr: bool | None = None, + image_seed: int | None = None, + model_seed: int | None = None, + texture_seed: int | None = None, + texture_quality: str | None = None, + geometry_quality: str | None = None, + face_limit: int | None = None, + quad: bool | None = None, ) -> IO.NodeOutput: style_enum = None if style == "None" else style if not prompt: @@ -218,22 +210,23 @@ class TripoImageToModelNode(IO.ComfyNode): IO.Combo.Input("style", options=TripoStyle, default="None", optional=True), IO.Boolean.Input("texture", default=True, optional=True), IO.Boolean.Input("pbr", default=True, optional=True), - IO.Int.Input("model_seed", default=42, optional=True), + IO.Int.Input("model_seed", default=42, optional=True, advanced=True), IO.Combo.Input( - "orientation", options=TripoOrientation, default=TripoOrientation.DEFAULT, optional=True + "orientation", options=TripoOrientation, default=TripoOrientation.DEFAULT, optional=True, advanced=True ), - IO.Int.Input("texture_seed", default=42, optional=True), - IO.Combo.Input("texture_quality", default="standard", options=["standard", "detailed"], optional=True), + IO.Int.Input("texture_seed", default=42, optional=True, advanced=True), + IO.Combo.Input("texture_quality", default="standard", options=["standard", "detailed"], optional=True, advanced=True), IO.Combo.Input( - "texture_alignment", default="original_image", options=["original_image", "geometry"], optional=True + "texture_alignment", default="original_image", options=["original_image", "geometry"], optional=True, advanced=True ), - IO.Int.Input("face_limit", default=-1, min=-1, max=500000, optional=True), - IO.Boolean.Input("quad", default=False, optional=True), - IO.Combo.Input("geometry_quality", default="standard", options=["standard", "detailed"], optional=True), + IO.Int.Input("face_limit", default=-1, min=-1, max=500000, optional=True, advanced=True), + IO.Boolean.Input("quad", default=False, optional=True, advanced=True), + IO.Combo.Input("geometry_quality", default="standard", options=["standard", "detailed"], optional=True, advanced=True), ], outputs=[ - IO.String.Output(display_name="model_file"), + IO.String.Output(display_name="model_file"), # for backward compatibility only IO.Custom("MODEL_TASK_ID").Output(display_name="model task_id"), + IO.File3DGLB.Output(display_name="GLB"), ], hidden=[ IO.Hidden.auth_token_comfy_org, @@ -279,19 +272,19 @@ class TripoImageToModelNode(IO.ComfyNode): @classmethod async def execute( cls, - image: torch.Tensor, - model_version: Optional[str] = None, - style: Optional[str] = None, - texture: Optional[bool] = None, - pbr: Optional[bool] = None, - model_seed: Optional[int] = None, + image: Input.Image, + model_version: str | None = None, + style: str | None = None, + texture: bool | None = None, + pbr: bool | None = None, + model_seed: int | None = None, orientation=None, - texture_seed: Optional[int] = None, - texture_quality: Optional[str] = None, - geometry_quality: Optional[str] = None, - texture_alignment: Optional[str] = None, - face_limit: Optional[int] = None, - quad: Optional[bool] = None, + texture_seed: int | None = None, + texture_quality: str | None = None, + geometry_quality: str | None = None, + texture_alignment: str | None = None, + face_limit: int | None = None, + quad: bool | None = None, ) -> IO.NodeOutput: style_enum = None if style == "None" else style if image is None: @@ -354,22 +347,24 @@ class TripoMultiviewToModelNode(IO.ComfyNode): options=TripoOrientation, default=TripoOrientation.DEFAULT, optional=True, + advanced=True, ), IO.Boolean.Input("texture", default=True, optional=True), IO.Boolean.Input("pbr", default=True, optional=True), - IO.Int.Input("model_seed", default=42, optional=True), - IO.Int.Input("texture_seed", default=42, optional=True), - IO.Combo.Input("texture_quality", default="standard", options=["standard", "detailed"], optional=True), + IO.Int.Input("model_seed", default=42, optional=True, advanced=True), + IO.Int.Input("texture_seed", default=42, optional=True, advanced=True), + IO.Combo.Input("texture_quality", default="standard", options=["standard", "detailed"], optional=True, advanced=True), IO.Combo.Input( - "texture_alignment", default="original_image", options=["original_image", "geometry"], optional=True + "texture_alignment", default="original_image", options=["original_image", "geometry"], optional=True, advanced=True ), - IO.Int.Input("face_limit", default=-1, min=-1, max=500000, optional=True), - IO.Boolean.Input("quad", default=False, optional=True), - IO.Combo.Input("geometry_quality", default="standard", options=["standard", "detailed"], optional=True), + IO.Int.Input("face_limit", default=-1, min=-1, max=500000, optional=True, advanced=True), + IO.Boolean.Input("quad", default=False, optional=True, advanced=True), + IO.Combo.Input("geometry_quality", default="standard", options=["standard", "detailed"], optional=True, advanced=True), ], outputs=[ - IO.String.Output(display_name="model_file"), + IO.String.Output(display_name="model_file"), # for backward compatibility only IO.Custom("MODEL_TASK_ID").Output(display_name="model task_id"), + IO.File3DGLB.Output(display_name="GLB"), ], hidden=[ IO.Hidden.auth_token_comfy_org, @@ -411,21 +406,21 @@ class TripoMultiviewToModelNode(IO.ComfyNode): @classmethod async def execute( cls, - image: torch.Tensor, - image_left: Optional[torch.Tensor] = None, - image_back: Optional[torch.Tensor] = None, - image_right: Optional[torch.Tensor] = None, - model_version: Optional[str] = None, - orientation: Optional[str] = None, - texture: Optional[bool] = None, - pbr: Optional[bool] = None, - model_seed: Optional[int] = None, - texture_seed: Optional[int] = None, - texture_quality: Optional[str] = None, - geometry_quality: Optional[str] = None, - texture_alignment: Optional[str] = None, - face_limit: Optional[int] = None, - quad: Optional[bool] = None, + image: Input.Image, + image_left: Input.Image | None = None, + image_back: Input.Image | None = None, + image_right: Input.Image | None = None, + model_version: str | None = None, + orientation: str | None = None, + texture: bool | None = None, + pbr: bool | None = None, + model_seed: int | None = None, + texture_seed: int | None = None, + texture_quality: str | None = None, + geometry_quality: str | None = None, + texture_alignment: str | None = None, + face_limit: int | None = None, + quad: bool | None = None, ) -> IO.NodeOutput: if image is None: raise RuntimeError("front image for multiview is required") @@ -480,15 +475,16 @@ class TripoTextureNode(IO.ComfyNode): IO.Custom("MODEL_TASK_ID").Input("model_task_id"), IO.Boolean.Input("texture", default=True, optional=True), IO.Boolean.Input("pbr", default=True, optional=True), - IO.Int.Input("texture_seed", default=42, optional=True), - IO.Combo.Input("texture_quality", default="standard", options=["standard", "detailed"], optional=True), + IO.Int.Input("texture_seed", default=42, optional=True, advanced=True), + IO.Combo.Input("texture_quality", default="standard", options=["standard", "detailed"], optional=True, advanced=True), IO.Combo.Input( - "texture_alignment", default="original_image", options=["original_image", "geometry"], optional=True + "texture_alignment", default="original_image", options=["original_image", "geometry"], optional=True, advanced=True ), ], outputs=[ - IO.String.Output(display_name="model_file"), + IO.String.Output(display_name="model_file"), # for backward compatibility only IO.Custom("MODEL_TASK_ID").Output(display_name="model task_id"), + IO.File3DGLB.Output(display_name="GLB"), ], hidden=[ IO.Hidden.auth_token_comfy_org, @@ -512,11 +508,11 @@ class TripoTextureNode(IO.ComfyNode): async def execute( cls, model_task_id, - texture: Optional[bool] = None, - pbr: Optional[bool] = None, - texture_seed: Optional[int] = None, - texture_quality: Optional[str] = None, - texture_alignment: Optional[str] = None, + texture: bool | None = None, + pbr: bool | None = None, + texture_seed: int | None = None, + texture_quality: str | None = None, + texture_alignment: str | None = None, ) -> IO.NodeOutput: response = await sync_op( cls, @@ -547,8 +543,9 @@ class TripoRefineNode(IO.ComfyNode): IO.Custom("MODEL_TASK_ID").Input("model_task_id", tooltip="Must be a v1.4 Tripo model"), ], outputs=[ - IO.String.Output(display_name="model_file"), + IO.String.Output(display_name="model_file"), # for backward compatibility only IO.Custom("MODEL_TASK_ID").Output(display_name="model task_id"), + IO.File3DGLB.Output(display_name="GLB"), ], hidden=[ IO.Hidden.auth_token_comfy_org, @@ -583,8 +580,9 @@ class TripoRigNode(IO.ComfyNode): category="api node/3d/Tripo", inputs=[IO.Custom("MODEL_TASK_ID").Input("original_model_task_id")], outputs=[ - IO.String.Output(display_name="model_file"), + IO.String.Output(display_name="model_file"), # for backward compatibility only IO.Custom("RIG_TASK_ID").Output(display_name="rig task_id"), + IO.File3DGLB.Output(display_name="GLB"), ], hidden=[ IO.Hidden.auth_token_comfy_org, @@ -642,8 +640,9 @@ class TripoRetargetNode(IO.ComfyNode): ), ], outputs=[ - IO.String.Output(display_name="model_file"), + IO.String.Output(display_name="model_file"), # for backward compatibility only IO.Custom("RETARGET_TASK_ID").Output(display_name="retarget task_id"), + IO.File3DGLB.Output(display_name="GLB"), ], hidden=[ IO.Hidden.auth_token_comfy_org, @@ -684,13 +683,14 @@ class TripoConversionNode(IO.ComfyNode): inputs=[ IO.Custom("MODEL_TASK_ID,RIG_TASK_ID,RETARGET_TASK_ID").Input("original_model_task_id"), IO.Combo.Input("format", options=["GLTF", "USDZ", "FBX", "OBJ", "STL", "3MF"]), - IO.Boolean.Input("quad", default=False, optional=True), + IO.Boolean.Input("quad", default=False, optional=True, advanced=True), IO.Int.Input( "face_limit", default=-1, min=-1, max=2000000, optional=True, + advanced=True, ), IO.Int.Input( "texture_size", @@ -698,47 +698,53 @@ class TripoConversionNode(IO.ComfyNode): min=128, max=4096, optional=True, + advanced=True, ), IO.Combo.Input( "texture_format", options=["BMP", "DPX", "HDR", "JPEG", "OPEN_EXR", "PNG", "TARGA", "TIFF", "WEBP"], default="JPEG", optional=True, + advanced=True, ), - IO.Boolean.Input("force_symmetry", default=False, optional=True), - IO.Boolean.Input("flatten_bottom", default=False, optional=True), + IO.Boolean.Input("force_symmetry", default=False, optional=True, advanced=True), + IO.Boolean.Input("flatten_bottom", default=False, optional=True, advanced=True), IO.Float.Input( "flatten_bottom_threshold", default=0.0, min=0.0, max=1.0, optional=True, + advanced=True, ), - IO.Boolean.Input("pivot_to_center_bottom", default=False, optional=True), + IO.Boolean.Input("pivot_to_center_bottom", default=False, optional=True, advanced=True), IO.Float.Input( "scale_factor", default=1.0, min=0.0, optional=True, + advanced=True, ), - IO.Boolean.Input("with_animation", default=False, optional=True), - IO.Boolean.Input("pack_uv", default=False, optional=True), - IO.Boolean.Input("bake", default=False, optional=True), - IO.String.Input("part_names", default="", optional=True), # comma-separated list + IO.Boolean.Input("with_animation", default=False, optional=True, advanced=True), + IO.Boolean.Input("pack_uv", default=False, optional=True, advanced=True), + IO.Boolean.Input("bake", default=False, optional=True, advanced=True), + IO.String.Input("part_names", default="", optional=True, advanced=True), # comma-separated list IO.Combo.Input( "fbx_preset", options=["blender", "mixamo", "3dsmax"], default="blender", optional=True, + advanced=True, ), - IO.Boolean.Input("export_vertex_colors", default=False, optional=True), + IO.Boolean.Input("export_vertex_colors", default=False, optional=True, advanced=True), IO.Combo.Input( "export_orientation", options=["align_image", "default"], default="default", optional=True, + advanced=True, ), - IO.Boolean.Input("animate_in_place", default=False, optional=True), + IO.Boolean.Input("animate_in_place", default=False, optional=True, advanced=True), ], outputs=[], hidden=[ diff --git a/comfy_api_nodes/nodes_veo2.py b/comfy_api_nodes/nodes_veo2.py index 2a202fc3b..13fc1cc36 100644 --- a/comfy_api_nodes/nodes_veo2.py +++ b/comfy_api_nodes/nodes_veo2.py @@ -81,6 +81,7 @@ class VeoVideoGenerationNode(IO.ComfyNode): default=True, tooltip="Whether to enhance the prompt with AI assistance", optional=True, + advanced=True, ), IO.Combo.Input( "person_generation", @@ -88,6 +89,7 @@ class VeoVideoGenerationNode(IO.ComfyNode): default="ALLOW", tooltip="Whether to allow generating people in the video", optional=True, + advanced=True, ), IO.Int.Input( "seed", @@ -299,6 +301,7 @@ class Veo3VideoGenerationNode(VeoVideoGenerationNode): default=True, tooltip="This parameter is deprecated and ignored.", optional=True, + advanced=True, ), IO.Combo.Input( "person_generation", @@ -306,6 +309,7 @@ class Veo3VideoGenerationNode(VeoVideoGenerationNode): default="ALLOW", tooltip="Whether to allow generating people in the video", optional=True, + advanced=True, ), IO.Int.Input( "seed", diff --git a/comfy_api_nodes/nodes_vidu.py b/comfy_api_nodes/nodes_vidu.py index 80de14dfe..f04407eb5 100644 --- a/comfy_api_nodes/nodes_vidu.py +++ b/comfy_api_nodes/nodes_vidu.py @@ -54,6 +54,7 @@ async def execute_task( response_model=TaskStatusResponse, status_extractor=lambda r: r.state, progress_extractor=lambda r: r.progress, + price_extractor=lambda r: r.credits * 0.005 if r.credits is not None else None, max_poll_attempts=max_poll_attempts, ) if not response.creations: @@ -111,12 +112,14 @@ class ViduTextToVideoNode(IO.ComfyNode): options=["1080p"], tooltip="Supported values may vary by model & duration", optional=True, + advanced=True, ), IO.Combo.Input( "movement_amplitude", options=["auto", "small", "medium", "large"], tooltip="The movement amplitude of objects in the frame", optional=True, + advanced=True, ), ], outputs=[ @@ -207,12 +210,14 @@ class ViduImageToVideoNode(IO.ComfyNode): options=["1080p"], tooltip="Supported values may vary by model & duration", optional=True, + advanced=True, ), IO.Combo.Input( "movement_amplitude", options=["auto", "small", "medium", "large"], tooltip="The movement amplitude of objects in the frame", optional=True, + advanced=True, ), ], outputs=[ @@ -313,12 +318,14 @@ class ViduReferenceVideoNode(IO.ComfyNode): options=["1080p"], tooltip="Supported values may vary by model & duration", optional=True, + advanced=True, ), IO.Combo.Input( "movement_amplitude", options=["auto", "small", "medium", "large"], tooltip="The movement amplitude of objects in the frame", optional=True, + advanced=True, ), ], outputs=[ @@ -425,12 +432,14 @@ class ViduStartEndToVideoNode(IO.ComfyNode): options=["1080p"], tooltip="Supported values may vary by model & duration", optional=True, + advanced=True, ), IO.Combo.Input( "movement_amplitude", options=["auto", "small", "medium", "large"], tooltip="The movement amplitude of objects in the frame", optional=True, + advanced=True, ), ], outputs=[ @@ -510,11 +519,12 @@ class Vidu2TextToVideoNode(IO.ComfyNode): control_after_generate=True, ), IO.Combo.Input("aspect_ratio", options=["16:9", "9:16", "3:4", "4:3", "1:1"]), - IO.Combo.Input("resolution", options=["720p", "1080p"]), + IO.Combo.Input("resolution", options=["720p", "1080p"], advanced=True), IO.Boolean.Input( "background_music", default=False, tooltip="Whether to add background music to the generated video.", + advanced=True, ), ], outputs=[ @@ -608,11 +618,13 @@ class Vidu2ImageToVideoNode(IO.ComfyNode): IO.Combo.Input( "resolution", options=["720p", "1080p"], + advanced=True, ), IO.Combo.Input( "movement_amplitude", options=["auto", "small", "medium", "large"], tooltip="The movement amplitude of objects in the frame.", + advanced=True, ), ], outputs=[ @@ -726,6 +738,7 @@ class Vidu2ReferenceVideoNode(IO.ComfyNode): "audio", default=False, tooltip="When enabled video will contain generated speech and background music based on the prompt.", + advanced=True, ), IO.Int.Input( "duration", @@ -745,11 +758,12 @@ class Vidu2ReferenceVideoNode(IO.ComfyNode): control_after_generate=True, ), IO.Combo.Input("aspect_ratio", options=["16:9", "9:16", "4:3", "3:4", "1:1"]), - IO.Combo.Input("resolution", options=["720p", "1080p"]), + IO.Combo.Input("resolution", options=["720p", "1080p"], advanced=True), IO.Combo.Input( "movement_amplitude", options=["auto", "small", "medium", "large"], tooltip="The movement amplitude of objects in the frame.", + advanced=True, ), ], outputs=[ @@ -863,11 +877,12 @@ class Vidu2StartEndToVideoNode(IO.ComfyNode): display_mode=IO.NumberDisplay.number, control_after_generate=True, ), - IO.Combo.Input("resolution", options=["720p", "1080p"]), + IO.Combo.Input("resolution", options=["720p", "1080p"], advanced=True), IO.Combo.Input( "movement_amplitude", options=["auto", "small", "medium", "large"], tooltip="The movement amplitude of objects in the frame.", + advanced=True, ), ], outputs=[ @@ -1306,6 +1321,36 @@ class Vidu3TextToVideoNode(IO.ComfyNode): ), ], ), + IO.DynamicCombo.Option( + "viduq3-turbo", + [ + IO.Combo.Input( + "aspect_ratio", + options=["16:9", "9:16", "3:4", "4:3", "1:1"], + tooltip="The aspect ratio of the output video.", + ), + IO.Combo.Input( + "resolution", + options=["720p", "1080p"], + tooltip="Resolution of the output video.", + ), + IO.Int.Input( + "duration", + default=5, + min=1, + max=16, + step=1, + display_mode=IO.NumberDisplay.slider, + tooltip="Duration of the output video in seconds.", + ), + IO.Boolean.Input( + "audio", + default=False, + tooltip="When enabled, outputs video with sound " + "(including dialogue and sound effects).", + ), + ], + ), ], tooltip="Model to use for video generation.", ), @@ -1334,13 +1379,20 @@ class Vidu3TextToVideoNode(IO.ComfyNode): ], is_api_node=True, price_badge=IO.PriceBadge( - depends_on=IO.PriceBadgeDepends(widgets=["model.duration", "model.resolution"]), + depends_on=IO.PriceBadgeDepends(widgets=["model", "model.duration", "model.resolution"]), expr=""" ( $res := $lookup(widgets, "model.resolution"); - $base := $lookup({"720p": 0.075, "1080p": 0.1}, $res); - $perSec := $lookup({"720p": 0.025, "1080p": 0.05}, $res); - {"type":"usd","usd": $base + $perSec * ($lookup(widgets, "model.duration") - 1)} + $d := $lookup(widgets, "model.duration"); + $contains(widgets.model, "turbo") + ? ( + $rate := $lookup({"720p": 0.06, "1080p": 0.08}, $res); + {"type":"usd","usd": $rate * $d} + ) + : ( + $rate := $lookup({"720p": 0.15, "1080p": 0.16}, $res); + {"type":"usd","usd": $rate * $d} + ) ) """, ), @@ -1409,6 +1461,31 @@ class Vidu3ImageToVideoNode(IO.ComfyNode): ), ], ), + IO.DynamicCombo.Option( + "viduq3-turbo", + [ + IO.Combo.Input( + "resolution", + options=["720p", "1080p"], + tooltip="Resolution of the output video.", + ), + IO.Int.Input( + "duration", + default=5, + min=1, + max=16, + step=1, + display_mode=IO.NumberDisplay.slider, + tooltip="Duration of the output video in seconds.", + ), + IO.Boolean.Input( + "audio", + default=False, + tooltip="When enabled, outputs video with sound " + "(including dialogue and sound effects).", + ), + ], + ), ], tooltip="Model to use for video generation.", ), @@ -1442,13 +1519,20 @@ class Vidu3ImageToVideoNode(IO.ComfyNode): ], is_api_node=True, price_badge=IO.PriceBadge( - depends_on=IO.PriceBadgeDepends(widgets=["model.duration", "model.resolution"]), + depends_on=IO.PriceBadgeDepends(widgets=["model", "model.duration", "model.resolution"]), expr=""" ( $res := $lookup(widgets, "model.resolution"); - $base := $lookup({"720p": 0.075, "1080p": 0.275, "2k": 0.35}, $res); - $perSec := $lookup({"720p": 0.05, "1080p": 0.075, "2k": 0.075}, $res); - {"type":"usd","usd": $base + $perSec * ($lookup(widgets, "model.duration") - 1)} + $d := $lookup(widgets, "model.duration"); + $contains(widgets.model, "turbo") + ? ( + $rate := $lookup({"720p": 0.06, "1080p": 0.08}, $res); + {"type":"usd","usd": $rate * $d} + ) + : ( + $rate := $lookup({"720p": 0.15, "1080p": 0.16, "2k": 0.2}, $res); + {"type":"usd","usd": $rate * $d} + ) ) """, ), @@ -1481,6 +1565,145 @@ class Vidu3ImageToVideoNode(IO.ComfyNode): return IO.NodeOutput(await download_url_to_video_output(results[0].url)) +class Vidu3StartEndToVideoNode(IO.ComfyNode): + + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="Vidu3StartEndToVideoNode", + display_name="Vidu Q3 Start/End Frame-to-Video Generation", + category="api node/video/Vidu", + description="Generate a video from a start frame, an end frame, and a prompt.", + inputs=[ + IO.DynamicCombo.Input( + "model", + options=[ + IO.DynamicCombo.Option( + "viduq3-pro", + [ + IO.Combo.Input( + "resolution", + options=["720p", "1080p"], + tooltip="Resolution of the output video.", + ), + IO.Int.Input( + "duration", + default=5, + min=1, + max=16, + step=1, + display_mode=IO.NumberDisplay.slider, + tooltip="Duration of the output video in seconds.", + ), + IO.Boolean.Input( + "audio", + default=False, + tooltip="When enabled, outputs video with sound " + "(including dialogue and sound effects).", + ), + ], + ), + IO.DynamicCombo.Option( + "viduq3-turbo", + [ + IO.Combo.Input( + "resolution", + options=["720p", "1080p"], + tooltip="Resolution of the output video.", + ), + IO.Int.Input( + "duration", + default=5, + min=1, + max=16, + step=1, + display_mode=IO.NumberDisplay.slider, + tooltip="Duration of the output video in seconds.", + ), + IO.Boolean.Input( + "audio", + default=False, + tooltip="When enabled, outputs video with sound " + "(including dialogue and sound effects).", + ), + ], + ), + ], + tooltip="Model to use for video generation.", + ), + IO.Image.Input("first_frame"), + IO.Image.Input("end_frame"), + IO.String.Input( + "prompt", + multiline=True, + tooltip="Prompt description (max 2000 characters).", + ), + IO.Int.Input( + "seed", + default=1, + min=0, + max=2147483647, + step=1, + display_mode=IO.NumberDisplay.number, + control_after_generate=True, + ), + ], + outputs=[ + IO.Video.Output(), + ], + hidden=[ + IO.Hidden.auth_token_comfy_org, + IO.Hidden.api_key_comfy_org, + IO.Hidden.unique_id, + ], + is_api_node=True, + price_badge=IO.PriceBadge( + depends_on=IO.PriceBadgeDepends(widgets=["model", "model.duration", "model.resolution"]), + expr=""" + ( + $res := $lookup(widgets, "model.resolution"); + $d := $lookup(widgets, "model.duration"); + $contains(widgets.model, "turbo") + ? ( + $rate := $lookup({"720p": 0.06, "1080p": 0.08}, $res); + {"type":"usd","usd": $rate * $d} + ) + : ( + $rate := $lookup({"720p": 0.15, "1080p": 0.16}, $res); + {"type":"usd","usd": $rate * $d} + ) + ) + """, + ), + ) + + @classmethod + async def execute( + cls, + model: dict, + first_frame: Input.Image, + end_frame: Input.Image, + prompt: str, + seed: int, + ) -> IO.NodeOutput: + validate_string(prompt, max_length=2000) + validate_images_aspect_ratio_closeness(first_frame, end_frame, min_rel=0.8, max_rel=1.25, strict=False) + payload = TaskCreationRequest( + model=model["model"], + prompt=prompt, + duration=model["duration"], + seed=seed, + resolution=model["resolution"], + audio=model["audio"], + images=[ + (await upload_images_to_comfyapi(cls, frame, max_images=1, mime_type="image/png"))[0] + for frame in (first_frame, end_frame) + ], + ) + results = await execute_task(cls, VIDU_START_END_VIDEO, payload) + return IO.NodeOutput(await download_url_to_video_output(results[0].url)) + + class ViduExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[IO.ComfyNode]]: @@ -1497,6 +1720,7 @@ class ViduExtension(ComfyExtension): ViduMultiFrameVideoNode, Vidu3TextToVideoNode, Vidu3ImageToVideoNode, + Vidu3StartEndToVideoNode, ] diff --git a/comfy_api_nodes/nodes_wan.py b/comfy_api_nodes/nodes_wan.py index a1355d4f1..e2afe7f9c 100644 --- a/comfy_api_nodes/nodes_wan.py +++ b/comfy_api_nodes/nodes_wan.py @@ -227,12 +227,14 @@ class WanTextToImageApi(IO.ComfyNode): default=True, tooltip="Whether to enhance the prompt with AI assistance.", optional=True, + advanced=True, ), IO.Boolean.Input( "watermark", default=False, tooltip="Whether to add an AI-generated watermark to the result.", optional=True, + advanced=True, ), ], outputs=[ @@ -355,6 +357,7 @@ class WanImageToImageApi(IO.ComfyNode): default=False, tooltip="Whether to add an AI-generated watermark to the result.", optional=True, + advanced=True, ), ], outputs=[ @@ -495,18 +498,21 @@ class WanTextToVideoApi(IO.ComfyNode): default=False, optional=True, tooltip="If no audio input is provided, generate audio automatically.", + advanced=True, ), IO.Boolean.Input( "prompt_extend", default=True, tooltip="Whether to enhance the prompt with AI assistance.", optional=True, + advanced=True, ), IO.Boolean.Input( "watermark", default=False, tooltip="Whether to add an AI-generated watermark to the result.", optional=True, + advanced=True, ), IO.Combo.Input( "shot_type", @@ -515,6 +521,7 @@ class WanTextToVideoApi(IO.ComfyNode): "single continuous shot or multiple shots with cuts. " "This parameter takes effect only when prompt_extend is True.", optional=True, + advanced=True, ), ], outputs=[ @@ -667,18 +674,21 @@ class WanImageToVideoApi(IO.ComfyNode): default=False, optional=True, tooltip="If no audio input is provided, generate audio automatically.", + advanced=True, ), IO.Boolean.Input( "prompt_extend", default=True, tooltip="Whether to enhance the prompt with AI assistance.", optional=True, + advanced=True, ), IO.Boolean.Input( "watermark", default=False, tooltip="Whether to add an AI-generated watermark to the result.", optional=True, + advanced=True, ), IO.Combo.Input( "shot_type", @@ -687,6 +697,7 @@ class WanImageToVideoApi(IO.ComfyNode): "single continuous shot or multiple shots with cuts. " "This parameter takes effect only when prompt_extend is True.", optional=True, + advanced=True, ), ], outputs=[ @@ -839,11 +850,13 @@ class WanReferenceVideoApi(IO.ComfyNode): options=["single", "multi"], tooltip="Specifies the shot type for the generated video, that is, whether the video is a " "single continuous shot or multiple shots with cuts.", + advanced=True, ), IO.Boolean.Input( "watermark", default=False, tooltip="Whether to add an AI-generated watermark to the result.", + advanced=True, ), ], outputs=[ diff --git a/comfy_api_nodes/util/__init__.py b/comfy_api_nodes/util/__init__.py index c3c9ff4bf..0cb9a47c7 100644 --- a/comfy_api_nodes/util/__init__.py +++ b/comfy_api_nodes/util/__init__.py @@ -9,6 +9,8 @@ from .client import ( from .conversions import ( audio_bytes_to_audio_input, audio_input_to_mp3, + audio_ndarray_to_bytesio, + audio_tensor_to_contiguous_ndarray, audio_to_base64_string, bytesio_to_image_tensor, convert_mask_to_image, @@ -28,10 +30,12 @@ from .conversions import ( from .download_helpers import ( download_url_as_bytesio, download_url_to_bytesio, + download_url_to_file_3d, download_url_to_image_tensor, download_url_to_video_output, ) from .upload_helpers import ( + upload_3d_model_to_comfyapi, upload_audio_to_comfyapi, upload_file_to_comfyapi, upload_image_to_comfyapi, @@ -61,6 +65,7 @@ __all__ = [ "sync_op", "sync_op_raw", # Upload helpers + "upload_3d_model_to_comfyapi", "upload_audio_to_comfyapi", "upload_file_to_comfyapi", "upload_image_to_comfyapi", @@ -69,11 +74,14 @@ __all__ = [ # Download helpers "download_url_as_bytesio", "download_url_to_bytesio", + "download_url_to_file_3d", "download_url_to_image_tensor", "download_url_to_video_output", # Conversions "audio_bytes_to_audio_input", "audio_input_to_mp3", + "audio_ndarray_to_bytesio", + "audio_tensor_to_contiguous_ndarray", "audio_to_base64_string", "bytesio_to_image_tensor", "convert_mask_to_image", diff --git a/comfy_api_nodes/util/client.py b/comfy_api_nodes/util/client.py index 8a1259506..94886af7b 100644 --- a/comfy_api_nodes/util/client.py +++ b/comfy_api_nodes/util/client.py @@ -57,6 +57,7 @@ class _RequestConfig: files: dict[str, Any] | list[tuple[str, Any]] | None multipart_parser: Callable | None max_retries: int + max_retries_on_rate_limit: int retry_delay: float retry_backoff: float wait_label: str = "Waiting" @@ -65,6 +66,7 @@ class _RequestConfig: final_label_on_success: str | None = "Completed" progress_origin_ts: float | None = None price_extractor: Callable[[dict[str, Any]], float | None] | None = None + is_rate_limited: Callable[[int, Any], bool] | None = None @dataclass @@ -78,7 +80,7 @@ class _PollUIState: active_since: float | None = None # start time of current active interval (None if queued) -_RETRY_STATUS = {408, 429, 500, 502, 503, 504} +_RETRY_STATUS = {408, 500, 502, 503, 504} # status 429 is handled separately COMPLETED_STATUSES = ["succeeded", "succeed", "success", "completed", "finished", "done", "complete"] FAILED_STATUSES = ["cancelled", "canceled", "canceling", "fail", "failed", "error"] QUEUED_STATUSES = ["created", "queued", "queueing", "submitted", "initializing"] @@ -103,6 +105,8 @@ async def sync_op( final_label_on_success: str | None = "Completed", progress_origin_ts: float | None = None, monitor_progress: bool = True, + max_retries_on_rate_limit: int = 16, + is_rate_limited: Callable[[int, Any], bool] | None = None, ) -> M: raw = await sync_op_raw( cls, @@ -122,6 +126,8 @@ async def sync_op( final_label_on_success=final_label_on_success, progress_origin_ts=progress_origin_ts, monitor_progress=monitor_progress, + max_retries_on_rate_limit=max_retries_on_rate_limit, + is_rate_limited=is_rate_limited, ) if not isinstance(raw, dict): raise Exception("Expected JSON response to validate into a Pydantic model, got non-JSON (binary or text).") @@ -143,9 +149,9 @@ async def poll_op( poll_interval: float = 5.0, max_poll_attempts: int = 160, timeout_per_poll: float = 120.0, - max_retries_per_poll: int = 3, + max_retries_per_poll: int = 10, retry_delay_per_poll: float = 1.0, - retry_backoff_per_poll: float = 2.0, + retry_backoff_per_poll: float = 1.4, estimated_duration: int | None = None, cancel_endpoint: ApiEndpoint | None = None, cancel_timeout: float = 10.0, @@ -194,6 +200,8 @@ async def sync_op_raw( final_label_on_success: str | None = "Completed", progress_origin_ts: float | None = None, monitor_progress: bool = True, + max_retries_on_rate_limit: int = 16, + is_rate_limited: Callable[[int, Any], bool] | None = None, ) -> dict[str, Any] | bytes: """ Make a single network request. @@ -222,6 +230,8 @@ async def sync_op_raw( final_label_on_success=final_label_on_success, progress_origin_ts=progress_origin_ts, price_extractor=price_extractor, + max_retries_on_rate_limit=max_retries_on_rate_limit, + is_rate_limited=is_rate_limited, ) return await _request_base(cfg, expect_binary=as_binary) @@ -240,9 +250,9 @@ async def poll_op_raw( poll_interval: float = 5.0, max_poll_attempts: int = 160, timeout_per_poll: float = 120.0, - max_retries_per_poll: int = 3, + max_retries_per_poll: int = 10, retry_delay_per_poll: float = 1.0, - retry_backoff_per_poll: float = 2.0, + retry_backoff_per_poll: float = 1.4, estimated_duration: int | None = None, cancel_endpoint: ApiEndpoint | None = None, cancel_timeout: float = 10.0, @@ -506,7 +516,7 @@ def _friendly_http_message(status: int, body: Any) -> str: if status == 409: return "There is a problem with your account. Please contact support@comfy.org." if status == 429: - return "Rate Limit Exceeded: Please try again later." + return "Rate Limit Exceeded: The server returned 429 after all retry attempts. Please wait and try again." try: if isinstance(body, dict): err = body.get("error") @@ -586,6 +596,8 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool): start_time = cfg.progress_origin_ts if cfg.progress_origin_ts is not None else time.monotonic() attempt = 0 delay = cfg.retry_delay + rate_limit_attempts = 0 + rate_limit_delay = cfg.retry_delay operation_succeeded: bool = False final_elapsed_seconds: int | None = None extracted_price: float | None = None @@ -653,17 +665,14 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool): payload_headers["Content-Type"] = "application/json" payload_kw["json"] = cfg.data or {} - try: - request_logger.log_request_response( - operation_id=operation_id, - request_method=method, - request_url=url, - request_headers=dict(payload_headers) if payload_headers else None, - request_params=dict(params) if params else None, - request_data=request_body_log, - ) - except Exception as _log_e: - logging.debug("[DEBUG] request logging failed: %s", _log_e) + request_logger.log_request_response( + operation_id=operation_id, + request_method=method, + request_url=url, + request_headers=dict(payload_headers) if payload_headers else None, + request_params=dict(params) if params else None, + request_data=request_body_log, + ) req_coro = sess.request(method, url, params=params, **payload_kw) req_task = asyncio.create_task(req_coro) @@ -688,41 +697,33 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool): body = await resp.json() except (ContentTypeError, json.JSONDecodeError): body = await resp.text() - if resp.status in _RETRY_STATUS and attempt <= cfg.max_retries: + should_retry = False + wait_time = 0.0 + retry_label = "" + is_rl = resp.status == 429 or ( + cfg.is_rate_limited is not None and cfg.is_rate_limited(resp.status, body) + ) + if is_rl and rate_limit_attempts < cfg.max_retries_on_rate_limit: + rate_limit_attempts += 1 + wait_time = min(rate_limit_delay, 30.0) + rate_limit_delay *= cfg.retry_backoff + retry_label = f"rate-limit retry {rate_limit_attempts} of {cfg.max_retries_on_rate_limit}" + should_retry = True + elif resp.status in _RETRY_STATUS and (attempt - rate_limit_attempts) <= cfg.max_retries: + wait_time = delay + delay *= cfg.retry_backoff + retry_label = f"retry {attempt - rate_limit_attempts} of {cfg.max_retries}" + should_retry = True + + if should_retry: logging.warning( - "HTTP %s %s -> %s. Retrying in %.2fs (retry %d of %d).", + "HTTP %s %s -> %s. Waiting %.2fs (%s).", method, url, resp.status, - delay, - attempt, - cfg.max_retries, + wait_time, + retry_label, ) - try: - request_logger.log_request_response( - operation_id=operation_id, - request_method=method, - request_url=url, - response_status_code=resp.status, - response_headers=dict(resp.headers), - response_content=body, - error_message=_friendly_http_message(resp.status, body), - ) - except Exception as _log_e: - logging.debug("[DEBUG] response logging failed: %s", _log_e) - - await sleep_with_interrupt( - delay, - cfg.node_cls, - cfg.wait_label if cfg.monitor_progress else None, - start_time if cfg.monitor_progress else None, - cfg.estimated_total, - display_callback=_display_time_progress if cfg.monitor_progress else None, - ) - delay *= cfg.retry_backoff - continue - msg = _friendly_http_message(resp.status, body) - try: request_logger.log_request_response( operation_id=operation_id, request_method=method, @@ -730,10 +731,27 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool): response_status_code=resp.status, response_headers=dict(resp.headers), response_content=body, - error_message=msg, + error_message=f"HTTP {resp.status} ({retry_label}, will retry in {wait_time:.1f}s)", ) - except Exception as _log_e: - logging.debug("[DEBUG] response logging failed: %s", _log_e) + await sleep_with_interrupt( + wait_time, + cfg.node_cls, + cfg.wait_label if cfg.monitor_progress else None, + start_time if cfg.monitor_progress else None, + cfg.estimated_total, + display_callback=_display_time_progress if cfg.monitor_progress else None, + ) + continue + msg = _friendly_http_message(resp.status, body) + request_logger.log_request_response( + operation_id=operation_id, + request_method=method, + request_url=url, + response_status_code=resp.status, + response_headers=dict(resp.headers), + response_content=body, + error_message=msg, + ) raise Exception(msg) if expect_binary: @@ -753,17 +771,14 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool): bytes_payload = bytes(buff) operation_succeeded = True final_elapsed_seconds = int(time.monotonic() - start_time) - try: - request_logger.log_request_response( - operation_id=operation_id, - request_method=method, - request_url=url, - response_status_code=resp.status, - response_headers=dict(resp.headers), - response_content=bytes_payload, - ) - except Exception as _log_e: - logging.debug("[DEBUG] response logging failed: %s", _log_e) + request_logger.log_request_response( + operation_id=operation_id, + request_method=method, + request_url=url, + response_status_code=resp.status, + response_headers=dict(resp.headers), + response_content=bytes_payload, + ) return bytes_payload else: try: @@ -780,45 +795,39 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool): extracted_price = cfg.price_extractor(payload) if cfg.price_extractor else None operation_succeeded = True final_elapsed_seconds = int(time.monotonic() - start_time) - try: - request_logger.log_request_response( - operation_id=operation_id, - request_method=method, - request_url=url, - response_status_code=resp.status, - response_headers=dict(resp.headers), - response_content=response_content_to_log, - ) - except Exception as _log_e: - logging.debug("[DEBUG] response logging failed: %s", _log_e) + request_logger.log_request_response( + operation_id=operation_id, + request_method=method, + request_url=url, + response_status_code=resp.status, + response_headers=dict(resp.headers), + response_content=response_content_to_log, + ) return payload except ProcessingInterrupted: logging.debug("Polling was interrupted by user") raise except (ClientError, OSError) as e: - if attempt <= cfg.max_retries: + if (attempt - rate_limit_attempts) <= cfg.max_retries: logging.warning( "Connection error calling %s %s. Retrying in %.2fs (%d/%d): %s", method, url, delay, - attempt, + attempt - rate_limit_attempts, cfg.max_retries, str(e), ) - try: - request_logger.log_request_response( - operation_id=operation_id, - request_method=method, - request_url=url, - request_headers=dict(payload_headers) if payload_headers else None, - request_params=dict(params) if params else None, - request_data=request_body_log, - error_message=f"{type(e).__name__}: {str(e)} (will retry)", - ) - except Exception as _log_e: - logging.debug("[DEBUG] request error logging failed: %s", _log_e) + request_logger.log_request_response( + operation_id=operation_id, + request_method=method, + request_url=url, + request_headers=dict(payload_headers) if payload_headers else None, + request_params=dict(params) if params else None, + request_data=request_body_log, + error_message=f"{type(e).__name__}: {str(e)} (will retry)", + ) await sleep_with_interrupt( delay, cfg.node_cls, @@ -831,23 +840,6 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool): continue diag = await _diagnose_connectivity() if not diag["internet_accessible"]: - try: - request_logger.log_request_response( - operation_id=operation_id, - request_method=method, - request_url=url, - request_headers=dict(payload_headers) if payload_headers else None, - request_params=dict(params) if params else None, - request_data=request_body_log, - error_message=f"LocalNetworkError: {str(e)}", - ) - except Exception as _log_e: - logging.debug("[DEBUG] final error logging failed: %s", _log_e) - raise LocalNetworkError( - "Unable to connect to the API server due to local network issues. " - "Please check your internet connection and try again." - ) from e - try: request_logger.log_request_response( operation_id=operation_id, request_method=method, @@ -855,10 +847,21 @@ async def _request_base(cfg: _RequestConfig, expect_binary: bool): request_headers=dict(payload_headers) if payload_headers else None, request_params=dict(params) if params else None, request_data=request_body_log, - error_message=f"ApiServerError: {str(e)}", + error_message=f"LocalNetworkError: {str(e)}", ) - except Exception as _log_e: - logging.debug("[DEBUG] final error logging failed: %s", _log_e) + raise LocalNetworkError( + "Unable to connect to the API server due to local network issues. " + "Please check your internet connection and try again." + ) from e + request_logger.log_request_response( + operation_id=operation_id, + request_method=method, + request_url=url, + request_headers=dict(payload_headers) if payload_headers else None, + request_params=dict(params) if params else None, + request_data=request_body_log, + error_message=f"ApiServerError: {str(e)}", + ) raise ApiServerError( f"The API server at {default_base_url()} is currently unreachable. " f"The service may be experiencing issues." diff --git a/comfy_api_nodes/util/conversions.py b/comfy_api_nodes/util/conversions.py index 3e37e8a8c..82b6d22a5 100644 --- a/comfy_api_nodes/util/conversions.py +++ b/comfy_api_nodes/util/conversions.py @@ -57,7 +57,7 @@ def tensor_to_bytesio( image: torch.Tensor, *, total_pixels: int | None = 2048 * 2048, - mime_type: str = "image/png", + mime_type: str | None = "image/png", ) -> BytesIO: """Converts a torch.Tensor image to a named BytesIO object. diff --git a/comfy_api_nodes/util/download_helpers.py b/comfy_api_nodes/util/download_helpers.py index 4668d14a9..aa588d038 100644 --- a/comfy_api_nodes/util/download_helpers.py +++ b/comfy_api_nodes/util/download_helpers.py @@ -11,7 +11,8 @@ import torch from aiohttp.client_exceptions import ClientError, ContentTypeError from comfy_api.latest import IO as COMFY_IO -from comfy_api.latest import InputImpl +from comfy_api.latest import InputImpl, Types +from folder_paths import get_output_directory from . import request_logger from ._helpers import ( @@ -166,27 +167,25 @@ async def download_url_to_bytesio( with contextlib.suppress(Exception): dest.seek(0) - with contextlib.suppress(Exception): - request_logger.log_request_response( - operation_id=op_id, - request_method="GET", - request_url=url, - response_status_code=resp.status, - response_headers=dict(resp.headers), - response_content=f"[streamed {written} bytes to dest]", - ) + request_logger.log_request_response( + operation_id=op_id, + request_method="GET", + request_url=url, + response_status_code=resp.status, + response_headers=dict(resp.headers), + response_content=f"[streamed {written} bytes to dest]", + ) return except asyncio.CancelledError: raise ProcessingInterrupted("Task cancelled") from None except (ClientError, OSError) as e: if attempt <= max_retries: - with contextlib.suppress(Exception): - request_logger.log_request_response( - operation_id=op_id, - request_method="GET", - request_url=url, - error_message=f"{type(e).__name__}: {str(e)} (will retry)", - ) + request_logger.log_request_response( + operation_id=op_id, + request_method="GET", + request_url=url, + error_message=f"{type(e).__name__}: {str(e)} (will retry)", + ) await sleep_with_interrupt(delay, cls, None, None, None) delay *= retry_backoff continue @@ -261,3 +260,38 @@ def _generate_operation_id(method: str, url: str, attempt: int) -> str: except Exception: slug = "download" return f"{method}_{slug}_try{attempt}_{uuid.uuid4().hex[:8]}" + + +async def download_url_to_file_3d( + url: str, + file_format: str, + *, + task_id: str | None = None, + timeout: float | None = None, + max_retries: int = 5, + cls: type[COMFY_IO.ComfyNode] = None, +) -> Types.File3D: + """Downloads a 3D model file from a URL into memory as BytesIO. + + If task_id is provided, also writes the file to disk in the output directory + for backward compatibility with the old save-to-disk behavior. + """ + file_format = file_format.lstrip(".").lower() + data = BytesIO() + await download_url_to_bytesio( + url, + data, + timeout=timeout, + max_retries=max_retries, + cls=cls, + ) + + if task_id is not None: + # This is only for backward compatability with current behavior when every 3D node is output node + # All new API nodes should not use "task_id" and instead users should use "SaveGLB" node to save results + output_dir = Path(get_output_directory()) + output_path = output_dir / f"{task_id}.{file_format}" + output_path.write_bytes(data.getvalue()) + data.seek(0) + + return Types.File3D(source=data, file_format=file_format) diff --git a/comfy_api_nodes/util/request_logger.py b/comfy_api_nodes/util/request_logger.py index e0cb4428d..fe0543d9b 100644 --- a/comfy_api_nodes/util/request_logger.py +++ b/comfy_api_nodes/util/request_logger.py @@ -8,7 +8,6 @@ from typing import Any import folder_paths -# Get the logger instance logger = logging.getLogger(__name__) @@ -91,38 +90,41 @@ def log_request_response( Filenames are sanitized and length-limited for cross-platform safety. If we still fail to write, we fall back to appending into api.log. """ - log_dir = get_log_directory() - filepath = _build_log_filepath(log_dir, operation_id, request_url) - - log_content: list[str] = [] - log_content.append(f"Timestamp: {datetime.datetime.now().isoformat()}") - log_content.append(f"Operation ID: {operation_id}") - log_content.append("-" * 30 + " REQUEST " + "-" * 30) - log_content.append(f"Method: {request_method}") - log_content.append(f"URL: {request_url}") - if request_headers: - log_content.append(f"Headers:\n{_format_data_for_logging(request_headers)}") - if request_params: - log_content.append(f"Params:\n{_format_data_for_logging(request_params)}") - if request_data is not None: - log_content.append(f"Data/Body:\n{_format_data_for_logging(request_data)}") - - log_content.append("\n" + "-" * 30 + " RESPONSE " + "-" * 30) - if response_status_code is not None: - log_content.append(f"Status Code: {response_status_code}") - if response_headers: - log_content.append(f"Headers:\n{_format_data_for_logging(response_headers)}") - if response_content is not None: - log_content.append(f"Content:\n{_format_data_for_logging(response_content)}") - if error_message: - log_content.append(f"Error:\n{error_message}") - try: - with open(filepath, "w", encoding="utf-8") as f: - f.write("\n".join(log_content)) - logger.debug("API log saved to: %s", filepath) - except Exception as e: - logger.error("Error writing API log to %s: %s", filepath, str(e)) + log_dir = get_log_directory() + filepath = _build_log_filepath(log_dir, operation_id, request_url) + + log_content: list[str] = [] + log_content.append(f"Timestamp: {datetime.datetime.now().isoformat()}") + log_content.append(f"Operation ID: {operation_id}") + log_content.append("-" * 30 + " REQUEST " + "-" * 30) + log_content.append(f"Method: {request_method}") + log_content.append(f"URL: {request_url}") + if request_headers: + log_content.append(f"Headers:\n{_format_data_for_logging(request_headers)}") + if request_params: + log_content.append(f"Params:\n{_format_data_for_logging(request_params)}") + if request_data is not None: + log_content.append(f"Data/Body:\n{_format_data_for_logging(request_data)}") + + log_content.append("\n" + "-" * 30 + " RESPONSE " + "-" * 30) + if response_status_code is not None: + log_content.append(f"Status Code: {response_status_code}") + if response_headers: + log_content.append(f"Headers:\n{_format_data_for_logging(response_headers)}") + if response_content is not None: + log_content.append(f"Content:\n{_format_data_for_logging(response_content)}") + if error_message: + log_content.append(f"Error:\n{error_message}") + + try: + with open(filepath, "w", encoding="utf-8") as f: + f.write("\n".join(log_content)) + logger.debug("API log saved to: %s", filepath) + except Exception as e: + logger.error("Error writing API log to %s: %s", filepath, str(e)) + except Exception as _log_e: + logging.debug("[DEBUG] log_request_response failed: %s", _log_e) if __name__ == '__main__': diff --git a/comfy_api_nodes/util/upload_helpers.py b/comfy_api_nodes/util/upload_helpers.py index 3153f2b98..6d1d107a1 100644 --- a/comfy_api_nodes/util/upload_helpers.py +++ b/comfy_api_nodes/util/upload_helpers.py @@ -94,7 +94,7 @@ async def upload_image_to_comfyapi( *, mime_type: str | None = None, wait_label: str | None = "Uploading", - total_pixels: int = 2048 * 2048, + total_pixels: int | None = 2048 * 2048, ) -> str: """Uploads a single image to ComfyUI API and returns its download URL.""" return ( @@ -164,6 +164,27 @@ async def upload_video_to_comfyapi( return await upload_file_to_comfyapi(cls, video_bytes_io, filename, upload_mime_type, wait_label) +_3D_MIME_TYPES = { + "glb": "model/gltf-binary", + "obj": "model/obj", + "fbx": "application/octet-stream", +} + + +async def upload_3d_model_to_comfyapi( + cls: type[IO.ComfyNode], + model_3d: Types.File3D, + file_format: str, +) -> str: + """Uploads a 3D model file to ComfyUI API and returns its download URL.""" + return await upload_file_to_comfyapi( + cls, + model_3d.get_data(), + f"{uuid.uuid4()}.{file_format}", + _3D_MIME_TYPES.get(file_format, "application/octet-stream"), + ) + + async def upload_file_to_comfyapi( cls: type[IO.ComfyNode], file_bytes_io: BytesIO, @@ -255,17 +276,14 @@ async def upload_file( monitor_task = asyncio.create_task(_monitor()) sess: aiohttp.ClientSession | None = None try: - try: - request_logger.log_request_response( - operation_id=operation_id, - request_method="PUT", - request_url=upload_url, - request_headers=headers or None, - request_params=None, - request_data=f"[File data {len(data)} bytes]", - ) - except Exception as e: - logging.debug("[DEBUG] upload request logging failed: %s", e) + request_logger.log_request_response( + operation_id=operation_id, + request_method="PUT", + request_url=upload_url, + request_headers=headers or None, + request_params=None, + request_data=f"[File data {len(data)} bytes]", + ) sess = aiohttp.ClientSession(timeout=timeout) req = sess.put(upload_url, data=data, headers=headers, skip_auto_headers=skip_auto_headers) @@ -311,31 +329,27 @@ async def upload_file( delay *= retry_backoff continue raise Exception(f"Failed to upload (HTTP {resp.status}).") - try: - request_logger.log_request_response( - operation_id=operation_id, - request_method="PUT", - request_url=upload_url, - response_status_code=resp.status, - response_headers=dict(resp.headers), - response_content="File uploaded successfully.", - ) - except Exception as e: - logging.debug("[DEBUG] upload response logging failed: %s", e) + request_logger.log_request_response( + operation_id=operation_id, + request_method="PUT", + request_url=upload_url, + response_status_code=resp.status, + response_headers=dict(resp.headers), + response_content="File uploaded successfully.", + ) return except asyncio.CancelledError: raise ProcessingInterrupted("Task cancelled") from None except (aiohttp.ClientError, OSError) as e: if attempt <= max_retries: - with contextlib.suppress(Exception): - request_logger.log_request_response( - operation_id=operation_id, - request_method="PUT", - request_url=upload_url, - request_headers=headers or None, - request_data=f"[File data {len(data)} bytes]", - error_message=f"{type(e).__name__}: {str(e)} (will retry)", - ) + request_logger.log_request_response( + operation_id=operation_id, + request_method="PUT", + request_url=upload_url, + request_headers=headers or None, + request_data=f"[File data {len(data)} bytes]", + error_message=f"{type(e).__name__}: {str(e)} (will retry)", + ) await sleep_with_interrupt( delay, cls, diff --git a/comfy_execution/jobs.py b/comfy_execution/jobs.py index bf091a448..370014fb6 100644 --- a/comfy_execution/jobs.py +++ b/comfy_execution/jobs.py @@ -20,10 +20,60 @@ class JobStatus: # Media types that can be previewed in the frontend -PREVIEWABLE_MEDIA_TYPES = frozenset({'images', 'video', 'audio'}) +PREVIEWABLE_MEDIA_TYPES = frozenset({'images', 'video', 'audio', '3d'}) # 3D file extensions for preview fallback (no dedicated media_type exists) -THREE_D_EXTENSIONS = frozenset({'.obj', '.fbx', '.gltf', '.glb'}) +THREE_D_EXTENSIONS = frozenset({'.obj', '.fbx', '.gltf', '.glb', '.usdz'}) + + +def has_3d_extension(filename: str) -> bool: + lower = filename.lower() + return any(lower.endswith(ext) for ext in THREE_D_EXTENSIONS) + + +def normalize_output_item(item): + """Normalize a single output list item for the jobs API. + + Returns the normalized item, or None to exclude it. + String items with 3D extensions become {filename, type, subfolder} dicts. + """ + if item is None: + return None + if isinstance(item, str): + if has_3d_extension(item): + return {'filename': item, 'type': 'output', 'subfolder': '', 'mediaType': '3d'} + return None + if isinstance(item, dict): + return item + return None + + +def normalize_outputs(outputs: dict) -> dict: + """Normalize raw node outputs for the jobs API. + + Transforms string 3D filenames into file output dicts and removes + None items. All other items (non-3D strings, dicts, etc.) are + preserved as-is. + """ + normalized = {} + for node_id, node_outputs in outputs.items(): + if not isinstance(node_outputs, dict): + normalized[node_id] = node_outputs + continue + normalized_node = {} + for media_type, items in node_outputs.items(): + if media_type == 'animated' or not isinstance(items, list): + normalized_node[media_type] = items + continue + normalized_items = [] + for item in items: + if item is None: + continue + norm = normalize_output_item(item) + normalized_items.append(norm if norm is not None else item) + normalized_node[media_type] = normalized_items + normalized[node_id] = normalized_node + return normalized def _extract_job_metadata(extra_data: dict) -> tuple[Optional[int], Optional[str]]: @@ -45,9 +95,9 @@ def is_previewable(media_type: str, item: dict) -> bool: Maintains backwards compatibility with existing logic. Priority: - 1. media_type is 'images', 'video', or 'audio' + 1. media_type is 'images', 'video', 'audio', or '3d' 2. format field starts with 'video/' or 'audio/' - 3. filename has a 3D extension (.obj, .fbx, .gltf, .glb) + 3. filename has a 3D extension (.obj, .fbx, .gltf, .glb, .usdz) """ if media_type in PREVIEWABLE_MEDIA_TYPES: return True @@ -139,7 +189,7 @@ def normalize_history_item(prompt_id: str, history_item: dict, include_outputs: }) if include_outputs: - job['outputs'] = outputs + job['outputs'] = normalize_outputs(outputs) job['execution_status'] = status_info job['workflow'] = { 'prompt': prompt, @@ -171,18 +221,23 @@ def get_outputs_summary(outputs: dict) -> tuple[int, Optional[dict]]: continue for item in items: - count += 1 - - if not isinstance(item, dict): + normalized = normalize_output_item(item) + if normalized is None: continue - if preview_output is None and is_previewable(media_type, item): + count += 1 + + if preview_output is not None: + continue + + if isinstance(normalized, dict) and is_previewable(media_type, normalized): enriched = { - **item, + **normalized, 'nodeId': node_id, - 'mediaType': media_type } - if item.get('type') == 'output': + if 'mediaType' not in normalized: + enriched['mediaType'] = media_type + if normalized.get('type') == 'output': preview_output = enriched elif fallback_preview is None: fallback_preview = enriched diff --git a/comfy_extras/nodes_ace.py b/comfy_extras/nodes_ace.py index 1409233c9..9cf84ab4d 100644 --- a/comfy_extras/nodes_ace.py +++ b/comfy_extras/nodes_ace.py @@ -28,12 +28,45 @@ class TextEncodeAceStepAudio(io.ComfyNode): conditioning = node_helpers.conditioning_set_values(conditioning, {"lyrics_strength": lyrics_strength}) return io.NodeOutput(conditioning) +class TextEncodeAceStepAudio15(io.ComfyNode): + @classmethod + def define_schema(cls): + return io.Schema( + node_id="TextEncodeAceStepAudio1.5", + category="conditioning", + inputs=[ + io.Clip.Input("clip"), + io.String.Input("tags", multiline=True, dynamic_prompts=True), + io.String.Input("lyrics", multiline=True, dynamic_prompts=True), + io.Int.Input("seed", default=0, min=0, max=0xffffffffffffffff, control_after_generate=True), + io.Int.Input("bpm", default=120, min=10, max=300), + io.Float.Input("duration", default=120.0, min=0.0, max=2000.0, step=0.1), + io.Combo.Input("timesignature", options=['2', '3', '4', '6']), + io.Combo.Input("language", options=["en", "ja", "zh", "es", "de", "fr", "pt", "ru", "it", "nl", "pl", "tr", "vi", "cs", "fa", "id", "ko", "uk", "hu", "ar", "sv", "ro", "el"]), + io.Combo.Input("keyscale", options=[f"{root} {quality}" for quality in ["major", "minor"] for root in ["C", "C#", "Db", "D", "D#", "Eb", "E", "F", "F#", "Gb", "G", "G#", "Ab", "A", "A#", "Bb", "B"]]), + io.Boolean.Input("generate_audio_codes", default=True, tooltip="Enable the LLM that generates audio codes. This can be slow but will increase the quality of the generated audio. Turn this off if you are giving the model an audio reference.", advanced=True), + io.Float.Input("cfg_scale", default=2.0, min=0.0, max=100.0, step=0.1, advanced=True), + io.Float.Input("temperature", default=0.85, min=0.0, max=2.0, step=0.01, advanced=True), + io.Float.Input("top_p", default=0.9, min=0.0, max=2000.0, step=0.01, advanced=True), + io.Int.Input("top_k", default=0, min=0, max=100, advanced=True), + io.Float.Input("min_p", default=0.000, min=0.0, max=1.0, step=0.001, advanced=True), + ], + outputs=[io.Conditioning.Output()], + ) + + @classmethod + def execute(cls, clip, tags, lyrics, seed, bpm, duration, timesignature, language, keyscale, generate_audio_codes, cfg_scale, temperature, top_p, top_k, min_p) -> io.NodeOutput: + tokens = clip.tokenize(tags, lyrics=lyrics, bpm=bpm, duration=duration, timesignature=int(timesignature), language=language, keyscale=keyscale, seed=seed, generate_audio_codes=generate_audio_codes, cfg_scale=cfg_scale, temperature=temperature, top_p=top_p, top_k=top_k, min_p=min_p) + conditioning = clip.encode_from_tokens_scheduled(tokens) + return io.NodeOutput(conditioning) + class EmptyAceStepLatentAudio(io.ComfyNode): @classmethod def define_schema(cls): return io.Schema( node_id="EmptyAceStepLatentAudio", + display_name="Empty Ace Step 1.0 Latent Audio", category="latent/audio", inputs=[ io.Float.Input("seconds", default=120.0, min=1.0, max=1000.0, step=0.1), @@ -51,12 +84,61 @@ class EmptyAceStepLatentAudio(io.ComfyNode): return io.NodeOutput({"samples": latent, "type": "audio"}) +class EmptyAceStep15LatentAudio(io.ComfyNode): + @classmethod + def define_schema(cls): + return io.Schema( + node_id="EmptyAceStep1.5LatentAudio", + display_name="Empty Ace Step 1.5 Latent Audio", + category="latent/audio", + inputs=[ + io.Float.Input("seconds", default=120.0, min=1.0, max=1000.0, step=0.01), + io.Int.Input( + "batch_size", default=1, min=1, max=4096, tooltip="The number of latent images in the batch." + ), + ], + outputs=[io.Latent.Output()], + ) + + @classmethod + def execute(cls, seconds, batch_size) -> io.NodeOutput: + length = round((seconds * 48000 / 1920)) + latent = torch.zeros([batch_size, 64, length], device=comfy.model_management.intermediate_device()) + return io.NodeOutput({"samples": latent, "type": "audio"}) + +class ReferenceAudio(io.ComfyNode): + @classmethod + def define_schema(cls): + return io.Schema( + node_id="ReferenceTimbreAudio", + display_name="Reference Audio", + category="advanced/conditioning/audio", + is_experimental=True, + description="This node sets the reference audio for ace step 1.5", + inputs=[ + io.Conditioning.Input("conditioning"), + io.Latent.Input("latent", optional=True), + ], + outputs=[ + io.Conditioning.Output(), + ] + ) + + @classmethod + def execute(cls, conditioning, latent=None) -> io.NodeOutput: + if latent is not None: + conditioning = node_helpers.conditioning_set_values(conditioning, {"reference_audio_timbre_latents": [latent["samples"]]}, append=True) + return io.NodeOutput(conditioning) + class AceExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[io.ComfyNode]]: return [ TextEncodeAceStepAudio, EmptyAceStepLatentAudio, + TextEncodeAceStepAudio15, + EmptyAceStep15LatentAudio, + ReferenceAudio, ] async def comfy_entrypoint() -> AceExtension: diff --git a/comfy_extras/nodes_advanced_samplers.py b/comfy_extras/nodes_advanced_samplers.py index 5532ffe6a..7f716cd76 100644 --- a/comfy_extras/nodes_advanced_samplers.py +++ b/comfy_extras/nodes_advanced_samplers.py @@ -47,8 +47,8 @@ class SamplerLCMUpscale(io.ComfyNode): node_id="SamplerLCMUpscale", category="sampling/custom_sampling/samplers", inputs=[ - io.Float.Input("scale_ratio", default=1.0, min=0.1, max=20.0, step=0.01), - io.Int.Input("scale_steps", default=-1, min=-1, max=1000, step=1), + io.Float.Input("scale_ratio", default=1.0, min=0.1, max=20.0, step=0.01, advanced=True), + io.Int.Input("scale_steps", default=-1, min=-1, max=1000, step=1, advanced=True), io.Combo.Input("upscale_method", options=cls.UPSCALE_METHODS), ], outputs=[io.Sampler.Output()], @@ -94,7 +94,7 @@ class SamplerEulerCFGpp(io.ComfyNode): display_name="SamplerEulerCFG++", category="_for_testing", # "sampling/custom_sampling/samplers" inputs=[ - io.Combo.Input("version", options=["regular", "alternative"]), + io.Combo.Input("version", options=["regular", "alternative"], advanced=True), ], outputs=[io.Sampler.Output()], is_experimental=True, diff --git a/comfy_extras/nodes_apg.py b/comfy_extras/nodes_apg.py index b9df2dcc9..fd561d360 100644 --- a/comfy_extras/nodes_apg.py +++ b/comfy_extras/nodes_apg.py @@ -26,6 +26,7 @@ class APG(io.ComfyNode): max=10.0, step=0.01, tooltip="Controls the scale of the parallel guidance vector. Default CFG behavior at a setting of 1.", + advanced=True, ), io.Float.Input( "norm_threshold", @@ -34,6 +35,7 @@ class APG(io.ComfyNode): max=50.0, step=0.1, tooltip="Normalize guidance vector to this value, normalization disable at a setting of 0.", + advanced=True, ), io.Float.Input( "momentum", @@ -42,6 +44,7 @@ class APG(io.ComfyNode): max=1.0, step=0.01, tooltip="Controls a running average of guidance during diffusion, disabled at a setting of 0.", + advanced=True, ), ], outputs=[io.Model.Output()], diff --git a/comfy_extras/nodes_attention_multiply.py b/comfy_extras/nodes_attention_multiply.py index 67c4e2ed0..060a5c9be 100644 --- a/comfy_extras/nodes_attention_multiply.py +++ b/comfy_extras/nodes_attention_multiply.py @@ -28,10 +28,10 @@ class UNetSelfAttentionMultiply(io.ComfyNode): category="_for_testing/attention_experiments", inputs=[ io.Model.Input("model"), - io.Float.Input("q", default=1.0, min=0.0, max=10.0, step=0.01), - io.Float.Input("k", default=1.0, min=0.0, max=10.0, step=0.01), - io.Float.Input("v", default=1.0, min=0.0, max=10.0, step=0.01), - io.Float.Input("out", default=1.0, min=0.0, max=10.0, step=0.01), + io.Float.Input("q", default=1.0, min=0.0, max=10.0, step=0.01, advanced=True), + io.Float.Input("k", default=1.0, min=0.0, max=10.0, step=0.01, advanced=True), + io.Float.Input("v", default=1.0, min=0.0, max=10.0, step=0.01, advanced=True), + io.Float.Input("out", default=1.0, min=0.0, max=10.0, step=0.01, advanced=True), ], outputs=[io.Model.Output()], is_experimental=True, @@ -51,10 +51,10 @@ class UNetCrossAttentionMultiply(io.ComfyNode): category="_for_testing/attention_experiments", inputs=[ io.Model.Input("model"), - io.Float.Input("q", default=1.0, min=0.0, max=10.0, step=0.01), - io.Float.Input("k", default=1.0, min=0.0, max=10.0, step=0.01), - io.Float.Input("v", default=1.0, min=0.0, max=10.0, step=0.01), - io.Float.Input("out", default=1.0, min=0.0, max=10.0, step=0.01), + io.Float.Input("q", default=1.0, min=0.0, max=10.0, step=0.01, advanced=True), + io.Float.Input("k", default=1.0, min=0.0, max=10.0, step=0.01, advanced=True), + io.Float.Input("v", default=1.0, min=0.0, max=10.0, step=0.01, advanced=True), + io.Float.Input("out", default=1.0, min=0.0, max=10.0, step=0.01, advanced=True), ], outputs=[io.Model.Output()], is_experimental=True, @@ -75,10 +75,10 @@ class CLIPAttentionMultiply(io.ComfyNode): category="_for_testing/attention_experiments", inputs=[ io.Clip.Input("clip"), - io.Float.Input("q", default=1.0, min=0.0, max=10.0, step=0.01), - io.Float.Input("k", default=1.0, min=0.0, max=10.0, step=0.01), - io.Float.Input("v", default=1.0, min=0.0, max=10.0, step=0.01), - io.Float.Input("out", default=1.0, min=0.0, max=10.0, step=0.01), + io.Float.Input("q", default=1.0, min=0.0, max=10.0, step=0.01, advanced=True), + io.Float.Input("k", default=1.0, min=0.0, max=10.0, step=0.01, advanced=True), + io.Float.Input("v", default=1.0, min=0.0, max=10.0, step=0.01, advanced=True), + io.Float.Input("out", default=1.0, min=0.0, max=10.0, step=0.01, advanced=True), ], outputs=[io.Clip.Output()], is_experimental=True, @@ -109,10 +109,10 @@ class UNetTemporalAttentionMultiply(io.ComfyNode): category="_for_testing/attention_experiments", inputs=[ io.Model.Input("model"), - io.Float.Input("self_structural", default=1.0, min=0.0, max=10.0, step=0.01), - io.Float.Input("self_temporal", default=1.0, min=0.0, max=10.0, step=0.01), - io.Float.Input("cross_structural", default=1.0, min=0.0, max=10.0, step=0.01), - io.Float.Input("cross_temporal", default=1.0, min=0.0, max=10.0, step=0.01), + io.Float.Input("self_structural", default=1.0, min=0.0, max=10.0, step=0.01, advanced=True), + io.Float.Input("self_temporal", default=1.0, min=0.0, max=10.0, step=0.01, advanced=True), + io.Float.Input("cross_structural", default=1.0, min=0.0, max=10.0, step=0.01, advanced=True), + io.Float.Input("cross_temporal", default=1.0, min=0.0, max=10.0, step=0.01, advanced=True), ], outputs=[io.Model.Output()], is_experimental=True, diff --git a/comfy_extras/nodes_audio.py b/comfy_extras/nodes_audio.py index 271b75fbd..43df0512f 100644 --- a/comfy_extras/nodes_audio.py +++ b/comfy_extras/nodes_audio.py @@ -22,7 +22,7 @@ class EmptyLatentAudio(IO.ComfyNode): inputs=[ IO.Float.Input("seconds", default=47.6, min=1.0, max=1000.0, step=0.1), IO.Int.Input( - "batch_size", default=1, min=1, max=4096, tooltip="The number of latent images in the batch." + "batch_size", default=1, min=1, max=4096, tooltip="The number of latent images in the batch.", ), ], outputs=[IO.Latent.Output()], @@ -82,17 +82,31 @@ class VAEEncodeAudio(IO.ComfyNode): @classmethod def execute(cls, vae, audio) -> IO.NodeOutput: sample_rate = audio["sample_rate"] - if 44100 != sample_rate: - waveform = torchaudio.functional.resample(audio["waveform"], sample_rate, 44100) + vae_sample_rate = getattr(vae, "audio_sample_rate", 44100) + if vae_sample_rate != sample_rate: + waveform = torchaudio.functional.resample(audio["waveform"], sample_rate, vae_sample_rate) else: waveform = audio["waveform"] t = vae.encode(waveform.movedim(1, -1)) - return IO.NodeOutput({"samples":t}) + return IO.NodeOutput({"samples": t}) encode = execute # TODO: remove +def vae_decode_audio(vae, samples, tile=None, overlap=None): + if tile is not None: + audio = vae.decode_tiled(samples["samples"], tile_y=tile, overlap=overlap).movedim(-1, 1) + else: + audio = vae.decode(samples["samples"]).movedim(-1, 1) + + std = torch.std(audio, dim=[1, 2], keepdim=True) * 5.0 + std[std < 1.0] = 1.0 + audio /= std + vae_sample_rate = getattr(vae, "audio_sample_rate", 44100) + return {"waveform": audio, "sample_rate": vae_sample_rate if "sample_rate" not in samples else samples["sample_rate"]} + + class VAEDecodeAudio(IO.ComfyNode): @classmethod def define_schema(cls): @@ -110,15 +124,33 @@ class VAEDecodeAudio(IO.ComfyNode): @classmethod def execute(cls, vae, samples) -> IO.NodeOutput: - audio = vae.decode(samples["samples"]).movedim(-1, 1) - std = torch.std(audio, dim=[1,2], keepdim=True) * 5.0 - std[std < 1.0] = 1.0 - audio /= std - return IO.NodeOutput({"waveform": audio, "sample_rate": 44100 if "sample_rate" not in samples else samples["sample_rate"]}) + return IO.NodeOutput(vae_decode_audio(vae, samples)) decode = execute # TODO: remove +class VAEDecodeAudioTiled(IO.ComfyNode): + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="VAEDecodeAudioTiled", + search_aliases=["latent to audio"], + display_name="VAE Decode Audio (Tiled)", + category="latent/audio", + inputs=[ + IO.Latent.Input("samples"), + IO.Vae.Input("vae"), + IO.Int.Input("tile_size", default=512, min=32, max=8192, step=8), + IO.Int.Input("overlap", default=64, min=0, max=1024, step=8), + ], + outputs=[IO.Audio.Output()], + ) + + @classmethod + def execute(cls, vae, samples, tile_size, overlap) -> IO.NodeOutput: + return IO.NodeOutput(vae_decode_audio(vae, samples, tile_size, overlap)) + + class SaveAudio(IO.ComfyNode): @classmethod def define_schema(cls): @@ -127,6 +159,7 @@ class SaveAudio(IO.ComfyNode): search_aliases=["export flac"], display_name="Save Audio (FLAC)", category="audio", + essentials_category="Audio", inputs=[ IO.Audio.Input("audio"), IO.String.Input("filename_prefix", default="audio/ComfyUI"), @@ -268,6 +301,7 @@ class LoadAudio(IO.ComfyNode): search_aliases=["import audio", "open audio", "audio file"], display_name="Load Audio", category="audio", + essentials_category="Audio", inputs=[ IO.Combo.Input("audio", upload=IO.UploadType.audio, options=sorted(files)), ], @@ -645,6 +679,7 @@ class EmptyAudio(IO.ComfyNode): tooltip="Sample rate of the empty audio clip.", min=1, max=192000, + advanced=True, ), IO.Int.Input( "channels", @@ -652,6 +687,7 @@ class EmptyAudio(IO.ComfyNode): min=1, max=2, tooltip="Number of audio channels (1 for mono, 2 for stereo).", + advanced=True, ), ], outputs=[IO.Audio.Output()], @@ -666,6 +702,67 @@ class EmptyAudio(IO.ComfyNode): create_empty_audio = execute # TODO: remove +class AudioEqualizer3Band(IO.ComfyNode): + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="AudioEqualizer3Band", + search_aliases=["eq", "bass boost", "treble boost", "equalizer"], + display_name="Audio Equalizer (3-Band)", + category="audio", + is_experimental=True, + inputs=[ + IO.Audio.Input("audio"), + IO.Float.Input("low_gain_dB", default=0.0, min=-24.0, max=24.0, step=0.1, tooltip="Gain for Low frequencies (Bass)"), + IO.Int.Input("low_freq", default=100, min=20, max=500, tooltip="Cutoff frequency for Low shelf"), + IO.Float.Input("mid_gain_dB", default=0.0, min=-24.0, max=24.0, step=0.1, tooltip="Gain for Mid frequencies"), + IO.Int.Input("mid_freq", default=1000, min=200, max=4000, tooltip="Center frequency for Mids"), + IO.Float.Input("mid_q", default=0.707, min=0.1, max=10.0, step=0.1, tooltip="Q factor (bandwidth) for Mids"), + IO.Float.Input("high_gain_dB", default=0.0, min=-24.0, max=24.0, step=0.1, tooltip="Gain for High frequencies (Treble)"), + IO.Int.Input("high_freq", default=5000, min=1000, max=15000, tooltip="Cutoff frequency for High shelf"), + ], + outputs=[IO.Audio.Output()], + ) + + @classmethod + def execute(cls, audio, low_gain_dB, low_freq, mid_gain_dB, mid_freq, mid_q, high_gain_dB, high_freq) -> IO.NodeOutput: + waveform = audio["waveform"] + sample_rate = audio["sample_rate"] + eq_waveform = waveform.clone() + + # 1. Apply Low Shelf (Bass) + if low_gain_dB != 0: + eq_waveform = torchaudio.functional.bass_biquad( + eq_waveform, + sample_rate, + gain=low_gain_dB, + central_freq=float(low_freq), + Q=0.707 + ) + + # 2. Apply Peaking EQ (Mids) + if mid_gain_dB != 0: + eq_waveform = torchaudio.functional.equalizer_biquad( + eq_waveform, + sample_rate, + center_freq=float(mid_freq), + gain=mid_gain_dB, + Q=mid_q + ) + + # 3. Apply High Shelf (Treble) + if high_gain_dB != 0: + eq_waveform = torchaudio.functional.treble_biquad( + eq_waveform, + sample_rate, + gain=high_gain_dB, + central_freq=float(high_freq), + Q=0.707 + ) + + return IO.NodeOutput({"waveform": eq_waveform, "sample_rate": sample_rate}) + + class AudioExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[IO.ComfyNode]]: @@ -673,6 +770,7 @@ class AudioExtension(ComfyExtension): EmptyLatentAudio, VAEEncodeAudio, VAEDecodeAudio, + VAEDecodeAudioTiled, SaveAudio, SaveAudioMP3, SaveAudioOpus, @@ -687,6 +785,7 @@ class AudioExtension(ComfyExtension): AudioMerge, AudioAdjustVolume, EmptyAudio, + AudioEqualizer3Band, ] async def comfy_entrypoint() -> AudioExtension: diff --git a/comfy_extras/nodes_camera_trajectory.py b/comfy_extras/nodes_camera_trajectory.py index eb7ef363c..e7efa29ba 100644 --- a/comfy_extras/nodes_camera_trajectory.py +++ b/comfy_extras/nodes_camera_trajectory.py @@ -174,10 +174,10 @@ class WanCameraEmbedding(io.ComfyNode): io.Int.Input("height", default=480, min=16, max=nodes.MAX_RESOLUTION, step=16), io.Int.Input("length", default=81, min=1, max=nodes.MAX_RESOLUTION, step=4), io.Float.Input("speed", default=1.0, min=0, max=10.0, step=0.1, optional=True), - io.Float.Input("fx", default=0.5, min=0, max=1, step=0.000000001, optional=True), - io.Float.Input("fy", default=0.5, min=0, max=1, step=0.000000001, optional=True), - io.Float.Input("cx", default=0.5, min=0, max=1, step=0.01, optional=True), - io.Float.Input("cy", default=0.5, min=0, max=1, step=0.01, optional=True), + io.Float.Input("fx", default=0.5, min=0, max=1, step=0.000000001, optional=True, advanced=True), + io.Float.Input("fy", default=0.5, min=0, max=1, step=0.000000001, optional=True, advanced=True), + io.Float.Input("cx", default=0.5, min=0, max=1, step=0.01, optional=True, advanced=True), + io.Float.Input("cy", default=0.5, min=0, max=1, step=0.01, optional=True, advanced=True), ], outputs=[ io.WanCameraEmbedding.Output(display_name="camera_embedding"), diff --git a/comfy_extras/nodes_canny.py b/comfy_extras/nodes_canny.py index 6e0fadca5..5e7c4eabb 100644 --- a/comfy_extras/nodes_canny.py +++ b/comfy_extras/nodes_canny.py @@ -10,8 +10,10 @@ class Canny(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="Canny", + display_name="Canny", search_aliases=["edge detection", "outline", "contour detection", "line art"], category="image/preprocessors", + essentials_category="Image Tools", inputs=[ io.Image.Input("image"), io.Float.Input("low_threshold", default=0.4, min=0.01, max=0.99, step=0.01), diff --git a/comfy_extras/nodes_chroma_radiance.py b/comfy_extras/nodes_chroma_radiance.py index 381989818..509436062 100644 --- a/comfy_extras/nodes_chroma_radiance.py +++ b/comfy_extras/nodes_chroma_radiance.py @@ -48,6 +48,7 @@ class ChromaRadianceOptions(io.ComfyNode): min=0.0, max=1.0, tooltip="First sigma that these options will be in effect.", + advanced=True, ), io.Float.Input( id="end_sigma", @@ -55,12 +56,14 @@ class ChromaRadianceOptions(io.ComfyNode): min=0.0, max=1.0, tooltip="Last sigma that these options will be in effect.", + advanced=True, ), io.Int.Input( id="nerf_tile_size", default=-1, min=-1, tooltip="Allows overriding the default NeRF tile size. -1 means use the default (32). 0 means use non-tiling mode (may require a lot of VRAM).", + advanced=True, ), ], outputs=[io.Model.Output()], diff --git a/comfy_extras/nodes_clip_sdxl.py b/comfy_extras/nodes_clip_sdxl.py index 520ff0e3c..7a001af6f 100644 --- a/comfy_extras/nodes_clip_sdxl.py +++ b/comfy_extras/nodes_clip_sdxl.py @@ -35,8 +35,8 @@ class CLIPTextEncodeSDXL(io.ComfyNode): io.Clip.Input("clip"), io.Int.Input("width", default=1024, min=0, max=nodes.MAX_RESOLUTION), io.Int.Input("height", default=1024, min=0, max=nodes.MAX_RESOLUTION), - io.Int.Input("crop_w", default=0, min=0, max=nodes.MAX_RESOLUTION), - io.Int.Input("crop_h", default=0, min=0, max=nodes.MAX_RESOLUTION), + io.Int.Input("crop_w", default=0, min=0, max=nodes.MAX_RESOLUTION, advanced=True), + io.Int.Input("crop_h", default=0, min=0, max=nodes.MAX_RESOLUTION, advanced=True), io.Int.Input("target_width", default=1024, min=0, max=nodes.MAX_RESOLUTION), io.Int.Input("target_height", default=1024, min=0, max=nodes.MAX_RESOLUTION), io.String.Input("text_g", multiline=True, dynamic_prompts=True), diff --git a/comfy_extras/nodes_cond.py b/comfy_extras/nodes_cond.py index 8b06e3de9..86426a780 100644 --- a/comfy_extras/nodes_cond.py +++ b/comfy_extras/nodes_cond.py @@ -38,8 +38,8 @@ class T5TokenizerOptions(io.ComfyNode): category="_for_testing/conditioning", inputs=[ io.Clip.Input("clip"), - io.Int.Input("min_padding", default=0, min=0, max=10000, step=1), - io.Int.Input("min_length", default=0, min=0, max=10000, step=1), + io.Int.Input("min_padding", default=0, min=0, max=10000, step=1, advanced=True), + io.Int.Input("min_length", default=0, min=0, max=10000, step=1, advanced=True), ], outputs=[io.Clip.Output()], is_experimental=True, diff --git a/comfy_extras/nodes_context_windows.py b/comfy_extras/nodes_context_windows.py index 3799a9004..93a5204e1 100644 --- a/comfy_extras/nodes_context_windows.py +++ b/comfy_extras/nodes_context_windows.py @@ -14,15 +14,15 @@ class ContextWindowsManualNode(io.ComfyNode): description="Manually set context windows.", inputs=[ io.Model.Input("model", tooltip="The model to apply context windows to during sampling."), - io.Int.Input("context_length", min=1, default=16, tooltip="The length of the context window."), - io.Int.Input("context_overlap", min=0, default=4, tooltip="The overlap of the context window."), + io.Int.Input("context_length", min=1, default=16, tooltip="The length of the context window.", advanced=True), + io.Int.Input("context_overlap", min=0, default=4, tooltip="The overlap of the context window.", advanced=True), io.Combo.Input("context_schedule", options=[ comfy.context_windows.ContextSchedules.STATIC_STANDARD, comfy.context_windows.ContextSchedules.UNIFORM_STANDARD, comfy.context_windows.ContextSchedules.UNIFORM_LOOPED, comfy.context_windows.ContextSchedules.BATCHED, ], tooltip="The stride of the context window."), - io.Int.Input("context_stride", min=1, default=1, tooltip="The stride of the context window; only applicable to uniform schedules."), + io.Int.Input("context_stride", min=1, default=1, tooltip="The stride of the context window; only applicable to uniform schedules.", advanced=True), io.Boolean.Input("closed_loop", default=False, tooltip="Whether to close the context window loop; only applicable to looped schedules."), io.Combo.Input("fuse_method", options=comfy.context_windows.ContextFuseMethods.LIST_STATIC, default=comfy.context_windows.ContextFuseMethods.PYRAMID, tooltip="The method to use to fuse the context windows."), io.Int.Input("dim", min=0, max=5, default=0, tooltip="The dimension to apply the context windows to."), @@ -67,15 +67,15 @@ class WanContextWindowsManualNode(ContextWindowsManualNode): schema.description = "Manually set context windows for WAN-like models (dim=2)." schema.inputs = [ io.Model.Input("model", tooltip="The model to apply context windows to during sampling."), - io.Int.Input("context_length", min=1, max=nodes.MAX_RESOLUTION, step=4, default=81, tooltip="The length of the context window."), - io.Int.Input("context_overlap", min=0, default=30, tooltip="The overlap of the context window."), + io.Int.Input("context_length", min=1, max=nodes.MAX_RESOLUTION, step=4, default=81, tooltip="The length of the context window.", advanced=True), + io.Int.Input("context_overlap", min=0, default=30, tooltip="The overlap of the context window.", advanced=True), io.Combo.Input("context_schedule", options=[ comfy.context_windows.ContextSchedules.STATIC_STANDARD, comfy.context_windows.ContextSchedules.UNIFORM_STANDARD, comfy.context_windows.ContextSchedules.UNIFORM_LOOPED, comfy.context_windows.ContextSchedules.BATCHED, ], tooltip="The stride of the context window."), - io.Int.Input("context_stride", min=1, default=1, tooltip="The stride of the context window; only applicable to uniform schedules."), + io.Int.Input("context_stride", min=1, default=1, tooltip="The stride of the context window; only applicable to uniform schedules.", advanced=True), io.Boolean.Input("closed_loop", default=False, tooltip="Whether to close the context window loop; only applicable to looped schedules."), io.Combo.Input("fuse_method", options=comfy.context_windows.ContextFuseMethods.LIST_STATIC, default=comfy.context_windows.ContextFuseMethods.PYRAMID, tooltip="The method to use to fuse the context windows."), io.Boolean.Input("freenoise", default=False, tooltip="Whether to apply FreeNoise noise shuffling, improves window blending."), diff --git a/comfy_extras/nodes_controlnet.py b/comfy_extras/nodes_controlnet.py index 0c1d7f0d4..847cb0bdf 100644 --- a/comfy_extras/nodes_controlnet.py +++ b/comfy_extras/nodes_controlnet.py @@ -48,8 +48,8 @@ class ControlNetInpaintingAliMamaApply(io.ComfyNode): io.Image.Input("image"), io.Mask.Input("mask"), io.Float.Input("strength", default=1.0, min=0.0, max=10.0, step=0.01), - io.Float.Input("start_percent", default=0.0, min=0.0, max=1.0, step=0.001), - io.Float.Input("end_percent", default=1.0, min=0.0, max=1.0, step=0.001), + io.Float.Input("start_percent", default=0.0, min=0.0, max=1.0, step=0.001, advanced=True), + io.Float.Input("end_percent", default=1.0, min=0.0, max=1.0, step=0.001, advanced=True), ], outputs=[ io.Conditioning.Output(display_name="positive"), diff --git a/comfy_extras/nodes_custom_sampler.py b/comfy_extras/nodes_custom_sampler.py index 8afd13acf..1e957c09b 100644 --- a/comfy_extras/nodes_custom_sampler.py +++ b/comfy_extras/nodes_custom_sampler.py @@ -50,9 +50,9 @@ class KarrasScheduler(io.ComfyNode): category="sampling/custom_sampling/schedulers", inputs=[ io.Int.Input("steps", default=20, min=1, max=10000), - io.Float.Input("sigma_max", default=14.614642, min=0.0, max=5000.0, step=0.01, round=False), - io.Float.Input("sigma_min", default=0.0291675, min=0.0, max=5000.0, step=0.01, round=False), - io.Float.Input("rho", default=7.0, min=0.0, max=100.0, step=0.01, round=False), + io.Float.Input("sigma_max", default=14.614642, min=0.0, max=5000.0, step=0.01, round=False, advanced=True), + io.Float.Input("sigma_min", default=0.0291675, min=0.0, max=5000.0, step=0.01, round=False, advanced=True), + io.Float.Input("rho", default=7.0, min=0.0, max=100.0, step=0.01, round=False, advanced=True), ], outputs=[io.Sigmas.Output()] ) @@ -72,8 +72,8 @@ class ExponentialScheduler(io.ComfyNode): category="sampling/custom_sampling/schedulers", inputs=[ io.Int.Input("steps", default=20, min=1, max=10000), - io.Float.Input("sigma_max", default=14.614642, min=0.0, max=5000.0, step=0.01, round=False), - io.Float.Input("sigma_min", default=0.0291675, min=0.0, max=5000.0, step=0.01, round=False), + io.Float.Input("sigma_max", default=14.614642, min=0.0, max=5000.0, step=0.01, round=False, advanced=True), + io.Float.Input("sigma_min", default=0.0291675, min=0.0, max=5000.0, step=0.01, round=False, advanced=True), ], outputs=[io.Sigmas.Output()] ) @@ -93,9 +93,9 @@ class PolyexponentialScheduler(io.ComfyNode): category="sampling/custom_sampling/schedulers", inputs=[ io.Int.Input("steps", default=20, min=1, max=10000), - io.Float.Input("sigma_max", default=14.614642, min=0.0, max=5000.0, step=0.01, round=False), - io.Float.Input("sigma_min", default=0.0291675, min=0.0, max=5000.0, step=0.01, round=False), - io.Float.Input("rho", default=1.0, min=0.0, max=100.0, step=0.01, round=False), + io.Float.Input("sigma_max", default=14.614642, min=0.0, max=5000.0, step=0.01, round=False, advanced=True), + io.Float.Input("sigma_min", default=0.0291675, min=0.0, max=5000.0, step=0.01, round=False, advanced=True), + io.Float.Input("rho", default=1.0, min=0.0, max=100.0, step=0.01, round=False, advanced=True), ], outputs=[io.Sigmas.Output()] ) @@ -115,10 +115,10 @@ class LaplaceScheduler(io.ComfyNode): category="sampling/custom_sampling/schedulers", inputs=[ io.Int.Input("steps", default=20, min=1, max=10000), - io.Float.Input("sigma_max", default=14.614642, min=0.0, max=5000.0, step=0.01, round=False), - io.Float.Input("sigma_min", default=0.0291675, min=0.0, max=5000.0, step=0.01, round=False), - io.Float.Input("mu", default=0.0, min=-10.0, max=10.0, step=0.1, round=False), - io.Float.Input("beta", default=0.5, min=0.0, max=10.0, step=0.1, round=False), + io.Float.Input("sigma_max", default=14.614642, min=0.0, max=5000.0, step=0.01, round=False, advanced=True), + io.Float.Input("sigma_min", default=0.0291675, min=0.0, max=5000.0, step=0.01, round=False, advanced=True), + io.Float.Input("mu", default=0.0, min=-10.0, max=10.0, step=0.1, round=False, advanced=True), + io.Float.Input("beta", default=0.5, min=0.0, max=10.0, step=0.1, round=False, advanced=True), ], outputs=[io.Sigmas.Output()] ) @@ -164,8 +164,8 @@ class BetaSamplingScheduler(io.ComfyNode): inputs=[ io.Model.Input("model"), io.Int.Input("steps", default=20, min=1, max=10000), - io.Float.Input("alpha", default=0.6, min=0.0, max=50.0, step=0.01, round=False), - io.Float.Input("beta", default=0.6, min=0.0, max=50.0, step=0.01, round=False), + io.Float.Input("alpha", default=0.6, min=0.0, max=50.0, step=0.01, round=False, advanced=True), + io.Float.Input("beta", default=0.6, min=0.0, max=50.0, step=0.01, round=False, advanced=True), ], outputs=[io.Sigmas.Output()] ) @@ -185,9 +185,9 @@ class VPScheduler(io.ComfyNode): category="sampling/custom_sampling/schedulers", inputs=[ io.Int.Input("steps", default=20, min=1, max=10000), - io.Float.Input("beta_d", default=19.9, min=0.0, max=5000.0, step=0.01, round=False), #TODO: fix default values - io.Float.Input("beta_min", default=0.1, min=0.0, max=5000.0, step=0.01, round=False), - io.Float.Input("eps_s", default=0.001, min=0.0, max=1.0, step=0.0001, round=False), + io.Float.Input("beta_d", default=19.9, min=0.0, max=5000.0, step=0.01, round=False, advanced=True), #TODO: fix default values + io.Float.Input("beta_min", default=0.1, min=0.0, max=5000.0, step=0.01, round=False, advanced=True), + io.Float.Input("eps_s", default=0.001, min=0.0, max=1.0, step=0.0001, round=False, advanced=True), ], outputs=[io.Sigmas.Output()] ) @@ -398,9 +398,9 @@ class SamplerDPMPP_3M_SDE(io.ComfyNode): node_id="SamplerDPMPP_3M_SDE", category="sampling/custom_sampling/samplers", inputs=[ - io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False), - io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False), - io.Combo.Input("noise_device", options=['gpu', 'cpu']), + io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False, advanced=True), + io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False, advanced=True), + io.Combo.Input("noise_device", options=['gpu', 'cpu'], advanced=True), ], outputs=[io.Sampler.Output()] ) @@ -424,9 +424,9 @@ class SamplerDPMPP_2M_SDE(io.ComfyNode): category="sampling/custom_sampling/samplers", inputs=[ io.Combo.Input("solver_type", options=['midpoint', 'heun']), - io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False), - io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False), - io.Combo.Input("noise_device", options=['gpu', 'cpu']), + io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False, advanced=True), + io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False, advanced=True), + io.Combo.Input("noise_device", options=['gpu', 'cpu'], advanced=True), ], outputs=[io.Sampler.Output()] ) @@ -450,10 +450,10 @@ class SamplerDPMPP_SDE(io.ComfyNode): node_id="SamplerDPMPP_SDE", category="sampling/custom_sampling/samplers", inputs=[ - io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False), - io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False), - io.Float.Input("r", default=0.5, min=0.0, max=100.0, step=0.01, round=False), - io.Combo.Input("noise_device", options=['gpu', 'cpu']), + io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False, advanced=True), + io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False, advanced=True), + io.Float.Input("r", default=0.5, min=0.0, max=100.0, step=0.01, round=False, advanced=True), + io.Combo.Input("noise_device", options=['gpu', 'cpu'], advanced=True), ], outputs=[io.Sampler.Output()] ) @@ -496,8 +496,8 @@ class SamplerEulerAncestral(io.ComfyNode): node_id="SamplerEulerAncestral", category="sampling/custom_sampling/samplers", inputs=[ - io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False), - io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False), + io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False, advanced=True), + io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False, advanced=True), ], outputs=[io.Sampler.Output()] ) @@ -538,7 +538,7 @@ class SamplerLMS(io.ComfyNode): return io.Schema( node_id="SamplerLMS", category="sampling/custom_sampling/samplers", - inputs=[io.Int.Input("order", default=4, min=1, max=100)], + inputs=[io.Int.Input("order", default=4, min=1, max=100, advanced=True)], outputs=[io.Sampler.Output()] ) @@ -556,16 +556,16 @@ class SamplerDPMAdaptative(io.ComfyNode): node_id="SamplerDPMAdaptative", category="sampling/custom_sampling/samplers", inputs=[ - io.Int.Input("order", default=3, min=2, max=3), - io.Float.Input("rtol", default=0.05, min=0.0, max=100.0, step=0.01, round=False), - io.Float.Input("atol", default=0.0078, min=0.0, max=100.0, step=0.01, round=False), - io.Float.Input("h_init", default=0.05, min=0.0, max=100.0, step=0.01, round=False), - io.Float.Input("pcoeff", default=0.0, min=0.0, max=100.0, step=0.01, round=False), - io.Float.Input("icoeff", default=1.0, min=0.0, max=100.0, step=0.01, round=False), - io.Float.Input("dcoeff", default=0.0, min=0.0, max=100.0, step=0.01, round=False), - io.Float.Input("accept_safety", default=0.81, min=0.0, max=100.0, step=0.01, round=False), - io.Float.Input("eta", default=0.0, min=0.0, max=100.0, step=0.01, round=False), - io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False), + io.Int.Input("order", default=3, min=2, max=3, advanced=True), + io.Float.Input("rtol", default=0.05, min=0.0, max=100.0, step=0.01, round=False, advanced=True), + io.Float.Input("atol", default=0.0078, min=0.0, max=100.0, step=0.01, round=False, advanced=True), + io.Float.Input("h_init", default=0.05, min=0.0, max=100.0, step=0.01, round=False, advanced=True), + io.Float.Input("pcoeff", default=0.0, min=0.0, max=100.0, step=0.01, round=False, advanced=True), + io.Float.Input("icoeff", default=1.0, min=0.0, max=100.0, step=0.01, round=False, advanced=True), + io.Float.Input("dcoeff", default=0.0, min=0.0, max=100.0, step=0.01, round=False, advanced=True), + io.Float.Input("accept_safety", default=0.81, min=0.0, max=100.0, step=0.01, round=False, advanced=True), + io.Float.Input("eta", default=0.0, min=0.0, max=100.0, step=0.01, round=False, advanced=True), + io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False, advanced=True), ], outputs=[io.Sampler.Output()] ) @@ -588,9 +588,9 @@ class SamplerER_SDE(io.ComfyNode): category="sampling/custom_sampling/samplers", inputs=[ io.Combo.Input("solver_type", options=["ER-SDE", "Reverse-time SDE", "ODE"]), - io.Int.Input("max_stage", default=3, min=1, max=3), - io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False, tooltip="Stochastic strength of reverse-time SDE.\nWhen eta=0, it reduces to deterministic ODE. This setting doesn't apply to ER-SDE solver type."), - io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False), + io.Int.Input("max_stage", default=3, min=1, max=3, advanced=True), + io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False, tooltip="Stochastic strength of reverse-time SDE.\nWhen eta=0, it reduces to deterministic ODE. This setting doesn't apply to ER-SDE solver type.", advanced=True), + io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False, advanced=True), ], outputs=[io.Sampler.Output()] ) @@ -622,17 +622,18 @@ class SamplerSASolver(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="SamplerSASolver", + search_aliases=["sde"], category="sampling/custom_sampling/samplers", inputs=[ io.Model.Input("model"), - io.Float.Input("eta", default=1.0, min=0.0, max=10.0, step=0.01, round=False), - io.Float.Input("sde_start_percent", default=0.2, min=0.0, max=1.0, step=0.001), - io.Float.Input("sde_end_percent", default=0.8, min=0.0, max=1.0, step=0.001), - io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False), - io.Int.Input("predictor_order", default=3, min=1, max=6), - io.Int.Input("corrector_order", default=4, min=0, max=6), - io.Boolean.Input("use_pece"), - io.Boolean.Input("simple_order_2"), + io.Float.Input("eta", default=1.0, min=0.0, max=10.0, step=0.01, round=False, advanced=True), + io.Float.Input("sde_start_percent", default=0.2, min=0.0, max=1.0, step=0.001, advanced=True), + io.Float.Input("sde_end_percent", default=0.8, min=0.0, max=1.0, step=0.001, advanced=True), + io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False, advanced=True), + io.Int.Input("predictor_order", default=3, min=1, max=6, advanced=True), + io.Int.Input("corrector_order", default=4, min=0, max=6, advanced=True), + io.Boolean.Input("use_pece", advanced=True), + io.Boolean.Input("simple_order_2", advanced=True), ], outputs=[io.Sampler.Output()] ) @@ -666,12 +667,13 @@ class SamplerSEEDS2(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="SamplerSEEDS2", + search_aliases=["sde", "exp heun"], category="sampling/custom_sampling/samplers", inputs=[ io.Combo.Input("solver_type", options=["phi_1", "phi_2"]), - io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False, tooltip="Stochastic strength"), - io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False, tooltip="SDE noise multiplier"), - io.Float.Input("r", default=0.5, min=0.01, max=1.0, step=0.01, round=False, tooltip="Relative step size for the intermediate stage (c2 node)"), + io.Float.Input("eta", default=1.0, min=0.0, max=100.0, step=0.01, round=False, tooltip="Stochastic strength", advanced=True), + io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False, tooltip="SDE noise multiplier", advanced=True), + io.Float.Input("r", default=0.5, min=0.01, max=1.0, step=0.01, round=False, tooltip="Relative step size for the intermediate stage (c2 node)", advanced=True), ], outputs=[io.Sampler.Output()], description=( @@ -728,7 +730,7 @@ class SamplerCustom(io.ComfyNode): category="sampling/custom_sampling", inputs=[ io.Model.Input("model"), - io.Boolean.Input("add_noise", default=True), + io.Boolean.Input("add_noise", default=True, advanced=True), io.Int.Input("noise_seed", default=0, min=0, max=0xffffffffffffffff, control_after_generate=True), io.Float.Input("cfg", default=8.0, min=0.0, max=100.0, step=0.1, round=0.01), io.Conditioning.Input("positive"), diff --git a/comfy_extras/nodes_dataset.py b/comfy_extras/nodes_dataset.py index fb9409ac3..98ed25d7e 100644 --- a/comfy_extras/nodes_dataset.py +++ b/comfy_extras/nodes_dataset.py @@ -222,6 +222,7 @@ class SaveImageDataSetToFolderNode(io.ComfyNode): "filename_prefix", default="image", tooltip="Prefix for saved image filenames.", + advanced=True, ), ], outputs=[], @@ -262,6 +263,7 @@ class SaveImageTextDataSetToFolderNode(io.ComfyNode): "filename_prefix", default="image", tooltip="Prefix for saved image filenames.", + advanced=True, ), ], outputs=[], @@ -741,6 +743,7 @@ class NormalizeImagesNode(ImageProcessingNode): min=0.0, max=1.0, tooltip="Mean value for normalization.", + advanced=True, ), io.Float.Input( "std", @@ -748,6 +751,7 @@ class NormalizeImagesNode(ImageProcessingNode): min=0.001, max=1.0, tooltip="Standard deviation for normalization.", + advanced=True, ), ] @@ -961,6 +965,7 @@ class ImageDeduplicationNode(ImageProcessingNode): min=0.0, max=1.0, tooltip="Similarity threshold (0-1). Higher means more similar. Images above this threshold are considered duplicates.", + advanced=True, ), ] @@ -1039,6 +1044,7 @@ class ImageGridNode(ImageProcessingNode): min=32, max=2048, tooltip="Width of each cell in the grid.", + advanced=True, ), io.Int.Input( "cell_height", @@ -1046,9 +1052,10 @@ class ImageGridNode(ImageProcessingNode): min=32, max=2048, tooltip="Height of each cell in the grid.", + advanced=True, ), io.Int.Input( - "padding", default=4, min=0, max=50, tooltip="Padding between images." + "padding", default=4, min=0, max=50, tooltip="Padding between images.", advanced=True ), ] @@ -1339,6 +1346,7 @@ class SaveTrainingDataset(io.ComfyNode): min=1, max=100000, tooltip="Number of samples per shard file.", + advanced=True, ), ], outputs=[], diff --git a/comfy_extras/nodes_easycache.py b/comfy_extras/nodes_easycache.py index 90d730df6..923c2bb05 100644 --- a/comfy_extras/nodes_easycache.py +++ b/comfy_extras/nodes_easycache.py @@ -9,6 +9,14 @@ if TYPE_CHECKING: from uuid import UUID +def _extract_tensor(data, output_channels): + """Extract tensor from data, handling both single tensors and lists.""" + if isinstance(data, list): + # LTX2 AV tensors: [video, audio] + return data[0][:, :output_channels], data[1][:, :output_channels] + return data[:, :output_channels], None + + def easycache_forward_wrapper(executor, *args, **kwargs): # get values from args transformer_options: dict[str] = args[-1] @@ -17,7 +25,7 @@ def easycache_forward_wrapper(executor, *args, **kwargs): if not transformer_options: transformer_options = args[-2] easycache: EasyCacheHolder = transformer_options["easycache"] - x: torch.Tensor = args[0][:, :easycache.output_channels] + x, ax = _extract_tensor(args[0], easycache.output_channels) sigmas = transformer_options["sigmas"] uuids = transformer_options["uuids"] if sigmas is not None and easycache.is_past_end_timestep(sigmas): @@ -35,7 +43,11 @@ def easycache_forward_wrapper(executor, *args, **kwargs): if easycache.skip_current_step and can_apply_cache_diff: if easycache.verbose: logging.info(f"EasyCache [verbose] - was marked to skip this step by {easycache.first_cond_uuid}. Present uuids: {uuids}") - return easycache.apply_cache_diff(x, uuids) + result = easycache.apply_cache_diff(x, uuids) + if ax is not None: + result_audio = easycache.apply_cache_diff(ax, uuids, is_audio=True) + return [result, result_audio] + return result if easycache.initial_step: easycache.first_cond_uuid = uuids[0] has_first_cond_uuid = easycache.has_first_cond_uuid(uuids) @@ -51,13 +63,18 @@ def easycache_forward_wrapper(executor, *args, **kwargs): logging.info(f"EasyCache [verbose] - skipping step; cumulative_change_rate: {easycache.cumulative_change_rate}, reuse_threshold: {easycache.reuse_threshold}") # other conds should also skip this step, and instead use their cached values easycache.skip_current_step = True - return easycache.apply_cache_diff(x, uuids) + result = easycache.apply_cache_diff(x, uuids) + if ax is not None: + result_audio = easycache.apply_cache_diff(ax, uuids, is_audio=True) + return [result, result_audio] + return result else: if easycache.verbose: logging.info(f"EasyCache [verbose] - NOT skipping step; cumulative_change_rate: {easycache.cumulative_change_rate}, reuse_threshold: {easycache.reuse_threshold}") easycache.cumulative_change_rate = 0.0 - output: torch.Tensor = executor(*args, **kwargs) + full_output: torch.Tensor = executor(*args, **kwargs) + output, audio_output = _extract_tensor(full_output, easycache.output_channels) if has_first_cond_uuid and easycache.has_output_prev_norm(): output_change = (easycache.subsample(output, uuids, clone=False) - easycache.output_prev_subsampled).flatten().abs().mean() if easycache.verbose: @@ -74,13 +91,15 @@ def easycache_forward_wrapper(executor, *args, **kwargs): logging.info(f"EasyCache [verbose] - output_change_rate: {output_change_rate}") # TODO: allow cache_diff to be offloaded easycache.update_cache_diff(output, next_x_prev, uuids) + if audio_output is not None: + easycache.update_cache_diff(audio_output, ax, uuids, is_audio=True) if has_first_cond_uuid: easycache.x_prev_subsampled = easycache.subsample(next_x_prev, uuids) easycache.output_prev_subsampled = easycache.subsample(output, uuids) easycache.output_prev_norm = output.flatten().abs().mean() if easycache.verbose: logging.info(f"EasyCache [verbose] - x_prev_subsampled: {easycache.x_prev_subsampled.shape}") - return output + return full_output def lazycache_predict_noise_wrapper(executor, *args, **kwargs): # get values from args @@ -89,8 +108,8 @@ def lazycache_predict_noise_wrapper(executor, *args, **kwargs): easycache: LazyCacheHolder = model_options["transformer_options"]["easycache"] if easycache.is_past_end_timestep(timestep): return executor(*args, **kwargs) - # prepare next x_prev x: torch.Tensor = args[0][:, :easycache.output_channels] + # prepare next x_prev next_x_prev = x input_change = None do_easycache = easycache.should_do_easycache(timestep) @@ -197,6 +216,7 @@ class EasyCacheHolder: self.output_prev_subsampled: torch.Tensor = None self.output_prev_norm: torch.Tensor = None self.uuid_cache_diffs: dict[UUID, torch.Tensor] = {} + self.uuid_cache_diffs_audio: dict[UUID, torch.Tensor] = {} self.output_change_rates = [] self.approx_output_change_rates = [] self.total_steps_skipped = 0 @@ -245,20 +265,21 @@ class EasyCacheHolder: def can_apply_cache_diff(self, uuids: list[UUID]) -> bool: return all(uuid in self.uuid_cache_diffs for uuid in uuids) - def apply_cache_diff(self, x: torch.Tensor, uuids: list[UUID]): - if self.first_cond_uuid in uuids: + def apply_cache_diff(self, x: torch.Tensor, uuids: list[UUID], is_audio: bool = False): + if self.first_cond_uuid in uuids and not is_audio: self.total_steps_skipped += 1 + cache_diffs = self.uuid_cache_diffs_audio if is_audio else self.uuid_cache_diffs batch_offset = x.shape[0] // len(uuids) for i, uuid in enumerate(uuids): # slice out only what is relevant to this cond batch_slice = [slice(i*batch_offset,(i+1)*batch_offset)] # if cached dims don't match x dims, cut off excess and hope for the best (cosmos world2video) - if x.shape[1:] != self.uuid_cache_diffs[uuid].shape[1:]: + if x.shape[1:] != cache_diffs[uuid].shape[1:]: if not self.allow_mismatch: raise ValueError(f"Cached dims {self.uuid_cache_diffs[uuid].shape} don't match x dims {x.shape} - this is no good") slicing = [] skip_this_dim = True - for dim_u, dim_x in zip(self.uuid_cache_diffs[uuid].shape, x.shape): + for dim_u, dim_x in zip(cache_diffs[uuid].shape, x.shape): if skip_this_dim: skip_this_dim = False continue @@ -270,10 +291,11 @@ class EasyCacheHolder: else: slicing.append(slice(None)) batch_slice = batch_slice + slicing - x[tuple(batch_slice)] += self.uuid_cache_diffs[uuid].to(x.device) + x[tuple(batch_slice)] += cache_diffs[uuid].to(x.device) return x - def update_cache_diff(self, output: torch.Tensor, x: torch.Tensor, uuids: list[UUID]): + def update_cache_diff(self, output: torch.Tensor, x: torch.Tensor, uuids: list[UUID], is_audio: bool = False): + cache_diffs = self.uuid_cache_diffs_audio if is_audio else self.uuid_cache_diffs # if output dims don't match x dims, cut off excess and hope for the best (cosmos world2video) if output.shape[1:] != x.shape[1:]: if not self.allow_mismatch: @@ -293,7 +315,7 @@ class EasyCacheHolder: diff = output - x batch_offset = diff.shape[0] // len(uuids) for i, uuid in enumerate(uuids): - self.uuid_cache_diffs[uuid] = diff[i*batch_offset:(i+1)*batch_offset, ...] + cache_diffs[uuid] = diff[i*batch_offset:(i+1)*batch_offset, ...] def has_first_cond_uuid(self, uuids: list[UUID]) -> bool: return self.first_cond_uuid in uuids @@ -324,6 +346,8 @@ class EasyCacheHolder: self.output_prev_norm = None del self.uuid_cache_diffs self.uuid_cache_diffs = {} + del self.uuid_cache_diffs_audio + self.uuid_cache_diffs_audio = {} self.total_steps_skipped = 0 self.state_metadata = None return self @@ -343,10 +367,10 @@ class EasyCacheNode(io.ComfyNode): is_experimental=True, inputs=[ io.Model.Input("model", tooltip="The model to add EasyCache to."), - io.Float.Input("reuse_threshold", min=0.0, default=0.2, max=3.0, step=0.01, tooltip="The threshold for reusing cached steps."), - io.Float.Input("start_percent", min=0.0, default=0.15, max=1.0, step=0.01, tooltip="The relative sampling step to begin use of EasyCache."), - io.Float.Input("end_percent", min=0.0, default=0.95, max=1.0, step=0.01, tooltip="The relative sampling step to end use of EasyCache."), - io.Boolean.Input("verbose", default=False, tooltip="Whether to log verbose information."), + io.Float.Input("reuse_threshold", min=0.0, default=0.2, max=3.0, step=0.01, tooltip="The threshold for reusing cached steps.", advanced=True), + io.Float.Input("start_percent", min=0.0, default=0.15, max=1.0, step=0.01, tooltip="The relative sampling step to begin use of EasyCache.", advanced=True), + io.Float.Input("end_percent", min=0.0, default=0.95, max=1.0, step=0.01, tooltip="The relative sampling step to end use of EasyCache.", advanced=True), + io.Boolean.Input("verbose", default=False, tooltip="Whether to log verbose information.", advanced=True), ], outputs=[ io.Model.Output(tooltip="The model with EasyCache."), @@ -476,10 +500,10 @@ class LazyCacheNode(io.ComfyNode): is_experimental=True, inputs=[ io.Model.Input("model", tooltip="The model to add LazyCache to."), - io.Float.Input("reuse_threshold", min=0.0, default=0.2, max=3.0, step=0.01, tooltip="The threshold for reusing cached steps."), - io.Float.Input("start_percent", min=0.0, default=0.15, max=1.0, step=0.01, tooltip="The relative sampling step to begin use of LazyCache."), - io.Float.Input("end_percent", min=0.0, default=0.95, max=1.0, step=0.01, tooltip="The relative sampling step to end use of LazyCache."), - io.Boolean.Input("verbose", default=False, tooltip="Whether to log verbose information."), + io.Float.Input("reuse_threshold", min=0.0, default=0.2, max=3.0, step=0.01, tooltip="The threshold for reusing cached steps.", advanced=True), + io.Float.Input("start_percent", min=0.0, default=0.15, max=1.0, step=0.01, tooltip="The relative sampling step to begin use of LazyCache.", advanced=True), + io.Float.Input("end_percent", min=0.0, default=0.95, max=1.0, step=0.01, tooltip="The relative sampling step to end use of LazyCache.", advanced=True), + io.Boolean.Input("verbose", default=False, tooltip="Whether to log verbose information.", advanced=True), ], outputs=[ io.Model.Output(tooltip="The model with LazyCache."), diff --git a/comfy_extras/nodes_eps.py b/comfy_extras/nodes_eps.py index 4d8061741..0fb3871c8 100644 --- a/comfy_extras/nodes_eps.py +++ b/comfy_extras/nodes_eps.py @@ -28,6 +28,7 @@ class EpsilonScaling(io.ComfyNode): max=1.5, step=0.001, display_mode=io.NumberDisplay.number, + advanced=True, ), ], outputs=[ @@ -97,6 +98,7 @@ class TemporalScoreRescaling(io.ComfyNode): max=100.0, step=0.001, display_mode=io.NumberDisplay.number, + advanced=True, ), io.Float.Input( "tsr_sigma", @@ -109,6 +111,7 @@ class TemporalScoreRescaling(io.ComfyNode): max=100.0, step=0.001, display_mode=io.NumberDisplay.number, + advanced=True, ), ], outputs=[ diff --git a/comfy_extras/nodes_flux.py b/comfy_extras/nodes_flux.py index 12c8ed3e6..fe9552022 100644 --- a/comfy_extras/nodes_flux.py +++ b/comfy_extras/nodes_flux.py @@ -161,6 +161,7 @@ class FluxKontextMultiReferenceLatentMethod(io.ComfyNode): io.Combo.Input( "reference_latents_method", options=["offset", "index", "uxo/uno", "index_timestep_zero"], + advanced=True, ), ], outputs=[ diff --git a/comfy_extras/nodes_freelunch.py b/comfy_extras/nodes_freelunch.py index 3429b731e..248efdef3 100644 --- a/comfy_extras/nodes_freelunch.py +++ b/comfy_extras/nodes_freelunch.py @@ -32,10 +32,10 @@ class FreeU(IO.ComfyNode): category="model_patches/unet", inputs=[ IO.Model.Input("model"), - IO.Float.Input("b1", default=1.1, min=0.0, max=10.0, step=0.01), - IO.Float.Input("b2", default=1.2, min=0.0, max=10.0, step=0.01), - IO.Float.Input("s1", default=0.9, min=0.0, max=10.0, step=0.01), - IO.Float.Input("s2", default=0.2, min=0.0, max=10.0, step=0.01), + IO.Float.Input("b1", default=1.1, min=0.0, max=10.0, step=0.01, advanced=True), + IO.Float.Input("b2", default=1.2, min=0.0, max=10.0, step=0.01, advanced=True), + IO.Float.Input("s1", default=0.9, min=0.0, max=10.0, step=0.01, advanced=True), + IO.Float.Input("s2", default=0.2, min=0.0, max=10.0, step=0.01, advanced=True), ], outputs=[ IO.Model.Output(), @@ -79,10 +79,10 @@ class FreeU_V2(IO.ComfyNode): category="model_patches/unet", inputs=[ IO.Model.Input("model"), - IO.Float.Input("b1", default=1.3, min=0.0, max=10.0, step=0.01), - IO.Float.Input("b2", default=1.4, min=0.0, max=10.0, step=0.01), - IO.Float.Input("s1", default=0.9, min=0.0, max=10.0, step=0.01), - IO.Float.Input("s2", default=0.2, min=0.0, max=10.0, step=0.01), + IO.Float.Input("b1", default=1.3, min=0.0, max=10.0, step=0.01, advanced=True), + IO.Float.Input("b2", default=1.4, min=0.0, max=10.0, step=0.01, advanced=True), + IO.Float.Input("s1", default=0.9, min=0.0, max=10.0, step=0.01, advanced=True), + IO.Float.Input("s2", default=0.2, min=0.0, max=10.0, step=0.01, advanced=True), ], outputs=[ IO.Model.Output(), diff --git a/comfy_extras/nodes_fresca.py b/comfy_extras/nodes_fresca.py index 3d590af4b..eab4f303f 100644 --- a/comfy_extras/nodes_fresca.py +++ b/comfy_extras/nodes_fresca.py @@ -65,11 +65,11 @@ class FreSca(io.ComfyNode): inputs=[ io.Model.Input("model"), io.Float.Input("scale_low", default=1.0, min=0, max=10, step=0.01, - tooltip="Scaling factor for low-frequency components"), + tooltip="Scaling factor for low-frequency components", advanced=True), io.Float.Input("scale_high", default=1.25, min=0, max=10, step=0.01, - tooltip="Scaling factor for high-frequency components"), + tooltip="Scaling factor for high-frequency components", advanced=True), io.Int.Input("freq_cutoff", default=20, min=1, max=10000, step=1, - tooltip="Number of frequency indices around center to consider as low-frequency"), + tooltip="Number of frequency indices around center to consider as low-frequency", advanced=True), ], outputs=[ io.Model.Output(), diff --git a/comfy_extras/nodes_gits.py b/comfy_extras/nodes_gits.py index 25367560a..d48483862 100644 --- a/comfy_extras/nodes_gits.py +++ b/comfy_extras/nodes_gits.py @@ -342,7 +342,7 @@ class GITSScheduler(io.ComfyNode): node_id="GITSScheduler", category="sampling/custom_sampling/schedulers", inputs=[ - io.Float.Input("coeff", default=1.20, min=0.80, max=1.50, step=0.05), + io.Float.Input("coeff", default=1.20, min=0.80, max=1.50, step=0.05, advanced=True), io.Int.Input("steps", default=10, min=2, max=1000), io.Float.Input("denoise", default=1.0, min=0.0, max=1.0, step=0.01), ], diff --git a/comfy_extras/nodes_glsl.py b/comfy_extras/nodes_glsl.py new file mode 100644 index 000000000..2a59a9285 --- /dev/null +++ b/comfy_extras/nodes_glsl.py @@ -0,0 +1,896 @@ +import os +import sys +import re +import logging +import ctypes.util +import importlib.util +from typing import TypedDict + +import numpy as np +import torch + +import nodes +from comfy_api.latest import ComfyExtension, io, ui +from typing_extensions import override +from utils.install_util import get_missing_requirements_message + +logger = logging.getLogger(__name__) + + +def _check_opengl_availability(): + """Early check for OpenGL availability. Raises RuntimeError if unlikely to work.""" + logger.debug("_check_opengl_availability: starting") + missing = [] + + # Check Python packages (using find_spec to avoid importing) + logger.debug("_check_opengl_availability: checking for glfw package") + if importlib.util.find_spec("glfw") is None: + missing.append("glfw") + + logger.debug("_check_opengl_availability: checking for OpenGL package") + if importlib.util.find_spec("OpenGL") is None: + missing.append("PyOpenGL") + + if missing: + raise RuntimeError( + f"OpenGL dependencies not available.\n{get_missing_requirements_message()}\n" + ) + + # On Linux without display, check if headless backends are available + logger.debug(f"_check_opengl_availability: platform={sys.platform}") + if sys.platform.startswith("linux"): + has_display = os.environ.get("DISPLAY") or os.environ.get("WAYLAND_DISPLAY") + logger.debug(f"_check_opengl_availability: has_display={bool(has_display)}") + if not has_display: + # Check for EGL or OSMesa libraries + logger.debug("_check_opengl_availability: checking for EGL library") + has_egl = ctypes.util.find_library("EGL") + logger.debug("_check_opengl_availability: checking for OSMesa library") + has_osmesa = ctypes.util.find_library("OSMesa") + + # Error disabled for CI as it fails this check + # if not has_egl and not has_osmesa: + # raise RuntimeError( + # "GLSL Shader node: No display and no headless backend (EGL/OSMesa) found.\n" + # "See error below for installation instructions." + # ) + logger.debug(f"Headless mode: EGL={'yes' if has_egl else 'no'}, OSMesa={'yes' if has_osmesa else 'no'}") + + logger.debug("_check_opengl_availability: completed") + + +# Run early check at import time +logger.debug("nodes_glsl: running _check_opengl_availability at import time") +_check_opengl_availability() + +# OpenGL modules - initialized lazily when context is created +gl = None +glfw = None +EGL = None + + +def _import_opengl(): + """Import OpenGL module. Called after context is created.""" + global gl + if gl is None: + logger.debug("_import_opengl: importing OpenGL.GL") + import OpenGL.GL as _gl + gl = _gl + logger.debug("_import_opengl: import completed") + return gl + + +class SizeModeInput(TypedDict): + size_mode: str + width: int + height: int + + +MAX_IMAGES = 5 # u_image0-4 +MAX_UNIFORMS = 5 # u_float0-4, u_int0-4 +MAX_OUTPUTS = 4 # fragColor0-3 (MRT) + +# Vertex shader using gl_VertexID trick - no VBO needed. +# Draws a single triangle that covers the entire screen: +# +# (-1,3) +# /| +# / | <- visible area is the unit square from (-1,-1) to (1,1) +# / | parts outside get clipped away +# (-1,-1)---(3,-1) +# +# v_texCoord is computed from clip space: * 0.5 + 0.5 maps (-1,1) -> (0,1) +VERTEX_SHADER = """#version 330 core +out vec2 v_texCoord; +void main() { + vec2 verts[3] = vec2[](vec2(-1, -1), vec2(3, -1), vec2(-1, 3)); + v_texCoord = verts[gl_VertexID] * 0.5 + 0.5; + gl_Position = vec4(verts[gl_VertexID], 0, 1); +} +""" + +DEFAULT_FRAGMENT_SHADER = """#version 300 es +precision highp float; + +uniform sampler2D u_image0; +uniform vec2 u_resolution; + +in vec2 v_texCoord; +layout(location = 0) out vec4 fragColor0; + +void main() { + fragColor0 = texture(u_image0, v_texCoord); +} +""" + + +def _convert_es_to_desktop(source: str) -> str: + """Convert GLSL ES (WebGL) shader source to desktop GLSL 330 core.""" + # Remove any existing #version directive + source = re.sub(r"#version\s+\d+(\s+es)?\s*\n?", "", source, flags=re.IGNORECASE) + # Remove precision qualifiers (not needed in desktop GLSL) + source = re.sub(r"precision\s+(lowp|mediump|highp)\s+\w+\s*;\s*\n?", "", source) + # Prepend desktop GLSL version + return "#version 330 core\n" + source + + +def _detect_output_count(source: str) -> int: + """Detect how many fragColor outputs are used in the shader. + + Returns the count of outputs needed (1 to MAX_OUTPUTS). + """ + matches = re.findall(r"fragColor(\d+)", source) + if not matches: + return 1 # Default to 1 output if none found + max_index = max(int(m) for m in matches) + return min(max_index + 1, MAX_OUTPUTS) + + +def _detect_pass_count(source: str) -> int: + """Detect multi-pass rendering from #pragma passes N directive. + + Returns the number of passes (1 if not specified). + """ + match = re.search(r'#pragma\s+passes\s+(\d+)', source) + if match: + return max(1, int(match.group(1))) + return 1 + + +def _init_glfw(): + """Initialize GLFW. Returns (window, glfw_module). Raises RuntimeError on failure.""" + logger.debug("_init_glfw: starting") + # On macOS, glfw.init() must be called from main thread or it hangs forever + if sys.platform == "darwin": + logger.debug("_init_glfw: skipping on macOS") + raise RuntimeError("GLFW backend not supported on macOS") + + logger.debug("_init_glfw: importing glfw module") + import glfw as _glfw + + logger.debug("_init_glfw: calling glfw.init()") + if not _glfw.init(): + raise RuntimeError("glfw.init() failed") + + try: + logger.debug("_init_glfw: setting window hints") + _glfw.window_hint(_glfw.VISIBLE, _glfw.FALSE) + _glfw.window_hint(_glfw.CONTEXT_VERSION_MAJOR, 3) + _glfw.window_hint(_glfw.CONTEXT_VERSION_MINOR, 3) + _glfw.window_hint(_glfw.OPENGL_PROFILE, _glfw.OPENGL_CORE_PROFILE) + + logger.debug("_init_glfw: calling create_window()") + window = _glfw.create_window(64, 64, "ComfyUI GLSL", None, None) + if not window: + raise RuntimeError("glfw.create_window() failed") + + logger.debug("_init_glfw: calling make_context_current()") + _glfw.make_context_current(window) + logger.debug("_init_glfw: completed successfully") + return window, _glfw + except Exception: + logger.debug("_init_glfw: failed, terminating glfw") + _glfw.terminate() + raise + + +def _init_egl(): + """Initialize EGL for headless rendering. Returns (display, context, surface, EGL_module). Raises RuntimeError on failure.""" + logger.debug("_init_egl: starting") + from OpenGL import EGL as _EGL + from OpenGL.EGL import ( + eglGetDisplay, eglInitialize, eglChooseConfig, eglCreateContext, + eglMakeCurrent, eglCreatePbufferSurface, eglBindAPI, + eglTerminate, eglDestroyContext, eglDestroySurface, + EGL_DEFAULT_DISPLAY, EGL_NO_CONTEXT, EGL_NONE, + EGL_SURFACE_TYPE, EGL_PBUFFER_BIT, EGL_RENDERABLE_TYPE, EGL_OPENGL_BIT, + EGL_RED_SIZE, EGL_GREEN_SIZE, EGL_BLUE_SIZE, EGL_ALPHA_SIZE, EGL_DEPTH_SIZE, + EGL_WIDTH, EGL_HEIGHT, EGL_OPENGL_API, + ) + logger.debug("_init_egl: imports completed") + + display = None + context = None + surface = None + + try: + logger.debug("_init_egl: calling eglGetDisplay()") + display = eglGetDisplay(EGL_DEFAULT_DISPLAY) + if display == _EGL.EGL_NO_DISPLAY: + raise RuntimeError("eglGetDisplay() failed") + + logger.debug("_init_egl: calling eglInitialize()") + major, minor = _EGL.EGLint(), _EGL.EGLint() + if not eglInitialize(display, major, minor): + display = None # Not initialized, don't terminate + raise RuntimeError("eglInitialize() failed") + logger.debug(f"_init_egl: EGL version {major.value}.{minor.value}") + + config_attribs = [ + EGL_SURFACE_TYPE, EGL_PBUFFER_BIT, + EGL_RENDERABLE_TYPE, EGL_OPENGL_BIT, + EGL_RED_SIZE, 8, EGL_GREEN_SIZE, 8, EGL_BLUE_SIZE, 8, EGL_ALPHA_SIZE, 8, + EGL_DEPTH_SIZE, 0, EGL_NONE + ] + configs = (_EGL.EGLConfig * 1)() + num_configs = _EGL.EGLint() + if not eglChooseConfig(display, config_attribs, configs, 1, num_configs) or num_configs.value == 0: + raise RuntimeError("eglChooseConfig() failed") + config = configs[0] + logger.debug(f"_init_egl: config chosen, num_configs={num_configs.value}") + + if not eglBindAPI(EGL_OPENGL_API): + raise RuntimeError("eglBindAPI() failed") + + logger.debug("_init_egl: calling eglCreateContext()") + context_attribs = [ + _EGL.EGL_CONTEXT_MAJOR_VERSION, 3, + _EGL.EGL_CONTEXT_MINOR_VERSION, 3, + _EGL.EGL_CONTEXT_OPENGL_PROFILE_MASK, _EGL.EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT, + EGL_NONE + ] + context = eglCreateContext(display, config, EGL_NO_CONTEXT, context_attribs) + if context == EGL_NO_CONTEXT: + raise RuntimeError("eglCreateContext() failed") + + logger.debug("_init_egl: calling eglCreatePbufferSurface()") + pbuffer_attribs = [EGL_WIDTH, 64, EGL_HEIGHT, 64, EGL_NONE] + surface = eglCreatePbufferSurface(display, config, pbuffer_attribs) + if surface == _EGL.EGL_NO_SURFACE: + raise RuntimeError("eglCreatePbufferSurface() failed") + + logger.debug("_init_egl: calling eglMakeCurrent()") + if not eglMakeCurrent(display, surface, surface, context): + raise RuntimeError("eglMakeCurrent() failed") + + logger.debug("_init_egl: completed successfully") + return display, context, surface, _EGL + + except Exception: + logger.debug("_init_egl: failed, cleaning up") + # Clean up any resources on failure + if surface is not None: + eglDestroySurface(display, surface) + if context is not None: + eglDestroyContext(display, context) + if display is not None: + eglTerminate(display) + raise + + +def _init_osmesa(): + """Initialize OSMesa for software rendering. Returns (context, buffer). Raises RuntimeError on failure.""" + import ctypes + + logger.debug("_init_osmesa: starting") + os.environ["PYOPENGL_PLATFORM"] = "osmesa" + + logger.debug("_init_osmesa: importing OpenGL.osmesa") + from OpenGL import GL as _gl + from OpenGL.osmesa import ( + OSMesaCreateContextExt, OSMesaMakeCurrent, OSMesaDestroyContext, + OSMESA_RGBA, + ) + logger.debug("_init_osmesa: imports completed") + + ctx = OSMesaCreateContextExt(OSMESA_RGBA, 24, 0, 0, None) + if not ctx: + raise RuntimeError("OSMesaCreateContextExt() failed") + + width, height = 64, 64 + buffer = (ctypes.c_ubyte * (width * height * 4))() + + logger.debug("_init_osmesa: calling OSMesaMakeCurrent()") + if not OSMesaMakeCurrent(ctx, buffer, _gl.GL_UNSIGNED_BYTE, width, height): + OSMesaDestroyContext(ctx) + raise RuntimeError("OSMesaMakeCurrent() failed") + + logger.debug("_init_osmesa: completed successfully") + return ctx, buffer + + +class GLContext: + """Manages OpenGL context and resources for shader execution. + + Tries backends in order: GLFW (desktop) → EGL (headless GPU) → OSMesa (software). + """ + + _instance = None + _initialized = False + + def __new__(cls): + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def __init__(self): + if GLContext._initialized: + logger.debug("GLContext.__init__: already initialized, skipping") + return + + logger.debug("GLContext.__init__: starting initialization") + + global glfw, EGL + + import time + start = time.perf_counter() + + self._backend = None + self._window = None + self._egl_display = None + self._egl_context = None + self._egl_surface = None + self._osmesa_ctx = None + self._osmesa_buffer = None + self._vao = None + + # Try backends in order: GLFW → EGL → OSMesa + errors = [] + + logger.debug("GLContext.__init__: trying GLFW backend") + try: + self._window, glfw = _init_glfw() + self._backend = "glfw" + logger.debug("GLContext.__init__: GLFW backend succeeded") + except Exception as e: + logger.debug(f"GLContext.__init__: GLFW backend failed: {e}") + errors.append(("GLFW", e)) + + if self._backend is None: + logger.debug("GLContext.__init__: trying EGL backend") + try: + self._egl_display, self._egl_context, self._egl_surface, EGL = _init_egl() + self._backend = "egl" + logger.debug("GLContext.__init__: EGL backend succeeded") + except Exception as e: + logger.debug(f"GLContext.__init__: EGL backend failed: {e}") + errors.append(("EGL", e)) + + if self._backend is None: + logger.debug("GLContext.__init__: trying OSMesa backend") + try: + self._osmesa_ctx, self._osmesa_buffer = _init_osmesa() + self._backend = "osmesa" + logger.debug("GLContext.__init__: OSMesa backend succeeded") + except Exception as e: + logger.debug(f"GLContext.__init__: OSMesa backend failed: {e}") + errors.append(("OSMesa", e)) + + if self._backend is None: + if sys.platform == "win32": + platform_help = ( + "Windows: Ensure GPU drivers are installed and display is available.\n" + " CPU-only/headless mode is not supported on Windows." + ) + elif sys.platform == "darwin": + platform_help = ( + "macOS: GLFW is not supported.\n" + " Install OSMesa via Homebrew: brew install mesa\n" + " Then: pip install PyOpenGL PyOpenGL-accelerate" + ) + else: + platform_help = ( + "Linux: Install one of these backends:\n" + " Desktop: sudo apt install libgl1-mesa-glx libglfw3\n" + " Headless with GPU: sudo apt install libegl1-mesa libgl1-mesa-dri\n" + " Headless (CPU): sudo apt install libosmesa6" + ) + + error_details = "\n".join(f" {name}: {err}" for name, err in errors) + raise RuntimeError( + f"Failed to create OpenGL context.\n\n" + f"Backend errors:\n{error_details}\n\n" + f"{platform_help}" + ) + + # Now import OpenGL.GL (after context is current) + logger.debug("GLContext.__init__: importing OpenGL.GL") + _import_opengl() + + # Create VAO (required for core profile, but OSMesa may use compat profile) + logger.debug("GLContext.__init__: creating VAO") + try: + vao = gl.glGenVertexArrays(1) + gl.glBindVertexArray(vao) + self._vao = vao # Only store after successful bind + logger.debug("GLContext.__init__: VAO created successfully") + except Exception as e: + logger.debug(f"GLContext.__init__: VAO creation failed (may be expected for OSMesa): {e}") + # OSMesa with older Mesa may not support VAOs + # Clean up if we created but couldn't bind + if vao: + try: + gl.glDeleteVertexArrays(1, [vao]) + except Exception: + pass + + elapsed = (time.perf_counter() - start) * 1000 + + # Log device info + renderer = gl.glGetString(gl.GL_RENDERER) + vendor = gl.glGetString(gl.GL_VENDOR) + version = gl.glGetString(gl.GL_VERSION) + renderer = renderer.decode() if renderer else "Unknown" + vendor = vendor.decode() if vendor else "Unknown" + version = version.decode() if version else "Unknown" + + GLContext._initialized = True + logger.info(f"GLSL context initialized in {elapsed:.1f}ms ({self._backend}) - {renderer} ({vendor}), GL {version}") + + def make_current(self): + if self._backend == "glfw": + glfw.make_context_current(self._window) + elif self._backend == "egl": + from OpenGL.EGL import eglMakeCurrent + eglMakeCurrent(self._egl_display, self._egl_surface, self._egl_surface, self._egl_context) + elif self._backend == "osmesa": + from OpenGL.osmesa import OSMesaMakeCurrent + OSMesaMakeCurrent(self._osmesa_ctx, self._osmesa_buffer, gl.GL_UNSIGNED_BYTE, 64, 64) + + if self._vao is not None: + gl.glBindVertexArray(self._vao) + + +def _compile_shader(source: str, shader_type: int) -> int: + """Compile a shader and return its ID.""" + shader = gl.glCreateShader(shader_type) + gl.glShaderSource(shader, source) + gl.glCompileShader(shader) + + if gl.glGetShaderiv(shader, gl.GL_COMPILE_STATUS) != gl.GL_TRUE: + error = gl.glGetShaderInfoLog(shader).decode() + gl.glDeleteShader(shader) + raise RuntimeError(f"Shader compilation failed:\n{error}") + + return shader + + +def _create_program(vertex_source: str, fragment_source: str) -> int: + """Create and link a shader program.""" + vertex_shader = _compile_shader(vertex_source, gl.GL_VERTEX_SHADER) + try: + fragment_shader = _compile_shader(fragment_source, gl.GL_FRAGMENT_SHADER) + except RuntimeError: + gl.glDeleteShader(vertex_shader) + raise + + program = gl.glCreateProgram() + gl.glAttachShader(program, vertex_shader) + gl.glAttachShader(program, fragment_shader) + gl.glLinkProgram(program) + + gl.glDeleteShader(vertex_shader) + gl.glDeleteShader(fragment_shader) + + if gl.glGetProgramiv(program, gl.GL_LINK_STATUS) != gl.GL_TRUE: + error = gl.glGetProgramInfoLog(program).decode() + gl.glDeleteProgram(program) + raise RuntimeError(f"Program linking failed:\n{error}") + + return program + + +def _render_shader_batch( + fragment_code: str, + width: int, + height: int, + image_batches: list[list[np.ndarray]], + floats: list[float], + ints: list[int], +) -> list[list[np.ndarray]]: + """ + Render a fragment shader for multiple batches efficiently. + + Compiles shader once, reuses framebuffer/textures across batches. + Supports multi-pass rendering via #pragma passes N directive. + + Args: + fragment_code: User's fragment shader code + width: Output width + height: Output height + image_batches: List of batches, each batch is a list of input images (H, W, C) float32 [0,1] + floats: List of float uniforms + ints: List of int uniforms + + Returns: + List of batch outputs, each is a list of output images (H, W, 4) float32 [0,1] + """ + import time + start_time = time.perf_counter() + + if not image_batches: + return [] + + ctx = GLContext() + ctx.make_current() + + # Convert from GLSL ES to desktop GLSL 330 + fragment_source = _convert_es_to_desktop(fragment_code) + + # Detect how many outputs the shader actually uses + num_outputs = _detect_output_count(fragment_code) + + # Detect multi-pass rendering + num_passes = _detect_pass_count(fragment_code) + + # Track resources for cleanup + program = None + fbo = None + output_textures = [] + input_textures = [] + ping_pong_textures = [] + ping_pong_fbos = [] + + num_inputs = len(image_batches[0]) + + try: + # Compile shaders (once for all batches) + try: + program = _create_program(VERTEX_SHADER, fragment_source) + except RuntimeError: + logger.error(f"Fragment shader:\n{fragment_source}") + raise + + gl.glUseProgram(program) + + # Create framebuffer with only the needed color attachments + fbo = gl.glGenFramebuffers(1) + gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, fbo) + + draw_buffers = [] + for i in range(num_outputs): + tex = gl.glGenTextures(1) + output_textures.append(tex) + gl.glBindTexture(gl.GL_TEXTURE_2D, tex) + gl.glTexImage2D(gl.GL_TEXTURE_2D, 0, gl.GL_RGBA32F, width, height, 0, gl.GL_RGBA, gl.GL_FLOAT, None) + gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MIN_FILTER, gl.GL_LINEAR) + gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER, gl.GL_LINEAR) + gl.glFramebufferTexture2D(gl.GL_FRAMEBUFFER, gl.GL_COLOR_ATTACHMENT0 + i, gl.GL_TEXTURE_2D, tex, 0) + draw_buffers.append(gl.GL_COLOR_ATTACHMENT0 + i) + + gl.glDrawBuffers(num_outputs, draw_buffers) + + if gl.glCheckFramebufferStatus(gl.GL_FRAMEBUFFER) != gl.GL_FRAMEBUFFER_COMPLETE: + raise RuntimeError("Framebuffer is not complete") + + # Create ping-pong resources for multi-pass rendering + if num_passes > 1: + for _ in range(2): + pp_tex = gl.glGenTextures(1) + ping_pong_textures.append(pp_tex) + gl.glBindTexture(gl.GL_TEXTURE_2D, pp_tex) + gl.glTexImage2D(gl.GL_TEXTURE_2D, 0, gl.GL_RGBA32F, width, height, 0, gl.GL_RGBA, gl.GL_FLOAT, None) + gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MIN_FILTER, gl.GL_LINEAR) + gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER, gl.GL_LINEAR) + gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_S, gl.GL_CLAMP_TO_EDGE) + gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_T, gl.GL_CLAMP_TO_EDGE) + + pp_fbo = gl.glGenFramebuffers(1) + ping_pong_fbos.append(pp_fbo) + gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, pp_fbo) + gl.glFramebufferTexture2D(gl.GL_FRAMEBUFFER, gl.GL_COLOR_ATTACHMENT0, gl.GL_TEXTURE_2D, pp_tex, 0) + gl.glDrawBuffers(1, [gl.GL_COLOR_ATTACHMENT0]) + + if gl.glCheckFramebufferStatus(gl.GL_FRAMEBUFFER) != gl.GL_FRAMEBUFFER_COMPLETE: + raise RuntimeError("Ping-pong framebuffer is not complete") + + # Create input textures (reused for all batches) + for i in range(num_inputs): + tex = gl.glGenTextures(1) + input_textures.append(tex) + gl.glActiveTexture(gl.GL_TEXTURE0 + i) + gl.glBindTexture(gl.GL_TEXTURE_2D, tex) + gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MIN_FILTER, gl.GL_LINEAR) + gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER, gl.GL_LINEAR) + gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_S, gl.GL_CLAMP_TO_EDGE) + gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_T, gl.GL_CLAMP_TO_EDGE) + + loc = gl.glGetUniformLocation(program, f"u_image{i}") + if loc >= 0: + gl.glUniform1i(loc, i) + + # Set static uniforms (once for all batches) + loc = gl.glGetUniformLocation(program, "u_resolution") + if loc >= 0: + gl.glUniform2f(loc, float(width), float(height)) + + for i, v in enumerate(floats): + loc = gl.glGetUniformLocation(program, f"u_float{i}") + if loc >= 0: + gl.glUniform1f(loc, v) + + for i, v in enumerate(ints): + loc = gl.glGetUniformLocation(program, f"u_int{i}") + if loc >= 0: + gl.glUniform1i(loc, v) + + # Get u_pass uniform location for multi-pass + pass_loc = gl.glGetUniformLocation(program, "u_pass") + + gl.glViewport(0, 0, width, height) + gl.glDisable(gl.GL_BLEND) # Ensure no alpha blending - write output directly + + # Process each batch + all_batch_outputs = [] + for images in image_batches: + # Update input textures with this batch's images + for i, img in enumerate(images): + gl.glActiveTexture(gl.GL_TEXTURE0 + i) + gl.glBindTexture(gl.GL_TEXTURE_2D, input_textures[i]) + + # Flip vertically for GL coordinates, ensure RGBA + h, w, c = img.shape + if c == 3: + img_upload = np.empty((h, w, 4), dtype=np.float32) + img_upload[:, :, :3] = img[::-1, :, :] + img_upload[:, :, 3] = 1.0 + else: + img_upload = np.ascontiguousarray(img[::-1, :, :]) + + gl.glTexImage2D(gl.GL_TEXTURE_2D, 0, gl.GL_RGBA32F, w, h, 0, gl.GL_RGBA, gl.GL_FLOAT, img_upload) + + if num_passes == 1: + # Single pass - render directly to output FBO + gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, fbo) + if pass_loc >= 0: + gl.glUniform1i(pass_loc, 0) + gl.glClearColor(0, 0, 0, 0) + gl.glClear(gl.GL_COLOR_BUFFER_BIT) + gl.glDrawArrays(gl.GL_TRIANGLES, 0, 3) + else: + # Multi-pass rendering with ping-pong + for p in range(num_passes): + is_last_pass = (p == num_passes - 1) + + # Set pass uniform + if pass_loc >= 0: + gl.glUniform1i(pass_loc, p) + + if is_last_pass: + # Last pass renders to the main output FBO + gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, fbo) + else: + # Intermediate passes render to ping-pong FBO + target_fbo = ping_pong_fbos[p % 2] + gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, target_fbo) + + # Set input texture for this pass + gl.glActiveTexture(gl.GL_TEXTURE0) + if p == 0: + # First pass reads from original input + gl.glBindTexture(gl.GL_TEXTURE_2D, input_textures[0]) + else: + # Subsequent passes read from previous pass output + source_tex = ping_pong_textures[(p - 1) % 2] + gl.glBindTexture(gl.GL_TEXTURE_2D, source_tex) + + gl.glClearColor(0, 0, 0, 0) + gl.glClear(gl.GL_COLOR_BUFFER_BIT) + gl.glDrawArrays(gl.GL_TRIANGLES, 0, 3) + + # Read back outputs for this batch + # (glGetTexImage is synchronous, implicitly waits for rendering) + batch_outputs = [] + for tex in output_textures: + gl.glBindTexture(gl.GL_TEXTURE_2D, tex) + data = gl.glGetTexImage(gl.GL_TEXTURE_2D, 0, gl.GL_RGBA, gl.GL_FLOAT) + img = np.frombuffer(data, dtype=np.float32).reshape(height, width, 4) + batch_outputs.append(img[::-1, :, :].copy()) + + # Pad with black images for unused outputs + black_img = np.zeros((height, width, 4), dtype=np.float32) + for _ in range(num_outputs, MAX_OUTPUTS): + batch_outputs.append(black_img) + + all_batch_outputs.append(batch_outputs) + + elapsed = (time.perf_counter() - start_time) * 1000 + num_batches = len(image_batches) + pass_info = f", {num_passes} passes" if num_passes > 1 else "" + logger.info(f"GLSL shader executed in {elapsed:.1f}ms ({num_batches} batch{'es' if num_batches != 1 else ''}, {width}x{height}{pass_info})") + + return all_batch_outputs + + finally: + # Unbind before deleting + gl.glBindFramebuffer(gl.GL_FRAMEBUFFER, 0) + gl.glUseProgram(0) + + for tex in input_textures: + gl.glDeleteTextures(int(tex)) + for tex in output_textures: + gl.glDeleteTextures(int(tex)) + for tex in ping_pong_textures: + gl.glDeleteTextures(int(tex)) + if fbo is not None: + gl.glDeleteFramebuffers(1, [fbo]) + for pp_fbo in ping_pong_fbos: + gl.glDeleteFramebuffers(1, [pp_fbo]) + if program is not None: + gl.glDeleteProgram(program) + +class GLSLShader(io.ComfyNode): + + @classmethod + def define_schema(cls) -> io.Schema: + image_template = io.Autogrow.TemplatePrefix( + io.Image.Input("image"), + prefix="image", + min=1, + max=MAX_IMAGES, + ) + + float_template = io.Autogrow.TemplatePrefix( + io.Float.Input("float", default=0.0), + prefix="u_float", + min=0, + max=MAX_UNIFORMS, + ) + + int_template = io.Autogrow.TemplatePrefix( + io.Int.Input("int", default=0), + prefix="u_int", + min=0, + max=MAX_UNIFORMS, + ) + + return io.Schema( + node_id="GLSLShader", + display_name="GLSL Shader", + category="image/shader", + description=( + "Apply GLSL ES fragment shaders to images. " + "u_resolution (vec2) is always available." + ), + inputs=[ + io.String.Input( + "fragment_shader", + default=DEFAULT_FRAGMENT_SHADER, + multiline=True, + tooltip="GLSL fragment shader source code (GLSL ES 3.00 / WebGL 2.0 compatible)", + ), + io.DynamicCombo.Input( + "size_mode", + options=[ + io.DynamicCombo.Option("from_input", []), + io.DynamicCombo.Option( + "custom", + [ + io.Int.Input( + "width", + default=512, + min=1, + max=nodes.MAX_RESOLUTION, + ), + io.Int.Input( + "height", + default=512, + min=1, + max=nodes.MAX_RESOLUTION, + ), + ], + ), + ], + tooltip="Output size: 'from_input' uses first input image dimensions, 'custom' allows manual size", + ), + io.Autogrow.Input("images", template=image_template, tooltip=f"Images are available as u_image0-{MAX_IMAGES-1} (sampler2D) in the shader code"), + io.Autogrow.Input("floats", template=float_template, tooltip=f"Floats are available as u_float0-{MAX_UNIFORMS-1} in the shader code"), + io.Autogrow.Input("ints", template=int_template, tooltip=f"Ints are available as u_int0-{MAX_UNIFORMS-1} in the shader code"), + ], + outputs=[ + io.Image.Output(display_name="IMAGE0", tooltip="Available via layout(location = 0) out vec4 fragColor0 in the shader code"), + io.Image.Output(display_name="IMAGE1", tooltip="Available via layout(location = 1) out vec4 fragColor1 in the shader code"), + io.Image.Output(display_name="IMAGE2", tooltip="Available via layout(location = 2) out vec4 fragColor2 in the shader code"), + io.Image.Output(display_name="IMAGE3", tooltip="Available via layout(location = 3) out vec4 fragColor3 in the shader code"), + ], + ) + + @classmethod + def execute( + cls, + fragment_shader: str, + size_mode: SizeModeInput, + images: io.Autogrow.Type, + floats: io.Autogrow.Type = None, + ints: io.Autogrow.Type = None, + **kwargs, + ) -> io.NodeOutput: + image_list = [v for v in images.values() if v is not None] + float_list = ( + [v if v is not None else 0.0 for v in floats.values()] if floats else [] + ) + int_list = [v if v is not None else 0 for v in ints.values()] if ints else [] + + if not image_list: + raise ValueError("At least one input image is required") + + # Determine output dimensions + if size_mode["size_mode"] == "custom": + out_width = size_mode["width"] + out_height = size_mode["height"] + else: + out_height, out_width = image_list[0].shape[1:3] + + batch_size = image_list[0].shape[0] + + # Prepare batches + image_batches = [] + for batch_idx in range(batch_size): + batch_images = [img_tensor[batch_idx].cpu().numpy().astype(np.float32) for img_tensor in image_list] + image_batches.append(batch_images) + + all_batch_outputs = _render_shader_batch( + fragment_shader, + out_width, + out_height, + image_batches, + float_list, + int_list, + ) + + # Collect outputs into tensors + all_outputs = [[] for _ in range(MAX_OUTPUTS)] + for batch_outputs in all_batch_outputs: + for i, out_img in enumerate(batch_outputs): + all_outputs[i].append(torch.from_numpy(out_img)) + + output_tensors = [torch.stack(all_outputs[i], dim=0) for i in range(MAX_OUTPUTS)] + return io.NodeOutput( + *output_tensors, + ui=cls._build_ui_output(image_list, output_tensors[0]), + ) + + @classmethod + def _build_ui_output( + cls, image_list: list[torch.Tensor], output_batch: torch.Tensor + ) -> dict[str, list]: + """Build UI output with input and output images for client-side shader execution.""" + input_images_ui = [] + for img in image_list: + input_images_ui.extend(ui.ImageSaveHelper.save_images( + img, + filename_prefix="GLSLShader_input", + folder_type=io.FolderType.temp, + cls=None, + compress_level=1, + )) + + output_images_ui = ui.ImageSaveHelper.save_images( + output_batch, + filename_prefix="GLSLShader_output", + folder_type=io.FolderType.temp, + cls=None, + compress_level=1, + ) + + return {"input_images": input_images_ui, "images": output_images_ui} + + +class GLSLExtension(ComfyExtension): + @override + async def get_node_list(self) -> list[type[io.ComfyNode]]: + return [GLSLShader] + + +async def comfy_entrypoint() -> GLSLExtension: + return GLSLExtension() diff --git a/comfy_extras/nodes_hooks.py b/comfy_extras/nodes_hooks.py index 58e511ef5..be7d600cd 100644 --- a/comfy_extras/nodes_hooks.py +++ b/comfy_extras/nodes_hooks.py @@ -233,8 +233,8 @@ class SetClipHooks: return { "required": { "clip": ("CLIP",), - "apply_to_conds": ("BOOLEAN", {"default": True}), - "schedule_clip": ("BOOLEAN", {"default": False}) + "apply_to_conds": ("BOOLEAN", {"default": True, "advanced": True}), + "schedule_clip": ("BOOLEAN", {"default": False, "advanced": True}) }, "optional": { "hooks": ("HOOKS",) @@ -512,7 +512,7 @@ class CreateHookKeyframesInterpolated: "start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}), "end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.001}), "keyframes_count": ("INT", {"default": 5, "min": 2, "max": 100, "step": 1}), - "print_keyframes": ("BOOLEAN", {"default": False}), + "print_keyframes": ("BOOLEAN", {"default": False, "advanced": True}), }, "optional": { "prev_hook_kf": ("HOOK_KEYFRAMES",), @@ -557,7 +557,7 @@ class CreateHookKeyframesFromFloats: "floats_strength": ("FLOATS", {"default": -1, "min": -1, "step": 0.001, "forceInput": True}), "start_percent": ("FLOAT", {"default": 0.0, "min": 0.0, "max": 1.0, "step": 0.001}), "end_percent": ("FLOAT", {"default": 1.0, "min": 0.0, "max": 1.0, "step": 0.001}), - "print_keyframes": ("BOOLEAN", {"default": False}), + "print_keyframes": ("BOOLEAN", {"default": False, "advanced": True}), }, "optional": { "prev_hook_kf": ("HOOK_KEYFRAMES",), diff --git a/comfy_extras/nodes_hunyuan.py b/comfy_extras/nodes_hunyuan.py index 774da75a3..4ea93a499 100644 --- a/comfy_extras/nodes_hunyuan.py +++ b/comfy_extras/nodes_hunyuan.py @@ -138,7 +138,7 @@ class HunyuanVideo15SuperResolution(io.ComfyNode): io.Image.Input("start_image", optional=True), io.ClipVisionOutput.Input("clip_vision_output", optional=True), io.Latent.Input("latent"), - io.Float.Input("noise_augmentation", default=0.70, min=0.0, max=1.0, step=0.01), + io.Float.Input("noise_augmentation", default=0.70, min=0.0, max=1.0, step=0.01, advanced=True), ], outputs=[ @@ -285,6 +285,7 @@ class TextEncodeHunyuanVideo_ImageToVideo(io.ComfyNode): min=1, max=512, tooltip="How much the image influences things vs the text prompt. Higher number means more influence from the text prompt.", + advanced=True, ), ], outputs=[ @@ -313,7 +314,7 @@ class HunyuanImageToVideo(io.ComfyNode): io.Int.Input("height", default=480, min=16, max=nodes.MAX_RESOLUTION, step=16), io.Int.Input("length", default=53, min=1, max=nodes.MAX_RESOLUTION, step=4), io.Int.Input("batch_size", default=1, min=1, max=4096), - io.Combo.Input("guidance_type", options=["v1 (concat)", "v2 (replace)", "custom"]), + io.Combo.Input("guidance_type", options=["v1 (concat)", "v2 (replace)", "custom"], advanced=True), io.Image.Input("start_image", optional=True), ], outputs=[ @@ -384,7 +385,7 @@ class HunyuanRefinerLatent(io.ComfyNode): io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), io.Latent.Input("latent"), - io.Float.Input("noise_augmentation", default=0.10, min=0.0, max=1.0, step=0.01), + io.Float.Input("noise_augmentation", default=0.10, min=0.0, max=1.0, step=0.01, advanced=True), ], outputs=[ diff --git a/comfy_extras/nodes_hunyuan3d.py b/comfy_extras/nodes_hunyuan3d.py index 5bb5df48e..df0c3e4b1 100644 --- a/comfy_extras/nodes_hunyuan3d.py +++ b/comfy_extras/nodes_hunyuan3d.py @@ -106,8 +106,8 @@ class VAEDecodeHunyuan3D(IO.ComfyNode): inputs=[ IO.Latent.Input("samples"), IO.Vae.Input("vae"), - IO.Int.Input("num_chunks", default=8000, min=1000, max=500000), - IO.Int.Input("octree_resolution", default=256, min=16, max=512), + IO.Int.Input("num_chunks", default=8000, min=1000, max=500000, advanced=True), + IO.Int.Input("octree_resolution", default=256, min=16, max=512, advanced=True), ], outputs=[ IO.Voxel.Output(), @@ -456,7 +456,7 @@ class VoxelToMesh(IO.ComfyNode): category="3d", inputs=[ IO.Voxel.Input("voxel"), - IO.Combo.Input("algorithm", options=["surface net", "basic"]), + IO.Combo.Input("algorithm", options=["surface net", "basic"], advanced=True), IO.Float.Input("threshold", default=0.6, min=-1.0, max=1.0, step=0.01), ], outputs=[ @@ -618,18 +618,32 @@ class SaveGLB(IO.ComfyNode): def define_schema(cls): return IO.Schema( node_id="SaveGLB", + display_name="Save 3D Model", search_aliases=["export 3d model", "save mesh"], category="3d", + essentials_category="Basics", is_output_node=True, inputs=[ - IO.Mesh.Input("mesh"), + IO.MultiType.Input( + IO.Mesh.Input("mesh"), + types=[ + IO.File3DGLB, + IO.File3DGLTF, + IO.File3DOBJ, + IO.File3DFBX, + IO.File3DSTL, + IO.File3DUSDZ, + IO.File3DAny, + ], + tooltip="Mesh or 3D file to save", + ), IO.String.Input("filename_prefix", default="mesh/ComfyUI"), ], hidden=[IO.Hidden.prompt, IO.Hidden.extra_pnginfo] ) @classmethod - def execute(cls, mesh, filename_prefix) -> IO.NodeOutput: + def execute(cls, mesh: Types.MESH | Types.File3D, filename_prefix: str) -> IO.NodeOutput: full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(filename_prefix, folder_paths.get_output_directory()) results = [] @@ -641,15 +655,27 @@ class SaveGLB(IO.ComfyNode): for x in cls.hidden.extra_pnginfo: metadata[x] = json.dumps(cls.hidden.extra_pnginfo[x]) - for i in range(mesh.vertices.shape[0]): - f = f"{filename}_{counter:05}_.glb" - save_glb(mesh.vertices[i], mesh.faces[i], os.path.join(full_output_folder, f), metadata) + if isinstance(mesh, Types.File3D): + # Handle File3D input - save BytesIO data to output folder + ext = mesh.format or "glb" + f = f"{filename}_{counter:05}_.{ext}" + mesh.save_to(os.path.join(full_output_folder, f)) results.append({ "filename": f, "subfolder": subfolder, "type": "output" }) - counter += 1 + else: + # Handle Mesh input - save vertices and faces as GLB + for i in range(mesh.vertices.shape[0]): + f = f"{filename}_{counter:05}_.glb" + save_glb(mesh.vertices[i], mesh.faces[i], os.path.join(full_output_folder, f), metadata) + results.append({ + "filename": f, + "subfolder": subfolder, + "type": "output" + }) + counter += 1 return IO.NodeOutput(ui={"3d": results}) diff --git a/comfy_extras/nodes_hypertile.py b/comfy_extras/nodes_hypertile.py index 0ad5e6773..354d96db1 100644 --- a/comfy_extras/nodes_hypertile.py +++ b/comfy_extras/nodes_hypertile.py @@ -30,10 +30,10 @@ class HyperTile(io.ComfyNode): category="model_patches/unet", inputs=[ io.Model.Input("model"), - io.Int.Input("tile_size", default=256, min=1, max=2048), - io.Int.Input("swap_size", default=2, min=1, max=128), - io.Int.Input("max_depth", default=0, min=0, max=10), - io.Boolean.Input("scale_depth", default=False), + io.Int.Input("tile_size", default=256, min=1, max=2048, advanced=True), + io.Int.Input("swap_size", default=2, min=1, max=128, advanced=True), + io.Int.Input("max_depth", default=0, min=0, max=10, advanced=True), + io.Boolean.Input("scale_depth", default=False, advanced=True), ], outputs=[ io.Model.Output(), diff --git a/comfy_extras/nodes_images.py b/comfy_extras/nodes_images.py index cb4fb24a1..727d7d09d 100644 --- a/comfy_extras/nodes_images.py +++ b/comfy_extras/nodes_images.py @@ -6,6 +6,7 @@ import folder_paths import json import os import re +import math import torch import comfy.utils @@ -23,8 +24,10 @@ class ImageCrop(IO.ComfyNode): return IO.Schema( node_id="ImageCrop", search_aliases=["trim"], - display_name="Image Crop", + display_name="Image Crop (Deprecated)", category="image/transform", + is_deprecated=True, + essentials_category="Image Tools", inputs=[ IO.Image.Input("image"), IO.Int.Input("width", default=512, min=1, max=nodes.MAX_RESOLUTION, step=1), @@ -47,6 +50,57 @@ class ImageCrop(IO.ComfyNode): crop = execute # TODO: remove +class ImageCropV2(IO.ComfyNode): + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="ImageCropV2", + search_aliases=["trim"], + display_name="Image Crop", + category="image/transform", + inputs=[ + IO.Image.Input("image"), + IO.BoundingBox.Input("crop_region", component="ImageCrop"), + ], + outputs=[IO.Image.Output()], + ) + + @classmethod + def execute(cls, image, crop_region) -> IO.NodeOutput: + x = crop_region.get("x", 0) + y = crop_region.get("y", 0) + width = crop_region.get("width", 512) + height = crop_region.get("height", 512) + + x = min(x, image.shape[2] - 1) + y = min(y, image.shape[1] - 1) + to_x = width + x + to_y = height + y + img = image[:,y:to_y, x:to_x, :] + return IO.NodeOutput(img, ui=UI.PreviewImage(img)) + + +class BoundingBox(IO.ComfyNode): + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="PrimitiveBoundingBox", + display_name="Bounding Box", + category="utils/primitive", + inputs=[ + IO.Int.Input("x", default=0, min=0, max=MAX_RESOLUTION), + IO.Int.Input("y", default=0, min=0, max=MAX_RESOLUTION), + IO.Int.Input("width", default=512, min=1, max=MAX_RESOLUTION), + IO.Int.Input("height", default=512, min=1, max=MAX_RESOLUTION), + ], + outputs=[IO.BoundingBox.Output()], + ) + + @classmethod + def execute(cls, x, y, width, height) -> IO.NodeOutput: + return IO.NodeOutput({"x": x, "y": y, "width": width, "height": height}) + + class RepeatImageBatch(IO.ComfyNode): @classmethod def define_schema(cls): @@ -175,7 +229,7 @@ class SaveAnimatedPNG(IO.ComfyNode): IO.Image.Input("images"), IO.String.Input("filename_prefix", default="ComfyUI"), IO.Float.Input("fps", default=6.0, min=0.01, max=1000.0, step=0.01), - IO.Int.Input("compress_level", default=4, min=0, max=9), + IO.Int.Input("compress_level", default=4, min=0, max=9, advanced=True), ], hidden=[IO.Hidden.prompt, IO.Hidden.extra_pnginfo], is_output_node=True, @@ -212,8 +266,8 @@ class ImageStitch(IO.ComfyNode): IO.Image.Input("image1"), IO.Combo.Input("direction", options=["right", "down", "left", "up"], default="right"), IO.Boolean.Input("match_image_size", default=True), - IO.Int.Input("spacing_width", default=0, min=0, max=1024, step=2), - IO.Combo.Input("spacing_color", options=["white", "black", "red", "green", "blue"], default="white"), + IO.Int.Input("spacing_width", default=0, min=0, max=1024, step=2, advanced=True), + IO.Combo.Input("spacing_color", options=["white", "black", "red", "green", "blue"], default="white", advanced=True), IO.Image.Input("image2", optional=True), ], outputs=[IO.Image.Output()], @@ -383,8 +437,8 @@ class ResizeAndPadImage(IO.ComfyNode): IO.Image.Input("image"), IO.Int.Input("target_width", default=512, min=1, max=nodes.MAX_RESOLUTION, step=1), IO.Int.Input("target_height", default=512, min=1, max=nodes.MAX_RESOLUTION, step=1), - IO.Combo.Input("padding_color", options=["white", "black"]), - IO.Combo.Input("interpolation", options=["area", "bicubic", "nearest-exact", "bilinear", "lanczos"]), + IO.Combo.Input("padding_color", options=["white", "black"], advanced=True), + IO.Combo.Input("interpolation", options=["area", "bicubic", "nearest-exact", "bilinear", "lanczos"], advanced=True), ], outputs=[IO.Image.Output()], ) @@ -535,8 +589,10 @@ class ImageRotate(IO.ComfyNode): def define_schema(cls): return IO.Schema( node_id="ImageRotate", + display_name="Image Rotate", search_aliases=["turn", "flip orientation"], category="image/transform", + essentials_category="Image Tools", inputs=[ IO.Image.Input("image"), IO.Combo.Input("rotation", options=["none", "90 degrees", "180 degrees", "270 degrees"]), @@ -627,11 +683,179 @@ class ImageScaleToMaxDimension(IO.ComfyNode): upscale = execute # TODO: remove +class SplitImageToTileList(IO.ComfyNode): + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="SplitImageToTileList", + category="image/batch", + search_aliases=["split image", "tile image", "slice image"], + display_name="Split Image into List of Tiles", + description="Splits an image into a batched list of tiles with a specified overlap.", + inputs=[ + IO.Image.Input("image"), + IO.Int.Input("tile_width", default=1024, min=64, max=MAX_RESOLUTION), + IO.Int.Input("tile_height", default=1024, min=64, max=MAX_RESOLUTION), + IO.Int.Input("overlap", default=128, min=0, max=4096), + ], + outputs=[ + IO.Image.Output(is_output_list=True), + ], + ) + + @staticmethod + def get_grid_coords(width, height, tile_width, tile_height, overlap): + coords = [] + stride_x = max(1, tile_width - overlap) + stride_y = max(1, tile_height - overlap) + + y = 0 + while y < height: + x = 0 + y_end = min(y + tile_height, height) + y_start = max(0, y_end - tile_height) + + while x < width: + x_end = min(x + tile_width, width) + x_start = max(0, x_end - tile_width) + + coords.append((x_start, y_start, x_end, y_end)) + + if x_end >= width: + break + x += stride_x + + if y_end >= height: + break + y += stride_y + + return coords + + @classmethod + def execute(cls, image, tile_width, tile_height, overlap): + b, h, w, c = image.shape + coords = cls.get_grid_coords(w, h, tile_width, tile_height, overlap) + + output_list = [] + for (x_start, y_start, x_end, y_end) in coords: + tile = image[:, y_start:y_end, x_start:x_end, :] + output_list.append(tile) + + return IO.NodeOutput(output_list) + + +class ImageMergeTileList(IO.ComfyNode): + @classmethod + def define_schema(cls): + return IO.Schema( + node_id="ImageMergeTileList", + display_name="Merge List of Tiles to Image", + category="image/batch", + search_aliases=["split image", "tile image", "slice image"], + is_input_list=True, + inputs=[ + IO.Image.Input("image_list"), + IO.Int.Input("final_width", default=1024, min=64, max=32768), + IO.Int.Input("final_height", default=1024, min=64, max=32768), + IO.Int.Input("overlap", default=128, min=0, max=4096), + ], + outputs=[ + IO.Image.Output(is_output_list=False), + ], + ) + + @staticmethod + def get_grid_coords(width, height, tile_width, tile_height, overlap): + coords = [] + stride_x = max(1, tile_width - overlap) + stride_y = max(1, tile_height - overlap) + + y = 0 + while y < height: + x = 0 + y_end = min(y + tile_height, height) + y_start = max(0, y_end - tile_height) + + while x < width: + x_end = min(x + tile_width, width) + x_start = max(0, x_end - tile_width) + + coords.append((x_start, y_start, x_end, y_end)) + + if x_end >= width: + break + x += stride_x + + if y_end >= height: + break + y += stride_y + + return coords + + @classmethod + def execute(cls, image_list, final_width, final_height, overlap): + w = final_width[0] + h = final_height[0] + ovlp = overlap[0] + feather_str = 1.0 + + first_tile = image_list[0] + b, t_h, t_w, c = first_tile.shape + device = first_tile.device + dtype = first_tile.dtype + + coords = cls.get_grid_coords(w, h, t_w, t_h, ovlp) + + canvas = torch.zeros((b, h, w, c), device=device, dtype=dtype) + weights = torch.zeros((b, h, w, 1), device=device, dtype=dtype) + + if ovlp > 0: + y_w = torch.sin(math.pi * torch.linspace(0, 1, t_h, device=device, dtype=dtype)) + x_w = torch.sin(math.pi * torch.linspace(0, 1, t_w, device=device, dtype=dtype)) + y_w = torch.clamp(y_w, min=1e-5) + x_w = torch.clamp(x_w, min=1e-5) + + sine_mask = (y_w.unsqueeze(1) * x_w.unsqueeze(0)).unsqueeze(0).unsqueeze(-1) + flat_mask = torch.ones_like(sine_mask) + + weight_mask = torch.lerp(flat_mask, sine_mask, feather_str) + else: + weight_mask = torch.ones((1, t_h, t_w, 1), device=device, dtype=dtype) + + for i, (x_start, y_start, x_end, y_end) in enumerate(coords): + if i >= len(image_list): + break + + tile = image_list[i] + + region_h = y_end - y_start + region_w = x_end - x_start + + real_h = min(region_h, tile.shape[1]) + real_w = min(region_w, tile.shape[2]) + + y_end_actual = y_start + real_h + x_end_actual = x_start + real_w + + tile_crop = tile[:, :real_h, :real_w, :] + mask_crop = weight_mask[:, :real_h, :real_w, :] + + canvas[:, y_start:y_end_actual, x_start:x_end_actual, :] += tile_crop * mask_crop + weights[:, y_start:y_end_actual, x_start:x_end_actual, :] += mask_crop + + weights[weights == 0] = 1.0 + merged_image = canvas / weights + + return IO.NodeOutput(merged_image) + + class ImagesExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[IO.ComfyNode]]: return [ ImageCrop, + ImageCropV2, + BoundingBox, RepeatImageBatch, ImageFromBatch, ImageAddNoise, @@ -644,6 +868,8 @@ class ImagesExtension(ComfyExtension): ImageRotate, ImageFlip, ImageScaleToMaxDimension, + SplitImageToTileList, + ImageMergeTileList, ] diff --git a/comfy_extras/nodes_latent.py b/comfy_extras/nodes_latent.py index 6aecf1561..8bb368dec 100644 --- a/comfy_extras/nodes_latent.py +++ b/comfy_extras/nodes_latent.py @@ -391,8 +391,9 @@ class LatentOperationTonemapReinhard(io.ComfyNode): latent_vector_magnitude = (torch.linalg.vector_norm(latent, dim=(1)) + 0.0000000001)[:,None] normalized_latent = latent / latent_vector_magnitude - mean = torch.mean(latent_vector_magnitude, dim=(1,2,3), keepdim=True) - std = torch.std(latent_vector_magnitude, dim=(1,2,3), keepdim=True) + dims = list(range(1, latent_vector_magnitude.ndim)) + mean = torch.mean(latent_vector_magnitude, dim=dims, keepdim=True) + std = torch.std(latent_vector_magnitude, dim=dims, keepdim=True) top = (std * 5 + mean) * multiplier @@ -412,9 +413,9 @@ class LatentOperationSharpen(io.ComfyNode): category="latent/advanced/operations", is_experimental=True, inputs=[ - io.Int.Input("sharpen_radius", default=9, min=1, max=31, step=1), - io.Float.Input("sigma", default=1.0, min=0.1, max=10.0, step=0.1), - io.Float.Input("alpha", default=0.1, min=0.0, max=5.0, step=0.01), + io.Int.Input("sharpen_radius", default=9, min=1, max=31, step=1, advanced=True), + io.Float.Input("sigma", default=1.0, min=0.1, max=10.0, step=0.1, advanced=True), + io.Float.Input("alpha", default=0.1, min=0.0, max=5.0, step=0.01, advanced=True), ], outputs=[ io.LatentOperation.Output(), diff --git a/comfy_extras/nodes_load_3d.py b/comfy_extras/nodes_load_3d.py index 4b8d950ae..9112bdd0a 100644 --- a/comfy_extras/nodes_load_3d.py +++ b/comfy_extras/nodes_load_3d.py @@ -1,9 +1,10 @@ import nodes import folder_paths import os +import uuid from typing_extensions import override -from comfy_api.latest import IO, ComfyExtension, InputImpl, UI +from comfy_api.latest import IO, UI, ComfyExtension, InputImpl, Types from pathlib import Path @@ -30,6 +31,7 @@ class Load3D(IO.ComfyNode): node_id="Load3D", display_name="Load 3D & Animation", category="3d", + essentials_category="Basics", is_experimental=True, inputs=[ IO.Combo.Input("model_file", options=sorted(files), upload=IO.UploadType.model), @@ -44,6 +46,7 @@ class Load3D(IO.ComfyNode): IO.Image.Output(display_name="normal"), IO.Load3DCamera.Output(display_name="camera_info"), IO.Video.Output(display_name="recording_video"), + IO.File3DAny.Output(display_name="model_3d"), ], ) @@ -65,7 +68,8 @@ class Load3D(IO.ComfyNode): video = InputImpl.VideoFromFile(recording_video_path) - return IO.NodeOutput(output_image, output_mask, model_file, normal_image, image['camera_info'], video) + file_3d = Types.File3D(folder_paths.get_annotated_filepath(model_file)) + return IO.NodeOutput(output_image, output_mask, model_file, normal_image, image['camera_info'], video, file_3d) process = execute # TODO: remove @@ -81,18 +85,35 @@ class Preview3D(IO.ComfyNode): is_experimental=True, is_output_node=True, inputs=[ - IO.String.Input("model_file", default="", multiline=False), - IO.Load3DCamera.Input("camera_info", optional=True), - IO.Image.Input("bg_image", optional=True), + IO.MultiType.Input( + IO.String.Input("model_file", default="", multiline=False), + types=[ + IO.File3DGLB, + IO.File3DGLTF, + IO.File3DFBX, + IO.File3DOBJ, + IO.File3DSTL, + IO.File3DUSDZ, + IO.File3DAny, + ], + tooltip="3D model file or path string", + ), + IO.Load3DCamera.Input("camera_info", optional=True, advanced=True), + IO.Image.Input("bg_image", optional=True, advanced=True), ], outputs=[], ) @classmethod - def execute(cls, model_file, **kwargs) -> IO.NodeOutput: + def execute(cls, model_file: str | Types.File3D, **kwargs) -> IO.NodeOutput: + if isinstance(model_file, Types.File3D): + filename = f"preview3d_{uuid.uuid4().hex}.{model_file.format}" + model_file.save_to(os.path.join(folder_paths.get_output_directory(), filename)) + else: + filename = model_file camera_info = kwargs.get("camera_info", None) bg_image = kwargs.get("bg_image", None) - return IO.NodeOutput(ui=UI.PreviewUI3D(model_file, camera_info, bg_image=bg_image)) + return IO.NodeOutput(ui=UI.PreviewUI3D(filename, camera_info, bg_image=bg_image)) process = execute # TODO: remove diff --git a/comfy_extras/nodes_lora_extract.py b/comfy_extras/nodes_lora_extract.py index fb89e03f4..975f90f45 100644 --- a/comfy_extras/nodes_lora_extract.py +++ b/comfy_extras/nodes_lora_extract.py @@ -7,6 +7,7 @@ import logging from enum import Enum from typing_extensions import override from comfy_api.latest import ComfyExtension, io +from tqdm.auto import trange CLAMP_QUANTILE = 0.99 @@ -49,12 +50,22 @@ LORA_TYPES = {"standard": LORAType.STANDARD, "full_diff": LORAType.FULL_DIFF} def calc_lora_model(model_diff, rank, prefix_model, prefix_lora, output_sd, lora_type, bias_diff=False): - comfy.model_management.load_models_gpu([model_diff], force_patch_weights=True) + comfy.model_management.load_models_gpu([model_diff]) sd = model_diff.model_state_dict(filter_prefix=prefix_model) - for k in sd: - if k.endswith(".weight"): + sd_keys = list(sd.keys()) + for index in trange(len(sd_keys), unit="weight"): + k = sd_keys[index] + op_keys = sd_keys[index].rsplit('.', 1) + if len(op_keys) < 2 or op_keys[1] not in ["weight", "bias"] or (op_keys[1] == "bias" and not bias_diff): + continue + op = comfy.utils.get_attr(model_diff.model, op_keys[0]) + if hasattr(op, "comfy_cast_weights") and not getattr(op, "comfy_patched_weights", False): + weight_diff = model_diff.patch_weight_to_device(k, model_diff.load_device, return_weight=True) + else: weight_diff = sd[k] + + if op_keys[1] == "weight": if lora_type == LORAType.STANDARD: if weight_diff.ndim < 2: if bias_diff: @@ -69,8 +80,8 @@ def calc_lora_model(model_diff, rank, prefix_model, prefix_lora, output_sd, lora elif lora_type == LORAType.FULL_DIFF: output_sd["{}{}.diff".format(prefix_lora, k[len(prefix_model):-7])] = weight_diff.contiguous().half().cpu() - elif bias_diff and k.endswith(".bias"): - output_sd["{}{}.diff_b".format(prefix_lora, k[len(prefix_model):-5])] = sd[k].contiguous().half().cpu() + elif bias_diff and op_keys[1] == "bias": + output_sd["{}{}.diff_b".format(prefix_lora, k[len(prefix_model):-5])] = weight_diff.contiguous().half().cpu() return output_sd class LoraSave(io.ComfyNode): @@ -83,9 +94,9 @@ class LoraSave(io.ComfyNode): category="_for_testing", inputs=[ io.String.Input("filename_prefix", default="loras/ComfyUI_extracted_lora"), - io.Int.Input("rank", default=8, min=1, max=4096, step=1), - io.Combo.Input("lora_type", options=tuple(LORA_TYPES.keys())), - io.Boolean.Input("bias_diff", default=True), + io.Int.Input("rank", default=8, min=1, max=4096, step=1, advanced=True), + io.Combo.Input("lora_type", options=tuple(LORA_TYPES.keys()), advanced=True), + io.Boolean.Input("bias_diff", default=True, advanced=True), io.Model.Input( "model_diff", tooltip="The ModelSubtract output to be converted to a lora.", diff --git a/comfy_extras/nodes_lt.py b/comfy_extras/nodes_lt.py index 2aec62f61..32fe921ff 100644 --- a/comfy_extras/nodes_lt.py +++ b/comfy_extras/nodes_lt.py @@ -134,6 +134,36 @@ class LTXVImgToVideoInplace(io.ComfyNode): generate = execute # TODO: remove +def _append_guide_attention_entry(positive, negative, pre_filter_count, latent_shape, strength=1.0): + """Append a guide_attention_entry to both positive and negative conditioning. + + Each entry tracks one guide reference for per-reference attention control. + Entries are derived independently from each conditioning to avoid cross-contamination. + """ + new_entry = { + "pre_filter_count": pre_filter_count, + "strength": strength, + "pixel_mask": None, + "latent_shape": latent_shape, + } + results = [] + for cond in (positive, negative): + # Read existing entries from this specific conditioning + existing = [] + for t in cond: + found = t[1].get("guide_attention_entries", None) + if found is not None: + existing = found + break + # Shallow copy and append (no deepcopy needed — entries contain + # only scalars and None for pixel_mask at this call site). + entries = [*existing, new_entry] + results.append(node_helpers.conditioning_set_values( + cond, {"guide_attention_entries": entries} + )) + return results[0], results[1] + + def conditioning_get_any_value(conditioning, key, default=None): for t in conditioning: if key in t[1]: @@ -324,6 +354,13 @@ class LTXVAddGuide(io.ComfyNode): scale_factors, ) + # Track this guide for per-reference attention control. + pre_filter_count = t.shape[2] * t.shape[3] * t.shape[4] + guide_latent_shape = list(t.shape[2:]) # [F, H, W] + positive, negative = _append_guide_attention_entry( + positive, negative, pre_filter_count, guide_latent_shape, strength=strength, + ) + return io.NodeOutput(positive, negative, {"samples": latent_image, "noise_mask": noise_mask}) generate = execute # TODO: remove @@ -359,8 +396,14 @@ class LTXVCropGuides(io.ComfyNode): latent_image = latent_image[:, :, :-num_keyframes] noise_mask = noise_mask[:, :, :-num_keyframes] - positive = node_helpers.conditioning_set_values(positive, {"keyframe_idxs": None}) - negative = node_helpers.conditioning_set_values(negative, {"keyframe_idxs": None}) + positive = node_helpers.conditioning_set_values(positive, { + "keyframe_idxs": None, + "guide_attention_entries": None, + }) + negative = node_helpers.conditioning_set_values(negative, { + "keyframe_idxs": None, + "guide_attention_entries": None, + }) return io.NodeOutput(positive, negative, {"samples": latent_image, "noise_mask": noise_mask}) @@ -450,6 +493,7 @@ class LTXVScheduler(io.ComfyNode): id="stretch", default=True, tooltip="Stretch the sigmas to be in the range [terminal, 1].", + advanced=True, ), io.Float.Input( id="terminal", @@ -458,6 +502,7 @@ class LTXVScheduler(io.ComfyNode): max=0.99, step=0.01, tooltip="The terminal value of the sigmas after stretching.", + advanced=True, ), io.Latent.Input("latent", optional=True), ], diff --git a/comfy_extras/nodes_lt_audio.py b/comfy_extras/nodes_lt_audio.py index 1966fd1bf..3e4222264 100644 --- a/comfy_extras/nodes_lt_audio.py +++ b/comfy_extras/nodes_lt_audio.py @@ -189,6 +189,7 @@ class LTXAVTextEncoderLoader(io.ComfyNode): io.Combo.Input( "device", options=["default", "cpu"], + advanced=True, ) ], outputs=[io.Clip.Output()], diff --git a/comfy_extras/nodes_lumina2.py b/comfy_extras/nodes_lumina2.py index 2550475ae..b35ab8b7d 100644 --- a/comfy_extras/nodes_lumina2.py +++ b/comfy_extras/nodes_lumina2.py @@ -12,8 +12,8 @@ class RenormCFG(io.ComfyNode): category="advanced/model", inputs=[ io.Model.Input("model"), - io.Float.Input("cfg_trunc", default=100, min=0.0, max=100.0, step=0.01), - io.Float.Input("renorm_cfg", default=1.0, min=0.0, max=100.0, step=0.01), + io.Float.Input("cfg_trunc", default=100, min=0.0, max=100.0, step=0.01, advanced=True), + io.Float.Input("renorm_cfg", default=1.0, min=0.0, max=100.0, step=0.01, advanced=True), ], outputs=[ io.Model.Output(), diff --git a/comfy_extras/nodes_mask.py b/comfy_extras/nodes_mask.py index 98e8fef8f..c44602597 100644 --- a/comfy_extras/nodes_mask.py +++ b/comfy_extras/nodes_mask.py @@ -348,7 +348,7 @@ class GrowMask(IO.ComfyNode): inputs=[ IO.Mask.Input("mask"), IO.Int.Input("expand", default=0, min=-nodes.MAX_RESOLUTION, max=nodes.MAX_RESOLUTION, step=1), - IO.Boolean.Input("tapered_corners", default=True), + IO.Boolean.Input("tapered_corners", default=True, advanced=True), ], outputs=[IO.Mask.Output()], ) diff --git a/comfy_extras/nodes_model_advanced.py b/comfy_extras/nodes_model_advanced.py index f22b333fc..8bf6a1afa 100644 --- a/comfy_extras/nodes_model_advanced.py +++ b/comfy_extras/nodes_model_advanced.py @@ -52,8 +52,8 @@ class ModelSamplingDiscrete: @classmethod def INPUT_TYPES(s): return {"required": { "model": ("MODEL",), - "sampling": (["eps", "v_prediction", "lcm", "x0", "img_to_img"],), - "zsnr": ("BOOLEAN", {"default": False}), + "sampling": (["eps", "v_prediction", "lcm", "x0", "img_to_img", "img_to_img_flow"],), + "zsnr": ("BOOLEAN", {"default": False, "advanced": True}), }} RETURN_TYPES = ("MODEL",) @@ -76,6 +76,8 @@ class ModelSamplingDiscrete: sampling_type = comfy.model_sampling.X0 elif sampling == "img_to_img": sampling_type = comfy.model_sampling.IMG_TO_IMG + elif sampling == "img_to_img_flow": + sampling_type = comfy.model_sampling.IMG_TO_IMG_FLOW class ModelSamplingAdvanced(sampling_base, sampling_type): pass @@ -153,8 +155,8 @@ class ModelSamplingFlux: @classmethod def INPUT_TYPES(s): return {"required": { "model": ("MODEL",), - "max_shift": ("FLOAT", {"default": 1.15, "min": 0.0, "max": 100.0, "step":0.01}), - "base_shift": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 100.0, "step":0.01}), + "max_shift": ("FLOAT", {"default": 1.15, "min": 0.0, "max": 100.0, "step":0.01, "advanced": True}), + "base_shift": ("FLOAT", {"default": 0.5, "min": 0.0, "max": 100.0, "step":0.01, "advanced": True}), "width": ("INT", {"default": 1024, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 8}), "height": ("INT", {"default": 1024, "min": 16, "max": nodes.MAX_RESOLUTION, "step": 8}), }} @@ -190,8 +192,8 @@ class ModelSamplingContinuousEDM: def INPUT_TYPES(s): return {"required": { "model": ("MODEL",), "sampling": (["v_prediction", "edm", "edm_playground_v2.5", "eps", "cosmos_rflow"],), - "sigma_max": ("FLOAT", {"default": 120.0, "min": 0.0, "max": 1000.0, "step":0.001, "round": False}), - "sigma_min": ("FLOAT", {"default": 0.002, "min": 0.0, "max": 1000.0, "step":0.001, "round": False}), + "sigma_max": ("FLOAT", {"default": 120.0, "min": 0.0, "max": 1000.0, "step":0.001, "round": False, "advanced": True}), + "sigma_min": ("FLOAT", {"default": 0.002, "min": 0.0, "max": 1000.0, "step":0.001, "round": False, "advanced": True}), }} RETURN_TYPES = ("MODEL",) @@ -235,8 +237,8 @@ class ModelSamplingContinuousV: def INPUT_TYPES(s): return {"required": { "model": ("MODEL",), "sampling": (["v_prediction"],), - "sigma_max": ("FLOAT", {"default": 500.0, "min": 0.0, "max": 1000.0, "step":0.001, "round": False}), - "sigma_min": ("FLOAT", {"default": 0.03, "min": 0.0, "max": 1000.0, "step":0.001, "round": False}), + "sigma_max": ("FLOAT", {"default": 500.0, "min": 0.0, "max": 1000.0, "step":0.001, "round": False, "advanced": True}), + "sigma_min": ("FLOAT", {"default": 0.03, "min": 0.0, "max": 1000.0, "step":0.001, "round": False, "advanced": True}), }} RETURN_TYPES = ("MODEL",) @@ -303,7 +305,7 @@ class ModelComputeDtype: @classmethod def INPUT_TYPES(s): return {"required": { "model": ("MODEL",), - "dtype": (["default", "fp32", "fp16", "bf16"],), + "dtype": (["default", "fp32", "fp16", "bf16"], {"advanced": True}), }} RETURN_TYPES = ("MODEL",) diff --git a/comfy_extras/nodes_model_downscale.py b/comfy_extras/nodes_model_downscale.py index dec2ae841..24d47a903 100644 --- a/comfy_extras/nodes_model_downscale.py +++ b/comfy_extras/nodes_model_downscale.py @@ -13,11 +13,11 @@ class PatchModelAddDownscale(io.ComfyNode): category="model_patches/unet", inputs=[ io.Model.Input("model"), - io.Int.Input("block_number", default=3, min=1, max=32, step=1), + io.Int.Input("block_number", default=3, min=1, max=32, step=1, advanced=True), io.Float.Input("downscale_factor", default=2.0, min=0.1, max=9.0, step=0.001), - io.Float.Input("start_percent", default=0.0, min=0.0, max=1.0, step=0.001), - io.Float.Input("end_percent", default=0.35, min=0.0, max=1.0, step=0.001), - io.Boolean.Input("downscale_after_skip", default=True), + io.Float.Input("start_percent", default=0.0, min=0.0, max=1.0, step=0.001, advanced=True), + io.Float.Input("end_percent", default=0.35, min=0.0, max=1.0, step=0.001, advanced=True), + io.Boolean.Input("downscale_after_skip", default=True, advanced=True), io.Combo.Input("downscale_method", options=cls.UPSCALE_METHODS), io.Combo.Input("upscale_method", options=cls.UPSCALE_METHODS), ], diff --git a/comfy_extras/nodes_nag.py b/comfy_extras/nodes_nag.py new file mode 100644 index 000000000..b57181848 --- /dev/null +++ b/comfy_extras/nodes_nag.py @@ -0,0 +1,99 @@ +import torch +from comfy_api.latest import ComfyExtension, io +from typing_extensions import override + + +class NAGuidance(io.ComfyNode): + @classmethod + def define_schema(cls) -> io.Schema: + return io.Schema( + node_id="NAGuidance", + display_name="Normalized Attention Guidance", + description="Applies Normalized Attention Guidance to models, enabling negative prompts on distilled/schnell models.", + category="advanced/guidance", + is_experimental=True, + inputs=[ + io.Model.Input("model", tooltip="The model to apply NAG to."), + io.Float.Input("nag_scale", min=0.0, default=5.0, max=50.0, step=0.1, tooltip="The guidance scale factor. Higher values push further from the negative prompt."), + io.Float.Input("nag_alpha", min=0.0, default=0.5, max=1.0, step=0.01, tooltip="Blending factor for the normalized attention. 1.0 is full replacement, 0.0 is no effect."), + io.Float.Input("nag_tau", min=1.0, default=1.5, max=10.0, step=0.01), + # io.Float.Input("start_percent", min=0.0, default=0.0, max=1.0, step=0.01, tooltip="The relative sampling step to begin applying NAG."), + # io.Float.Input("end_percent", min=0.0, default=1.0, max=1.0, step=0.01, tooltip="The relative sampling step to stop applying NAG."), + ], + outputs=[ + io.Model.Output(tooltip="The patched model with NAG enabled."), + ], + ) + + @classmethod + def execute(cls, model: io.Model.Type, nag_scale: float, nag_alpha: float, nag_tau: float) -> io.NodeOutput: + m = model.clone() + + # sigma_start = m.get_model_object("model_sampling").percent_to_sigma(start_percent) + # sigma_end = m.get_model_object("model_sampling").percent_to_sigma(end_percent) + + def nag_attention_output_patch(out, extra_options): + cond_or_uncond = extra_options.get("cond_or_uncond", None) + if cond_or_uncond is None: + return out + + if not (1 in cond_or_uncond and 0 in cond_or_uncond): + return out + + # sigma = extra_options.get("sigmas", None) + # if sigma is not None and len(sigma) > 0: + # sigma = sigma[0].item() + # if sigma > sigma_start or sigma < sigma_end: + # return out + + img_slice = extra_options.get("img_slice", None) + + if img_slice is not None: + orig_out = out + out = out[:, img_slice[0]:img_slice[1]] # only apply on img part + + batch_size = out.shape[0] + half_size = batch_size // len(cond_or_uncond) + + ind_neg = cond_or_uncond.index(1) + ind_pos = cond_or_uncond.index(0) + z_pos = out[half_size * ind_pos:half_size * (ind_pos + 1)] + z_neg = out[half_size * ind_neg:half_size * (ind_neg + 1)] + + guided = z_pos * nag_scale - z_neg * (nag_scale - 1.0) + + eps = 1e-6 + norm_pos = torch.norm(z_pos, p=1, dim=-1, keepdim=True).clamp_min(eps) + norm_guided = torch.norm(guided, p=1, dim=-1, keepdim=True).clamp_min(eps) + + ratio = norm_guided / norm_pos + scale_factor = torch.minimum(ratio, torch.full_like(ratio, nag_tau)) / ratio + + guided_normalized = guided * scale_factor + + z_final = guided_normalized * nag_alpha + z_pos * (1.0 - nag_alpha) + + if img_slice is not None: + orig_out[half_size * ind_neg:half_size * (ind_neg + 1), img_slice[0]:img_slice[1]] = z_final + orig_out[half_size * ind_pos:half_size * (ind_pos + 1), img_slice[0]:img_slice[1]] = z_final + return orig_out + else: + out[half_size * ind_pos:half_size * (ind_pos + 1)] = z_final + return out + + m.set_model_attn1_output_patch(nag_attention_output_patch) + m.disable_model_cfg1_optimization() + + return io.NodeOutput(m) + + +class NagExtension(ComfyExtension): + @override + async def get_node_list(self) -> list[type[io.ComfyNode]]: + return [ + NAGuidance, + ] + + +async def comfy_entrypoint() -> NagExtension: + return NagExtension() diff --git a/comfy_extras/nodes_perpneg.py b/comfy_extras/nodes_perpneg.py index cd068ce9c..ed1467de9 100644 --- a/comfy_extras/nodes_perpneg.py +++ b/comfy_extras/nodes_perpneg.py @@ -29,7 +29,7 @@ class PerpNeg(io.ComfyNode): inputs=[ io.Model.Input("model"), io.Conditioning.Input("empty_conditioning"), - io.Float.Input("neg_scale", default=1.0, min=0.0, max=100.0, step=0.01), + io.Float.Input("neg_scale", default=1.0, min=0.0, max=100.0, step=0.01, advanced=True), ], outputs=[ io.Model.Output(), @@ -134,7 +134,7 @@ class PerpNegGuider(io.ComfyNode): io.Conditioning.Input("negative"), io.Conditioning.Input("empty_conditioning"), io.Float.Input("cfg", default=8.0, min=0.0, max=100.0, step=0.1, round=0.01), - io.Float.Input("neg_scale", default=1.0, min=0.0, max=100.0, step=0.01), + io.Float.Input("neg_scale", default=1.0, min=0.0, max=100.0, step=0.01, advanced=True), ], outputs=[ io.Guider.Output(), diff --git a/comfy_extras/nodes_post_processing.py b/comfy_extras/nodes_post_processing.py index a52a90e2c..4a0f7141a 100644 --- a/comfy_extras/nodes_post_processing.py +++ b/comfy_extras/nodes_post_processing.py @@ -19,6 +19,7 @@ class Blend(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="ImageBlend", + display_name="Image Blend", category="image/postprocessing", inputs=[ io.Image.Input("image1"), @@ -76,6 +77,7 @@ class Blur(io.ComfyNode): def define_schema(cls): return io.Schema( node_id="ImageBlur", + display_name="Image Blur", category="image/postprocessing", inputs=[ io.Image.Input("image"), @@ -179,9 +181,9 @@ class Sharpen(io.ComfyNode): category="image/postprocessing", inputs=[ io.Image.Input("image"), - io.Int.Input("sharpen_radius", default=1, min=1, max=31, step=1), - io.Float.Input("sigma", default=1.0, min=0.1, max=10.0, step=0.01), - io.Float.Input("alpha", default=1.0, min=0.0, max=5.0, step=0.01), + io.Int.Input("sharpen_radius", default=1, min=1, max=31, step=1, advanced=True), + io.Float.Input("sigma", default=1.0, min=0.1, max=10.0, step=0.01, advanced=True), + io.Float.Input("alpha", default=1.0, min=0.0, max=5.0, step=0.01, advanced=True), ], outputs=[ io.Image.Output(), @@ -225,7 +227,7 @@ class ImageScaleToTotalPixels(io.ComfyNode): io.Image.Input("image"), io.Combo.Input("upscale_method", options=cls.upscale_methods), io.Float.Input("megapixels", default=1.0, min=0.01, max=16.0, step=0.01), - io.Int.Input("resolution_steps", default=1, min=1, max=256), + io.Int.Input("resolution_steps", default=1, min=1, max=256, advanced=True), ], outputs=[ io.Image.Output(), @@ -565,6 +567,7 @@ class BatchImagesNode(io.ComfyNode): node_id="BatchImagesNode", display_name="Batch Images", category="image", + essentials_category="Image Tools", search_aliases=["batch", "image batch", "batch images", "combine images", "merge images", "stack images"], inputs=[ io.Autogrow.Input("images", template=autogrow_template) @@ -655,6 +658,7 @@ class BatchImagesMasksLatentsNode(io.ComfyNode): batched = batch_masks(values) return io.NodeOutput(batched) + class PostProcessingExtension(ComfyExtension): @override async def get_node_list(self) -> list[type[io.ComfyNode]]: diff --git a/comfy_extras/nodes_primitive.py b/comfy_extras/nodes_primitive.py index 937321800..9c2e98758 100644 --- a/comfy_extras/nodes_primitive.py +++ b/comfy_extras/nodes_primitive.py @@ -29,6 +29,7 @@ class StringMultiline(io.ComfyNode): node_id="PrimitiveStringMultiline", display_name="String (Multiline)", category="utils/primitive", + essentials_category="Basics", inputs=[ io.String.Input("value", multiline=True), ], diff --git a/comfy_extras/nodes_qwen.py b/comfy_extras/nodes_qwen.py index fde8fac9a..6894367be 100644 --- a/comfy_extras/nodes_qwen.py +++ b/comfy_extras/nodes_qwen.py @@ -116,7 +116,7 @@ class EmptyQwenImageLayeredLatentImage(io.ComfyNode): inputs=[ io.Int.Input("width", default=640, min=16, max=nodes.MAX_RESOLUTION, step=16), io.Int.Input("height", default=640, min=16, max=nodes.MAX_RESOLUTION, step=16), - io.Int.Input("layers", default=3, min=0, max=nodes.MAX_RESOLUTION, step=1), + io.Int.Input("layers", default=3, min=0, max=nodes.MAX_RESOLUTION, step=1, advanced=True), io.Int.Input("batch_size", default=1, min=1, max=4096), ], outputs=[ diff --git a/comfy_extras/nodes_replacements.py b/comfy_extras/nodes_replacements.py new file mode 100644 index 000000000..7684e854c --- /dev/null +++ b/comfy_extras/nodes_replacements.py @@ -0,0 +1,103 @@ +from comfy_api.latest import ComfyExtension, io, ComfyAPI + +api = ComfyAPI() + + +async def register_replacements(): + """Register all built-in node replacements.""" + await register_replacements_longeredge() + await register_replacements_batchimages() + await register_replacements_upscaleimage() + await register_replacements_controlnet() + await register_replacements_load3d() + await register_replacements_preview3d() + await register_replacements_svdimg2vid() + await register_replacements_conditioningavg() + +async def register_replacements_longeredge(): + # No dynamic inputs here + await api.node_replacement.register(io.NodeReplace( + new_node_id="ImageScaleToMaxDimension", + old_node_id="ResizeImagesByLongerEdge", + old_widget_ids=["longer_edge"], + input_mapping=[ + {"new_id": "image", "old_id": "images"}, + {"new_id": "largest_size", "old_id": "longer_edge"}, + {"new_id": "upscale_method", "set_value": "lanczos"}, + ], + # just to test the frontend output_mapping code, does nothing really here + output_mapping=[{"new_idx": 0, "old_idx": 0}], + )) + +async def register_replacements_batchimages(): + # BatchImages node uses Autogrow + await api.node_replacement.register(io.NodeReplace( + new_node_id="BatchImagesNode", + old_node_id="ImageBatch", + input_mapping=[ + {"new_id": "images.image0", "old_id": "image1"}, + {"new_id": "images.image1", "old_id": "image2"}, + ], + )) + +async def register_replacements_upscaleimage(): + # ResizeImageMaskNode uses DynamicCombo + await api.node_replacement.register(io.NodeReplace( + new_node_id="ResizeImageMaskNode", + old_node_id="ImageScaleBy", + old_widget_ids=["upscale_method", "scale_by"], + input_mapping=[ + {"new_id": "input", "old_id": "image"}, + {"new_id": "resize_type", "set_value": "scale by multiplier"}, + {"new_id": "resize_type.multiplier", "old_id": "scale_by"}, + {"new_id": "scale_method", "old_id": "upscale_method"}, + ], + )) + +async def register_replacements_controlnet(): + # T2IAdapterLoader → ControlNetLoader + await api.node_replacement.register(io.NodeReplace( + new_node_id="ControlNetLoader", + old_node_id="T2IAdapterLoader", + input_mapping=[ + {"new_id": "control_net_name", "old_id": "t2i_adapter_name"}, + ], + )) + +async def register_replacements_load3d(): + # Load3DAnimation merged into Load3D + await api.node_replacement.register(io.NodeReplace( + new_node_id="Load3D", + old_node_id="Load3DAnimation", + )) + +async def register_replacements_preview3d(): + # Preview3DAnimation merged into Preview3D + await api.node_replacement.register(io.NodeReplace( + new_node_id="Preview3D", + old_node_id="Preview3DAnimation", + )) + +async def register_replacements_svdimg2vid(): + # Typo fix: SDV → SVD + await api.node_replacement.register(io.NodeReplace( + new_node_id="SVD_img2vid_Conditioning", + old_node_id="SDV_img2vid_Conditioning", + )) + +async def register_replacements_conditioningavg(): + # Typo fix: trailing space in node name + await api.node_replacement.register(io.NodeReplace( + new_node_id="ConditioningAverage", + old_node_id="ConditioningAverage ", + )) + +class NodeReplacementsExtension(ComfyExtension): + async def on_load(self) -> None: + await register_replacements() + + async def get_node_list(self) -> list[type[io.ComfyNode]]: + return [] + +async def comfy_entrypoint() -> NodeReplacementsExtension: + return NodeReplacementsExtension() diff --git a/comfy_extras/nodes_rope.py b/comfy_extras/nodes_rope.py index d1feb031e..918ddc02b 100644 --- a/comfy_extras/nodes_rope.py +++ b/comfy_extras/nodes_rope.py @@ -12,14 +12,14 @@ class ScaleROPE(io.ComfyNode): is_experimental=True, inputs=[ io.Model.Input("model"), - io.Float.Input("scale_x", default=1.0, min=0.0, max=100.0, step=0.1), - io.Float.Input("shift_x", default=0.0, min=-256.0, max=256.0, step=0.1), + io.Float.Input("scale_x", default=1.0, min=0.0, max=100.0, step=0.1, advanced=True), + io.Float.Input("shift_x", default=0.0, min=-256.0, max=256.0, step=0.1, advanced=True), - io.Float.Input("scale_y", default=1.0, min=0.0, max=100.0, step=0.1), - io.Float.Input("shift_y", default=0.0, min=-256.0, max=256.0, step=0.1), + io.Float.Input("scale_y", default=1.0, min=0.0, max=100.0, step=0.1, advanced=True), + io.Float.Input("shift_y", default=0.0, min=-256.0, max=256.0, step=0.1, advanced=True), - io.Float.Input("scale_t", default=1.0, min=0.0, max=100.0, step=0.1), - io.Float.Input("shift_t", default=0.0, min=-256.0, max=256.0, step=0.1), + io.Float.Input("scale_t", default=1.0, min=0.0, max=100.0, step=0.1, advanced=True), + io.Float.Input("shift_t", default=0.0, min=-256.0, max=256.0, step=0.1, advanced=True), ], diff --git a/comfy_extras/nodes_sag.py b/comfy_extras/nodes_sag.py index 0f47db30b..d9c47851c 100644 --- a/comfy_extras/nodes_sag.py +++ b/comfy_extras/nodes_sag.py @@ -117,7 +117,7 @@ class SelfAttentionGuidance(io.ComfyNode): inputs=[ io.Model.Input("model"), io.Float.Input("scale", default=0.5, min=-2.0, max=5.0, step=0.01), - io.Float.Input("blur_sigma", default=2.0, min=0.0, max=10.0, step=0.1), + io.Float.Input("blur_sigma", default=2.0, min=0.0, max=10.0, step=0.1, advanced=True), ], outputs=[ io.Model.Output(), diff --git a/comfy_extras/nodes_sd3.py b/comfy_extras/nodes_sd3.py index 736213a47..c43844a1a 100644 --- a/comfy_extras/nodes_sd3.py +++ b/comfy_extras/nodes_sd3.py @@ -72,7 +72,7 @@ class CLIPTextEncodeSD3(io.ComfyNode): io.String.Input("clip_l", multiline=True, dynamic_prompts=True), io.String.Input("clip_g", multiline=True, dynamic_prompts=True), io.String.Input("t5xxl", multiline=True, dynamic_prompts=True), - io.Combo.Input("empty_padding", options=["none", "empty_prompt"]), + io.Combo.Input("empty_padding", options=["none", "empty_prompt"], advanced=True), ], outputs=[ io.Conditioning.Output(), @@ -179,10 +179,10 @@ class SkipLayerGuidanceSD3(io.ComfyNode): description="Generic version of SkipLayerGuidance node that can be used on every DiT model.", inputs=[ io.Model.Input("model"), - io.String.Input("layers", default="7, 8, 9", multiline=False), + io.String.Input("layers", default="7, 8, 9", multiline=False, advanced=True), io.Float.Input("scale", default=3.0, min=0.0, max=10.0, step=0.1), - io.Float.Input("start_percent", default=0.01, min=0.0, max=1.0, step=0.001), - io.Float.Input("end_percent", default=0.15, min=0.0, max=1.0, step=0.001), + io.Float.Input("start_percent", default=0.01, min=0.0, max=1.0, step=0.001, advanced=True), + io.Float.Input("end_percent", default=0.15, min=0.0, max=1.0, step=0.001, advanced=True), ], outputs=[ io.Model.Output(), diff --git a/comfy_extras/nodes_sdpose.py b/comfy_extras/nodes_sdpose.py new file mode 100644 index 000000000..71441848e --- /dev/null +++ b/comfy_extras/nodes_sdpose.py @@ -0,0 +1,740 @@ +import torch +import comfy.utils +import numpy as np +import math +import colorsys +from tqdm import tqdm +from typing_extensions import override +from comfy_api.latest import ComfyExtension, io +from comfy_extras.nodes_lotus import LotusConditioning + + +def _preprocess_keypoints(kp_raw, sc_raw): + """Insert neck keypoint and remap from MMPose to OpenPose ordering. + + Returns (kp, sc) where kp has shape (134, 2) and sc has shape (134,). + Layout: + 0-17 body (18 kp, OpenPose order) + 18-23 feet (6 kp) + 24-91 face (68 kp) + 92-112 right hand (21 kp) + 113-133 left hand (21 kp) + """ + kp = np.array(kp_raw, dtype=np.float32) + sc = np.array(sc_raw, dtype=np.float32) + if len(kp) >= 17: + neck = (kp[5] + kp[6]) / 2 + neck_score = min(sc[5], sc[6]) if sc[5] > 0.3 and sc[6] > 0.3 else 0 + kp = np.insert(kp, 17, neck, axis=0) + sc = np.insert(sc, 17, neck_score) + mmpose_idx = np.array([17, 6, 8, 10, 7, 9, 12, 14, 16, 13, 15, 2, 1, 4, 3]) + openpose_idx = np.array([ 1, 2, 3, 4, 6, 7, 8, 9, 10, 12, 13, 14, 15, 16, 17]) + tmp_kp, tmp_sc = kp.copy(), sc.copy() + tmp_kp[openpose_idx] = kp[mmpose_idx] + tmp_sc[openpose_idx] = sc[mmpose_idx] + kp, sc = tmp_kp, tmp_sc + return kp, sc + + +def _to_openpose_frames(all_keypoints, all_scores, height, width): + """Convert raw keypoint lists to a list of OpenPose-style frame dicts. + + Each frame dict contains: + canvas_width, canvas_height, people: list of person dicts with keys: + pose_keypoints_2d - 18 body kp as flat [x,y,score,...] (absolute pixels) + foot_keypoints_2d - 6 foot kp as flat [x,y,score,...] (absolute pixels) + face_keypoints_2d - 70 face kp as flat [x,y,score,...] (absolute pixels) + indices 0-67: 68 face landmarks + index 68: right eye (body[14]) + index 69: left eye (body[15]) + hand_right_keypoints_2d - 21 right-hand kp (absolute pixels) + hand_left_keypoints_2d - 21 left-hand kp (absolute pixels) + """ + def _flatten(kp_slice, sc_slice): + return np.stack([kp_slice[:, 0], kp_slice[:, 1], sc_slice], axis=1).flatten().tolist() + + frames = [] + for img_idx in range(len(all_keypoints)): + people = [] + for kp_raw, sc_raw in zip(all_keypoints[img_idx], all_scores[img_idx]): + kp, sc = _preprocess_keypoints(kp_raw, sc_raw) + # 70 face kp = 68 face landmarks + REye (body[14]) + LEye (body[15]) + face_kp = np.concatenate([kp[24:92], kp[[14, 15]]], axis=0) + face_sc = np.concatenate([sc[24:92], sc[[14, 15]]], axis=0) + people.append({ + "pose_keypoints_2d": _flatten(kp[0:18], sc[0:18]), + "foot_keypoints_2d": _flatten(kp[18:24], sc[18:24]), + "face_keypoints_2d": _flatten(face_kp, face_sc), + "hand_right_keypoints_2d": _flatten(kp[92:113], sc[92:113]), + "hand_left_keypoints_2d": _flatten(kp[113:134], sc[113:134]), + }) + frames.append({"canvas_width": width, "canvas_height": height, "people": people}) + return frames + + +class KeypointDraw: + """ + Pose keypoint drawing class that supports both numpy and cv2 backends. + """ + def __init__(self): + try: + import cv2 + self.draw = cv2 + except ImportError: + self.draw = self + + # Hand connections (same for both hands) + self.hand_edges = [ + [0, 1], [1, 2], [2, 3], [3, 4], # thumb + [0, 5], [5, 6], [6, 7], [7, 8], # index + [0, 9], [9, 10], [10, 11], [11, 12], # middle + [0, 13], [13, 14], [14, 15], [15, 16], # ring + [0, 17], [17, 18], [18, 19], [19, 20], # pinky + ] + + # Body connections - matching DWPose limbSeq (1-indexed, converted to 0-indexed) + self.body_limbSeq = [ + [2, 3], [2, 6], [3, 4], [4, 5], [6, 7], [7, 8], [2, 9], [9, 10], + [10, 11], [2, 12], [12, 13], [13, 14], [2, 1], [1, 15], [15, 17], + [1, 16], [16, 18] + ] + + # Colors matching DWPose + self.colors = [ + [255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], + [85, 255, 0], [0, 255, 0], [0, 255, 85], [0, 255, 170], [0, 255, 255], + [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], + [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85] + ] + + @staticmethod + def circle(canvas_np, center, radius, color, **kwargs): + """Draw a filled circle using NumPy vectorized operations.""" + cx, cy = center + h, w = canvas_np.shape[:2] + + radius_int = int(np.ceil(radius)) + + y_min, y_max = max(0, cy - radius_int), min(h, cy + radius_int + 1) + x_min, x_max = max(0, cx - radius_int), min(w, cx + radius_int + 1) + + if y_max <= y_min or x_max <= x_min: + return + + y, x = np.ogrid[y_min:y_max, x_min:x_max] + mask = (x - cx)**2 + (y - cy)**2 <= radius**2 + canvas_np[y_min:y_max, x_min:x_max][mask] = color + + @staticmethod + def line(canvas_np, pt1, pt2, color, thickness=1, **kwargs): + """Draw line using Bresenham's algorithm with NumPy operations.""" + x0, y0, x1, y1 = *pt1, *pt2 + h, w = canvas_np.shape[:2] + dx, dy = abs(x1 - x0), abs(y1 - y0) + sx, sy = (1 if x0 < x1 else -1), (1 if y0 < y1 else -1) + err, x, y, line_points = dx - dy, x0, y0, [] + + while True: + line_points.append((x, y)) + if x == x1 and y == y1: + break + e2 = 2 * err + if e2 > -dy: + err, x = err - dy, x + sx + if e2 < dx: + err, y = err + dx, y + sy + + if thickness > 1: + radius, radius_int = (thickness / 2.0) + 0.5, int(np.ceil((thickness / 2.0) + 0.5)) + for px, py in line_points: + y_min, y_max, x_min, x_max = max(0, py - radius_int), min(h, py + radius_int + 1), max(0, px - radius_int), min(w, px + radius_int + 1) + if y_max > y_min and x_max > x_min: + yy, xx = np.ogrid[y_min:y_max, x_min:x_max] + canvas_np[y_min:y_max, x_min:x_max][(xx - px)**2 + (yy - py)**2 <= radius**2] = color + else: + line_points = np.array(line_points) + valid = (line_points[:, 1] >= 0) & (line_points[:, 1] < h) & (line_points[:, 0] >= 0) & (line_points[:, 0] < w) + if (valid_points := line_points[valid]).size: + canvas_np[valid_points[:, 1], valid_points[:, 0]] = color + + @staticmethod + def fillConvexPoly(canvas_np, pts, color, **kwargs): + """Fill polygon using vectorized scanline algorithm.""" + if len(pts) < 3: + return + pts = np.array(pts, dtype=np.int32) + h, w = canvas_np.shape[:2] + y_min, y_max, x_min, x_max = max(0, pts[:, 1].min()), min(h, pts[:, 1].max() + 1), max(0, pts[:, 0].min()), min(w, pts[:, 0].max() + 1) + if y_max <= y_min or x_max <= x_min: + return + yy, xx = np.mgrid[y_min:y_max, x_min:x_max] + mask = np.zeros((y_max - y_min, x_max - x_min), dtype=bool) + + for i in range(len(pts)): + p1, p2 = pts[i], pts[(i + 1) % len(pts)] + y1, y2 = p1[1], p2[1] + if y1 == y2: + continue + if y1 > y2: + p1, p2, y1, y2 = p2, p1, p2[1], p1[1] + if not (edge_mask := (yy >= y1) & (yy < y2)).any(): + continue + mask ^= edge_mask & (xx >= p1[0] + (yy - y1) * (p2[0] - p1[0]) / (y2 - y1)) + + canvas_np[y_min:y_max, x_min:x_max][mask] = color + + @staticmethod + def ellipse2Poly(center, axes, angle, arc_start, arc_end, delta=1, **kwargs): + """Python implementation of cv2.ellipse2Poly.""" + axes = (axes[0] + 0.5, axes[1] + 0.5) # to better match cv2 output + angle = angle % 360 + if arc_start > arc_end: + arc_start, arc_end = arc_end, arc_start + while arc_start < 0: + arc_start, arc_end = arc_start + 360, arc_end + 360 + while arc_end > 360: + arc_end, arc_start = arc_end - 360, arc_start - 360 + if arc_end - arc_start > 360: + arc_start, arc_end = 0, 360 + + angle_rad = math.radians(angle) + alpha, beta = math.cos(angle_rad), math.sin(angle_rad) + pts = [] + for i in range(arc_start, arc_end + delta, delta): + theta_rad = math.radians(min(i, arc_end)) + x, y = axes[0] * math.cos(theta_rad), axes[1] * math.sin(theta_rad) + pts.append([int(round(center[0] + x * alpha - y * beta)), int(round(center[1] + x * beta + y * alpha))]) + + unique_pts, prev_pt = [], (float('inf'), float('inf')) + for pt in pts: + if (pt_tuple := tuple(pt)) != prev_pt: + unique_pts.append(pt) + prev_pt = pt_tuple + + return unique_pts if len(unique_pts) > 1 else [[center[0], center[1]], [center[0], center[1]]] + + def draw_wholebody_keypoints(self, canvas, keypoints, scores=None, threshold=0.3, + draw_body=True, draw_feet=True, draw_face=True, draw_hands=True, stick_width=4, face_point_size=3): + """ + Draw wholebody keypoints (134 keypoints after processing) in DWPose style. + + Expected keypoint format (after neck insertion and remapping): + - Body: 0-17 (18 keypoints in OpenPose format, neck at index 1) + - Foot: 18-23 (6 keypoints) + - Face: 24-91 (68 landmarks) + - Right hand: 92-112 (21 keypoints) + - Left hand: 113-133 (21 keypoints) + + Args: + canvas: The canvas to draw on (numpy array) + keypoints: Array of keypoint coordinates + scores: Optional confidence scores for each keypoint + threshold: Minimum confidence threshold for drawing keypoints + + Returns: + canvas: The canvas with keypoints drawn + """ + H, W, C = canvas.shape + + # Draw body limbs + if draw_body and len(keypoints) >= 18: + for i, limb in enumerate(self.body_limbSeq): + # Convert from 1-indexed to 0-indexed + idx1, idx2 = limb[0] - 1, limb[1] - 1 + + if idx1 >= 18 or idx2 >= 18: + continue + + if scores is not None: + if scores[idx1] < threshold or scores[idx2] < threshold: + continue + + Y = [keypoints[idx1][0], keypoints[idx2][0]] + X = [keypoints[idx1][1], keypoints[idx2][1]] + mX, mY = (X[0] + X[1]) / 2, (Y[0] + Y[1]) / 2 + length = math.sqrt((X[0] - X[1]) ** 2 + (Y[0] - Y[1]) ** 2) + + if length < 1: + continue + + angle = math.degrees(math.atan2(X[0] - X[1], Y[0] - Y[1])) + + polygon = self.draw.ellipse2Poly((int(mY), int(mX)), (int(length / 2), stick_width), int(angle), 0, 360, 1) + + self.draw.fillConvexPoly(canvas, polygon, self.colors[i % len(self.colors)]) + + # Draw body keypoints + if draw_body and len(keypoints) >= 18: + for i in range(18): + if scores is not None and scores[i] < threshold: + continue + x, y = int(keypoints[i][0]), int(keypoints[i][1]) + if 0 <= x < W and 0 <= y < H: + self.draw.circle(canvas, (x, y), 4, self.colors[i % len(self.colors)], thickness=-1) + + # Draw foot keypoints (18-23, 6 keypoints) + if draw_feet and len(keypoints) >= 24: + for i in range(18, 24): + if scores is not None and scores[i] < threshold: + continue + x, y = int(keypoints[i][0]), int(keypoints[i][1]) + if 0 <= x < W and 0 <= y < H: + self.draw.circle(canvas, (x, y), 4, self.colors[i % len(self.colors)], thickness=-1) + + # Draw right hand (92-112) + if draw_hands and len(keypoints) >= 113: + eps = 0.01 + for ie, edge in enumerate(self.hand_edges): + idx1, idx2 = 92 + edge[0], 92 + edge[1] + if scores is not None: + if scores[idx1] < threshold or scores[idx2] < threshold: + continue + + x1, y1 = int(keypoints[idx1][0]), int(keypoints[idx1][1]) + x2, y2 = int(keypoints[idx2][0]), int(keypoints[idx2][1]) + + if x1 > eps and y1 > eps and x2 > eps and y2 > eps: + if 0 <= x1 < W and 0 <= y1 < H and 0 <= x2 < W and 0 <= y2 < H: + # HSV to RGB conversion for rainbow colors + r, g, b = colorsys.hsv_to_rgb(ie / float(len(self.hand_edges)), 1.0, 1.0) + color = (int(r * 255), int(g * 255), int(b * 255)) + self.draw.line(canvas, (x1, y1), (x2, y2), color, thickness=2) + + # Draw right hand keypoints + for i in range(92, 113): + if scores is not None and scores[i] < threshold: + continue + x, y = int(keypoints[i][0]), int(keypoints[i][1]) + if x > eps and y > eps and 0 <= x < W and 0 <= y < H: + self.draw.circle(canvas, (x, y), 4, (0, 0, 255), thickness=-1) + + # Draw left hand (113-133) + if draw_hands and len(keypoints) >= 134: + eps = 0.01 + for ie, edge in enumerate(self.hand_edges): + idx1, idx2 = 113 + edge[0], 113 + edge[1] + if scores is not None: + if scores[idx1] < threshold or scores[idx2] < threshold: + continue + + x1, y1 = int(keypoints[idx1][0]), int(keypoints[idx1][1]) + x2, y2 = int(keypoints[idx2][0]), int(keypoints[idx2][1]) + + if x1 > eps and y1 > eps and x2 > eps and y2 > eps: + if 0 <= x1 < W and 0 <= y1 < H and 0 <= x2 < W and 0 <= y2 < H: + # HSV to RGB conversion for rainbow colors + r, g, b = colorsys.hsv_to_rgb(ie / float(len(self.hand_edges)), 1.0, 1.0) + color = (int(r * 255), int(g * 255), int(b * 255)) + self.draw.line(canvas, (x1, y1), (x2, y2), color, thickness=2) + + # Draw left hand keypoints + for i in range(113, 134): + if scores is not None and i < len(scores) and scores[i] < threshold: + continue + x, y = int(keypoints[i][0]), int(keypoints[i][1]) + if x > eps and y > eps and 0 <= x < W and 0 <= y < H: + self.draw.circle(canvas, (x, y), 4, (0, 0, 255), thickness=-1) + + # Draw face keypoints (24-91) - white dots only, no lines + if draw_face and len(keypoints) >= 92: + eps = 0.01 + for i in range(24, 92): + if scores is not None and scores[i] < threshold: + continue + x, y = int(keypoints[i][0]), int(keypoints[i][1]) + if x > eps and y > eps and 0 <= x < W and 0 <= y < H: + self.draw.circle(canvas, (x, y), face_point_size, (255, 255, 255), thickness=-1) + + return canvas + +class SDPoseDrawKeypoints(io.ComfyNode): + @classmethod + def define_schema(cls): + return io.Schema( + node_id="SDPoseDrawKeypoints", + category="image/preprocessors", + search_aliases=["openpose", "pose detection", "preprocessor", "keypoints", "pose"], + inputs=[ + io.Custom("POSE_KEYPOINT").Input("keypoints"), + io.Boolean.Input("draw_body", default=True), + io.Boolean.Input("draw_hands", default=True), + io.Boolean.Input("draw_face", default=True), + io.Boolean.Input("draw_feet", default=False), + io.Int.Input("stick_width", default=4, min=1, max=10, step=1), + io.Int.Input("face_point_size", default=3, min=1, max=10, step=1), + io.Float.Input("score_threshold", default=0.3, min=0.0, max=1.0, step=0.01), + ], + outputs=[ + io.Image.Output(), + ], + ) + + @classmethod + def execute(cls, keypoints, draw_body, draw_hands, draw_face, draw_feet, stick_width, face_point_size, score_threshold) -> io.NodeOutput: + if not keypoints: + return io.NodeOutput(torch.zeros((1, 64, 64, 3), dtype=torch.float32)) + height = keypoints[0]["canvas_height"] + width = keypoints[0]["canvas_width"] + + def _parse(flat, n): + arr = np.array(flat, dtype=np.float32).reshape(n, 3) + return arr[:, :2], arr[:, 2] + + def _zeros(n): + return np.zeros((n, 2), dtype=np.float32), np.zeros(n, dtype=np.float32) + + pose_outputs = [] + drawer = KeypointDraw() + + for frame in tqdm(keypoints, desc="Drawing keypoints on frames"): + canvas = np.zeros((height, width, 3), dtype=np.uint8) + for person in frame["people"]: + body_kp, body_sc = _parse(person["pose_keypoints_2d"], 18) + foot_raw = person.get("foot_keypoints_2d") + foot_kp, foot_sc = _parse(foot_raw, 6) if foot_raw else _zeros(6) + face_kp, face_sc = _parse(person["face_keypoints_2d"], 70) + face_kp, face_sc = face_kp[:68], face_sc[:68] # drop appended eye kp; body already draws them + rhand_kp, rhand_sc = _parse(person["hand_right_keypoints_2d"], 21) + lhand_kp, lhand_sc = _parse(person["hand_left_keypoints_2d"], 21) + + kp = np.concatenate([body_kp, foot_kp, face_kp, rhand_kp, lhand_kp], axis=0) + sc = np.concatenate([body_sc, foot_sc, face_sc, rhand_sc, lhand_sc], axis=0) + + canvas = drawer.draw_wholebody_keypoints( + canvas, kp, sc, + threshold=score_threshold, + draw_body=draw_body, draw_feet=draw_feet, + draw_face=draw_face, draw_hands=draw_hands, + stick_width=stick_width, face_point_size=face_point_size, + ) + pose_outputs.append(canvas) + + pose_outputs_np = np.stack(pose_outputs) if len(pose_outputs) > 1 else np.expand_dims(pose_outputs[0], 0) + final_pose_output = torch.from_numpy(pose_outputs_np).float() / 255.0 + return io.NodeOutput(final_pose_output) + +class SDPoseKeypointExtractor(io.ComfyNode): + @classmethod + def define_schema(cls): + return io.Schema( + node_id="SDPoseKeypointExtractor", + category="image/preprocessors", + search_aliases=["openpose", "pose detection", "preprocessor", "keypoints", "sdpose"], + description="Extract pose keypoints from images using the SDPose model: https://huggingface.co/Comfy-Org/SDPose/tree/main/checkpoints", + inputs=[ + io.Model.Input("model"), + io.Vae.Input("vae"), + io.Image.Input("image"), + io.Int.Input("batch_size", default=16, min=1, max=10000, step=1), + io.BoundingBox.Input("bboxes", optional=True, force_input=True, tooltip="Optional bounding boxes for more accurate detections. Required for multi-person detection."), + ], + outputs=[ + io.Custom("POSE_KEYPOINT").Output("keypoints", tooltip="Keypoints in OpenPose frame format (canvas_width, canvas_height, people)"), + ], + ) + + @classmethod + def execute(cls, model, vae, image, batch_size, bboxes=None) -> io.NodeOutput: + + height, width = image.shape[-3], image.shape[-2] + context = LotusConditioning().execute().result[0] + + # Use output_block_patch to capture the last 640-channel feature + def output_patch(h, hsp, transformer_options): + nonlocal captured_feat + if h.shape[1] == 640: # Capture the features for wholebody + captured_feat = h.clone() + return h, hsp + + model_clone = model.clone() + model_clone.model_options["transformer_options"] = {"patches": {"output_block_patch": [output_patch]}} + + if not hasattr(model.model.diffusion_model, 'heatmap_head'): + raise ValueError("The provided model does not have a heatmap_head. Please use SDPose model from here https://huggingface.co/Comfy-Org/SDPose/tree/main/checkpoints.") + + head = model.model.diffusion_model.heatmap_head + total_images = image.shape[0] + captured_feat = None + + model_h = int(head.heatmap_size[0]) * 4 # e.g. 192 * 4 = 768 + model_w = int(head.heatmap_size[1]) * 4 # e.g. 256 * 4 = 1024 + + def _run_on_latent(latent_batch): + """Run one forward pass and return (keypoints_list, scores_list) for the batch.""" + nonlocal captured_feat + captured_feat = None + _ = comfy.sample.sample( + model_clone, + noise=torch.zeros_like(latent_batch), + steps=1, cfg=1.0, + sampler_name="euler", scheduler="simple", + positive=context, negative=context, + latent_image=latent_batch, disable_noise=True, disable_pbar=True, + ) + return head(captured_feat) # keypoints_batch, scores_batch + + # all_keypoints / all_scores are lists-of-lists: + # outer index = input image index + # inner index = detected person (one per bbox, or one for full-image) + all_keypoints = [] # shape: [n_images][n_persons] + all_scores = [] # shape: [n_images][n_persons] + pbar = comfy.utils.ProgressBar(total_images) + + if bboxes is not None: + if not isinstance(bboxes, list): + bboxes = [[bboxes]] + elif len(bboxes) == 0: + bboxes = [None] * total_images + # --- bbox-crop mode: one forward pass per crop ------------------------- + for img_idx in tqdm(range(total_images), desc="Extracting keypoints from crops"): + img = image[img_idx:img_idx + 1] # (1, H, W, C) + # Broadcasting: if fewer bbox lists than images, repeat the last one. + img_bboxes = bboxes[min(img_idx, len(bboxes) - 1)] if bboxes else None + + img_keypoints = [] + img_scores = [] + + if img_bboxes: + for bbox in img_bboxes: + x1 = max(0, int(bbox["x"])) + y1 = max(0, int(bbox["y"])) + x2 = min(width, int(bbox["x"] + bbox["width"])) + y2 = min(height, int(bbox["y"] + bbox["height"])) + + if x2 <= x1 or y2 <= y1: + continue + + crop_h_px, crop_w_px = y2 - y1, x2 - x1 + crop = img[:, y1:y2, x1:x2, :] # (1, crop_h, crop_w, C) + + # scale to fit inside (model_h, model_w) while preserving aspect ratio, then pad to exact model size. + scale = min(model_h / crop_h_px, model_w / crop_w_px) + scaled_h, scaled_w = int(round(crop_h_px * scale)), int(round(crop_w_px * scale)) + pad_top, pad_left = (model_h - scaled_h) // 2, (model_w - scaled_w) // 2 + + crop_chw = crop.permute(0, 3, 1, 2).float() # BHWC → BCHW + scaled = comfy.utils.common_upscale(crop_chw, scaled_w, scaled_h, upscale_method="bilinear", crop="disabled") + padded = torch.zeros(1, scaled.shape[1], model_h, model_w, dtype=scaled.dtype, device=scaled.device) + padded[:, :, pad_top:pad_top + scaled_h, pad_left:pad_left + scaled_w] = scaled + crop_resized = padded.permute(0, 2, 3, 1) # BCHW → BHWC + + latent_crop = vae.encode(crop_resized) + kp_batch, sc_batch = _run_on_latent(latent_crop) + kp, sc = kp_batch[0], sc_batch[0] # (K, 2), coords in model pixel space + + # remove padding offset, undo scale, offset to full-image coordinates. + kp = kp.copy() if isinstance(kp, np.ndarray) else np.array(kp, dtype=np.float32) + kp[..., 0] = (kp[..., 0] - pad_left) / scale + x1 + kp[..., 1] = (kp[..., 1] - pad_top) / scale + y1 + + img_keypoints.append(kp) + img_scores.append(sc) + else: + # No bboxes for this image – run on the full image + latent_img = vae.encode(img) + kp_batch, sc_batch = _run_on_latent(latent_img) + img_keypoints.append(kp_batch[0]) + img_scores.append(sc_batch[0]) + + all_keypoints.append(img_keypoints) + all_scores.append(img_scores) + pbar.update(1) + + else: # full-image mode, batched + tqdm_pbar = tqdm(total=total_images, desc="Extracting keypoints") + for batch_start in range(0, total_images, batch_size): + batch_end = min(batch_start + batch_size, total_images) + latent_batch = vae.encode(image[batch_start:batch_end]) + + kp_batch, sc_batch = _run_on_latent(latent_batch) + + for kp, sc in zip(kp_batch, sc_batch): + all_keypoints.append([kp]) + all_scores.append([sc]) + tqdm_pbar.update(1) + + pbar.update(batch_end - batch_start) + + openpose_frames = _to_openpose_frames(all_keypoints, all_scores, height, width) + return io.NodeOutput(openpose_frames) + + +def get_face_bboxes(kp2ds, scale, image_shape): + h, w = image_shape + kp2ds_face = kp2ds.copy()[1:] * (w, h) + + min_x, min_y = np.min(kp2ds_face, axis=0) + max_x, max_y = np.max(kp2ds_face, axis=0) + + initial_width = max_x - min_x + initial_height = max_y - min_y + + if initial_width <= 0 or initial_height <= 0: + return [0, 0, 0, 0] + + initial_area = initial_width * initial_height + + expanded_area = initial_area * scale + + new_width = np.sqrt(expanded_area * (initial_width / initial_height)) + new_height = np.sqrt(expanded_area * (initial_height / initial_width)) + + delta_width = (new_width - initial_width) / 2 + delta_height = (new_height - initial_height) / 4 + + expanded_min_x = max(min_x - delta_width, 0) + expanded_max_x = min(max_x + delta_width, w) + expanded_min_y = max(min_y - 3 * delta_height, 0) + expanded_max_y = min(max_y + delta_height, h) + + return [int(expanded_min_x), int(expanded_max_x), int(expanded_min_y), int(expanded_max_y)] + +class SDPoseFaceBBoxes(io.ComfyNode): + + @classmethod + def define_schema(cls): + return io.Schema( + node_id="SDPoseFaceBBoxes", + category="image/preprocessors", + search_aliases=["face bbox", "face bounding box", "pose", "keypoints"], + inputs=[ + io.Custom("POSE_KEYPOINT").Input("keypoints"), + io.Float.Input("scale", default=1.5, min=1.0, max=10.0, step=0.1, tooltip="Multiplier for the bounding box area around each detected face."), + io.Boolean.Input("force_square", default=True, tooltip="Expand the shorter bbox axis so the crop region is always square."), + ], + outputs=[ + io.BoundingBox.Output("bboxes", tooltip="Face bounding boxes per frame, compatible with SDPoseKeypointExtractor bboxes input."), + ], + ) + + @classmethod + def execute(cls, keypoints, scale, force_square) -> io.NodeOutput: + all_bboxes = [] + for frame in keypoints: + h = frame["canvas_height"] + w = frame["canvas_width"] + frame_bboxes = [] + for person in frame["people"]: + face_flat = person.get("face_keypoints_2d", []) + if not face_flat: + continue + # Parse absolute-pixel face keypoints (70 kp: 68 landmarks + REye + LEye) + face_arr = np.array(face_flat, dtype=np.float32).reshape(-1, 3) + face_xy = face_arr[:, :2] # (70, 2) in absolute pixels + + kp_norm = face_xy / np.array([w, h], dtype=np.float32) + kp_padded = np.vstack([np.zeros((1, 2), dtype=np.float32), kp_norm]) # (71, 2) + + x1, x2, y1, y2 = get_face_bboxes(kp_padded, scale, (h, w)) + if x2 > x1 and y2 > y1: + if force_square: + bw, bh = x2 - x1, y2 - y1 + if bw != bh: + side = max(bw, bh) + cx, cy = (x1 + x2) // 2, (y1 + y2) // 2 + half = side // 2 + x1 = max(0, cx - half) + y1 = max(0, cy - half) + x2 = min(w, x1 + side) + y2 = min(h, y1 + side) + # Re-anchor if clamped + x1 = max(0, x2 - side) + y1 = max(0, y2 - side) + frame_bboxes.append({"x": x1, "y": y1, "width": x2 - x1, "height": y2 - y1}) + + all_bboxes.append(frame_bboxes) + + return io.NodeOutput(all_bboxes) + + +class CropByBBoxes(io.ComfyNode): + @classmethod + def define_schema(cls): + return io.Schema( + node_id="CropByBBoxes", + category="image/preprocessors", + search_aliases=["crop", "face crop", "bbox crop", "pose", "bounding box"], + description="Crop and resize regions from the input image batch based on provided bounding boxes.", + inputs=[ + io.Image.Input("image"), + io.BoundingBox.Input("bboxes", force_input=True), + io.Int.Input("output_width", default=512, min=64, max=4096, step=8, tooltip="Width each crop is resized to."), + io.Int.Input("output_height", default=512, min=64, max=4096, step=8, tooltip="Height each crop is resized to."), + io.Int.Input("padding", default=0, min=0, max=1024, step=1, tooltip="Extra padding in pixels added on each side of the bbox before cropping."), + ], + outputs=[ + io.Image.Output(tooltip="All crops stacked into a single image batch."), + ], + ) + + @classmethod + def execute(cls, image, bboxes, output_width, output_height, padding) -> io.NodeOutput: + total_frames = image.shape[0] + img_h = image.shape[1] + img_w = image.shape[2] + num_ch = image.shape[3] + + if not isinstance(bboxes, list): + bboxes = [[bboxes]] + elif len(bboxes) == 0: + return io.NodeOutput(image) + + crops = [] + + for frame_idx in range(total_frames): + frame_bboxes = bboxes[min(frame_idx, len(bboxes) - 1)] + if not frame_bboxes: + continue + + frame_chw = image[frame_idx].permute(2, 0, 1).unsqueeze(0) # BHWC → BCHW (1, C, H, W) + + # Union all bboxes for this frame into a single crop region + x1 = min(int(b["x"]) for b in frame_bboxes) + y1 = min(int(b["y"]) for b in frame_bboxes) + x2 = max(int(b["x"] + b["width"]) for b in frame_bboxes) + y2 = max(int(b["y"] + b["height"]) for b in frame_bboxes) + + if padding > 0: + x1 = max(0, x1 - padding) + y1 = max(0, y1 - padding) + x2 = min(img_w, x2 + padding) + y2 = min(img_h, y2 + padding) + + x1, x2 = max(0, x1), min(img_w, x2) + y1, y2 = max(0, y1), min(img_h, y2) + + # Fallback for empty/degenerate crops + if x2 <= x1 or y2 <= y1: + fallback_size = int(min(img_h, img_w) * 0.3) + fb_x1 = max(0, (img_w - fallback_size) // 2) + fb_y1 = max(0, int(img_h * 0.1)) + fb_x2 = min(img_w, fb_x1 + fallback_size) + fb_y2 = min(img_h, fb_y1 + fallback_size) + if fb_x2 <= fb_x1 or fb_y2 <= fb_y1: + crops.append(torch.zeros(1, num_ch, output_height, output_width, dtype=image.dtype, device=image.device)) + continue + x1, y1, x2, y2 = fb_x1, fb_y1, fb_x2, fb_y2 + + crop_chw = frame_chw[:, :, y1:y2, x1:x2] # (1, C, crop_h, crop_w) + resized = comfy.utils.common_upscale(crop_chw, output_width, output_height, upscale_method="bilinear", crop="disabled") + crops.append(resized) + + if not crops: + return io.NodeOutput(image) + + out_images = torch.cat(crops, dim=0).permute(0, 2, 3, 1) # (N, H, W, C) + return io.NodeOutput(out_images) + + +class SDPoseExtension(ComfyExtension): + @override + async def get_node_list(self) -> list[type[io.ComfyNode]]: + return [ + SDPoseKeypointExtractor, + SDPoseDrawKeypoints, + SDPoseFaceBBoxes, + CropByBBoxes, + ] + +async def comfy_entrypoint() -> SDPoseExtension: + return SDPoseExtension() diff --git a/comfy_extras/nodes_sdupscale.py b/comfy_extras/nodes_sdupscale.py index 31b373370..5877719d3 100644 --- a/comfy_extras/nodes_sdupscale.py +++ b/comfy_extras/nodes_sdupscale.py @@ -15,7 +15,7 @@ class SD_4XUpscale_Conditioning(io.ComfyNode): io.Conditioning.Input("positive"), io.Conditioning.Input("negative"), io.Float.Input("scale_ratio", default=4.0, min=0.0, max=10.0, step=0.01), - io.Float.Input("noise_augmentation", default=0.0, min=0.0, max=1.0, step=0.001), + io.Float.Input("noise_augmentation", default=0.0, min=0.0, max=1.0, step=0.001, advanced=True), ], outputs=[ io.Conditioning.Output(display_name="positive"), diff --git a/comfy_extras/nodes_slg.py b/comfy_extras/nodes_slg.py index f462faa8f..8cc1f551e 100644 --- a/comfy_extras/nodes_slg.py +++ b/comfy_extras/nodes_slg.py @@ -21,11 +21,11 @@ class SkipLayerGuidanceDiT(io.ComfyNode): is_experimental=True, inputs=[ io.Model.Input("model"), - io.String.Input("double_layers", default="7, 8, 9"), - io.String.Input("single_layers", default="7, 8, 9"), + io.String.Input("double_layers", default="7, 8, 9", advanced=True), + io.String.Input("single_layers", default="7, 8, 9", advanced=True), io.Float.Input("scale", default=3.0, min=0.0, max=10.0, step=0.1), - io.Float.Input("start_percent", default=0.01, min=0.0, max=1.0, step=0.001), - io.Float.Input("end_percent", default=0.15, min=0.0, max=1.0, step=0.001), + io.Float.Input("start_percent", default=0.01, min=0.0, max=1.0, step=0.001, advanced=True), + io.Float.Input("end_percent", default=0.15, min=0.0, max=1.0, step=0.001, advanced=True), io.Float.Input("rescaling_scale", default=0.0, min=0.0, max=10.0, step=0.01), ], outputs=[ @@ -101,10 +101,10 @@ class SkipLayerGuidanceDiTSimple(io.ComfyNode): is_experimental=True, inputs=[ io.Model.Input("model"), - io.String.Input("double_layers", default="7, 8, 9"), - io.String.Input("single_layers", default="7, 8, 9"), - io.Float.Input("start_percent", default=0.0, min=0.0, max=1.0, step=0.001), - io.Float.Input("end_percent", default=1.0, min=0.0, max=1.0, step=0.001), + io.String.Input("double_layers", default="7, 8, 9", advanced=True), + io.String.Input("single_layers", default="7, 8, 9", advanced=True), + io.Float.Input("start_percent", default=0.0, min=0.0, max=1.0, step=0.001, advanced=True), + io.Float.Input("end_percent", default=1.0, min=0.0, max=1.0, step=0.001, advanced=True), ], outputs=[ io.Model.Output(), diff --git a/comfy_extras/nodes_stable3d.py b/comfy_extras/nodes_stable3d.py index c6d8a683d..829c837a1 100644 --- a/comfy_extras/nodes_stable3d.py +++ b/comfy_extras/nodes_stable3d.py @@ -75,8 +75,8 @@ class StableZero123_Conditioning_Batched(io.ComfyNode): io.Int.Input("batch_size", default=1, min=1, max=4096), io.Float.Input("elevation", default=0.0, min=-180.0, max=180.0, step=0.1, round=False), io.Float.Input("azimuth", default=0.0, min=-180.0, max=180.0, step=0.1, round=False), - io.Float.Input("elevation_batch_increment", default=0.0, min=-180.0, max=180.0, step=0.1, round=False), - io.Float.Input("azimuth_batch_increment", default=0.0, min=-180.0, max=180.0, step=0.1, round=False) + io.Float.Input("elevation_batch_increment", default=0.0, min=-180.0, max=180.0, step=0.1, round=False, advanced=True), + io.Float.Input("azimuth_batch_increment", default=0.0, min=-180.0, max=180.0, step=0.1, round=False, advanced=True) ], outputs=[ io.Conditioning.Output(display_name="positive"), diff --git a/comfy_extras/nodes_stable_cascade.py b/comfy_extras/nodes_stable_cascade.py index 04c0b366a..8c1aebca9 100644 --- a/comfy_extras/nodes_stable_cascade.py +++ b/comfy_extras/nodes_stable_cascade.py @@ -33,7 +33,7 @@ class StableCascade_EmptyLatentImage(io.ComfyNode): inputs=[ io.Int.Input("width", default=1024, min=256, max=nodes.MAX_RESOLUTION, step=8), io.Int.Input("height", default=1024, min=256, max=nodes.MAX_RESOLUTION, step=8), - io.Int.Input("compression", default=42, min=4, max=128, step=1), + io.Int.Input("compression", default=42, min=4, max=128, step=1, advanced=True), io.Int.Input("batch_size", default=1, min=1, max=4096), ], outputs=[ @@ -62,7 +62,7 @@ class StableCascade_StageC_VAEEncode(io.ComfyNode): inputs=[ io.Image.Input("image"), io.Vae.Input("vae"), - io.Int.Input("compression", default=42, min=4, max=128, step=1), + io.Int.Input("compression", default=42, min=4, max=128, step=1, advanced=True), ], outputs=[ io.Latent.Output(display_name="stage_c"), diff --git a/comfy_extras/nodes_string.py b/comfy_extras/nodes_string.py index 8d3e65cc5..b4e5f148a 100644 --- a/comfy_extras/nodes_string.py +++ b/comfy_extras/nodes_string.py @@ -169,7 +169,7 @@ class StringContains(io.ComfyNode): inputs=[ io.String.Input("string", multiline=True), io.String.Input("substring", multiline=True), - io.Boolean.Input("case_sensitive", default=True), + io.Boolean.Input("case_sensitive", default=True, advanced=True), ], outputs=[ io.Boolean.Output(display_name="contains"), @@ -198,7 +198,7 @@ class StringCompare(io.ComfyNode): io.String.Input("string_a", multiline=True), io.String.Input("string_b", multiline=True), io.Combo.Input("mode", options=["Starts With", "Ends With", "Equal"]), - io.Boolean.Input("case_sensitive", default=True), + io.Boolean.Input("case_sensitive", default=True, advanced=True), ], outputs=[ io.Boolean.Output(), @@ -233,9 +233,9 @@ class RegexMatch(io.ComfyNode): inputs=[ io.String.Input("string", multiline=True), io.String.Input("regex_pattern", multiline=True), - io.Boolean.Input("case_insensitive", default=True), - io.Boolean.Input("multiline", default=False), - io.Boolean.Input("dotall", default=False), + io.Boolean.Input("case_insensitive", default=True, advanced=True), + io.Boolean.Input("multiline", default=False, advanced=True), + io.Boolean.Input("dotall", default=False, advanced=True), ], outputs=[ io.Boolean.Output(display_name="matches"), @@ -275,10 +275,10 @@ class RegexExtract(io.ComfyNode): io.String.Input("string", multiline=True), io.String.Input("regex_pattern", multiline=True), io.Combo.Input("mode", options=["First Match", "All Matches", "First Group", "All Groups"]), - io.Boolean.Input("case_insensitive", default=True), - io.Boolean.Input("multiline", default=False), - io.Boolean.Input("dotall", default=False), - io.Int.Input("group_index", default=1, min=0, max=100), + io.Boolean.Input("case_insensitive", default=True, advanced=True), + io.Boolean.Input("multiline", default=False, advanced=True), + io.Boolean.Input("dotall", default=False, advanced=True), + io.Int.Input("group_index", default=1, min=0, max=100, advanced=True), ], outputs=[ io.String.Output(), @@ -351,10 +351,10 @@ class RegexReplace(io.ComfyNode): io.String.Input("string", multiline=True), io.String.Input("regex_pattern", multiline=True), io.String.Input("replace", multiline=True), - io.Boolean.Input("case_insensitive", default=True, optional=True), - io.Boolean.Input("multiline", default=False, optional=True), - io.Boolean.Input("dotall", default=False, optional=True, tooltip="When enabled, the dot (.) character will match any character including newline characters. When disabled, dots won't match newlines."), - io.Int.Input("count", default=0, min=0, max=100, optional=True, tooltip="Maximum number of replacements to make. Set to 0 to replace all occurrences (default). Set to 1 to replace only the first match, 2 for the first two matches, etc."), + io.Boolean.Input("case_insensitive", default=True, optional=True, advanced=True), + io.Boolean.Input("multiline", default=False, optional=True, advanced=True), + io.Boolean.Input("dotall", default=False, optional=True, advanced=True, tooltip="When enabled, the dot (.) character will match any character including newline characters. When disabled, dots won't match newlines."), + io.Int.Input("count", default=0, min=0, max=100, optional=True, advanced=True, tooltip="Maximum number of replacements to make. Set to 0 to replace all occurrences (default). Set to 1 to replace only the first match, 2 for the first two matches, etc."), ], outputs=[ io.String.Output(), diff --git a/comfy_extras/nodes_textgen.py b/comfy_extras/nodes_textgen.py new file mode 100644 index 000000000..14cff14a6 --- /dev/null +++ b/comfy_extras/nodes_textgen.py @@ -0,0 +1,176 @@ +from comfy_api.latest import ComfyExtension, io +from typing_extensions import override + +class TextGenerate(io.ComfyNode): + @classmethod + def define_schema(cls): + # Define dynamic combo options for sampling mode + sampling_options = [ + io.DynamicCombo.Option( + key="on", + inputs=[ + io.Float.Input("temperature", default=0.7, min=0.01, max=2.0, step=0.000001), + io.Int.Input("top_k", default=64, min=0, max=1000), + io.Float.Input("top_p", default=0.95, min=0.0, max=1.0, step=0.01), + io.Float.Input("min_p", default=0.05, min=0.0, max=1.0, step=0.01), + io.Float.Input("repetition_penalty", default=1.05, min=0.0, max=5.0, step=0.01), + io.Int.Input("seed", default=0, min=0, max=0xffffffffffffffff), + ] + ), + io.DynamicCombo.Option( + key="off", + inputs=[] + ), + ] + + return io.Schema( + node_id="TextGenerate", + category="textgen/", + search_aliases=["LLM", "gemma"], + inputs=[ + io.Clip.Input("clip"), + io.String.Input("prompt", multiline=True, dynamic_prompts=True, default=""), + io.Image.Input("image", optional=True), + io.Int.Input("max_length", default=256, min=1, max=2048), + io.DynamicCombo.Input("sampling_mode", options=sampling_options, display_name="Sampling Mode"), + ], + outputs=[ + io.String.Output(display_name="generated_text"), + ], + ) + + @classmethod + def execute(cls, clip, prompt, max_length, sampling_mode, image=None) -> io.NodeOutput: + + tokens = clip.tokenize(prompt, image=image, skip_template=False, min_length=1) + + # Get sampling parameters from dynamic combo + do_sample = sampling_mode.get("sampling_mode") == "on" + temperature = sampling_mode.get("temperature", 1.0) + top_k = sampling_mode.get("top_k", 50) + top_p = sampling_mode.get("top_p", 1.0) + min_p = sampling_mode.get("min_p", 0.0) + seed = sampling_mode.get("seed", None) + repetition_penalty = sampling_mode.get("repetition_penalty", 1.0) + + generated_ids = clip.generate( + tokens, + do_sample=do_sample, + max_length=max_length, + temperature=temperature, + top_k=top_k, + top_p=top_p, + min_p=min_p, + repetition_penalty=repetition_penalty, + seed=seed + ) + + generated_text = clip.decode(generated_ids, skip_special_tokens=True) + return io.NodeOutput(generated_text) + + +LTX2_T2V_SYSTEM_PROMPT = """You are a Creative Assistant. Given a user's raw input prompt describing a scene or concept, expand it into a detailed video generation prompt with specific visuals and integrated audio to guide a text-to-video model. +#### Guidelines +- Strictly follow all aspects of the user's raw input: include every element requested (style, visuals, motions, actions, camera movement, audio). + - If the input is vague, invent concrete details: lighting, textures, materials, scene settings, etc. + - For characters: describe gender, clothing, hair, expressions. DO NOT invent unrequested characters. +- Use active language: present-progressive verbs ("is walking," "speaking"). If no action specified, describe natural movements. +- Maintain chronological flow: use temporal connectors ("as," "then," "while"). +- Audio layer: Describe complete soundscape (background audio, ambient sounds, SFX, speech/music when requested). Integrate sounds chronologically alongside actions. Be specific (e.g., "soft footsteps on tile"), not vague (e.g., "ambient sound is present"). +- Speech (only when requested): + - For ANY speech-related input (talking, conversation, singing, etc.), ALWAYS include exact words in quotes with voice characteristics (e.g., "The man says in an excited voice: 'You won't believe what I just saw!'"). + - Specify language if not English and accent if relevant. +- Style: Include visual style at the beginning: "Style: