Compare commits

...

7 Commits

Author SHA1 Message Date
Terry Jia
aed40532dc feat: add curve inputs and raise uniform limit for GLSL shader node 2026-03-23 10:30:34 -04:00
comfyanonymous
6265a239f3 Add warning for users who disable dynamic vram. (#13113) 2026-03-22 18:46:18 -04:00
Talmaj
d49420b3c7 LongCat-Image edit (#13003) 2026-03-21 23:51:05 -04:00
comfyanonymous
ebf6b52e32 ComfyUI v0.18.1 2026-03-21 22:32:16 -04:00
rattus
25b6d1d629 wan: vae: Fix light/color change (#13101)
There was an issue where the resample split was too early and dropped one
of the rolling convolutions a frame early. This is most noticable as a
lighting/color change between pixel frames 5->6 (latent 2->3), or as a
lighting change between the first and last frame in an FLF wan flow.
2026-03-21 18:44:35 -04:00
comfyanonymous
11c15d8832 Fix fp16 intermediates giving different results. (#13100) 2026-03-21 17:53:25 -04:00
comfyanonymous
b5d32e6ad2 Fix sampling issue with fp16 intermediates. (#13099) 2026-03-21 17:47:42 -04:00
14 changed files with 221 additions and 26 deletions

View File

@@ -0,0 +1,92 @@
#version 300 es
precision highp float;
uniform sampler2D u_image0;
uniform float u_float0; // shadows red (-100 to 100)
uniform float u_float1; // shadows green (-100 to 100)
uniform float u_float2; // shadows blue (-100 to 100)
uniform float u_float3; // midtones red (-100 to 100)
uniform float u_float4; // midtones green (-100 to 100)
uniform float u_float5; // midtones blue (-100 to 100)
uniform float u_float6; // highlights red (-100 to 100)
uniform float u_float7; // highlights green (-100 to 100)
uniform float u_float8; // highlights blue (-100 to 100)
uniform bool u_bool0; // preserve luminosity
in vec2 v_texCoord;
out vec4 fragColor;
vec3 rgb2hsl(vec3 c) {
float maxC = max(c.r, max(c.g, c.b));
float minC = min(c.r, min(c.g, c.b));
float l = (maxC + minC) * 0.5;
if (maxC == minC) return vec3(0.0, 0.0, l);
float d = maxC - minC;
float s = l > 0.5 ? d / (2.0 - maxC - minC) : d / (maxC + minC);
float h;
if (maxC == c.r) {
h = (c.g - c.b) / d + (c.g < c.b ? 6.0 : 0.0);
} else if (maxC == c.g) {
h = (c.b - c.r) / d + 2.0;
} else {
h = (c.r - c.g) / d + 4.0;
}
h /= 6.0;
return vec3(h, s, l);
}
float hue2rgb(float p, float q, float t) {
if (t < 0.0) t += 1.0;
if (t > 1.0) t -= 1.0;
if (t < 1.0 / 6.0) return p + (q - p) * 6.0 * t;
if (t < 1.0 / 2.0) return q;
if (t < 2.0 / 3.0) return p + (q - p) * (2.0 / 3.0 - t) * 6.0;
return p;
}
vec3 hsl2rgb(vec3 hsl) {
float h = hsl.x, s = hsl.y, l = hsl.z;
if (s == 0.0) return vec3(l);
float q = l < 0.5 ? l * (1.0 + s) : l + s - l * s;
float p = 2.0 * l - q;
return vec3(
hue2rgb(p, q, h + 1.0 / 3.0),
hue2rgb(p, q, h),
hue2rgb(p, q, h - 1.0 / 3.0)
);
}
void main() {
vec4 tex = texture(u_image0, v_texCoord);
vec3 color = tex.rgb;
// Build shadows/midtones/highlights vectors (scale -100..100 to -1..1)
vec3 shadows = vec3(u_float0, u_float1, u_float2) * 0.01;
vec3 midtones = vec3(u_float3, u_float4, u_float5) * 0.01;
vec3 highlights = vec3(u_float6, u_float7, u_float8) * 0.01;
// GIMP: HSL lightness for weight calculation
float maxC = max(color.r, max(color.g, color.b));
float minC = min(color.r, min(color.g, color.b));
float lightness = (maxC + minC) * 0.5;
// GIMP weight curves: linear ramps with constants a=0.25, b=0.333, scale=0.7
const float a = 0.25;
const float b = 0.333;
const float scale = 0.7;
float sw = clamp((lightness - b) / -a + 0.5, 0.0, 1.0) * scale;
float mw = clamp((lightness - b) / a + 0.5, 0.0, 1.0) *
clamp((lightness + b - 1.0) / -a + 0.5, 0.0, 1.0) * scale;
float hw = clamp((lightness + b - 1.0) / a + 0.5, 0.0, 1.0) * scale;
color += sw * shadows + mw * midtones + hw * highlights;
if (u_bool0) {
vec3 hsl = rgb2hsl(clamp(color, 0.0, 1.0));
hsl.z = lightness;
color = hsl2rgb(hsl);
}
fragColor = vec4(clamp(color, 0.0, 1.0), tex.a);
}

File diff suppressed because one or more lines are too long

View File

@@ -386,7 +386,7 @@ class Flux(nn.Module):
h = max(h, ref.shape[-2] + h_offset)
w = max(w, ref.shape[-1] + w_offset)
kontext, kontext_ids = self.process_img(ref, index=index, h_offset=h_offset, w_offset=w_offset)
kontext, kontext_ids = self.process_img(ref, index=index, h_offset=h_offset, w_offset=w_offset, transformer_options=transformer_options)
img = torch.cat([img, kontext], dim=1)
img_ids = torch.cat([img_ids, kontext_ids], dim=1)
ref_num_tokens.append(kontext.shape[1])

View File

@@ -376,11 +376,16 @@ class Decoder3d(nn.Module):
return
layer = self.upsamples[layer_idx]
if isinstance(layer, Resample) and layer.mode == 'upsample3d' and x.shape[2] > 1:
for frame_idx in range(x.shape[2]):
if feat_cache is not None:
x = layer(x, feat_cache, feat_idx)
else:
x = layer(x)
if isinstance(layer, Resample) and layer.mode == 'upsample3d' and x.shape[2] > 2:
for frame_idx in range(0, x.shape[2], 2):
self.run_up(
layer_idx,
[x[:, :, frame_idx:frame_idx + 1, :, :]],
layer_idx + 1,
[x[:, :, frame_idx:frame_idx + 2, :, :]],
feat_cache,
feat_idx.copy(),
out_chunks,
@@ -388,11 +393,6 @@ class Decoder3d(nn.Module):
del x
return
if feat_cache is not None:
x = layer(x, feat_cache, feat_idx)
else:
x = layer(x)
next_x_ref = [x]
del x
self.run_up(layer_idx + 1, next_x_ref, feat_cache, feat_idx, out_chunks)

View File

@@ -937,9 +937,10 @@ class LongCatImage(Flux):
transformer_options = transformer_options.copy()
rope_opts = transformer_options.get("rope_options", {})
rope_opts = dict(rope_opts)
pe_len = float(c_crossattn.shape[1]) if c_crossattn is not None else 512.0
rope_opts.setdefault("shift_t", 1.0)
rope_opts.setdefault("shift_y", 512.0)
rope_opts.setdefault("shift_x", 512.0)
rope_opts.setdefault("shift_y", pe_len)
rope_opts.setdefault("shift_x", pe_len)
transformer_options["rope_options"] = rope_opts
return super()._apply_model(x, t, c_concat, c_crossattn, control, transformer_options, **kwargs)

View File

@@ -8,12 +8,12 @@ import comfy.nested_tensor
def prepare_noise_inner(latent_image, generator, noise_inds=None):
if noise_inds is None:
return torch.randn(latent_image.size(), dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu")
return torch.randn(latent_image.size(), dtype=torch.float32, layout=latent_image.layout, generator=generator, device="cpu").to(dtype=latent_image.dtype)
unique_inds, inverse = np.unique(noise_inds, return_inverse=True)
noises = []
for i in range(unique_inds[-1]+1):
noise = torch.randn([1] + list(latent_image.size())[1:], dtype=latent_image.dtype, layout=latent_image.layout, generator=generator, device="cpu")
noise = torch.randn([1] + list(latent_image.size())[1:], dtype=torch.float32, layout=latent_image.layout, generator=generator, device="cpu").to(dtype=latent_image.dtype)
if i in unique_inds:
noises.append(noise)
noises = [noises[i] for i in inverse]

View File

@@ -985,8 +985,8 @@ class CFGGuider:
self.inner_model, self.conds, self.loaded_models = comfy.sampler_helpers.prepare_sampling(self.model_patcher, noise.shape, self.conds, self.model_options)
device = self.model_patcher.load_device
noise = noise.to(device)
latent_image = latent_image.to(device)
noise = noise.to(device=device, dtype=torch.float32)
latent_image = latent_image.to(device=device, dtype=torch.float32)
sigmas = sigmas.to(device)
cast_to_load_options(self.model_options, device=device, dtype=self.model_patcher.model_dtype())
@@ -1028,6 +1028,7 @@ class CFGGuider:
denoise_mask, _ = comfy.utils.pack_latents(denoise_masks)
else:
denoise_mask = denoise_masks[0]
denoise_mask = denoise_mask.float()
self.conds = {}
for k in self.original_conds:

View File

@@ -1028,12 +1028,19 @@ class Qwen25_7BVLI(BaseLlama, BaseGenerate, torch.nn.Module):
grid = e.get("extra", None)
start = e.get("index")
if position_ids is None:
position_ids = torch.zeros((3, embeds.shape[1]), device=embeds.device)
position_ids = torch.ones((3, embeds.shape[1]), device=embeds.device, dtype=torch.long)
position_ids[:, :start] = torch.arange(0, start, device=embeds.device)
end = e.get("size") + start
len_max = int(grid.max()) // 2
start_next = len_max + start
position_ids[:, end:] = torch.arange(start_next + offset, start_next + (embeds.shape[1] - end) + offset, device=embeds.device)
if attention_mask is not None:
# Assign compact sequential positions to attended tokens only,
# skipping over padding so post-padding tokens aren't inflated.
after_mask = attention_mask[0, end:]
text_positions = after_mask.cumsum(0) - 1 + start_next + offset
position_ids[:, end:] = torch.where(after_mask.bool(), text_positions, position_ids[0, end:])
else:
position_ids[:, end:] = torch.arange(start_next + offset, start_next + (embeds.shape[1] - end) + offset, device=embeds.device)
position_ids[0, start:end] = start + offset
max_d = int(grid[0][1]) // 2
position_ids[1, start:end] = torch.arange(start + offset, start + max_d + offset, device=embeds.device).unsqueeze(1).repeat(1, math.ceil((end - start) / max_d)).flatten(0)[:end - start]

View File

@@ -64,7 +64,13 @@ class LongCatImageBaseTokenizer(Qwen25_7BVLITokenizer):
return [output]
IMAGE_PAD_TOKEN_ID = 151655
class LongCatImageTokenizer(sd1_clip.SD1Tokenizer):
T2I_PREFIX = "<|im_start|>system\nAs an image captioning expert, generate a descriptive text prompt based on an image content, suitable for input to a text-to-image model.<|im_end|>\n<|im_start|>user\n"
EDIT_PREFIX = "<|im_start|>system\nAs an image editing expert, first analyze the content and attributes of the input image(s). Then, based on the user's editing instructions, clearly and precisely determine how to modify the given image(s), ensuring that only the specified parts are altered and all other aspects remain consistent with the original(s).<|im_end|>\n<|im_start|>user\n<|vision_start|><|image_pad|><|vision_end|>"
SUFFIX = "<|im_end|>\n<|im_start|>assistant\n"
def __init__(self, embedding_directory=None, tokenizer_data={}):
super().__init__(
embedding_directory=embedding_directory,
@@ -72,10 +78,8 @@ class LongCatImageTokenizer(sd1_clip.SD1Tokenizer):
name="qwen25_7b",
tokenizer=LongCatImageBaseTokenizer,
)
self.longcat_template_prefix = "<|im_start|>system\nAs an image captioning expert, generate a descriptive text prompt based on an image content, suitable for input to a text-to-image model.<|im_end|>\n<|im_start|>user\n"
self.longcat_template_suffix = "<|im_end|>\n<|im_start|>assistant\n"
def tokenize_with_weights(self, text, return_word_ids=False, **kwargs):
def tokenize_with_weights(self, text, return_word_ids=False, images=None, **kwargs):
skip_template = False
if text.startswith("<|im_start|>"):
skip_template = True
@@ -90,11 +94,14 @@ class LongCatImageTokenizer(sd1_clip.SD1Tokenizer):
text, return_word_ids=return_word_ids, disable_weights=True, **kwargs
)
else:
has_images = images is not None and len(images) > 0
template_prefix = self.EDIT_PREFIX if has_images else self.T2I_PREFIX
prefix_ids = base_tok.tokenizer(
self.longcat_template_prefix, add_special_tokens=False
template_prefix, add_special_tokens=False
)["input_ids"]
suffix_ids = base_tok.tokenizer(
self.longcat_template_suffix, add_special_tokens=False
self.SUFFIX, add_special_tokens=False
)["input_ids"]
prompt_tokens = base_tok.tokenize_with_weights(
@@ -106,6 +113,14 @@ class LongCatImageTokenizer(sd1_clip.SD1Tokenizer):
suffix_pairs = [(t, 1.0) for t in suffix_ids]
combined = prefix_pairs + prompt_pairs + suffix_pairs
if has_images:
embed_count = 0
for i in range(len(combined)):
if combined[i][0] == IMAGE_PAD_TOKEN_ID and embed_count < len(images):
combined[i] = ({"type": "image", "data": images[embed_count], "original_type": "image"}, combined[i][1])
embed_count += 1
tokens = {"qwen25_7b": [combined]}
return tokens

View File

@@ -425,4 +425,7 @@ class Qwen2VLVisionTransformer(nn.Module):
hidden_states = block(hidden_states, position_embeddings, cu_seqlens_now, optimized_attention=optimized_attention)
hidden_states = self.merger(hidden_states)
# Potentially important for spatially precise edits. This is present in the HF implementation.
reverse_indices = torch.argsort(window_index)
hidden_states = hidden_states[reverse_indices, :]
return hidden_states

View File

@@ -87,7 +87,9 @@ class SizeModeInput(TypedDict):
MAX_IMAGES = 5 # u_image0-4
MAX_UNIFORMS = 5 # u_float0-4, u_int0-4
MAX_UNIFORMS = 20 # u_float0-19, u_int0-19
MAX_BOOLS = 10 # u_bool0-9
MAX_CURVES = 4 # u_curve0-3 (1D LUT textures)
MAX_OUTPUTS = 4 # fragColor0-3 (MRT)
# Vertex shader using gl_VertexID trick - no VBO needed.
@@ -497,6 +499,8 @@ def _render_shader_batch(
image_batches: list[list[np.ndarray]],
floats: list[float],
ints: list[int],
bools: list[bool] | None = None,
curves: list[np.ndarray] | None = None,
) -> list[list[np.ndarray]]:
"""
Render a fragment shader for multiple batches efficiently.
@@ -511,6 +515,8 @@ def _render_shader_batch(
image_batches: List of batches, each batch is a list of input images (H, W, C) float32 [0,1]
floats: List of float uniforms
ints: List of int uniforms
bools: List of bool uniforms (passed as int 0/1 to GLSL bool uniforms)
curves: List of 1D LUT arrays (256 float32 values each) for u_curve0-N
Returns:
List of batch outputs, each is a list of output images (H, W, 4) float32 [0,1]
@@ -533,11 +539,15 @@ def _render_shader_batch(
# Detect multi-pass rendering
num_passes = _detect_pass_count(fragment_code)
if curves is None:
curves = []
# Track resources for cleanup
program = None
fbo = None
output_textures = []
input_textures = []
curve_textures = []
ping_pong_textures = []
ping_pong_fbos = []
@@ -624,6 +634,30 @@ def _render_shader_batch(
if loc >= 0:
gl.glUniform1i(loc, v)
if bools is None:
bools = []
for i, v in enumerate(bools):
loc = gl.glGetUniformLocation(program, f"u_bool{i}")
if loc >= 0:
gl.glUniform1i(loc, 1 if v else 0)
# Create 1D LUT textures for curves (bound after image texture units)
for i, lut in enumerate(curves):
tex = gl.glGenTextures(1)
curve_textures.append(tex)
unit = MAX_IMAGES + i
gl.glActiveTexture(gl.GL_TEXTURE0 + unit)
gl.glBindTexture(gl.GL_TEXTURE_2D, tex)
gl.glTexImage2D(gl.GL_TEXTURE_2D, 0, gl.GL_R32F, len(lut), 1, 0, gl.GL_RED, gl.GL_FLOAT, lut)
gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MIN_FILTER, gl.GL_LINEAR)
gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_MAG_FILTER, gl.GL_LINEAR)
gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_S, gl.GL_CLAMP_TO_EDGE)
gl.glTexParameteri(gl.GL_TEXTURE_2D, gl.GL_TEXTURE_WRAP_T, gl.GL_CLAMP_TO_EDGE)
loc = gl.glGetUniformLocation(program, f"u_curve{i}")
if loc >= 0:
gl.glUniform1i(loc, unit)
# Get u_pass uniform location for multi-pass
pass_loc = gl.glGetUniformLocation(program, "u_pass")
@@ -718,6 +752,8 @@ def _render_shader_batch(
for tex in input_textures:
gl.glDeleteTextures(int(tex))
for tex in curve_textures:
gl.glDeleteTextures(int(tex))
for tex in output_textures:
gl.glDeleteTextures(int(tex))
for tex in ping_pong_textures:
@@ -754,6 +790,20 @@ class GLSLShader(io.ComfyNode):
max=MAX_UNIFORMS,
)
bool_template = io.Autogrow.TemplatePrefix(
io.Boolean.Input("bool", default=False),
prefix="u_bool",
min=0,
max=MAX_BOOLS,
)
curve_template = io.Autogrow.TemplatePrefix(
io.Curve.Input("curve"),
prefix="u_curve",
min=0,
max=MAX_CURVES,
)
return io.Schema(
node_id="GLSLShader",
display_name="GLSL Shader",
@@ -762,6 +812,7 @@ class GLSLShader(io.ComfyNode):
"Apply GLSL ES fragment shaders to images. "
"u_resolution (vec2) is always available."
),
is_experimental=True,
inputs=[
io.String.Input(
"fragment_shader",
@@ -796,6 +847,8 @@ class GLSLShader(io.ComfyNode):
io.Autogrow.Input("images", template=image_template, tooltip=f"Images are available as u_image0-{MAX_IMAGES-1} (sampler2D) in the shader code"),
io.Autogrow.Input("floats", template=float_template, tooltip=f"Floats are available as u_float0-{MAX_UNIFORMS-1} in the shader code"),
io.Autogrow.Input("ints", template=int_template, tooltip=f"Ints are available as u_int0-{MAX_UNIFORMS-1} in the shader code"),
io.Autogrow.Input("bools", template=bool_template, tooltip=f"Booleans are available as u_bool0-{MAX_BOOLS-1} (bool) in the shader code"),
io.Autogrow.Input("curves", template=curve_template, tooltip=f"Curves are available as u_curve0-{MAX_CURVES-1} (sampler2D, 256x1 LUT) in the shader code. Sample with texture(u_curve0, vec2(x, 0.5)).r"),
],
outputs=[
io.Image.Output(display_name="IMAGE0", tooltip="Available via layout(location = 0) out vec4 fragColor0 in the shader code"),
@@ -813,13 +866,30 @@ class GLSLShader(io.ComfyNode):
images: io.Autogrow.Type,
floats: io.Autogrow.Type = None,
ints: io.Autogrow.Type = None,
bools: io.Autogrow.Type = None,
curves: io.Autogrow.Type = None,
**kwargs,
) -> io.NodeOutput:
from comfy_api.input import CurveInput
image_list = [v for v in images.values() if v is not None]
float_list = (
[v if v is not None else 0.0 for v in floats.values()] if floats else []
)
int_list = [v if v is not None else 0 for v in ints.values()] if ints else []
bool_list = [v if v is not None else False for v in bools.values()] if bools else []
# Convert CurveInput objects to 256-entry float32 LUT arrays
curve_luts = []
if curves:
for v in curves.values():
if v is not None and isinstance(v, CurveInput):
curve_luts.append(v.to_lut(256).astype(np.float32))
elif v is not None:
# Raw point list fallback: build a monotone cubic curve
from comfy_api.input import MonotoneCubicCurve
points = [(float(x), float(y)) for x, y in v]
curve_luts.append(MonotoneCubicCurve(points).to_lut(256).astype(np.float32))
if not image_list:
raise ValueError("At least one input image is required")
@@ -846,6 +916,8 @@ class GLSLShader(io.ComfyNode):
image_batches,
float_list,
int_list,
bool_list,
curve_luts,
)
# Collect outputs into tensors

View File

@@ -1,3 +1,3 @@
# This file is automatically generated by the build process when version is
# updated in pyproject.toml.
__version__ = "0.18.0"
__version__ = "0.18.1"

View File

@@ -471,6 +471,9 @@ if __name__ == "__main__":
if sys.version_info.major == 3 and sys.version_info.minor < 10:
logging.warning("WARNING: You are using a python version older than 3.10, please upgrade to a newer one. 3.12 and above is recommended.")
if args.disable_dynamic_vram:
logging.warning("Dynamic vram disabled with argument. If you have any issues with dynamic vram enabled please give us a detailed reports as this argument will be removed soon.")
event_loop, _, start_all_func = start_comfyui()
try:
x = start_all_func()

View File

@@ -1,6 +1,6 @@
[project]
name = "ComfyUI"
version = "0.18.0"
version = "0.18.1"
readme = "README.md"
license = { file = "LICENSE" }
requires-python = ">=3.10"