mirror of
https://github.com/comfyanonymous/ComfyUI.git
synced 2026-02-16 05:00:03 +00:00
Compare commits
198 Commits
v0.5.0
...
portable-m
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0f21df8051 | ||
|
|
334e74b938 | ||
|
|
f19a2b53f4 | ||
|
|
7cf52dd51e | ||
|
|
4061eaa469 | ||
|
|
d69c8b3ac2 | ||
|
|
a5e0674474 | ||
|
|
79fb96488a | ||
|
|
aa878cc193 | ||
|
|
1abf69ea27 | ||
|
|
6206a6d3d2 | ||
|
|
7b9ad5208e | ||
|
|
a58c4fbf68 | ||
|
|
b15ef9917b | ||
|
|
2d4dd3972c | ||
|
|
5cb77fbb18 | ||
|
|
32bd55779a | ||
|
|
671a769dc6 | ||
|
|
d8b821e47b | ||
|
|
16359abbbc | ||
|
|
8f492d8f34 | ||
|
|
ad4b959d7e | ||
|
|
b88c66bfa1 | ||
|
|
de357a01f8 | ||
|
|
c07908a37e | ||
|
|
fe26f30cb6 | ||
|
|
3c4b429251 | ||
|
|
0432bccbcf | ||
|
|
aaf06ace12 | ||
|
|
f46771bd97 | ||
|
|
8e1b1b722b | ||
|
|
a1a6f4d7fe | ||
|
|
ee54914a52 | ||
|
|
8f59e2a341 | ||
|
|
7d5e73ea94 | ||
|
|
9dd26b0349 | ||
|
|
c9c68ed78d | ||
|
|
6626f7c5c4 | ||
|
|
0802f3a635 | ||
|
|
19ad129d37 | ||
|
|
db61dc3481 | ||
|
|
5fbc8a1b80 | ||
|
|
b180f47d0e | ||
|
|
2b47f4a38e | ||
|
|
a3af8f35c2 | ||
|
|
5f50b86114 | ||
|
|
4e7f2eeae2 | ||
|
|
fc5703c468 | ||
|
|
05cd5348b6 | ||
|
|
3c000c1de4 | ||
|
|
6b20418ad1 | ||
|
|
2dc24f9870 | ||
|
|
8634b19bc7 | ||
|
|
47436c59d7 | ||
|
|
28092933c1 | ||
|
|
17064a993c | ||
|
|
12f2b59284 | ||
|
|
8cbdaa8855 | ||
|
|
976cee95f8 | ||
|
|
20ac0052f8 | ||
|
|
bc8418f55a | ||
|
|
42f69b1ffd | ||
|
|
581059a83d | ||
|
|
74c1a58566 | ||
|
|
316aa125c9 | ||
|
|
7b1ed9b2b8 | ||
|
|
4ea946778b | ||
|
|
309c92d6c9 | ||
|
|
ca7492c9d4 | ||
|
|
267c54eaae | ||
|
|
fa51f0c60a | ||
|
|
0a084a88a2 | ||
|
|
036aa3efa8 | ||
|
|
e7ff647d02 | ||
|
|
77e10752fe | ||
|
|
2c30881d9c | ||
|
|
7fa5990dbc | ||
|
|
07212a2466 | ||
|
|
f4d7a32cd8 | ||
|
|
ce1df28bef | ||
|
|
0f8d57206c | ||
|
|
9d70d75f20 | ||
|
|
ff5e92abdb | ||
|
|
033e725b8e | ||
|
|
cc8a026671 | ||
|
|
f9cfea0f2e | ||
|
|
b5745ae0a7 | ||
|
|
2a30a19df7 | ||
|
|
c7f04234c6 | ||
|
|
0e31eca087 | ||
|
|
1c8c9f7f4d | ||
|
|
d4cb177414 | ||
|
|
8a2f805233 | ||
|
|
c97f6aa0b2 | ||
|
|
ba3c8e3dbe | ||
|
|
3b65618d13 | ||
|
|
fc88b4f939 | ||
|
|
6a1f95caa0 | ||
|
|
2ade597d02 | ||
|
|
08e9c3ddf0 | ||
|
|
f8aab7cab0 | ||
|
|
561eaf6ccf | ||
|
|
31469f962f | ||
|
|
e0f111c6eb | ||
|
|
74a027f589 | ||
|
|
cc21e84115 | ||
|
|
69bbe1d5a9 | ||
|
|
7fd87423b3 | ||
|
|
1224d58a17 | ||
|
|
2cc7bafb52 | ||
|
|
523b54b9b4 | ||
|
|
5c8f724c9a | ||
|
|
eda556d7b4 | ||
|
|
b7faa5fe3d | ||
|
|
6087e0210c | ||
|
|
6728792589 | ||
|
|
881db45147 | ||
|
|
26cac3c053 | ||
|
|
47350d323a | ||
|
|
117d8ae992 | ||
|
|
844e5e7abb | ||
|
|
20953cbfd4 | ||
|
|
7c36368b14 | ||
|
|
d7b4f45c5b | ||
|
|
4b1aac74bb | ||
|
|
be456cb37a | ||
|
|
3dfecd541b | ||
|
|
ca04f8f401 | ||
|
|
8b44e58e6c | ||
|
|
37aa552602 | ||
|
|
91555acf2c | ||
|
|
d7777dc83a | ||
|
|
1c66507261 | ||
|
|
264116dc4d | ||
|
|
d750aa0847 | ||
|
|
37277e4188 | ||
|
|
106510197a | ||
|
|
bf01579b87 | ||
|
|
ab1a79ad74 | ||
|
|
2fe58571e2 | ||
|
|
46209599ff | ||
|
|
02317a1f71 | ||
|
|
ac7e83448e | ||
|
|
56cff964f2 | ||
|
|
5582e2a0f3 | ||
|
|
3c8196a170 | ||
|
|
62c08e4659 | ||
|
|
ac7bde1d03 | ||
|
|
6909638a42 | ||
|
|
d0625d7f7c | ||
|
|
6b19857c93 | ||
|
|
4e904305ce | ||
|
|
726aa75126 | ||
|
|
74087e26da | ||
|
|
51bf04c5ae | ||
|
|
b603e034e5 | ||
|
|
3c9a0fcf8a | ||
|
|
0adeb9b135 | ||
|
|
98b5183ed8 | ||
|
|
16a0b24da4 | ||
|
|
552fe9df02 | ||
|
|
2ce64b131c | ||
|
|
d6fa7a7c84 | ||
|
|
17cfabec7d | ||
|
|
ad633b2953 | ||
|
|
9eba1547f4 | ||
|
|
f398256d11 | ||
|
|
8744ebb4a1 | ||
|
|
d5167d2ded | ||
|
|
364e07d145 | ||
|
|
5a0ec182ec | ||
|
|
39f39c3aa9 | ||
|
|
4e95c0c104 | ||
|
|
d1ab6adc3a | ||
|
|
35a294431f | ||
|
|
baeeeb02b9 | ||
|
|
ef641f3e4b | ||
|
|
9ac185456f | ||
|
|
b69ef5f869 | ||
|
|
31aecbe1ad | ||
|
|
28d23a7813 | ||
|
|
f51047abd3 | ||
|
|
14598c1104 | ||
|
|
57dae1469f | ||
|
|
ea3d3cc6a4 | ||
|
|
9c2eb2c1dd | ||
|
|
ec82eea1f1 | ||
|
|
4fafc0c58d | ||
|
|
d2ed1dcb9a | ||
|
|
94f61c6378 | ||
|
|
418eaed42c | ||
|
|
cc975e5f0b | ||
|
|
311f64ac83 | ||
|
|
8b9f31abdf | ||
|
|
fb1b9c76b0 | ||
|
|
545d96c12d | ||
|
|
1855efe1c3 | ||
|
|
6897a1d077 |
4
.ci/manager_windows/install_manager.bat
Normal file
4
.ci/manager_windows/install_manager.bat
Normal file
@@ -0,0 +1,4 @@
|
||||
@echo off
|
||||
..\python_embeded\python.exe .\install_manager.py ..\ComfyUI\
|
||||
echo Installed manager through pip package, if not already installed.
|
||||
pause
|
||||
24
.ci/manager_windows/install_manager.py
Normal file
24
.ci/manager_windows/install_manager.py
Normal file
@@ -0,0 +1,24 @@
|
||||
import sys
|
||||
import os
|
||||
|
||||
repo_path = str(sys.argv[1])
|
||||
repo_manager_req_path = os.path.join(repo_path, "manager_requirements.txt")
|
||||
|
||||
if os.path.exists(repo_manager_req_path):
|
||||
import subprocess
|
||||
# if not installed, we get 'WARNING: Package(s) not found: comfyui_manager'
|
||||
# if installed, there will be a line like 'Version: 0.1.0' = False
|
||||
try:
|
||||
output = subprocess.check_output([sys.executable, '-s', '-m', 'pip', 'show', 'comfyui_manager'])
|
||||
if 'Version:' in output.decode('utf-8'):
|
||||
print("comfyui_manager is already installed, will attempt to update to matching version of ComfyUI.") # noqa: T201
|
||||
else:
|
||||
print("comfyui_manager is not installed, will install it now.") # noqa: T201
|
||||
except:
|
||||
pass
|
||||
|
||||
try:
|
||||
subprocess.check_call([sys.executable, '-s', '-m', 'pip', 'install', '-r', repo_manager_req_path])
|
||||
print("comfyui_manager installed successfully.") # noqa: T201
|
||||
except:
|
||||
print("Failed to install comfyui_manager, please install it manually.") # noqa: T201
|
||||
@@ -126,6 +126,8 @@ cur_path = os.path.dirname(update_py_path)
|
||||
req_path = os.path.join(cur_path, "current_requirements.txt")
|
||||
repo_req_path = os.path.join(repo_path, "requirements.txt")
|
||||
|
||||
manager_req_path = os.path.join(cur_path, "current_manager_requirements.txt")
|
||||
repo_manager_req_path = os.path.join(repo_path, "manager_requirements.txt")
|
||||
|
||||
def files_equal(file1, file2):
|
||||
try:
|
||||
@@ -152,6 +154,25 @@ if not os.path.exists(req_path) or not files_equal(repo_req_path, req_path):
|
||||
except:
|
||||
pass
|
||||
|
||||
if os.path.exists(repo_manager_req_path) and (not os.path.exists(manager_req_path) or not files_equal(repo_manager_req_path, manager_req_path)):
|
||||
import subprocess
|
||||
# first, confirm that comfyui_manager package is installed; only update it if it is
|
||||
# if not installed, we get 'WARNING: Package(s) not found: comfyui_manager'
|
||||
# if installed, there will be a line like 'Version: 0.1.0'
|
||||
update_manager = False
|
||||
try:
|
||||
output = subprocess.check_output([sys.executable, '-s', '-m', 'pip', 'show', 'comfyui_manager'])
|
||||
if 'Version:' in output.decode('utf-8'):
|
||||
update_manager = True
|
||||
except:
|
||||
pass
|
||||
|
||||
if update_manager:
|
||||
try:
|
||||
subprocess.check_call([sys.executable, '-s', '-m', 'pip', 'install', '-r', repo_manager_req_path])
|
||||
shutil.copy(repo_manager_req_path, manager_req_path)
|
||||
except:
|
||||
pass
|
||||
|
||||
stable_update_script = os.path.join(repo_path, ".ci/update_windows/update_comfyui_stable.bat")
|
||||
stable_update_script_to = os.path.join(cur_path, "update_comfyui_stable.bat")
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build
|
||||
.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --enable-manager
|
||||
pause
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --disable-smart-memory
|
||||
.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --enable-manager --disable-smart-memory
|
||||
pause
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --fast
|
||||
.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --enable-manager --fast
|
||||
pause
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
..\python_embeded\python.exe -s ..\ComfyUI\main.py --windows-standalone-build --disable-api-nodes
|
||||
..\python_embeded\python.exe -s ..\ComfyUI\main.py --windows-standalone-build --enable-manager --disable-api-nodes
|
||||
echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest.
|
||||
pause
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
.\python_embeded\python.exe -s ComfyUI\main.py --cpu --windows-standalone-build
|
||||
.\python_embeded\python.exe -s ComfyUI\main.py --cpu --windows-standalone-build --enable-manager
|
||||
pause
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build
|
||||
.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --enable-manager
|
||||
echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest.
|
||||
pause
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --fast fp16_accumulation
|
||||
.\python_embeded\python.exe -s ComfyUI\main.py --windows-standalone-build --enable-manager --fast fp16_accumulation
|
||||
echo If you see this and ComfyUI did not start try updating your Nvidia Drivers to the latest.
|
||||
pause
|
||||
|
||||
1
.github/workflows/test-ci.yml
vendored
1
.github/workflows/test-ci.yml
vendored
@@ -5,7 +5,6 @@ on:
|
||||
push:
|
||||
branches:
|
||||
- master
|
||||
- release/**
|
||||
paths-ignore:
|
||||
- 'app/**'
|
||||
- 'input/**'
|
||||
|
||||
4
.github/workflows/test-execution.yml
vendored
4
.github/workflows/test-execution.yml
vendored
@@ -2,9 +2,9 @@ name: Execution Tests
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main, master, release/** ]
|
||||
branches: [ main, master ]
|
||||
pull_request:
|
||||
branches: [ main, master, release/** ]
|
||||
branches: [ main, master ]
|
||||
|
||||
jobs:
|
||||
test:
|
||||
|
||||
4
.github/workflows/test-launch.yml
vendored
4
.github/workflows/test-launch.yml
vendored
@@ -2,9 +2,9 @@ name: Test server launches without errors
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main, master, release/** ]
|
||||
branches: [ main, master ]
|
||||
pull_request:
|
||||
branches: [ main, master, release/** ]
|
||||
branches: [ main, master ]
|
||||
|
||||
jobs:
|
||||
test:
|
||||
|
||||
4
.github/workflows/test-unit.yml
vendored
4
.github/workflows/test-unit.yml
vendored
@@ -2,9 +2,9 @@ name: Unit Tests
|
||||
|
||||
on:
|
||||
push:
|
||||
branches: [ main, master, release/** ]
|
||||
branches: [ main, master ]
|
||||
pull_request:
|
||||
branches: [ main, master, release/** ]
|
||||
branches: [ main, master ]
|
||||
|
||||
jobs:
|
||||
test:
|
||||
|
||||
1
.github/workflows/update-version.yml
vendored
1
.github/workflows/update-version.yml
vendored
@@ -6,7 +6,6 @@ on:
|
||||
- "pyproject.toml"
|
||||
branches:
|
||||
- master
|
||||
- release/**
|
||||
|
||||
jobs:
|
||||
update-version:
|
||||
|
||||
@@ -1618,17 +1618,6 @@ def sample_seeds_2(model, x, sigmas, extra_args=None, callback=None, disable=Non
|
||||
x = x + sde_noise * sigmas[i + 1] * s_noise
|
||||
return x
|
||||
|
||||
@torch.no_grad()
|
||||
def sample_exp_heun_2_x0(model, x, sigmas, extra_args=None, callback=None, disable=None, solver_type="phi_2"):
|
||||
"""Deterministic exponential Heun second order method in data prediction (x0) and logSNR time."""
|
||||
return sample_seeds_2(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=0.0, s_noise=0.0, noise_sampler=None, r=1.0, solver_type=solver_type)
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
def sample_exp_heun_2_x0_sde(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, solver_type="phi_2"):
|
||||
"""Stochastic exponential Heun second order method in data prediction (x0) and logSNR time."""
|
||||
return sample_seeds_2(model, x, sigmas, extra_args=extra_args, callback=callback, disable=disable, eta=eta, s_noise=s_noise, noise_sampler=noise_sampler, r=1.0, solver_type=solver_type)
|
||||
|
||||
|
||||
@torch.no_grad()
|
||||
def sample_seeds_3(model, x, sigmas, extra_args=None, callback=None, disable=None, eta=1., s_noise=1., noise_sampler=None, r_1=1./3, r_2=2./3):
|
||||
|
||||
@@ -634,11 +634,8 @@ class NextDiT(nn.Module):
|
||||
img, mask, img_size, cap_size, freqs_cis = self.patchify_and_embed(x, cap_feats, cap_mask, adaln_input, num_tokens, transformer_options=transformer_options)
|
||||
freqs_cis = freqs_cis.to(img.device)
|
||||
|
||||
transformer_options["total_blocks"] = len(self.layers)
|
||||
transformer_options["block_type"] = "double"
|
||||
img_input = img
|
||||
for i, layer in enumerate(self.layers):
|
||||
transformer_options["block_index"] = i
|
||||
img = layer(img, mask, freqs_cis, adaln_input, transformer_options=transformer_options)
|
||||
if "double_block" in patches:
|
||||
for p in patches["double_block"]:
|
||||
|
||||
@@ -322,7 +322,6 @@ class QwenImageTransformer2DModel(nn.Module):
|
||||
pooled_projection_dim: int = 768,
|
||||
guidance_embeds: bool = False,
|
||||
axes_dims_rope: Tuple[int, int, int] = (16, 56, 56),
|
||||
default_ref_method="index",
|
||||
image_model=None,
|
||||
final_layer=True,
|
||||
dtype=None,
|
||||
@@ -335,7 +334,6 @@ class QwenImageTransformer2DModel(nn.Module):
|
||||
self.in_channels = in_channels
|
||||
self.out_channels = out_channels or in_channels
|
||||
self.inner_dim = num_attention_heads * attention_head_dim
|
||||
self.default_ref_method = default_ref_method
|
||||
|
||||
self.pe_embedder = EmbedND(dim=attention_head_dim, theta=10000, axes_dim=list(axes_dims_rope))
|
||||
|
||||
@@ -363,9 +361,6 @@ class QwenImageTransformer2DModel(nn.Module):
|
||||
for _ in range(num_layers)
|
||||
])
|
||||
|
||||
if self.default_ref_method == "index_timestep_zero":
|
||||
self.register_buffer("__index_timestep_zero__", torch.tensor([]))
|
||||
|
||||
if final_layer:
|
||||
self.norm_out = LastLayer(self.inner_dim, self.inner_dim, dtype=dtype, device=device, operations=operations)
|
||||
self.proj_out = operations.Linear(self.inner_dim, patch_size * patch_size * self.out_channels, bias=True, dtype=dtype, device=device)
|
||||
@@ -421,7 +416,7 @@ class QwenImageTransformer2DModel(nn.Module):
|
||||
h = 0
|
||||
w = 0
|
||||
index = 0
|
||||
ref_method = kwargs.get("ref_latents_method", self.default_ref_method)
|
||||
ref_method = kwargs.get("ref_latents_method", "index")
|
||||
index_ref_method = (ref_method == "index") or (ref_method == "index_timestep_zero")
|
||||
timestep_zero = ref_method == "index_timestep_zero"
|
||||
for ref in ref_latents:
|
||||
|
||||
@@ -568,10 +568,7 @@ class WanModel(torch.nn.Module):
|
||||
|
||||
patches_replace = transformer_options.get("patches_replace", {})
|
||||
blocks_replace = patches_replace.get("dit", {})
|
||||
transformer_options["total_blocks"] = len(self.blocks)
|
||||
transformer_options["block_type"] = "double"
|
||||
for i, block in enumerate(self.blocks):
|
||||
transformer_options["block_index"] = i
|
||||
if ("double_block", i) in blocks_replace:
|
||||
def block_wrap(args):
|
||||
out = {}
|
||||
@@ -766,10 +763,7 @@ class VaceWanModel(WanModel):
|
||||
|
||||
patches_replace = transformer_options.get("patches_replace", {})
|
||||
blocks_replace = patches_replace.get("dit", {})
|
||||
transformer_options["total_blocks"] = len(self.blocks)
|
||||
transformer_options["block_type"] = "double"
|
||||
for i, block in enumerate(self.blocks):
|
||||
transformer_options["block_index"] = i
|
||||
if ("double_block", i) in blocks_replace:
|
||||
def block_wrap(args):
|
||||
out = {}
|
||||
@@ -868,10 +862,7 @@ class CameraWanModel(WanModel):
|
||||
|
||||
patches_replace = transformer_options.get("patches_replace", {})
|
||||
blocks_replace = patches_replace.get("dit", {})
|
||||
transformer_options["total_blocks"] = len(self.blocks)
|
||||
transformer_options["block_type"] = "double"
|
||||
for i, block in enumerate(self.blocks):
|
||||
transformer_options["block_index"] = i
|
||||
if ("double_block", i) in blocks_replace:
|
||||
def block_wrap(args):
|
||||
out = {}
|
||||
@@ -1335,19 +1326,16 @@ class WanModel_S2V(WanModel):
|
||||
|
||||
patches_replace = transformer_options.get("patches_replace", {})
|
||||
blocks_replace = patches_replace.get("dit", {})
|
||||
transformer_options["total_blocks"] = len(self.blocks)
|
||||
transformer_options["block_type"] = "double"
|
||||
for i, block in enumerate(self.blocks):
|
||||
transformer_options["block_index"] = i
|
||||
if ("double_block", i) in blocks_replace:
|
||||
def block_wrap(args):
|
||||
out = {}
|
||||
out["img"] = block(args["img"], context=args["txt"], e=args["vec"], freqs=args["pe"], transformer_options=args["transformer_options"])
|
||||
out["img"] = block(args["img"], context=args["txt"], e=args["vec"], freqs=args["pe"])
|
||||
return out
|
||||
out = blocks_replace[("double_block", i)]({"img": x, "txt": context, "vec": e0, "pe": freqs, "transformer_options": transformer_options}, {"original_block": block_wrap})
|
||||
out = blocks_replace[("double_block", i)]({"img": x, "txt": context, "vec": e0, "pe": freqs}, {"original_block": block_wrap})
|
||||
x = out["img"]
|
||||
else:
|
||||
x = block(x, e=e0, freqs=freqs, context=context, transformer_options=transformer_options)
|
||||
x = block(x, e=e0, freqs=freqs, context=context)
|
||||
if audio_emb is not None:
|
||||
x = self.audio_injector(x, i, audio_emb, audio_emb_global, seq_len)
|
||||
# head
|
||||
@@ -1586,10 +1574,7 @@ class HumoWanModel(WanModel):
|
||||
|
||||
patches_replace = transformer_options.get("patches_replace", {})
|
||||
blocks_replace = patches_replace.get("dit", {})
|
||||
transformer_options["total_blocks"] = len(self.blocks)
|
||||
transformer_options["block_type"] = "double"
|
||||
for i, block in enumerate(self.blocks):
|
||||
transformer_options["block_index"] = i
|
||||
if ("double_block", i) in blocks_replace:
|
||||
def block_wrap(args):
|
||||
out = {}
|
||||
|
||||
@@ -523,10 +523,7 @@ class AnimateWanModel(WanModel):
|
||||
|
||||
patches_replace = transformer_options.get("patches_replace", {})
|
||||
blocks_replace = patches_replace.get("dit", {})
|
||||
transformer_options["total_blocks"] = len(self.blocks)
|
||||
transformer_options["block_type"] = "double"
|
||||
for i, block in enumerate(self.blocks):
|
||||
transformer_options["block_index"] = i
|
||||
if ("double_block", i) in blocks_replace:
|
||||
def block_wrap(args):
|
||||
out = {}
|
||||
|
||||
@@ -259,7 +259,7 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
|
||||
dit_config["nerf_tile_size"] = 512
|
||||
dit_config["nerf_final_head_type"] = "conv" if f"{key_prefix}nerf_final_layer_conv.norm.scale" in state_dict_keys else "linear"
|
||||
dit_config["nerf_embedder_dtype"] = torch.float32
|
||||
if "{}__x0__".format(key_prefix) in state_dict_keys: # x0 pred
|
||||
if "__x0__" in state_dict_keys: # x0 pred
|
||||
dit_config["use_x0"] = True
|
||||
else:
|
||||
dit_config["use_x0"] = False
|
||||
@@ -618,8 +618,6 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
|
||||
dit_config["image_model"] = "qwen_image"
|
||||
dit_config["in_channels"] = state_dict['{}img_in.weight'.format(key_prefix)].shape[1]
|
||||
dit_config["num_layers"] = count_blocks(state_dict_keys, '{}transformer_blocks.'.format(key_prefix) + '{}.')
|
||||
if "{}__index_timestep_zero__".format(key_prefix) in state_dict_keys: # 2511
|
||||
dit_config["default_ref_method"] = "index_timestep_zero"
|
||||
return dit_config
|
||||
|
||||
if '{}visual_transformer_blocks.0.cross_attention.key_norm.weight'.format(key_prefix) in state_dict_keys: # Kandinsky 5
|
||||
|
||||
@@ -720,7 +720,7 @@ class Sampler:
|
||||
sigma = float(sigmas[0])
|
||||
return math.isclose(max_sigma, sigma, rel_tol=1e-05) or sigma > max_sigma
|
||||
|
||||
KSAMPLER_NAMES = ["euler", "euler_cfg_pp", "euler_ancestral", "euler_ancestral_cfg_pp", "heun", "heunpp2", "exp_heun_2_x0", "exp_heun_2_x0_sde", "dpm_2", "dpm_2_ancestral",
|
||||
KSAMPLER_NAMES = ["euler", "euler_cfg_pp", "euler_ancestral", "euler_ancestral_cfg_pp", "heun", "heunpp2","dpm_2", "dpm_2_ancestral",
|
||||
"lms", "dpm_fast", "dpm_adaptive", "dpmpp_2s_ancestral", "dpmpp_2s_ancestral_cfg_pp", "dpmpp_sde", "dpmpp_sde_gpu",
|
||||
"dpmpp_2m", "dpmpp_2m_cfg_pp", "dpmpp_2m_sde", "dpmpp_2m_sde_gpu", "dpmpp_2m_sde_heun", "dpmpp_2m_sde_heun_gpu", "dpmpp_3m_sde", "dpmpp_3m_sde_gpu", "ddpm", "lcm",
|
||||
"ipndm", "ipndm_v", "deis", "res_multistep", "res_multistep_cfg_pp", "res_multistep_ancestral", "res_multistep_ancestral_cfg_pp",
|
||||
|
||||
@@ -28,7 +28,6 @@ from . import supported_models_base
|
||||
from . import latent_formats
|
||||
|
||||
from . import diffusers_convert
|
||||
import comfy.model_management
|
||||
|
||||
class SD15(supported_models_base.BASE):
|
||||
unet_config = {
|
||||
@@ -1029,13 +1028,7 @@ class ZImage(Lumina2):
|
||||
|
||||
memory_usage_factor = 2.0
|
||||
|
||||
supported_inference_dtypes = [torch.bfloat16, torch.float32]
|
||||
|
||||
def __init__(self, unet_config):
|
||||
super().__init__(unet_config)
|
||||
if comfy.model_management.extended_fp16_support():
|
||||
self.supported_inference_dtypes = self.supported_inference_dtypes.copy()
|
||||
self.supported_inference_dtypes.insert(1, torch.float16)
|
||||
supported_inference_dtypes = [torch.bfloat16, torch.float16, torch.float32]
|
||||
|
||||
def clip_target(self, state_dict={}):
|
||||
pref = self.text_encoder_key_prefix[0]
|
||||
|
||||
@@ -5,17 +5,11 @@ from typing import Optional, List, Dict, Any, Union
|
||||
from pydantic import BaseModel, Field, RootModel
|
||||
|
||||
class TripoModelVersion(str, Enum):
|
||||
v3_0_20250812 = 'v3.0-20250812'
|
||||
v2_5_20250123 = 'v2.5-20250123'
|
||||
v2_0_20240919 = 'v2.0-20240919'
|
||||
v1_4_20240625 = 'v1.4-20240625'
|
||||
|
||||
|
||||
class TripoGeometryQuality(str, Enum):
|
||||
standard = 'standard'
|
||||
detailed = 'detailed'
|
||||
|
||||
|
||||
class TripoTextureQuality(str, Enum):
|
||||
standard = 'standard'
|
||||
detailed = 'detailed'
|
||||
@@ -67,20 +61,14 @@ class TripoSpec(str, Enum):
|
||||
class TripoAnimation(str, Enum):
|
||||
IDLE = "preset:idle"
|
||||
WALK = "preset:walk"
|
||||
RUN = "preset:run"
|
||||
DIVE = "preset:dive"
|
||||
CLIMB = "preset:climb"
|
||||
JUMP = "preset:jump"
|
||||
RUN = "preset:run"
|
||||
SLASH = "preset:slash"
|
||||
SHOOT = "preset:shoot"
|
||||
HURT = "preset:hurt"
|
||||
FALL = "preset:fall"
|
||||
TURN = "preset:turn"
|
||||
QUADRUPED_WALK = "preset:quadruped:walk"
|
||||
HEXAPOD_WALK = "preset:hexapod:walk"
|
||||
OCTOPOD_WALK = "preset:octopod:walk"
|
||||
SERPENTINE_MARCH = "preset:serpentine:march"
|
||||
AQUATIC_MARCH = "preset:aquatic:march"
|
||||
|
||||
class TripoStylizeStyle(str, Enum):
|
||||
LEGO = "lego"
|
||||
@@ -117,11 +105,6 @@ class TripoTaskStatus(str, Enum):
|
||||
BANNED = "banned"
|
||||
EXPIRED = "expired"
|
||||
|
||||
class TripoFbxPreset(str, Enum):
|
||||
BLENDER = "blender"
|
||||
MIXAMO = "mixamo"
|
||||
_3DSMAX = "3dsmax"
|
||||
|
||||
class TripoFileTokenReference(BaseModel):
|
||||
type: Optional[str] = Field(None, description='The type of the reference')
|
||||
file_token: str
|
||||
@@ -159,7 +142,6 @@ class TripoTextToModelRequest(BaseModel):
|
||||
model_seed: Optional[int] = Field(None, description='The seed for the model')
|
||||
texture_seed: Optional[int] = Field(None, description='The seed for the texture')
|
||||
texture_quality: Optional[TripoTextureQuality] = TripoTextureQuality.standard
|
||||
geometry_quality: Optional[TripoGeometryQuality] = TripoGeometryQuality.standard
|
||||
style: Optional[TripoStyle] = None
|
||||
auto_size: Optional[bool] = Field(False, description='Whether to auto-size the model')
|
||||
quad: Optional[bool] = Field(False, description='Whether to apply quad to the generated model')
|
||||
@@ -174,7 +156,6 @@ class TripoImageToModelRequest(BaseModel):
|
||||
model_seed: Optional[int] = Field(None, description='The seed for the model')
|
||||
texture_seed: Optional[int] = Field(None, description='The seed for the texture')
|
||||
texture_quality: Optional[TripoTextureQuality] = TripoTextureQuality.standard
|
||||
geometry_quality: Optional[TripoGeometryQuality] = TripoGeometryQuality.standard
|
||||
texture_alignment: Optional[TripoTextureAlignment] = Field(TripoTextureAlignment.ORIGINAL_IMAGE, description='The texture alignment method')
|
||||
style: Optional[TripoStyle] = Field(None, description='The style to apply to the generated model')
|
||||
auto_size: Optional[bool] = Field(False, description='Whether to auto-size the model')
|
||||
@@ -192,7 +173,6 @@ class TripoMultiviewToModelRequest(BaseModel):
|
||||
model_seed: Optional[int] = Field(None, description='The seed for the model')
|
||||
texture_seed: Optional[int] = Field(None, description='The seed for the texture')
|
||||
texture_quality: Optional[TripoTextureQuality] = TripoTextureQuality.standard
|
||||
geometry_quality: Optional[TripoGeometryQuality] = TripoGeometryQuality.standard
|
||||
texture_alignment: Optional[TripoTextureAlignment] = TripoTextureAlignment.ORIGINAL_IMAGE
|
||||
auto_size: Optional[bool] = Field(False, description='Whether to auto-size the model')
|
||||
orientation: Optional[TripoOrientation] = Field(TripoOrientation.DEFAULT, description='The orientation for the model')
|
||||
@@ -239,24 +219,14 @@ class TripoConvertModelRequest(BaseModel):
|
||||
type: TripoTaskType = Field(TripoTaskType.CONVERT_MODEL, description='Type of task')
|
||||
format: TripoConvertFormat = Field(..., description='The format to convert to')
|
||||
original_model_task_id: str = Field(..., description='The task ID of the original model')
|
||||
quad: Optional[bool] = Field(None, description='Whether to apply quad to the model')
|
||||
force_symmetry: Optional[bool] = Field(None, description='Whether to force symmetry')
|
||||
face_limit: Optional[int] = Field(None, description='The number of faces to limit the conversion to')
|
||||
flatten_bottom: Optional[bool] = Field(None, description='Whether to flatten the bottom of the model')
|
||||
flatten_bottom_threshold: Optional[float] = Field(None, description='The threshold for flattening the bottom')
|
||||
texture_size: Optional[int] = Field(None, description='The size of the texture')
|
||||
quad: Optional[bool] = Field(False, description='Whether to apply quad to the model')
|
||||
force_symmetry: Optional[bool] = Field(False, description='Whether to force symmetry')
|
||||
face_limit: Optional[int] = Field(10000, description='The number of faces to limit the conversion to')
|
||||
flatten_bottom: Optional[bool] = Field(False, description='Whether to flatten the bottom of the model')
|
||||
flatten_bottom_threshold: Optional[float] = Field(0.01, description='The threshold for flattening the bottom')
|
||||
texture_size: Optional[int] = Field(4096, description='The size of the texture')
|
||||
texture_format: Optional[TripoTextureFormat] = Field(TripoTextureFormat.JPEG, description='The format of the texture')
|
||||
pivot_to_center_bottom: Optional[bool] = Field(None, description='Whether to pivot to the center bottom')
|
||||
scale_factor: Optional[float] = Field(None, description='The scale factor for the model')
|
||||
with_animation: Optional[bool] = Field(None, description='Whether to include animations')
|
||||
pack_uv: Optional[bool] = Field(None, description='Whether to pack the UVs')
|
||||
bake: Optional[bool] = Field(None, description='Whether to bake the model')
|
||||
part_names: Optional[List[str]] = Field(None, description='The names of the parts to include')
|
||||
fbx_preset: Optional[TripoFbxPreset] = Field(None, description='The preset for the FBX export')
|
||||
export_vertex_colors: Optional[bool] = Field(None, description='Whether to export the vertex colors')
|
||||
export_orientation: Optional[TripoOrientation] = Field(None, description='The orientation for the export')
|
||||
animate_in_place: Optional[bool] = Field(None, description='Whether to animate in place')
|
||||
|
||||
pivot_to_center_bottom: Optional[bool] = Field(False, description='Whether to pivot to the center bottom')
|
||||
|
||||
class TripoTaskRequest(RootModel):
|
||||
root: Union[
|
||||
|
||||
@@ -102,9 +102,8 @@ class TripoTextToModelNode(IO.ComfyNode):
|
||||
IO.Int.Input("model_seed", default=42, optional=True),
|
||||
IO.Int.Input("texture_seed", default=42, optional=True),
|
||||
IO.Combo.Input("texture_quality", default="standard", options=["standard", "detailed"], optional=True),
|
||||
IO.Int.Input("face_limit", default=-1, min=-1, max=2000000, optional=True),
|
||||
IO.Int.Input("face_limit", default=-1, min=-1, max=500000, optional=True),
|
||||
IO.Boolean.Input("quad", default=False, optional=True),
|
||||
IO.Combo.Input("geometry_quality", default="standard", options=["standard", "detailed"], optional=True),
|
||||
],
|
||||
outputs=[
|
||||
IO.String.Output(display_name="model_file"),
|
||||
@@ -132,7 +131,6 @@ class TripoTextToModelNode(IO.ComfyNode):
|
||||
model_seed: Optional[int] = None,
|
||||
texture_seed: Optional[int] = None,
|
||||
texture_quality: Optional[str] = None,
|
||||
geometry_quality: Optional[str] = None,
|
||||
face_limit: Optional[int] = None,
|
||||
quad: Optional[bool] = None,
|
||||
) -> IO.NodeOutput:
|
||||
@@ -156,7 +154,6 @@ class TripoTextToModelNode(IO.ComfyNode):
|
||||
texture_seed=texture_seed,
|
||||
texture_quality=texture_quality,
|
||||
face_limit=face_limit,
|
||||
geometry_quality=geometry_quality,
|
||||
auto_size=True,
|
||||
quad=quad,
|
||||
),
|
||||
@@ -197,7 +194,6 @@ class TripoImageToModelNode(IO.ComfyNode):
|
||||
),
|
||||
IO.Int.Input("face_limit", default=-1, min=-1, max=500000, optional=True),
|
||||
IO.Boolean.Input("quad", default=False, optional=True),
|
||||
IO.Combo.Input("geometry_quality", default="standard", options=["standard", "detailed"], optional=True),
|
||||
],
|
||||
outputs=[
|
||||
IO.String.Output(display_name="model_file"),
|
||||
@@ -224,7 +220,6 @@ class TripoImageToModelNode(IO.ComfyNode):
|
||||
orientation=None,
|
||||
texture_seed: Optional[int] = None,
|
||||
texture_quality: Optional[str] = None,
|
||||
geometry_quality: Optional[str] = None,
|
||||
texture_alignment: Optional[str] = None,
|
||||
face_limit: Optional[int] = None,
|
||||
quad: Optional[bool] = None,
|
||||
@@ -251,7 +246,6 @@ class TripoImageToModelNode(IO.ComfyNode):
|
||||
pbr=pbr,
|
||||
model_seed=model_seed,
|
||||
orientation=orientation,
|
||||
geometry_quality=geometry_quality,
|
||||
texture_alignment=texture_alignment,
|
||||
texture_seed=texture_seed,
|
||||
texture_quality=texture_quality,
|
||||
@@ -301,7 +295,6 @@ class TripoMultiviewToModelNode(IO.ComfyNode):
|
||||
),
|
||||
IO.Int.Input("face_limit", default=-1, min=-1, max=500000, optional=True),
|
||||
IO.Boolean.Input("quad", default=False, optional=True),
|
||||
IO.Combo.Input("geometry_quality", default="standard", options=["standard", "detailed"], optional=True),
|
||||
],
|
||||
outputs=[
|
||||
IO.String.Output(display_name="model_file"),
|
||||
@@ -330,7 +323,6 @@ class TripoMultiviewToModelNode(IO.ComfyNode):
|
||||
model_seed: Optional[int] = None,
|
||||
texture_seed: Optional[int] = None,
|
||||
texture_quality: Optional[str] = None,
|
||||
geometry_quality: Optional[str] = None,
|
||||
texture_alignment: Optional[str] = None,
|
||||
face_limit: Optional[int] = None,
|
||||
quad: Optional[bool] = None,
|
||||
@@ -367,7 +359,6 @@ class TripoMultiviewToModelNode(IO.ComfyNode):
|
||||
model_seed=model_seed,
|
||||
texture_seed=texture_seed,
|
||||
texture_quality=texture_quality,
|
||||
geometry_quality=geometry_quality,
|
||||
texture_alignment=texture_alignment,
|
||||
face_limit=face_limit,
|
||||
quad=quad,
|
||||
@@ -517,8 +508,6 @@ class TripoRetargetNode(IO.ComfyNode):
|
||||
options=[
|
||||
"preset:idle",
|
||||
"preset:walk",
|
||||
"preset:run",
|
||||
"preset:dive",
|
||||
"preset:climb",
|
||||
"preset:jump",
|
||||
"preset:slash",
|
||||
@@ -526,11 +515,6 @@ class TripoRetargetNode(IO.ComfyNode):
|
||||
"preset:hurt",
|
||||
"preset:fall",
|
||||
"preset:turn",
|
||||
"preset:quadruped:walk",
|
||||
"preset:hexapod:walk",
|
||||
"preset:octopod:walk",
|
||||
"preset:serpentine:march",
|
||||
"preset:aquatic:march"
|
||||
],
|
||||
),
|
||||
],
|
||||
@@ -579,7 +563,7 @@ class TripoConversionNode(IO.ComfyNode):
|
||||
"face_limit",
|
||||
default=-1,
|
||||
min=-1,
|
||||
max=2000000,
|
||||
max=500000,
|
||||
optional=True,
|
||||
),
|
||||
IO.Int.Input(
|
||||
@@ -595,40 +579,6 @@ class TripoConversionNode(IO.ComfyNode):
|
||||
default="JPEG",
|
||||
optional=True,
|
||||
),
|
||||
IO.Boolean.Input("force_symmetry", default=False, optional=True),
|
||||
IO.Boolean.Input("flatten_bottom", default=False, optional=True),
|
||||
IO.Float.Input(
|
||||
"flatten_bottom_threshold",
|
||||
default=0.0,
|
||||
min=0.0,
|
||||
max=1.0,
|
||||
optional=True,
|
||||
),
|
||||
IO.Boolean.Input("pivot_to_center_bottom", default=False, optional=True),
|
||||
IO.Float.Input(
|
||||
"scale_factor",
|
||||
default=1.0,
|
||||
min=0.0,
|
||||
optional=True,
|
||||
),
|
||||
IO.Boolean.Input("with_animation", default=False, optional=True),
|
||||
IO.Boolean.Input("pack_uv", default=False, optional=True),
|
||||
IO.Boolean.Input("bake", default=False, optional=True),
|
||||
IO.String.Input("part_names", default="", optional=True), # comma-separated list
|
||||
IO.Combo.Input(
|
||||
"fbx_preset",
|
||||
options=["blender", "mixamo", "3dsmax"],
|
||||
default="blender",
|
||||
optional=True,
|
||||
),
|
||||
IO.Boolean.Input("export_vertex_colors", default=False, optional=True),
|
||||
IO.Combo.Input(
|
||||
"export_orientation",
|
||||
options=["align_image", "default"],
|
||||
default="default",
|
||||
optional=True,
|
||||
),
|
||||
IO.Boolean.Input("animate_in_place", default=False, optional=True),
|
||||
],
|
||||
outputs=[],
|
||||
hidden=[
|
||||
@@ -654,31 +604,12 @@ class TripoConversionNode(IO.ComfyNode):
|
||||
original_model_task_id,
|
||||
format: str,
|
||||
quad: bool,
|
||||
force_symmetry: bool,
|
||||
face_limit: int,
|
||||
flatten_bottom: bool,
|
||||
flatten_bottom_threshold: float,
|
||||
texture_size: int,
|
||||
texture_format: str,
|
||||
pivot_to_center_bottom: bool,
|
||||
scale_factor: float,
|
||||
with_animation: bool,
|
||||
pack_uv: bool,
|
||||
bake: bool,
|
||||
part_names: str,
|
||||
fbx_preset: str,
|
||||
export_vertex_colors: bool,
|
||||
export_orientation: str,
|
||||
animate_in_place: bool,
|
||||
) -> IO.NodeOutput:
|
||||
if not original_model_task_id:
|
||||
raise RuntimeError("original_model_task_id is required")
|
||||
|
||||
# Parse part_names from comma-separated string to list
|
||||
part_names_list = None
|
||||
if part_names and part_names.strip():
|
||||
part_names_list = [name.strip() for name in part_names.split(',') if name.strip()]
|
||||
|
||||
response = await sync_op(
|
||||
cls,
|
||||
endpoint=ApiEndpoint(path="/proxy/tripo/v2/openapi/task", method="POST"),
|
||||
@@ -687,22 +618,9 @@ class TripoConversionNode(IO.ComfyNode):
|
||||
original_model_task_id=original_model_task_id,
|
||||
format=format,
|
||||
quad=quad if quad else None,
|
||||
force_symmetry=force_symmetry if force_symmetry else None,
|
||||
face_limit=face_limit if face_limit != -1 else None,
|
||||
flatten_bottom=flatten_bottom if flatten_bottom else None,
|
||||
flatten_bottom_threshold=flatten_bottom_threshold if flatten_bottom_threshold != 0.0 else None,
|
||||
texture_size=texture_size if texture_size != 4096 else None,
|
||||
texture_format=texture_format if texture_format != "JPEG" else None,
|
||||
pivot_to_center_bottom=pivot_to_center_bottom if pivot_to_center_bottom else None,
|
||||
scale_factor=scale_factor if scale_factor != 1.0 else None,
|
||||
with_animation=with_animation if with_animation else None,
|
||||
pack_uv=pack_uv if pack_uv else None,
|
||||
bake=bake if bake else None,
|
||||
part_names=part_names_list,
|
||||
fbx_preset=fbx_preset if fbx_preset != "blender" else None,
|
||||
export_vertex_colors=export_vertex_colors if export_vertex_colors else None,
|
||||
export_orientation=export_orientation if export_orientation != "default" else None,
|
||||
animate_in_place=animate_in_place if animate_in_place else None,
|
||||
),
|
||||
)
|
||||
return await poll_until_finished(cls, response, average_duration=30)
|
||||
|
||||
@@ -1,5 +1,7 @@
|
||||
import re
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
from pydantic import BaseModel, Field
|
||||
from typing_extensions import override
|
||||
|
||||
@@ -19,26 +21,26 @@ from comfy_api_nodes.util import (
|
||||
|
||||
class Text2ImageInputField(BaseModel):
|
||||
prompt: str = Field(...)
|
||||
negative_prompt: str | None = Field(None)
|
||||
negative_prompt: Optional[str] = Field(None)
|
||||
|
||||
|
||||
class Image2ImageInputField(BaseModel):
|
||||
prompt: str = Field(...)
|
||||
negative_prompt: str | None = Field(None)
|
||||
negative_prompt: Optional[str] = Field(None)
|
||||
images: list[str] = Field(..., min_length=1, max_length=2)
|
||||
|
||||
|
||||
class Text2VideoInputField(BaseModel):
|
||||
prompt: str = Field(...)
|
||||
negative_prompt: str | None = Field(None)
|
||||
audio_url: str | None = Field(None)
|
||||
negative_prompt: Optional[str] = Field(None)
|
||||
audio_url: Optional[str] = Field(None)
|
||||
|
||||
|
||||
class Image2VideoInputField(BaseModel):
|
||||
prompt: str = Field(...)
|
||||
negative_prompt: str | None = Field(None)
|
||||
negative_prompt: Optional[str] = Field(None)
|
||||
img_url: str = Field(...)
|
||||
audio_url: str | None = Field(None)
|
||||
audio_url: Optional[str] = Field(None)
|
||||
|
||||
|
||||
class Txt2ImageParametersField(BaseModel):
|
||||
@@ -50,7 +52,7 @@ class Txt2ImageParametersField(BaseModel):
|
||||
|
||||
|
||||
class Image2ImageParametersField(BaseModel):
|
||||
size: str | None = Field(None)
|
||||
size: Optional[str] = Field(None)
|
||||
n: int = Field(1, description="Number of images to generate.") # we support only value=1
|
||||
seed: int = Field(..., ge=0, le=2147483647)
|
||||
watermark: bool = Field(True)
|
||||
@@ -59,21 +61,19 @@ class Image2ImageParametersField(BaseModel):
|
||||
class Text2VideoParametersField(BaseModel):
|
||||
size: str = Field(...)
|
||||
seed: int = Field(..., ge=0, le=2147483647)
|
||||
duration: int = Field(5, ge=5, le=15)
|
||||
duration: int = Field(5, ge=5, le=10)
|
||||
prompt_extend: bool = Field(True)
|
||||
watermark: bool = Field(True)
|
||||
audio: bool = Field(False, description="Whether to generate audio automatically.")
|
||||
shot_type: str = Field("single")
|
||||
audio: bool = Field(False, description="Should be audio generated automatically")
|
||||
|
||||
|
||||
class Image2VideoParametersField(BaseModel):
|
||||
resolution: str = Field(...)
|
||||
seed: int = Field(..., ge=0, le=2147483647)
|
||||
duration: int = Field(5, ge=5, le=15)
|
||||
duration: int = Field(5, ge=5, le=10)
|
||||
prompt_extend: bool = Field(True)
|
||||
watermark: bool = Field(True)
|
||||
audio: bool = Field(False, description="Whether to generate audio automatically.")
|
||||
shot_type: str = Field("single")
|
||||
audio: bool = Field(False, description="Should be audio generated automatically")
|
||||
|
||||
|
||||
class Text2ImageTaskCreationRequest(BaseModel):
|
||||
@@ -106,39 +106,39 @@ class TaskCreationOutputField(BaseModel):
|
||||
|
||||
|
||||
class TaskCreationResponse(BaseModel):
|
||||
output: TaskCreationOutputField | None = Field(None)
|
||||
output: Optional[TaskCreationOutputField] = Field(None)
|
||||
request_id: str = Field(...)
|
||||
code: str | None = Field(None, description="Error code for the failed request.")
|
||||
message: str | None = Field(None, description="Details about the failed request.")
|
||||
code: Optional[str] = Field(None, description="The error code of the failed request.")
|
||||
message: Optional[str] = Field(None, description="Details of the failed request.")
|
||||
|
||||
|
||||
class TaskResult(BaseModel):
|
||||
url: str | None = Field(None)
|
||||
code: str | None = Field(None)
|
||||
message: str | None = Field(None)
|
||||
url: Optional[str] = Field(None)
|
||||
code: Optional[str] = Field(None)
|
||||
message: Optional[str] = Field(None)
|
||||
|
||||
|
||||
class ImageTaskStatusOutputField(TaskCreationOutputField):
|
||||
task_id: str = Field(...)
|
||||
task_status: str = Field(...)
|
||||
results: list[TaskResult] | None = Field(None)
|
||||
results: Optional[list[TaskResult]] = Field(None)
|
||||
|
||||
|
||||
class VideoTaskStatusOutputField(TaskCreationOutputField):
|
||||
task_id: str = Field(...)
|
||||
task_status: str = Field(...)
|
||||
video_url: str | None = Field(None)
|
||||
code: str | None = Field(None)
|
||||
message: str | None = Field(None)
|
||||
video_url: Optional[str] = Field(None)
|
||||
code: Optional[str] = Field(None)
|
||||
message: Optional[str] = Field(None)
|
||||
|
||||
|
||||
class ImageTaskStatusResponse(BaseModel):
|
||||
output: ImageTaskStatusOutputField | None = Field(None)
|
||||
output: Optional[ImageTaskStatusOutputField] = Field(None)
|
||||
request_id: str = Field(...)
|
||||
|
||||
|
||||
class VideoTaskStatusResponse(BaseModel):
|
||||
output: VideoTaskStatusOutputField | None = Field(None)
|
||||
output: Optional[VideoTaskStatusOutputField] = Field(None)
|
||||
request_id: str = Field(...)
|
||||
|
||||
|
||||
@@ -152,7 +152,7 @@ class WanTextToImageApi(IO.ComfyNode):
|
||||
node_id="WanTextToImageApi",
|
||||
display_name="Wan Text to Image",
|
||||
category="api node/image/Wan",
|
||||
description="Generates an image based on a text prompt.",
|
||||
description="Generates image based on text prompt.",
|
||||
inputs=[
|
||||
IO.Combo.Input(
|
||||
"model",
|
||||
@@ -164,13 +164,13 @@ class WanTextToImageApi(IO.ComfyNode):
|
||||
"prompt",
|
||||
multiline=True,
|
||||
default="",
|
||||
tooltip="Prompt describing the elements and visual features. Supports English and Chinese.",
|
||||
tooltip="Prompt used to describe the elements and visual features, supports English/Chinese.",
|
||||
),
|
||||
IO.String.Input(
|
||||
"negative_prompt",
|
||||
multiline=True,
|
||||
default="",
|
||||
tooltip="Negative prompt describing what to avoid.",
|
||||
tooltip="Negative text prompt to guide what to avoid.",
|
||||
optional=True,
|
||||
),
|
||||
IO.Int.Input(
|
||||
@@ -209,7 +209,7 @@ class WanTextToImageApi(IO.ComfyNode):
|
||||
IO.Boolean.Input(
|
||||
"watermark",
|
||||
default=True,
|
||||
tooltip="Whether to add an AI-generated watermark to the result.",
|
||||
tooltip='Whether to add an "AI generated" watermark to the result.',
|
||||
optional=True,
|
||||
),
|
||||
],
|
||||
@@ -252,7 +252,7 @@ class WanTextToImageApi(IO.ComfyNode):
|
||||
),
|
||||
)
|
||||
if not initial_response.output:
|
||||
raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}")
|
||||
raise Exception(f"Unknown error occurred: {initial_response.code} - {initial_response.message}")
|
||||
response = await poll_op(
|
||||
cls,
|
||||
ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"),
|
||||
@@ -272,7 +272,7 @@ class WanImageToImageApi(IO.ComfyNode):
|
||||
display_name="Wan Image to Image",
|
||||
category="api node/image/Wan",
|
||||
description="Generates an image from one or two input images and a text prompt. "
|
||||
"The output image is currently fixed at 1.6 MP, and its aspect ratio matches the input image(s).",
|
||||
"The output image is currently fixed at 1.6 MP; its aspect ratio matches the input image(s).",
|
||||
inputs=[
|
||||
IO.Combo.Input(
|
||||
"model",
|
||||
@@ -282,19 +282,19 @@ class WanImageToImageApi(IO.ComfyNode):
|
||||
),
|
||||
IO.Image.Input(
|
||||
"image",
|
||||
tooltip="Single-image editing or multi-image fusion. Maximum 2 images.",
|
||||
tooltip="Single-image editing or multi-image fusion, maximum 2 images.",
|
||||
),
|
||||
IO.String.Input(
|
||||
"prompt",
|
||||
multiline=True,
|
||||
default="",
|
||||
tooltip="Prompt describing the elements and visual features. Supports English and Chinese.",
|
||||
tooltip="Prompt used to describe the elements and visual features, supports English/Chinese.",
|
||||
),
|
||||
IO.String.Input(
|
||||
"negative_prompt",
|
||||
multiline=True,
|
||||
default="",
|
||||
tooltip="Negative prompt describing what to avoid.",
|
||||
tooltip="Negative text prompt to guide what to avoid.",
|
||||
optional=True,
|
||||
),
|
||||
# redo this later as an optional combo of recommended resolutions
|
||||
@@ -328,7 +328,7 @@ class WanImageToImageApi(IO.ComfyNode):
|
||||
IO.Boolean.Input(
|
||||
"watermark",
|
||||
default=True,
|
||||
tooltip="Whether to add an AI-generated watermark to the result.",
|
||||
tooltip='Whether to add an "AI generated" watermark to the result.',
|
||||
optional=True,
|
||||
),
|
||||
],
|
||||
@@ -347,7 +347,7 @@ class WanImageToImageApi(IO.ComfyNode):
|
||||
async def execute(
|
||||
cls,
|
||||
model: str,
|
||||
image: Input.Image,
|
||||
image: torch.Tensor,
|
||||
prompt: str,
|
||||
negative_prompt: str = "",
|
||||
# width: int = 1024,
|
||||
@@ -357,7 +357,7 @@ class WanImageToImageApi(IO.ComfyNode):
|
||||
):
|
||||
n_images = get_number_of_images(image)
|
||||
if n_images not in (1, 2):
|
||||
raise ValueError(f"Expected 1 or 2 input images, but got {n_images}.")
|
||||
raise ValueError(f"Expected 1 or 2 input images, got {n_images}.")
|
||||
images = []
|
||||
for i in image:
|
||||
images.append("data:image/png;base64," + tensor_to_base64_string(i, total_pixels=4096 * 4096))
|
||||
@@ -376,7 +376,7 @@ class WanImageToImageApi(IO.ComfyNode):
|
||||
),
|
||||
)
|
||||
if not initial_response.output:
|
||||
raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}")
|
||||
raise Exception(f"Unknown error occurred: {initial_response.code} - {initial_response.message}")
|
||||
response = await poll_op(
|
||||
cls,
|
||||
ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"),
|
||||
@@ -395,25 +395,25 @@ class WanTextToVideoApi(IO.ComfyNode):
|
||||
node_id="WanTextToVideoApi",
|
||||
display_name="Wan Text to Video",
|
||||
category="api node/video/Wan",
|
||||
description="Generates a video based on a text prompt.",
|
||||
description="Generates video based on text prompt.",
|
||||
inputs=[
|
||||
IO.Combo.Input(
|
||||
"model",
|
||||
options=["wan2.5-t2v-preview", "wan2.6-t2v"],
|
||||
default="wan2.6-t2v",
|
||||
options=["wan2.5-t2v-preview"],
|
||||
default="wan2.5-t2v-preview",
|
||||
tooltip="Model to use.",
|
||||
),
|
||||
IO.String.Input(
|
||||
"prompt",
|
||||
multiline=True,
|
||||
default="",
|
||||
tooltip="Prompt describing the elements and visual features. Supports English and Chinese.",
|
||||
tooltip="Prompt used to describe the elements and visual features, supports English/Chinese.",
|
||||
),
|
||||
IO.String.Input(
|
||||
"negative_prompt",
|
||||
multiline=True,
|
||||
default="",
|
||||
tooltip="Negative prompt describing what to avoid.",
|
||||
tooltip="Negative text prompt to guide what to avoid.",
|
||||
optional=True,
|
||||
),
|
||||
IO.Combo.Input(
|
||||
@@ -433,23 +433,23 @@ class WanTextToVideoApi(IO.ComfyNode):
|
||||
"1080p: 4:3 (1632x1248)",
|
||||
"1080p: 3:4 (1248x1632)",
|
||||
],
|
||||
default="720p: 1:1 (960x960)",
|
||||
default="480p: 1:1 (624x624)",
|
||||
optional=True,
|
||||
),
|
||||
IO.Int.Input(
|
||||
"duration",
|
||||
default=5,
|
||||
min=5,
|
||||
max=15,
|
||||
max=10,
|
||||
step=5,
|
||||
display_mode=IO.NumberDisplay.number,
|
||||
tooltip="A 15-second duration is available only for the Wan 2.6 model.",
|
||||
tooltip="Available durations: 5 and 10 seconds",
|
||||
optional=True,
|
||||
),
|
||||
IO.Audio.Input(
|
||||
"audio",
|
||||
optional=True,
|
||||
tooltip="Audio must contain a clear, loud voice, without extraneous noise or background music.",
|
||||
tooltip="Audio must contain a clear, loud voice, without extraneous noise, background music.",
|
||||
),
|
||||
IO.Int.Input(
|
||||
"seed",
|
||||
@@ -466,7 +466,7 @@ class WanTextToVideoApi(IO.ComfyNode):
|
||||
"generate_audio",
|
||||
default=False,
|
||||
optional=True,
|
||||
tooltip="If no audio input is provided, generate audio automatically.",
|
||||
tooltip="If there is no audio input, generate audio automatically.",
|
||||
),
|
||||
IO.Boolean.Input(
|
||||
"prompt_extend",
|
||||
@@ -477,15 +477,7 @@ class WanTextToVideoApi(IO.ComfyNode):
|
||||
IO.Boolean.Input(
|
||||
"watermark",
|
||||
default=True,
|
||||
tooltip="Whether to add an AI-generated watermark to the result.",
|
||||
optional=True,
|
||||
),
|
||||
IO.Combo.Input(
|
||||
"shot_type",
|
||||
options=["single", "multi"],
|
||||
tooltip="Specifies the shot type for the generated video, that is, whether the video is a "
|
||||
"single continuous shot or multiple shots with cuts. "
|
||||
"This parameter takes effect only when prompt_extend is True.",
|
||||
tooltip='Whether to add an "AI generated" watermark to the result.',
|
||||
optional=True,
|
||||
),
|
||||
],
|
||||
@@ -506,19 +498,14 @@ class WanTextToVideoApi(IO.ComfyNode):
|
||||
model: str,
|
||||
prompt: str,
|
||||
negative_prompt: str = "",
|
||||
size: str = "720p: 1:1 (960x960)",
|
||||
size: str = "480p: 1:1 (624x624)",
|
||||
duration: int = 5,
|
||||
audio: Input.Audio | None = None,
|
||||
audio: Optional[Input.Audio] = None,
|
||||
seed: int = 0,
|
||||
generate_audio: bool = False,
|
||||
prompt_extend: bool = True,
|
||||
watermark: bool = True,
|
||||
shot_type: str = "single",
|
||||
):
|
||||
if "480p" in size and model == "wan2.6-t2v":
|
||||
raise ValueError("The Wan 2.6 model does not support 480p.")
|
||||
if duration == 15 and model == "wan2.5-t2v-preview":
|
||||
raise ValueError("A 15-second duration is supported only by the Wan 2.6 model.")
|
||||
width, height = RES_IN_PARENS.search(size).groups()
|
||||
audio_url = None
|
||||
if audio is not None:
|
||||
@@ -539,12 +526,11 @@ class WanTextToVideoApi(IO.ComfyNode):
|
||||
audio=generate_audio,
|
||||
prompt_extend=prompt_extend,
|
||||
watermark=watermark,
|
||||
shot_type=shot_type,
|
||||
),
|
||||
),
|
||||
)
|
||||
if not initial_response.output:
|
||||
raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}")
|
||||
raise Exception(f"Unknown error occurred: {initial_response.code} - {initial_response.message}")
|
||||
response = await poll_op(
|
||||
cls,
|
||||
ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"),
|
||||
@@ -563,12 +549,12 @@ class WanImageToVideoApi(IO.ComfyNode):
|
||||
node_id="WanImageToVideoApi",
|
||||
display_name="Wan Image to Video",
|
||||
category="api node/video/Wan",
|
||||
description="Generates a video from the first frame and a text prompt.",
|
||||
description="Generates video based on the first frame and text prompt.",
|
||||
inputs=[
|
||||
IO.Combo.Input(
|
||||
"model",
|
||||
options=["wan2.5-i2v-preview", "wan2.6-i2v"],
|
||||
default="wan2.6-i2v",
|
||||
options=["wan2.5-i2v-preview"],
|
||||
default="wan2.5-i2v-preview",
|
||||
tooltip="Model to use.",
|
||||
),
|
||||
IO.Image.Input(
|
||||
@@ -578,13 +564,13 @@ class WanImageToVideoApi(IO.ComfyNode):
|
||||
"prompt",
|
||||
multiline=True,
|
||||
default="",
|
||||
tooltip="Prompt describing the elements and visual features. Supports English and Chinese.",
|
||||
tooltip="Prompt used to describe the elements and visual features, supports English/Chinese.",
|
||||
),
|
||||
IO.String.Input(
|
||||
"negative_prompt",
|
||||
multiline=True,
|
||||
default="",
|
||||
tooltip="Negative prompt describing what to avoid.",
|
||||
tooltip="Negative text prompt to guide what to avoid.",
|
||||
optional=True,
|
||||
),
|
||||
IO.Combo.Input(
|
||||
@@ -594,23 +580,23 @@ class WanImageToVideoApi(IO.ComfyNode):
|
||||
"720P",
|
||||
"1080P",
|
||||
],
|
||||
default="720P",
|
||||
default="480P",
|
||||
optional=True,
|
||||
),
|
||||
IO.Int.Input(
|
||||
"duration",
|
||||
default=5,
|
||||
min=5,
|
||||
max=15,
|
||||
max=10,
|
||||
step=5,
|
||||
display_mode=IO.NumberDisplay.number,
|
||||
tooltip="Duration 15 available only for WAN2.6 model.",
|
||||
tooltip="Available durations: 5 and 10 seconds",
|
||||
optional=True,
|
||||
),
|
||||
IO.Audio.Input(
|
||||
"audio",
|
||||
optional=True,
|
||||
tooltip="Audio must contain a clear, loud voice, without extraneous noise or background music.",
|
||||
tooltip="Audio must contain a clear, loud voice, without extraneous noise, background music.",
|
||||
),
|
||||
IO.Int.Input(
|
||||
"seed",
|
||||
@@ -627,7 +613,7 @@ class WanImageToVideoApi(IO.ComfyNode):
|
||||
"generate_audio",
|
||||
default=False,
|
||||
optional=True,
|
||||
tooltip="If no audio input is provided, generate audio automatically.",
|
||||
tooltip="If there is no audio input, generate audio automatically.",
|
||||
),
|
||||
IO.Boolean.Input(
|
||||
"prompt_extend",
|
||||
@@ -638,15 +624,7 @@ class WanImageToVideoApi(IO.ComfyNode):
|
||||
IO.Boolean.Input(
|
||||
"watermark",
|
||||
default=True,
|
||||
tooltip="Whether to add an AI-generated watermark to the result.",
|
||||
optional=True,
|
||||
),
|
||||
IO.Combo.Input(
|
||||
"shot_type",
|
||||
options=["single", "multi"],
|
||||
tooltip="Specifies the shot type for the generated video, that is, whether the video is a "
|
||||
"single continuous shot or multiple shots with cuts. "
|
||||
"This parameter takes effect only when prompt_extend is True.",
|
||||
tooltip='Whether to add an "AI generated" watermark to the result.',
|
||||
optional=True,
|
||||
),
|
||||
],
|
||||
@@ -665,24 +643,19 @@ class WanImageToVideoApi(IO.ComfyNode):
|
||||
async def execute(
|
||||
cls,
|
||||
model: str,
|
||||
image: Input.Image,
|
||||
image: torch.Tensor,
|
||||
prompt: str,
|
||||
negative_prompt: str = "",
|
||||
resolution: str = "720P",
|
||||
resolution: str = "480P",
|
||||
duration: int = 5,
|
||||
audio: Input.Audio | None = None,
|
||||
audio: Optional[Input.Audio] = None,
|
||||
seed: int = 0,
|
||||
generate_audio: bool = False,
|
||||
prompt_extend: bool = True,
|
||||
watermark: bool = True,
|
||||
shot_type: str = "single",
|
||||
):
|
||||
if get_number_of_images(image) != 1:
|
||||
raise ValueError("Exactly one input image is required.")
|
||||
if "480P" in resolution and model == "wan2.6-i2v":
|
||||
raise ValueError("The Wan 2.6 model does not support 480P.")
|
||||
if duration == 15 and model == "wan2.5-i2v-preview":
|
||||
raise ValueError("A 15-second duration is supported only by the Wan 2.6 model.")
|
||||
image_url = "data:image/png;base64," + tensor_to_base64_string(image, total_pixels=2000 * 2000)
|
||||
audio_url = None
|
||||
if audio is not None:
|
||||
@@ -704,12 +677,11 @@ class WanImageToVideoApi(IO.ComfyNode):
|
||||
audio=generate_audio,
|
||||
prompt_extend=prompt_extend,
|
||||
watermark=watermark,
|
||||
shot_type=shot_type,
|
||||
),
|
||||
),
|
||||
)
|
||||
if not initial_response.output:
|
||||
raise Exception(f"An unknown error occurred: {initial_response.code} - {initial_response.message}")
|
||||
raise Exception(f"Unknown error occurred: {initial_response.code} - {initial_response.message}")
|
||||
response = await poll_op(
|
||||
cls,
|
||||
ApiEndpoint(path=f"/proxy/wan/api/v1/tasks/{initial_response.output.task_id}"),
|
||||
|
||||
@@ -671,16 +671,7 @@ class SamplerSEEDS2(io.ComfyNode):
|
||||
io.Float.Input("s_noise", default=1.0, min=0.0, max=100.0, step=0.01, round=False, tooltip="SDE noise multiplier"),
|
||||
io.Float.Input("r", default=0.5, min=0.01, max=1.0, step=0.01, round=False, tooltip="Relative step size for the intermediate stage (c2 node)"),
|
||||
],
|
||||
outputs=[io.Sampler.Output()],
|
||||
description=(
|
||||
"This sampler node can represent multiple samplers:\n\n"
|
||||
"seeds_2\n"
|
||||
"- default setting\n\n"
|
||||
"exp_heun_2_x0\n"
|
||||
"- solver_type=phi_2, r=1.0, eta=0.0\n\n"
|
||||
"exp_heun_2_x0_sde\n"
|
||||
"- solver_type=phi_2, r=1.0, eta=1.0, s_noise=1.0"
|
||||
)
|
||||
outputs=[io.Sampler.Output()]
|
||||
)
|
||||
|
||||
@classmethod
|
||||
|
||||
@@ -248,10 +248,7 @@ class ModelPatchLoader:
|
||||
config['n_control_layers'] = 15
|
||||
config['additional_in_dim'] = 17
|
||||
config['refiner_control'] = True
|
||||
ref_weight = sd.get("control_noise_refiner.0.after_proj.weight", None)
|
||||
if ref_weight is not None:
|
||||
if torch.count_nonzero(ref_weight) == 0:
|
||||
config['broken'] = True
|
||||
config['broken'] = True
|
||||
model = comfy.ldm.lumina.controlnet.ZImage_Control(device=comfy.model_management.unet_offload_device(), dtype=dtype, operations=comfy.ops.manual_cast, **config)
|
||||
|
||||
model.load_state_dict(sd)
|
||||
@@ -313,46 +310,22 @@ class ZImageControlPatch:
|
||||
self.inpaint_image = inpaint_image
|
||||
self.mask = mask
|
||||
self.strength = strength
|
||||
self.is_inpaint = self.model_patch.model.additional_in_dim > 0
|
||||
|
||||
skip_encoding = False
|
||||
if self.image is not None and self.inpaint_image is not None:
|
||||
if self.image.shape != self.inpaint_image.shape:
|
||||
skip_encoding = True
|
||||
|
||||
if skip_encoding:
|
||||
self.encoded_image = None
|
||||
else:
|
||||
self.encoded_image = self.encode_latent_cond(self.image, self.inpaint_image)
|
||||
if self.image is None:
|
||||
self.encoded_image_size = (self.inpaint_image.shape[1], self.inpaint_image.shape[2])
|
||||
else:
|
||||
self.encoded_image_size = (self.image.shape[1], self.image.shape[2])
|
||||
self.encoded_image = self.encode_latent_cond(image)
|
||||
self.encoded_image_size = (image.shape[1], image.shape[2])
|
||||
self.temp_data = None
|
||||
|
||||
def encode_latent_cond(self, control_image=None, inpaint_image=None):
|
||||
latent_image = None
|
||||
if control_image is not None:
|
||||
latent_image = comfy.latent_formats.Flux().process_in(self.vae.encode(control_image))
|
||||
|
||||
if self.is_inpaint:
|
||||
def encode_latent_cond(self, control_image, inpaint_image=None):
|
||||
latent_image = comfy.latent_formats.Flux().process_in(self.vae.encode(control_image))
|
||||
if self.model_patch.model.additional_in_dim > 0:
|
||||
if self.mask is None:
|
||||
mask_ = torch.zeros_like(latent_image)[:, :1]
|
||||
else:
|
||||
mask_ = comfy.utils.common_upscale(self.mask.mean(dim=1, keepdim=True), latent_image.shape[-1], latent_image.shape[-2], "bilinear", "none")
|
||||
if inpaint_image is None:
|
||||
inpaint_image = torch.ones_like(control_image) * 0.5
|
||||
|
||||
if self.mask is not None:
|
||||
mask_inpaint = comfy.utils.common_upscale(self.mask.view(self.mask.shape[0], -1, self.mask.shape[-2], self.mask.shape[-1]).mean(dim=1, keepdim=True), inpaint_image.shape[-2], inpaint_image.shape[-3], "bilinear", "center")
|
||||
inpaint_image = ((inpaint_image - 0.5) * mask_inpaint.movedim(1, -1).round()) + 0.5
|
||||
|
||||
inpaint_image_latent = comfy.latent_formats.Flux().process_in(self.vae.encode(inpaint_image))
|
||||
|
||||
if self.mask is None:
|
||||
mask_ = torch.zeros_like(inpaint_image_latent)[:, :1]
|
||||
else:
|
||||
mask_ = comfy.utils.common_upscale(self.mask.view(self.mask.shape[0], -1, self.mask.shape[-2], self.mask.shape[-1]).mean(dim=1, keepdim=True), inpaint_image_latent.shape[-1], inpaint_image_latent.shape[-2], "nearest", "center")
|
||||
|
||||
if latent_image is None:
|
||||
latent_image = comfy.latent_formats.Flux().process_in(self.vae.encode(torch.ones_like(inpaint_image) * 0.5))
|
||||
|
||||
return torch.cat([latent_image, mask_, inpaint_image_latent], dim=1)
|
||||
else:
|
||||
return latent_image
|
||||
@@ -368,18 +341,13 @@ class ZImageControlPatch:
|
||||
block_type = kwargs.get("block_type", "")
|
||||
spacial_compression = self.vae.spacial_compression_encode()
|
||||
if self.encoded_image is None or self.encoded_image_size != (x.shape[-2] * spacial_compression, x.shape[-1] * spacial_compression):
|
||||
image_scaled = None
|
||||
if self.image is not None:
|
||||
image_scaled = comfy.utils.common_upscale(self.image.movedim(-1, 1), x.shape[-1] * spacial_compression, x.shape[-2] * spacial_compression, "area", "center").movedim(1, -1)
|
||||
self.encoded_image_size = (image_scaled.shape[-3], image_scaled.shape[-2])
|
||||
|
||||
image_scaled = comfy.utils.common_upscale(self.image.movedim(-1, 1), x.shape[-1] * spacial_compression, x.shape[-2] * spacial_compression, "area", "center")
|
||||
inpaint_scaled = None
|
||||
if self.inpaint_image is not None:
|
||||
inpaint_scaled = comfy.utils.common_upscale(self.inpaint_image.movedim(-1, 1), x.shape[-1] * spacial_compression, x.shape[-2] * spacial_compression, "area", "center").movedim(1, -1)
|
||||
self.encoded_image_size = (inpaint_scaled.shape[-3], inpaint_scaled.shape[-2])
|
||||
|
||||
loaded_models = comfy.model_management.loaded_models(only_currently_used=True)
|
||||
self.encoded_image = self.encode_latent_cond(image_scaled, inpaint_scaled)
|
||||
self.encoded_image = self.encode_latent_cond(image_scaled.movedim(1, -1), inpaint_scaled)
|
||||
self.encoded_image_size = (image_scaled.shape[-2], image_scaled.shape[-1])
|
||||
comfy.model_management.load_models_gpu(loaded_models)
|
||||
|
||||
cnet_blocks = self.model_patch.model.n_control_layers
|
||||
@@ -420,8 +388,7 @@ class ZImageControlPatch:
|
||||
|
||||
def to(self, device_or_dtype):
|
||||
if isinstance(device_or_dtype, torch.device):
|
||||
if self.encoded_image is not None:
|
||||
self.encoded_image = self.encoded_image.to(device_or_dtype)
|
||||
self.encoded_image = self.encoded_image.to(device_or_dtype)
|
||||
self.temp_data = None
|
||||
return self
|
||||
|
||||
@@ -444,12 +411,9 @@ class QwenImageDiffsynthControlnet:
|
||||
|
||||
CATEGORY = "advanced/loaders/qwen"
|
||||
|
||||
def diffsynth_controlnet(self, model, model_patch, vae, image=None, strength=1.0, inpaint_image=None, mask=None):
|
||||
def diffsynth_controlnet(self, model, model_patch, vae, image, strength, mask=None):
|
||||
model_patched = model.clone()
|
||||
if image is not None:
|
||||
image = image[:, :, :, :3]
|
||||
if inpaint_image is not None:
|
||||
inpaint_image = inpaint_image[:, :, :, :3]
|
||||
image = image[:, :, :, :3]
|
||||
if mask is not None:
|
||||
if mask.ndim == 3:
|
||||
mask = mask.unsqueeze(1)
|
||||
@@ -458,24 +422,13 @@ class QwenImageDiffsynthControlnet:
|
||||
mask = 1.0 - mask
|
||||
|
||||
if isinstance(model_patch.model, comfy.ldm.lumina.controlnet.ZImage_Control):
|
||||
patch = ZImageControlPatch(model_patch, vae, image, strength, inpaint_image=inpaint_image, mask=mask)
|
||||
patch = ZImageControlPatch(model_patch, vae, image, strength, mask=mask)
|
||||
model_patched.set_model_noise_refiner_patch(patch)
|
||||
model_patched.set_model_double_block_patch(patch)
|
||||
else:
|
||||
model_patched.set_model_double_block_patch(DiffSynthCnetPatch(model_patch, vae, image, strength, mask))
|
||||
return (model_patched,)
|
||||
|
||||
class ZImageFunControlnet(QwenImageDiffsynthControlnet):
|
||||
@classmethod
|
||||
def INPUT_TYPES(s):
|
||||
return {"required": { "model": ("MODEL",),
|
||||
"model_patch": ("MODEL_PATCH",),
|
||||
"vae": ("VAE",),
|
||||
"strength": ("FLOAT", {"default": 1.0, "min": -10.0, "max": 10.0, "step": 0.01}),
|
||||
},
|
||||
"optional": {"image": ("IMAGE",), "inpaint_image": ("IMAGE",), "mask": ("MASK",)}}
|
||||
|
||||
CATEGORY = "advanced/loaders/zimage"
|
||||
|
||||
class UsoStyleProjectorPatch:
|
||||
def __init__(self, model_patch, encoded_image):
|
||||
@@ -523,6 +476,5 @@ class USOStyleReference:
|
||||
NODE_CLASS_MAPPINGS = {
|
||||
"ModelPatchLoader": ModelPatchLoader,
|
||||
"QwenImageDiffsynthControlnet": QwenImageDiffsynthControlnet,
|
||||
"ZImageFunControlnet": ZImageFunControlnet,
|
||||
"USOStyleReference": USOStyleReference,
|
||||
}
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
# This file is automatically generated by the build process when version is
|
||||
# updated in pyproject.toml.
|
||||
__version__ = "0.5.0"
|
||||
__version__ = "0.4.0"
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "ComfyUI"
|
||||
version = "0.5.0"
|
||||
version = "0.4.0"
|
||||
readme = "README.md"
|
||||
license = { file = "LICENSE" }
|
||||
requires-python = ">=3.9"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
comfyui-frontend-package==1.34.9
|
||||
comfyui-frontend-package==1.34.8
|
||||
comfyui-workflow-templates==0.7.59
|
||||
comfyui-embedded-docs==0.3.1
|
||||
torch
|
||||
|
||||
Reference in New Issue
Block a user