Compare commits

..

1 Commits

Author SHA1 Message Date
Terry Jia
6e831a1ab3 feat: add proxyWidgetSelector to subgraph blueprints 2026-04-06 18:04:22 -04:00
44 changed files with 92 additions and 48965 deletions

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -1,322 +1 @@
{
"revision": 0,
"last_node_id": 29,
"last_link_id": 0,
"nodes": [
{
"id": 29,
"type": "4c9d6ea4-b912-40e5-8766-6793a9758c53",
"pos": [
1970,
-230
],
"size": [
180,
86
],
"flags": {},
"order": 5,
"mode": 0,
"inputs": [
{
"label": "image",
"localized_name": "images.image0",
"name": "images.image0",
"type": "IMAGE",
"link": null
}
],
"outputs": [
{
"label": "R",
"localized_name": "IMAGE0",
"name": "IMAGE0",
"type": "IMAGE",
"links": []
},
{
"label": "G",
"localized_name": "IMAGE1",
"name": "IMAGE1",
"type": "IMAGE",
"links": []
},
{
"label": "B",
"localized_name": "IMAGE2",
"name": "IMAGE2",
"type": "IMAGE",
"links": []
},
{
"label": "A",
"localized_name": "IMAGE3",
"name": "IMAGE3",
"type": "IMAGE",
"links": []
}
],
"title": "Image Channels",
"properties": {
"proxyWidgets": []
},
"widgets_values": []
}
],
"links": [],
"version": 0.4,
"definitions": {
"subgraphs": [
{
"id": "4c9d6ea4-b912-40e5-8766-6793a9758c53",
"version": 1,
"state": {
"lastGroupId": 0,
"lastNodeId": 28,
"lastLinkId": 39,
"lastRerouteId": 0
},
"revision": 0,
"config": {},
"name": "Image Channels",
"inputNode": {
"id": -10,
"bounding": [
1820,
-185,
120,
60
]
},
"outputNode": {
"id": -20,
"bounding": [
2460,
-215,
120,
120
]
},
"inputs": [
{
"id": "3522932b-2d86-4a1f-a02a-cb29f3a9d7fe",
"name": "images.image0",
"type": "IMAGE",
"linkIds": [
39
],
"localized_name": "images.image0",
"label": "image",
"pos": [
1920,
-165
]
}
],
"outputs": [
{
"id": "605cb9c3-b065-4d9b-81d2-3ec331889b2b",
"name": "IMAGE0",
"type": "IMAGE",
"linkIds": [
26
],
"localized_name": "IMAGE0",
"label": "R",
"pos": [
2480,
-195
]
},
{
"id": "fb44a77e-0522-43e9-9527-82e7465b3596",
"name": "IMAGE1",
"type": "IMAGE",
"linkIds": [
27
],
"localized_name": "IMAGE1",
"label": "G",
"pos": [
2480,
-175
]
},
{
"id": "81460ee6-0131-402a-874f-6bf3001fc4ff",
"name": "IMAGE2",
"type": "IMAGE",
"linkIds": [
28
],
"localized_name": "IMAGE2",
"label": "B",
"pos": [
2480,
-155
]
},
{
"id": "ae690246-80d4-4951-b1d9-9306d8a77417",
"name": "IMAGE3",
"type": "IMAGE",
"linkIds": [
29
],
"localized_name": "IMAGE3",
"label": "A",
"pos": [
2480,
-135
]
}
],
"widgets": [],
"nodes": [
{
"id": 23,
"type": "GLSLShader",
"pos": [
2000,
-330
],
"size": [
400,
172
],
"flags": {},
"order": 0,
"mode": 0,
"inputs": [
{
"label": "image",
"localized_name": "images.image0",
"name": "images.image0",
"type": "IMAGE",
"link": 39
},
{
"localized_name": "fragment_shader",
"name": "fragment_shader",
"type": "STRING",
"widget": {
"name": "fragment_shader"
},
"link": null
},
{
"localized_name": "size_mode",
"name": "size_mode",
"type": "COMFY_DYNAMICCOMBO_V3",
"widget": {
"name": "size_mode"
},
"link": null
},
{
"label": "image1",
"localized_name": "images.image1",
"name": "images.image1",
"shape": 7,
"type": "IMAGE",
"link": null
}
],
"outputs": [
{
"label": "R",
"localized_name": "IMAGE0",
"name": "IMAGE0",
"type": "IMAGE",
"links": [
26
]
},
{
"label": "G",
"localized_name": "IMAGE1",
"name": "IMAGE1",
"type": "IMAGE",
"links": [
27
]
},
{
"label": "B",
"localized_name": "IMAGE2",
"name": "IMAGE2",
"type": "IMAGE",
"links": [
28
]
},
{
"label": "A",
"localized_name": "IMAGE3",
"name": "IMAGE3",
"type": "IMAGE",
"links": [
29
]
}
],
"properties": {
"Node name for S&R": "GLSLShader"
},
"widgets_values": [
"#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\nlayout(location = 1) out vec4 fragColor1;\nlayout(location = 2) out vec4 fragColor2;\nlayout(location = 3) out vec4 fragColor3;\n\nvoid main() {\n vec4 color = texture(u_image0, v_texCoord);\n // Output each channel as grayscale to separate render targets\n fragColor0 = vec4(vec3(color.r), 1.0); // Red channel\n fragColor1 = vec4(vec3(color.g), 1.0); // Green channel\n fragColor2 = vec4(vec3(color.b), 1.0); // Blue channel\n fragColor3 = vec4(vec3(color.a), 1.0); // Alpha channel\n}\n",
"from_input"
]
}
],
"groups": [],
"links": [
{
"id": 39,
"origin_id": -10,
"origin_slot": 0,
"target_id": 23,
"target_slot": 0,
"type": "IMAGE"
},
{
"id": 26,
"origin_id": 23,
"origin_slot": 0,
"target_id": -20,
"target_slot": 0,
"type": "IMAGE"
},
{
"id": 27,
"origin_id": 23,
"origin_slot": 1,
"target_id": -20,
"target_slot": 1,
"type": "IMAGE"
},
{
"id": 28,
"origin_id": 23,
"origin_slot": 2,
"target_id": -20,
"target_slot": 2,
"type": "IMAGE"
},
{
"id": 29,
"origin_id": 23,
"origin_slot": 3,
"target_id": -20,
"target_slot": 3,
"type": "IMAGE"
}
],
"extra": {
"workflowRendererVersion": "LG"
},
"category": "Image Tools/Color adjust"
}
]
}
}
{"revision": 0, "last_node_id": 29, "last_link_id": 0, "nodes": [{"id": 29, "type": "4c9d6ea4-b912-40e5-8766-6793a9758c53", "pos": [1970, -230], "size": [180, 86], "flags": {}, "order": 5, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "R", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}, {"label": "G", "localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": []}, {"label": "B", "localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": []}, {"label": "A", "localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": []}], "title": "Image Channels", "properties": {"proxyWidgets": []}, "widgets_values": []}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "4c9d6ea4-b912-40e5-8766-6793a9758c53", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 28, "lastLinkId": 39, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Image Channels", "inputNode": {"id": -10, "bounding": [1820, -185, 120, 60]}, "outputNode": {"id": -20, "bounding": [2460, -215, 120, 120]}, "inputs": [{"id": "3522932b-2d86-4a1f-a02a-cb29f3a9d7fe", "name": "images.image0", "type": "IMAGE", "linkIds": [39], "localized_name": "images.image0", "label": "image", "pos": [1920, -165]}], "outputs": [{"id": "605cb9c3-b065-4d9b-81d2-3ec331889b2b", "name": "IMAGE0", "type": "IMAGE", "linkIds": [26], "localized_name": "IMAGE0", "label": "R", "pos": [2480, -195]}, {"id": "fb44a77e-0522-43e9-9527-82e7465b3596", "name": "IMAGE1", "type": "IMAGE", "linkIds": [27], "localized_name": "IMAGE1", "label": "G", "pos": [2480, -175]}, {"id": "81460ee6-0131-402a-874f-6bf3001fc4ff", "name": "IMAGE2", "type": "IMAGE", "linkIds": [28], "localized_name": "IMAGE2", "label": "B", "pos": [2480, -155]}, {"id": "ae690246-80d4-4951-b1d9-9306d8a77417", "name": "IMAGE3", "type": "IMAGE", "linkIds": [29], "localized_name": "IMAGE3", "label": "A", "pos": [2480, -135]}], "widgets": [], "nodes": [{"id": 23, "type": "GLSLShader", "pos": [2000, -330], "size": [400, 172], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 39}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}], "outputs": [{"label": "R", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [26]}, {"label": "G", "localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": [27]}, {"label": "B", "localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": [28]}, {"label": "A", "localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": [29]}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\nlayout(location = 1) out vec4 fragColor1;\nlayout(location = 2) out vec4 fragColor2;\nlayout(location = 3) out vec4 fragColor3;\n\nvoid main() {\n vec4 color = texture(u_image0, v_texCoord);\n // Output each channel as grayscale to separate render targets\n fragColor0 = vec4(vec3(color.r), 1.0); // Red channel\n fragColor1 = vec4(vec3(color.g), 1.0); // Green channel\n fragColor2 = vec4(vec3(color.b), 1.0); // Blue channel\n fragColor3 = vec4(vec3(color.a), 1.0); // Alpha channel\n}\n", "from_input"]}], "groups": [], "links": [{"id": 39, "origin_id": -10, "origin_slot": 0, "target_id": 23, "target_slot": 0, "type": "IMAGE"}, {"id": 26, "origin_id": 23, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}, {"id": 27, "origin_id": 23, "origin_slot": 1, "target_id": -20, "target_slot": 1, "type": "IMAGE"}, {"id": 28, "origin_id": 23, "origin_slot": 2, "target_id": -20, "target_slot": 2, "type": "IMAGE"}, {"id": 29, "origin_id": 23, "origin_slot": 3, "target_id": -20, "target_slot": 3, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Color adjust"}]}}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -1,278 +1 @@
{
"revision": 0,
"last_node_id": 15,
"last_link_id": 0,
"nodes": [
{
"id": 15,
"type": "24d8bbfd-39d4-4774-bff0-3de40cc7a471",
"pos": [
-1490,
2040
],
"size": [
400,
260
],
"flags": {},
"order": 0,
"mode": 0,
"inputs": [
{
"name": "prompt",
"type": "STRING",
"widget": {
"name": "prompt"
},
"link": null
},
{
"label": "reference images",
"name": "images",
"type": "IMAGE",
"link": null
}
],
"outputs": [
{
"name": "STRING",
"type": "STRING",
"links": null
}
],
"title": "Prompt Enhance",
"properties": {
"proxyWidgets": [
[
"-1",
"prompt"
]
],
"cnr_id": "comfy-core",
"ver": "0.14.1"
},
"widgets_values": [
""
]
}
],
"links": [],
"version": 0.4,
"definitions": {
"subgraphs": [
{
"id": "24d8bbfd-39d4-4774-bff0-3de40cc7a471",
"version": 1,
"state": {
"lastGroupId": 0,
"lastNodeId": 15,
"lastLinkId": 14,
"lastRerouteId": 0
},
"revision": 0,
"config": {},
"name": "Prompt Enhance",
"inputNode": {
"id": -10,
"bounding": [
-2170,
2110,
138.876953125,
80
]
},
"outputNode": {
"id": -20,
"bounding": [
-640,
2110,
120,
60
]
},
"inputs": [
{
"id": "aeab7216-00e0-4528-a09b-bba50845c5a6",
"name": "prompt",
"type": "STRING",
"linkIds": [
11
],
"pos": [
-2051.123046875,
2130
]
},
{
"id": "7b73fd36-aa31-4771-9066-f6c83879994b",
"name": "images",
"type": "IMAGE",
"linkIds": [
14
],
"label": "reference images",
"pos": [
-2051.123046875,
2150
]
}
],
"outputs": [
{
"id": "c7b0d930-68a1-48d1-b496-0519e5837064",
"name": "STRING",
"type": "STRING",
"linkIds": [
13
],
"pos": [
-620,
2130
]
}
],
"widgets": [],
"nodes": [
{
"id": 11,
"type": "GeminiNode",
"pos": [
-1560,
1990
],
"size": [
470,
470
],
"flags": {},
"order": 0,
"mode": 0,
"inputs": [
{
"localized_name": "images",
"name": "images",
"shape": 7,
"type": "IMAGE",
"link": 14
},
{
"localized_name": "audio",
"name": "audio",
"shape": 7,
"type": "AUDIO",
"link": null
},
{
"localized_name": "video",
"name": "video",
"shape": 7,
"type": "VIDEO",
"link": null
},
{
"localized_name": "files",
"name": "files",
"shape": 7,
"type": "GEMINI_INPUT_FILES",
"link": null
},
{
"localized_name": "prompt",
"name": "prompt",
"type": "STRING",
"widget": {
"name": "prompt"
},
"link": 11
},
{
"localized_name": "model",
"name": "model",
"type": "COMBO",
"widget": {
"name": "model"
},
"link": null
},
{
"localized_name": "seed",
"name": "seed",
"type": "INT",
"widget": {
"name": "seed"
},
"link": null
},
{
"localized_name": "system_prompt",
"name": "system_prompt",
"shape": 7,
"type": "STRING",
"widget": {
"name": "system_prompt"
},
"link": null
}
],
"outputs": [
{
"localized_name": "STRING",
"name": "STRING",
"type": "STRING",
"links": [
13
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.14.1",
"Node name for S&R": "GeminiNode"
},
"widgets_values": [
"",
"gemini-3-pro-preview",
42,
"randomize",
"You are an expert in prompt writing.\nBased on the input, rewrite the user's input into a detailed prompt.\nincluding camera settings, lighting, composition, and style.\nReturn the prompt only"
],
"color": "#432",
"bgcolor": "#653"
}
],
"groups": [],
"links": [
{
"id": 11,
"origin_id": -10,
"origin_slot": 0,
"target_id": 11,
"target_slot": 4,
"type": "STRING"
},
{
"id": 13,
"origin_id": 11,
"origin_slot": 0,
"target_id": -20,
"target_slot": 0,
"type": "STRING"
},
{
"id": 14,
"origin_id": -10,
"origin_slot": 1,
"target_id": 11,
"target_slot": 0,
"type": "IMAGE"
}
],
"extra": {
"workflowRendererVersion": "LG"
},
"category": "Text generation/Prompt enhance"
}
]
},
"extra": {}
}
{"revision": 0, "last_node_id": 15, "last_link_id": 0, "nodes": [{"id": 15, "type": "24d8bbfd-39d4-4774-bff0-3de40cc7a471", "pos": [-1490, 2040], "size": [400, 260], "flags": {}, "order": 0, "mode": 0, "inputs": [{"name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": null}, {"label": "reference images", "name": "images", "type": "IMAGE", "link": null}], "outputs": [{"name": "STRING", "type": "STRING", "links": null}], "title": "Prompt Enhance", "properties": {"proxyWidgets": [["-1", "prompt"]], "cnr_id": "comfy-core", "ver": "0.14.1"}, "widgets_values": [""]}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "24d8bbfd-39d4-4774-bff0-3de40cc7a471", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 15, "lastLinkId": 14, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Prompt Enhance", "inputNode": {"id": -10, "bounding": [-2170, 2110, 138.876953125, 80]}, "outputNode": {"id": -20, "bounding": [-640, 2110, 120, 60]}, "inputs": [{"id": "aeab7216-00e0-4528-a09b-bba50845c5a6", "name": "prompt", "type": "STRING", "linkIds": [11], "pos": [-2051.123046875, 2130]}, {"id": "7b73fd36-aa31-4771-9066-f6c83879994b", "name": "images", "type": "IMAGE", "linkIds": [14], "label": "reference images", "pos": [-2051.123046875, 2150]}], "outputs": [{"id": "c7b0d930-68a1-48d1-b496-0519e5837064", "name": "STRING", "type": "STRING", "linkIds": [13], "pos": [-620, 2130]}], "widgets": [], "nodes": [{"id": 11, "type": "GeminiNode", "pos": [-1560, 1990], "size": [470, 470], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "shape": 7, "type": "IMAGE", "link": 14}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": null}, {"localized_name": "video", "name": "video", "shape": 7, "type": "VIDEO", "link": null}, {"localized_name": "files", "name": "files", "shape": 7, "type": "GEMINI_INPUT_FILES", "link": null}, {"localized_name": "prompt", "name": "prompt", "type": "STRING", "widget": {"name": "prompt"}, "link": 11}, {"localized_name": "model", "name": "model", "type": "COMBO", "widget": {"name": "model"}, "link": null}, {"localized_name": "seed", "name": "seed", "type": "INT", "widget": {"name": "seed"}, "link": null}, {"localized_name": "system_prompt", "name": "system_prompt", "shape": 7, "type": "STRING", "widget": {"name": "system_prompt"}, "link": null}], "outputs": [{"localized_name": "STRING", "name": "STRING", "type": "STRING", "links": [13]}], "properties": {"cnr_id": "comfy-core", "ver": "0.14.1", "Node name for S&R": "GeminiNode"}, "widgets_values": ["", "gemini-3-pro-preview", 42, "randomize", "You are an expert in prompt writing.\nBased on the input, rewrite the user's input into a detailed prompt.\nincluding camera settings, lighting, composition, and style.\nReturn the prompt only"], "color": "#432", "bgcolor": "#653"}], "groups": [], "links": [{"id": 11, "origin_id": -10, "origin_slot": 0, "target_id": 11, "target_slot": 4, "type": "STRING"}, {"id": 13, "origin_id": 11, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "STRING"}, {"id": 14, "origin_id": -10, "origin_slot": 1, "target_id": 11, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Text generation/Prompt enhance"}]}, "extra": {}}

View File

@@ -1,309 +1 @@
{
"revision": 0,
"last_node_id": 25,
"last_link_id": 0,
"nodes": [
{
"id": 25,
"type": "621ba4e2-22a8-482d-a369-023753198b7b",
"pos": [
4610,
-790
],
"size": [
230,
58
],
"flags": {},
"order": 4,
"mode": 0,
"inputs": [
{
"label": "image",
"localized_name": "images.image0",
"name": "images.image0",
"type": "IMAGE",
"link": null
}
],
"outputs": [
{
"label": "IMAGE",
"localized_name": "IMAGE0",
"name": "IMAGE0",
"type": "IMAGE",
"links": []
}
],
"title": "Sharpen",
"properties": {
"proxyWidgets": [
[
"24",
"value"
]
]
},
"widgets_values": []
}
],
"links": [],
"version": 0.4,
"definitions": {
"subgraphs": [
{
"id": "621ba4e2-22a8-482d-a369-023753198b7b",
"version": 1,
"state": {
"lastGroupId": 0,
"lastNodeId": 24,
"lastLinkId": 36,
"lastRerouteId": 0
},
"revision": 0,
"config": {},
"name": "Sharpen",
"inputNode": {
"id": -10,
"bounding": [
4090,
-825,
120,
60
]
},
"outputNode": {
"id": -20,
"bounding": [
5150,
-825,
120,
60
]
},
"inputs": [
{
"id": "37011fb7-14b7-4e0e-b1a0-6a02e8da1fd7",
"name": "images.image0",
"type": "IMAGE",
"linkIds": [
34
],
"localized_name": "images.image0",
"label": "image",
"pos": [
4190,
-805
]
}
],
"outputs": [
{
"id": "e9182b3f-635c-4cd4-a152-4b4be17ae4b9",
"name": "IMAGE0",
"type": "IMAGE",
"linkIds": [
35
],
"localized_name": "IMAGE0",
"label": "IMAGE",
"pos": [
5170,
-805
]
}
],
"widgets": [],
"nodes": [
{
"id": 24,
"type": "PrimitiveFloat",
"pos": [
4280,
-1240
],
"size": [
270,
58
],
"flags": {},
"order": 0,
"mode": 0,
"inputs": [
{
"label": "strength",
"localized_name": "value",
"name": "value",
"type": "FLOAT",
"widget": {
"name": "value"
},
"link": null
}
],
"outputs": [
{
"localized_name": "FLOAT",
"name": "FLOAT",
"type": "FLOAT",
"links": [
36
]
}
],
"properties": {
"Node name for S&R": "PrimitiveFloat",
"min": 0,
"max": 3,
"precision": 2,
"step": 0.05
},
"widgets_values": [
0.5
]
},
{
"id": 23,
"type": "GLSLShader",
"pos": [
4570,
-1240
],
"size": [
370,
192
],
"flags": {},
"order": 1,
"mode": 0,
"inputs": [
{
"label": "image0",
"localized_name": "images.image0",
"name": "images.image0",
"type": "IMAGE",
"link": 34
},
{
"label": "image1",
"localized_name": "images.image1",
"name": "images.image1",
"shape": 7,
"type": "IMAGE",
"link": null
},
{
"label": "u_float0",
"localized_name": "floats.u_float0",
"name": "floats.u_float0",
"shape": 7,
"type": "FLOAT",
"link": 36
},
{
"label": "u_float1",
"localized_name": "floats.u_float1",
"name": "floats.u_float1",
"shape": 7,
"type": "FLOAT",
"link": null
},
{
"label": "u_int0",
"localized_name": "ints.u_int0",
"name": "ints.u_int0",
"shape": 7,
"type": "INT",
"link": null
},
{
"localized_name": "fragment_shader",
"name": "fragment_shader",
"type": "STRING",
"widget": {
"name": "fragment_shader"
},
"link": null
},
{
"localized_name": "size_mode",
"name": "size_mode",
"type": "COMFY_DYNAMICCOMBO_V3",
"widget": {
"name": "size_mode"
},
"link": null
}
],
"outputs": [
{
"localized_name": "IMAGE0",
"name": "IMAGE0",
"type": "IMAGE",
"links": [
35
]
},
{
"localized_name": "IMAGE1",
"name": "IMAGE1",
"type": "IMAGE",
"links": null
},
{
"localized_name": "IMAGE2",
"name": "IMAGE2",
"type": "IMAGE",
"links": null
},
{
"localized_name": "IMAGE3",
"name": "IMAGE3",
"type": "IMAGE",
"links": null
}
],
"properties": {
"Node name for S&R": "GLSLShader"
},
"widgets_values": [
"#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform float u_float0; // strength [0.0 2.0] typical: 0.31.0\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nvoid main() {\n vec2 texel = 1.0 / u_resolution;\n \n // Sample center and neighbors\n vec4 center = texture(u_image0, v_texCoord);\n vec4 top = texture(u_image0, v_texCoord + vec2( 0.0, -texel.y));\n vec4 bottom = texture(u_image0, v_texCoord + vec2( 0.0, texel.y));\n vec4 left = texture(u_image0, v_texCoord + vec2(-texel.x, 0.0));\n vec4 right = texture(u_image0, v_texCoord + vec2( texel.x, 0.0));\n \n // Edge enhancement (Laplacian)\n vec4 edges = center * 4.0 - top - bottom - left - right;\n \n // Add edges back scaled by strength\n vec4 sharpened = center + edges * u_float0;\n \n fragColor0 = vec4(clamp(sharpened.rgb, 0.0, 1.0), center.a);\n}",
"from_input"
]
}
],
"groups": [],
"links": [
{
"id": 36,
"origin_id": 24,
"origin_slot": 0,
"target_id": 23,
"target_slot": 2,
"type": "FLOAT"
},
{
"id": 34,
"origin_id": -10,
"origin_slot": 0,
"target_id": 23,
"target_slot": 0,
"type": "IMAGE"
},
{
"id": 35,
"origin_id": 23,
"origin_slot": 0,
"target_id": -20,
"target_slot": 0,
"type": "IMAGE"
}
],
"extra": {
"workflowRendererVersion": "LG"
},
"category": "Image Tools/Sharpen"
}
]
}
}
{"revision": 0, "last_node_id": 25, "last_link_id": 0, "nodes": [{"id": 25, "type": "621ba4e2-22a8-482d-a369-023753198b7b", "pos": [4610, -790], "size": [230, 58], "flags": {}, "order": 4, "mode": 0, "inputs": [{"label": "image", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": null}], "outputs": [{"label": "IMAGE", "localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": []}], "title": "Sharpen", "properties": {"proxyWidgets": [["24", "value"]]}, "widgets_values": []}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "621ba4e2-22a8-482d-a369-023753198b7b", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 24, "lastLinkId": 36, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Sharpen", "inputNode": {"id": -10, "bounding": [4090, -825, 120, 60]}, "outputNode": {"id": -20, "bounding": [5150, -825, 120, 60]}, "inputs": [{"id": "37011fb7-14b7-4e0e-b1a0-6a02e8da1fd7", "name": "images.image0", "type": "IMAGE", "linkIds": [34], "localized_name": "images.image0", "label": "image", "pos": [4190, -805]}], "outputs": [{"id": "e9182b3f-635c-4cd4-a152-4b4be17ae4b9", "name": "IMAGE0", "type": "IMAGE", "linkIds": [35], "localized_name": "IMAGE0", "label": "IMAGE", "pos": [5170, -805]}], "widgets": [], "nodes": [{"id": 24, "type": "PrimitiveFloat", "pos": [4280, -1240], "size": [270, 58], "flags": {}, "order": 0, "mode": 0, "inputs": [{"label": "strength", "localized_name": "value", "name": "value", "type": "FLOAT", "widget": {"name": "value"}, "link": null}], "outputs": [{"localized_name": "FLOAT", "name": "FLOAT", "type": "FLOAT", "links": [36]}], "properties": {"Node name for S&R": "PrimitiveFloat", "min": 0, "max": 3, "precision": 2, "step": 0.05}, "widgets_values": [0.5]}, {"id": 23, "type": "GLSLShader", "pos": [4570, -1240], "size": [370, 192], "flags": {}, "order": 1, "mode": 0, "inputs": [{"label": "image0", "localized_name": "images.image0", "name": "images.image0", "type": "IMAGE", "link": 34}, {"label": "image1", "localized_name": "images.image1", "name": "images.image1", "shape": 7, "type": "IMAGE", "link": null}, {"label": "u_float0", "localized_name": "floats.u_float0", "name": "floats.u_float0", "shape": 7, "type": "FLOAT", "link": 36}, {"label": "u_float1", "localized_name": "floats.u_float1", "name": "floats.u_float1", "shape": 7, "type": "FLOAT", "link": null}, {"label": "u_int0", "localized_name": "ints.u_int0", "name": "ints.u_int0", "shape": 7, "type": "INT", "link": null}, {"localized_name": "fragment_shader", "name": "fragment_shader", "type": "STRING", "widget": {"name": "fragment_shader"}, "link": null}, {"localized_name": "size_mode", "name": "size_mode", "type": "COMFY_DYNAMICCOMBO_V3", "widget": {"name": "size_mode"}, "link": null}], "outputs": [{"localized_name": "IMAGE0", "name": "IMAGE0", "type": "IMAGE", "links": [35]}, {"localized_name": "IMAGE1", "name": "IMAGE1", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE2", "name": "IMAGE2", "type": "IMAGE", "links": null}, {"localized_name": "IMAGE3", "name": "IMAGE3", "type": "IMAGE", "links": null}], "properties": {"Node name for S&R": "GLSLShader"}, "widgets_values": ["#version 300 es\nprecision highp float;\n\nuniform sampler2D u_image0;\nuniform vec2 u_resolution;\nuniform float u_float0; // strength [0.0 2.0] typical: 0.31.0\n\nin vec2 v_texCoord;\nlayout(location = 0) out vec4 fragColor0;\n\nvoid main() {\n vec2 texel = 1.0 / u_resolution;\n \n // Sample center and neighbors\n vec4 center = texture(u_image0, v_texCoord);\n vec4 top = texture(u_image0, v_texCoord + vec2( 0.0, -texel.y));\n vec4 bottom = texture(u_image0, v_texCoord + vec2( 0.0, texel.y));\n vec4 left = texture(u_image0, v_texCoord + vec2(-texel.x, 0.0));\n vec4 right = texture(u_image0, v_texCoord + vec2( texel.x, 0.0));\n \n // Edge enhancement (Laplacian)\n vec4 edges = center * 4.0 - top - bottom - left - right;\n \n // Add edges back scaled by strength\n vec4 sharpened = center + edges * u_float0;\n \n fragColor0 = vec4(clamp(sharpened.rgb, 0.0, 1.0), center.a);\n}", "from_input"]}], "groups": [], "links": [{"id": 36, "origin_id": 24, "origin_slot": 0, "target_id": 23, "target_slot": 2, "type": "FLOAT"}, {"id": 34, "origin_id": -10, "origin_slot": 0, "target_id": 23, "target_slot": 0, "type": "IMAGE"}, {"id": 35, "origin_id": 23, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "IMAGE"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Image Tools/Sharpen"}]}}

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -1,420 +1 @@
{
"revision": 0,
"last_node_id": 13,
"last_link_id": 0,
"nodes": [
{
"id": 13,
"type": "cf95b747-3e17-46cb-8097-cac60ff9b2e1",
"pos": [
1120,
330
],
"size": [
240,
58
],
"flags": {},
"order": 3,
"mode": 0,
"inputs": [
{
"localized_name": "video",
"name": "video",
"type": "VIDEO",
"link": null
},
{
"name": "model_name",
"type": "COMBO",
"widget": {
"name": "model_name"
},
"link": null
}
],
"outputs": [
{
"localized_name": "VIDEO",
"name": "VIDEO",
"type": "VIDEO",
"links": []
}
],
"title": "Video Upscale(GAN x4)",
"properties": {
"proxyWidgets": [
[
"-1",
"model_name"
]
],
"cnr_id": "comfy-core",
"ver": "0.14.1"
},
"widgets_values": [
"RealESRGAN_x4plus.safetensors"
]
}
],
"links": [],
"version": 0.4,
"definitions": {
"subgraphs": [
{
"id": "cf95b747-3e17-46cb-8097-cac60ff9b2e1",
"version": 1,
"state": {
"lastGroupId": 0,
"lastNodeId": 13,
"lastLinkId": 19,
"lastRerouteId": 0
},
"revision": 0,
"config": {},
"name": "Video Upscale(GAN x4)",
"inputNode": {
"id": -10,
"bounding": [
550,
460,
120,
80
]
},
"outputNode": {
"id": -20,
"bounding": [
1490,
460,
120,
60
]
},
"inputs": [
{
"id": "666d633e-93e7-42dc-8d11-2b7b99b0f2a6",
"name": "video",
"type": "VIDEO",
"linkIds": [
10
],
"localized_name": "video",
"pos": [
650,
480
]
},
{
"id": "2e23a087-caa8-4d65-99e6-662761aa905a",
"name": "model_name",
"type": "COMBO",
"linkIds": [
19
],
"pos": [
650,
500
]
}
],
"outputs": [
{
"id": "0c1768ea-3ec2-412f-9af6-8e0fa36dae70",
"name": "VIDEO",
"type": "VIDEO",
"linkIds": [
15
],
"localized_name": "VIDEO",
"pos": [
1510,
480
]
}
],
"widgets": [],
"nodes": [
{
"id": 2,
"type": "ImageUpscaleWithModel",
"pos": [
1110,
450
],
"size": [
320,
46
],
"flags": {},
"order": 1,
"mode": 0,
"inputs": [
{
"localized_name": "upscale_model",
"name": "upscale_model",
"type": "UPSCALE_MODEL",
"link": 1
},
{
"localized_name": "image",
"name": "image",
"type": "IMAGE",
"link": 14
}
],
"outputs": [
{
"localized_name": "IMAGE",
"name": "IMAGE",
"type": "IMAGE",
"links": [
13
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.10.0",
"Node name for S&R": "ImageUpscaleWithModel"
}
},
{
"id": 11,
"type": "CreateVideo",
"pos": [
1110,
550
],
"size": [
320,
78
],
"flags": {},
"order": 3,
"mode": 0,
"inputs": [
{
"localized_name": "images",
"name": "images",
"type": "IMAGE",
"link": 13
},
{
"localized_name": "audio",
"name": "audio",
"shape": 7,
"type": "AUDIO",
"link": 16
},
{
"localized_name": "fps",
"name": "fps",
"type": "FLOAT",
"widget": {
"name": "fps"
},
"link": 12
}
],
"outputs": [
{
"localized_name": "VIDEO",
"name": "VIDEO",
"type": "VIDEO",
"links": [
15
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.10.0",
"Node name for S&R": "CreateVideo"
},
"widgets_values": [
30
]
},
{
"id": 10,
"type": "GetVideoComponents",
"pos": [
1110,
330
],
"size": [
320,
70
],
"flags": {},
"order": 2,
"mode": 0,
"inputs": [
{
"localized_name": "video",
"name": "video",
"type": "VIDEO",
"link": 10
}
],
"outputs": [
{
"localized_name": "images",
"name": "images",
"type": "IMAGE",
"links": [
14
]
},
{
"localized_name": "audio",
"name": "audio",
"type": "AUDIO",
"links": [
16
]
},
{
"localized_name": "fps",
"name": "fps",
"type": "FLOAT",
"links": [
12
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.10.0",
"Node name for S&R": "GetVideoComponents"
}
},
{
"id": 1,
"type": "UpscaleModelLoader",
"pos": [
750,
450
],
"size": [
280,
60
],
"flags": {},
"order": 0,
"mode": 0,
"inputs": [
{
"localized_name": "model_name",
"name": "model_name",
"type": "COMBO",
"widget": {
"name": "model_name"
},
"link": 19
}
],
"outputs": [
{
"localized_name": "UPSCALE_MODEL",
"name": "UPSCALE_MODEL",
"type": "UPSCALE_MODEL",
"links": [
1
]
}
],
"properties": {
"cnr_id": "comfy-core",
"ver": "0.10.0",
"Node name for S&R": "UpscaleModelLoader",
"models": [
{
"name": "RealESRGAN_x4plus.safetensors",
"url": "https://huggingface.co/Comfy-Org/Real-ESRGAN_repackaged/resolve/main/RealESRGAN_x4plus.safetensors",
"directory": "upscale_models"
}
]
},
"widgets_values": [
"RealESRGAN_x4plus.safetensors"
]
}
],
"groups": [],
"links": [
{
"id": 1,
"origin_id": 1,
"origin_slot": 0,
"target_id": 2,
"target_slot": 0,
"type": "UPSCALE_MODEL"
},
{
"id": 14,
"origin_id": 10,
"origin_slot": 0,
"target_id": 2,
"target_slot": 1,
"type": "IMAGE"
},
{
"id": 13,
"origin_id": 2,
"origin_slot": 0,
"target_id": 11,
"target_slot": 0,
"type": "IMAGE"
},
{
"id": 16,
"origin_id": 10,
"origin_slot": 1,
"target_id": 11,
"target_slot": 1,
"type": "AUDIO"
},
{
"id": 12,
"origin_id": 10,
"origin_slot": 2,
"target_id": 11,
"target_slot": 2,
"type": "FLOAT"
},
{
"id": 10,
"origin_id": -10,
"origin_slot": 0,
"target_id": 10,
"target_slot": 0,
"type": "VIDEO"
},
{
"id": 15,
"origin_id": 11,
"origin_slot": 0,
"target_id": -20,
"target_slot": 0,
"type": "VIDEO"
},
{
"id": 19,
"origin_id": -10,
"origin_slot": 1,
"target_id": 1,
"target_slot": 0,
"type": "COMBO"
}
],
"extra": {
"workflowRendererVersion": "LG"
},
"category": "Video generation and editing/Enhance video"
}
]
},
"extra": {}
}
{"revision": 0, "last_node_id": 13, "last_link_id": 0, "nodes": [{"id": 13, "type": "cf95b747-3e17-46cb-8097-cac60ff9b2e1", "pos": [1120, 330], "size": [240, 58], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "video", "name": "video", "type": "VIDEO", "link": null}, {"name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": null}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": []}], "title": "Video Upscale(GAN x4)", "properties": {"proxyWidgets": [["-1", "model_name"]], "cnr_id": "comfy-core", "ver": "0.14.1"}, "widgets_values": ["RealESRGAN_x4plus.safetensors"]}], "links": [], "version": 0.4, "definitions": {"subgraphs": [{"id": "cf95b747-3e17-46cb-8097-cac60ff9b2e1", "version": 1, "state": {"lastGroupId": 0, "lastNodeId": 13, "lastLinkId": 19, "lastRerouteId": 0}, "revision": 0, "config": {}, "name": "Video Upscale(GAN x4)", "inputNode": {"id": -10, "bounding": [550, 460, 120, 80]}, "outputNode": {"id": -20, "bounding": [1490, 460, 120, 60]}, "inputs": [{"id": "666d633e-93e7-42dc-8d11-2b7b99b0f2a6", "name": "video", "type": "VIDEO", "linkIds": [10], "localized_name": "video", "pos": [650, 480]}, {"id": "2e23a087-caa8-4d65-99e6-662761aa905a", "name": "model_name", "type": "COMBO", "linkIds": [19], "pos": [650, 500]}], "outputs": [{"id": "0c1768ea-3ec2-412f-9af6-8e0fa36dae70", "name": "VIDEO", "type": "VIDEO", "linkIds": [15], "localized_name": "VIDEO", "pos": [1510, 480]}], "widgets": [], "nodes": [{"id": 2, "type": "ImageUpscaleWithModel", "pos": [1110, 450], "size": [320, 46], "flags": {}, "order": 1, "mode": 0, "inputs": [{"localized_name": "upscale_model", "name": "upscale_model", "type": "UPSCALE_MODEL", "link": 1}, {"localized_name": "image", "name": "image", "type": "IMAGE", "link": 14}], "outputs": [{"localized_name": "IMAGE", "name": "IMAGE", "type": "IMAGE", "links": [13]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "ImageUpscaleWithModel"}}, {"id": 11, "type": "CreateVideo", "pos": [1110, 550], "size": [320, 78], "flags": {}, "order": 3, "mode": 0, "inputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "link": 13}, {"localized_name": "audio", "name": "audio", "shape": 7, "type": "AUDIO", "link": 16}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "widget": {"name": "fps"}, "link": 12}], "outputs": [{"localized_name": "VIDEO", "name": "VIDEO", "type": "VIDEO", "links": [15]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "CreateVideo"}, "widgets_values": [30]}, {"id": 10, "type": "GetVideoComponents", "pos": [1110, 330], "size": [320, 70], "flags": {}, "order": 2, "mode": 0, "inputs": [{"localized_name": "video", "name": "video", "type": "VIDEO", "link": 10}], "outputs": [{"localized_name": "images", "name": "images", "type": "IMAGE", "links": [14]}, {"localized_name": "audio", "name": "audio", "type": "AUDIO", "links": [16]}, {"localized_name": "fps", "name": "fps", "type": "FLOAT", "links": [12]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "GetVideoComponents"}}, {"id": 1, "type": "UpscaleModelLoader", "pos": [750, 450], "size": [280, 60], "flags": {}, "order": 0, "mode": 0, "inputs": [{"localized_name": "model_name", "name": "model_name", "type": "COMBO", "widget": {"name": "model_name"}, "link": 19}], "outputs": [{"localized_name": "UPSCALE_MODEL", "name": "UPSCALE_MODEL", "type": "UPSCALE_MODEL", "links": [1]}], "properties": {"cnr_id": "comfy-core", "ver": "0.10.0", "Node name for S&R": "UpscaleModelLoader", "models": [{"name": "RealESRGAN_x4plus.safetensors", "url": "https://huggingface.co/Comfy-Org/Real-ESRGAN_repackaged/resolve/main/RealESRGAN_x4plus.safetensors", "directory": "upscale_models"}]}, "widgets_values": ["RealESRGAN_x4plus.safetensors"]}], "groups": [], "links": [{"id": 1, "origin_id": 1, "origin_slot": 0, "target_id": 2, "target_slot": 0, "type": "UPSCALE_MODEL"}, {"id": 14, "origin_id": 10, "origin_slot": 0, "target_id": 2, "target_slot": 1, "type": "IMAGE"}, {"id": 13, "origin_id": 2, "origin_slot": 0, "target_id": 11, "target_slot": 0, "type": "IMAGE"}, {"id": 16, "origin_id": 10, "origin_slot": 1, "target_id": 11, "target_slot": 1, "type": "AUDIO"}, {"id": 12, "origin_id": 10, "origin_slot": 2, "target_id": 11, "target_slot": 2, "type": "FLOAT"}, {"id": 10, "origin_id": -10, "origin_slot": 0, "target_id": 10, "target_slot": 0, "type": "VIDEO"}, {"id": 15, "origin_id": 11, "origin_slot": 0, "target_id": -20, "target_slot": 0, "type": "VIDEO"}, {"id": 19, "origin_id": -10, "origin_slot": 1, "target_id": 1, "target_slot": 0, "type": "COMBO"}], "extra": {"workflowRendererVersion": "LG"}, "category": "Video generation and editing/Enhance video"}]}, "extra": {}}

View File

@@ -611,7 +611,6 @@ class AceStepDiTModel(nn.Module):
intermediate_size,
patch_size,
audio_acoustic_hidden_dim,
condition_dim=None,
layer_types=None,
sliding_window=128,
rms_norm_eps=1e-6,
@@ -641,7 +640,7 @@ class AceStepDiTModel(nn.Module):
self.time_embed = TimestepEmbedding(256, hidden_size, dtype=dtype, device=device, operations=operations)
self.time_embed_r = TimestepEmbedding(256, hidden_size, dtype=dtype, device=device, operations=operations)
self.condition_embedder = Linear(condition_dim, hidden_size, dtype=dtype, device=device)
self.condition_embedder = Linear(hidden_size, hidden_size, dtype=dtype, device=device)
if layer_types is None:
layer_types = ["full_attention"] * num_layers
@@ -1036,9 +1035,6 @@ class AceStepConditionGenerationModel(nn.Module):
fsq_dim=2048,
fsq_levels=[8, 8, 8, 5, 5, 5],
fsq_input_num_quantizers=1,
encoder_hidden_size=2048,
encoder_intermediate_size=6144,
encoder_num_heads=16,
audio_model=None,
dtype=None,
device=None,
@@ -1058,24 +1054,24 @@ class AceStepConditionGenerationModel(nn.Module):
self.decoder = AceStepDiTModel(
in_channels, hidden_size, num_dit_layers, num_heads, num_kv_heads, head_dim,
intermediate_size, patch_size, audio_acoustic_hidden_dim, condition_dim=encoder_hidden_size,
intermediate_size, patch_size, audio_acoustic_hidden_dim,
layer_types=layer_types, sliding_window=sliding_window, rms_norm_eps=rms_norm_eps,
dtype=dtype, device=device, operations=operations
)
self.encoder = AceStepConditionEncoder(
text_hidden_dim, timbre_hidden_dim, encoder_hidden_size, num_lyric_layers, num_timbre_layers,
encoder_num_heads, num_kv_heads, head_dim, encoder_intermediate_size, rms_norm_eps,
text_hidden_dim, timbre_hidden_dim, hidden_size, num_lyric_layers, num_timbre_layers,
num_heads, num_kv_heads, head_dim, intermediate_size, rms_norm_eps,
dtype=dtype, device=device, operations=operations
)
self.tokenizer = AceStepAudioTokenizer(
audio_acoustic_hidden_dim, encoder_hidden_size, pool_window_size, fsq_dim=fsq_dim, fsq_levels=fsq_levels, fsq_input_num_quantizers=fsq_input_num_quantizers, num_layers=num_tokenizer_layers, head_dim=head_dim, rms_norm_eps=rms_norm_eps,
audio_acoustic_hidden_dim, hidden_size, pool_window_size, fsq_dim=fsq_dim, fsq_levels=fsq_levels, fsq_input_num_quantizers=fsq_input_num_quantizers, num_layers=num_tokenizer_layers, head_dim=head_dim, rms_norm_eps=rms_norm_eps,
dtype=dtype, device=device, operations=operations
)
self.detokenizer = AudioTokenDetokenizer(
encoder_hidden_size, pool_window_size, audio_acoustic_hidden_dim, num_layers=2, head_dim=head_dim,
hidden_size, pool_window_size, audio_acoustic_hidden_dim, num_layers=2, head_dim=head_dim,
dtype=dtype, device=device, operations=operations
)
self.null_condition_emb = nn.Parameter(torch.empty(1, 1, encoder_hidden_size, dtype=dtype, device=device))
self.null_condition_emb = nn.Parameter(torch.empty(1, 1, hidden_size, dtype=dtype, device=device))
def prepare_condition(
self,

View File

@@ -155,7 +155,6 @@ class AutoencodingEngineLegacy(AutoencodingEngine):
def __init__(self, embed_dim: int, **kwargs):
self.max_batch_size = kwargs.pop("max_batch_size", None)
ddconfig = kwargs.pop("ddconfig")
decoder_ddconfig = kwargs.pop("decoder_ddconfig", ddconfig)
super().__init__(
encoder_config={
"target": "comfy.ldm.modules.diffusionmodules.model.Encoder",
@@ -163,7 +162,7 @@ class AutoencodingEngineLegacy(AutoencodingEngine):
},
decoder_config={
"target": "comfy.ldm.modules.diffusionmodules.model.Decoder",
"params": decoder_ddconfig,
"params": ddconfig,
},
**kwargs,
)

View File

@@ -696,15 +696,6 @@ def detect_unet_config(state_dict, key_prefix, metadata=None):
if '{}encoder.lyric_encoder.layers.0.input_layernorm.weight'.format(key_prefix) in state_dict_keys:
dit_config = {}
dit_config["audio_model"] = "ace1.5"
head_dim = 128
dit_config["hidden_size"] = state_dict['{}decoder.layers.0.self_attn_norm.weight'.format(key_prefix)].shape[0]
dit_config["intermediate_size"] = state_dict['{}decoder.layers.0.mlp.gate_proj.weight'.format(key_prefix)].shape[0]
dit_config["num_heads"] = state_dict['{}decoder.layers.0.self_attn.q_proj.weight'.format(key_prefix)].shape[0] // head_dim
dit_config["encoder_hidden_size"] = state_dict['{}encoder.lyric_encoder.layers.0.input_layernorm.weight'.format(key_prefix)].shape[0]
dit_config["encoder_num_heads"] = state_dict['{}encoder.lyric_encoder.layers.0.self_attn.q_proj.weight'.format(key_prefix)].shape[0] // head_dim
dit_config["encoder_intermediate_size"] = state_dict['{}encoder.lyric_encoder.layers.0.mlp.gate_proj.weight'.format(key_prefix)].shape[0]
dit_config["num_dit_layers"] = count_blocks(state_dict_keys, '{}decoder.layers.'.format(key_prefix) + '{}.')
return dit_config
if '{}encoder.pan_blocks.1.cv4.conv.weight'.format(key_prefix) in state_dict_keys: # RT-DETR_v4

View File

@@ -556,19 +556,12 @@ class VAE:
old_memory_used_decode = self.memory_used_decode
self.memory_used_decode = lambda shape, dtype: old_memory_used_decode(shape, dtype) * 4.0
decoder_ch = sd['decoder.conv_in.weight'].shape[0] // ddconfig['ch_mult'][-1]
if decoder_ch != ddconfig['ch']:
decoder_ddconfig = ddconfig.copy()
decoder_ddconfig['ch'] = decoder_ch
else:
decoder_ddconfig = None
if 'post_quant_conv.weight' in sd:
self.first_stage_model = AutoencoderKL(ddconfig=ddconfig, embed_dim=sd['post_quant_conv.weight'].shape[1], **({"decoder_ddconfig": decoder_ddconfig} if decoder_ddconfig is not None else {}))
self.first_stage_model = AutoencoderKL(ddconfig=ddconfig, embed_dim=sd['post_quant_conv.weight'].shape[1])
else:
self.first_stage_model = AutoencodingEngine(regularizer_config={'target': "comfy.ldm.models.autoencoder.DiagonalGaussianRegularizer"},
encoder_config={'target': "comfy.ldm.modules.diffusionmodules.model.Encoder", 'params': ddconfig},
decoder_config={'target': "comfy.ldm.modules.diffusionmodules.model.Decoder", 'params': decoder_ddconfig if decoder_ddconfig is not None else ddconfig})
decoder_config={'target': "comfy.ldm.modules.diffusionmodules.model.Decoder", 'params': ddconfig})
elif "decoder.layers.1.layers.0.beta" in sd:
config = {}
param_key = None

View File

@@ -24,9 +24,8 @@ from comfy_api_nodes.util import (
AVERAGE_DURATION_VIDEO_GEN = 32
MODELS_MAP = {
"veo-2.0-generate-001": "veo-2.0-generate-001",
"veo-3.1-generate": "veo-3.1-generate-001",
"veo-3.1-fast-generate": "veo-3.1-fast-generate-001",
"veo-3.1-lite": "veo-3.1-lite-generate-001",
"veo-3.1-generate": "veo-3.1-generate-preview",
"veo-3.1-fast-generate": "veo-3.1-fast-generate-preview",
"veo-3.0-generate-001": "veo-3.0-generate-001",
"veo-3.0-fast-generate-001": "veo-3.0-fast-generate-001",
}
@@ -248,8 +247,17 @@ class VeoVideoGenerationNode(IO.ComfyNode):
raise Exception("Video generation completed but no video was returned")
class Veo3VideoGenerationNode(IO.ComfyNode):
"""Generates videos from text prompts using Google's Veo 3 API."""
class Veo3VideoGenerationNode(VeoVideoGenerationNode):
"""
Generates videos from text prompts using Google's Veo 3 API.
Supported models:
- veo-3.0-generate-001
- veo-3.0-fast-generate-001
This node extends the base Veo node with Veo 3 specific features including
audio generation and fixed 8-second duration.
"""
@classmethod
def define_schema(cls):
@@ -271,13 +279,6 @@ class Veo3VideoGenerationNode(IO.ComfyNode):
default="16:9",
tooltip="Aspect ratio of the output video",
),
IO.Combo.Input(
"resolution",
options=["720p", "1080p", "4k"],
default="720p",
tooltip="Output video resolution. 4K is not available for veo-3.1-lite and veo-3.0 models.",
optional=True,
),
IO.String.Input(
"negative_prompt",
multiline=True,
@@ -288,11 +289,11 @@ class Veo3VideoGenerationNode(IO.ComfyNode):
IO.Int.Input(
"duration_seconds",
default=8,
min=4,
min=8,
max=8,
step=2,
step=1,
display_mode=IO.NumberDisplay.number,
tooltip="Duration of the output video in seconds",
tooltip="Duration of the output video in seconds (Veo 3 only supports 8 seconds)",
optional=True,
),
IO.Boolean.Input(
@@ -331,10 +332,10 @@ class Veo3VideoGenerationNode(IO.ComfyNode):
options=[
"veo-3.1-generate",
"veo-3.1-fast-generate",
"veo-3.1-lite",
"veo-3.0-generate-001",
"veo-3.0-fast-generate-001",
],
default="veo-3.0-generate-001",
tooltip="Veo 3 model to use for video generation",
optional=True,
),
@@ -355,111 +356,21 @@ class Veo3VideoGenerationNode(IO.ComfyNode):
],
is_api_node=True,
price_badge=IO.PriceBadge(
depends_on=IO.PriceBadgeDepends(widgets=["model", "generate_audio", "resolution", "duration_seconds"]),
depends_on=IO.PriceBadgeDepends(widgets=["model", "generate_audio"]),
expr="""
(
$m := widgets.model;
$r := widgets.resolution;
$a := widgets.generate_audio;
$seconds := widgets.duration_seconds;
$pps :=
$contains($m, "lite")
? ($r = "1080p" ? ($a ? 0.08 : 0.05) : ($a ? 0.05 : 0.03))
: $contains($m, "3.1-fast")
? ($r = "4k" ? ($a ? 0.30 : 0.25) : $r = "1080p" ? ($a ? 0.12 : 0.10) : ($a ? 0.10 : 0.08))
: $contains($m, "3.1-generate")
? ($r = "4k" ? ($a ? 0.60 : 0.40) : ($a ? 0.40 : 0.20))
: $contains($m, "3.0-fast")
? ($a ? 0.15 : 0.10)
: ($a ? 0.40 : 0.20);
{"type":"usd","usd": $pps * $seconds}
($contains($m,"veo-3.0-fast-generate-001") or $contains($m,"veo-3.1-fast-generate"))
? {"type":"usd","usd": ($a ? 1.2 : 0.8)}
: ($contains($m,"veo-3.0-generate-001") or $contains($m,"veo-3.1-generate"))
? {"type":"usd","usd": ($a ? 3.2 : 1.6)}
: {"type":"range_usd","min_usd":0.8,"max_usd":3.2}
)
""",
),
)
@classmethod
async def execute(
cls,
prompt,
aspect_ratio="16:9",
resolution="720p",
negative_prompt="",
duration_seconds=8,
enhance_prompt=True,
person_generation="ALLOW",
seed=0,
image=None,
model="veo-3.0-generate-001",
generate_audio=False,
):
if "lite" in model and resolution == "4k":
raise Exception("4K resolution is not supported by the veo-3.1-lite model.")
model = MODELS_MAP[model]
instances = [{"prompt": prompt}]
if image is not None:
image_base64 = tensor_to_base64_string(image)
if image_base64:
instances[0]["image"] = {"bytesBase64Encoded": image_base64, "mimeType": "image/png"}
parameters = {
"aspectRatio": aspect_ratio,
"personGeneration": person_generation,
"durationSeconds": duration_seconds,
"enhancePrompt": True,
"generateAudio": generate_audio,
}
if negative_prompt:
parameters["negativePrompt"] = negative_prompt
if seed > 0:
parameters["seed"] = seed
if "veo-3.1" in model:
parameters["resolution"] = resolution
initial_response = await sync_op(
cls,
ApiEndpoint(path=f"/proxy/veo/{model}/generate", method="POST"),
response_model=VeoGenVidResponse,
data=VeoGenVidRequest(
instances=instances,
parameters=parameters,
),
)
poll_response = await poll_op(
cls,
ApiEndpoint(path=f"/proxy/veo/{model}/poll", method="POST"),
response_model=VeoGenVidPollResponse,
status_extractor=lambda r: "completed" if r.done else "pending",
data=VeoGenVidPollRequest(operationName=initial_response.name),
poll_interval=5.0,
estimated_duration=AVERAGE_DURATION_VIDEO_GEN,
)
if poll_response.error:
raise Exception(f"Veo API error: {poll_response.error.message} (code: {poll_response.error.code})")
response = poll_response.response
filtered_count = response.raiMediaFilteredCount
if filtered_count:
reasons = response.raiMediaFilteredReasons or []
reason_part = f": {reasons[0]}" if reasons else ""
raise Exception(
f"Content blocked by Google's Responsible AI filters{reason_part} "
f"({filtered_count} video{'s' if filtered_count != 1 else ''} filtered)."
)
if response.videos:
video = response.videos[0]
if video.bytesBase64Encoded:
return IO.NodeOutput(InputImpl.VideoFromFile(BytesIO(base64.b64decode(video.bytesBase64Encoded))))
if video.gcsUri:
return IO.NodeOutput(await download_url_to_video_output(video.gcsUri))
raise Exception("Video returned but no data or URL was provided")
raise Exception("Video generation completed but no video was returned")
class Veo3FirstLastFrameNode(IO.ComfyNode):
@@ -483,7 +394,7 @@ class Veo3FirstLastFrameNode(IO.ComfyNode):
default="",
tooltip="Negative text prompt to guide what to avoid in the video",
),
IO.Combo.Input("resolution", options=["720p", "1080p", "4k"]),
IO.Combo.Input("resolution", options=["720p", "1080p"]),
IO.Combo.Input(
"aspect_ratio",
options=["16:9", "9:16"],
@@ -513,7 +424,8 @@ class Veo3FirstLastFrameNode(IO.ComfyNode):
IO.Image.Input("last_frame", tooltip="End frame"),
IO.Combo.Input(
"model",
options=["veo-3.1-generate", "veo-3.1-fast-generate", "veo-3.1-lite"],
options=["veo-3.1-generate", "veo-3.1-fast-generate"],
default="veo-3.1-fast-generate",
),
IO.Boolean.Input(
"generate_audio",
@@ -531,20 +443,26 @@ class Veo3FirstLastFrameNode(IO.ComfyNode):
],
is_api_node=True,
price_badge=IO.PriceBadge(
depends_on=IO.PriceBadgeDepends(widgets=["model", "generate_audio", "duration", "resolution"]),
depends_on=IO.PriceBadgeDepends(widgets=["model", "generate_audio", "duration"]),
expr="""
(
$prices := {
"veo-3.1-fast-generate": { "audio": 0.15, "no_audio": 0.10 },
"veo-3.1-generate": { "audio": 0.40, "no_audio": 0.20 }
};
$m := widgets.model;
$r := widgets.resolution;
$ga := widgets.generate_audio;
$ga := (widgets.generate_audio = "true");
$seconds := widgets.duration;
$pps :=
$contains($m, "lite")
? ($r = "1080p" ? ($ga ? 0.08 : 0.05) : ($ga ? 0.05 : 0.03))
: $contains($m, "fast")
? ($r = "4k" ? ($ga ? 0.30 : 0.25) : $r = "1080p" ? ($ga ? 0.12 : 0.10) : ($ga ? 0.10 : 0.08))
: ($r = "4k" ? ($ga ? 0.60 : 0.40) : ($ga ? 0.40 : 0.20));
{"type":"usd","usd": $pps * $seconds}
$modelKey :=
$contains($m, "veo-3.1-fast-generate") ? "veo-3.1-fast-generate" :
$contains($m, "veo-3.1-generate") ? "veo-3.1-generate" :
"";
$audioKey := $ga ? "audio" : "no_audio";
$modelPrices := $lookup($prices, $modelKey);
$pps := $lookup($modelPrices, $audioKey);
($pps != null)
? {"type":"usd","usd": $pps * $seconds}
: {"type":"range_usd","min_usd": 0.4, "max_usd": 3.2}
)
""",
),
@@ -564,9 +482,6 @@ class Veo3FirstLastFrameNode(IO.ComfyNode):
model: str,
generate_audio: bool,
):
if "lite" in model and resolution == "4k":
raise Exception("4K resolution is not supported by the veo-3.1-lite model.")
model = MODELS_MAP[model]
initial_response = await sync_op(
cls,

View File

@@ -80,7 +80,7 @@ class EmptyAceStepLatentAudio(io.ComfyNode):
@classmethod
def execute(cls, seconds, batch_size) -> io.NodeOutput:
length = int(seconds * 44100 / 512 / 8)
latent = torch.zeros([batch_size, 8, 16, length], device=comfy.model_management.intermediate_device(), dtype=comfy.model_management.intermediate_dtype())
latent = torch.zeros([batch_size, 8, 16, length], device=comfy.model_management.intermediate_device())
return io.NodeOutput({"samples": latent, "type": "audio"})
@@ -103,7 +103,7 @@ class EmptyAceStep15LatentAudio(io.ComfyNode):
@classmethod
def execute(cls, seconds, batch_size) -> io.NodeOutput:
length = round((seconds * 48000 / 1920))
latent = torch.zeros([batch_size, 64, length], device=comfy.model_management.intermediate_device(), dtype=comfy.model_management.intermediate_dtype())
latent = torch.zeros([batch_size, 64, length], device=comfy.model_management.intermediate_device())
return io.NodeOutput({"samples": latent, "type": "audio"})
class ReferenceAudio(io.ComfyNode):