mirror of
https://github.com/wildminder/ComfyUI-VibeVoice.git
synced 2026-01-26 14:39:45 +00:00
model path update, fixes
This commit is contained in:
@@ -1,8 +1,8 @@
|
|||||||
{
|
{
|
||||||
"id": "b91265e5-1b03-4b63-8dc3-4abd9a030e08",
|
"id": "b91265e5-1b03-4b63-8dc3-4abd9a030e08",
|
||||||
"revision": 0,
|
"revision": 0,
|
||||||
"last_node_id": 10,
|
"last_node_id": 11,
|
||||||
"last_link_id": 24,
|
"last_link_id": 29,
|
||||||
"nodes": [
|
"nodes": [
|
||||||
{
|
{
|
||||||
"id": 4,
|
"id": 4,
|
||||||
@@ -24,14 +24,14 @@
|
|||||||
"name": "AUDIO",
|
"name": "AUDIO",
|
||||||
"type": "AUDIO",
|
"type": "AUDIO",
|
||||||
"links": [
|
"links": [
|
||||||
21
|
28
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"properties": {
|
"properties": {
|
||||||
"Node name for S&R": "LoadAudio",
|
|
||||||
"cnr_id": "comfy-core",
|
"cnr_id": "comfy-core",
|
||||||
"ver": "0.3.52",
|
"ver": "0.3.52",
|
||||||
|
"Node name for S&R": "LoadAudio",
|
||||||
"ue_properties": {
|
"ue_properties": {
|
||||||
"widget_ue_connectable": {
|
"widget_ue_connectable": {
|
||||||
"audio": true,
|
"audio": true,
|
||||||
@@ -47,6 +47,77 @@
|
|||||||
null
|
null
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"id": 11,
|
||||||
|
"type": "VibeVoiceTTS",
|
||||||
|
"pos": [
|
||||||
|
-1570,
|
||||||
|
-1130
|
||||||
|
],
|
||||||
|
"size": [
|
||||||
|
460,
|
||||||
|
510
|
||||||
|
],
|
||||||
|
"flags": {},
|
||||||
|
"order": 3,
|
||||||
|
"mode": 0,
|
||||||
|
"inputs": [
|
||||||
|
{
|
||||||
|
"name": "speaker_1_voice",
|
||||||
|
"shape": 7,
|
||||||
|
"type": "AUDIO",
|
||||||
|
"link": 28
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "speaker_2_voice",
|
||||||
|
"shape": 7,
|
||||||
|
"type": "AUDIO",
|
||||||
|
"link": 29
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "speaker_3_voice",
|
||||||
|
"shape": 7,
|
||||||
|
"type": "AUDIO",
|
||||||
|
"link": null
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "speaker_4_voice",
|
||||||
|
"shape": 7,
|
||||||
|
"type": "AUDIO",
|
||||||
|
"link": null
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "AUDIO",
|
||||||
|
"type": "AUDIO",
|
||||||
|
"links": [
|
||||||
|
27
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"properties": {
|
||||||
|
"cnr_id": "ComfyUI-VibeVoice",
|
||||||
|
"ver": "37803a884fb8f9b43c38286f6d654c7f97181a73",
|
||||||
|
"Node name for S&R": "VibeVoiceTTS"
|
||||||
|
},
|
||||||
|
"widgets_values": [
|
||||||
|
"VibeVoice-1.5B",
|
||||||
|
"Speaker 1: I can't believe you did it again. I waited for two hours. Two hours! Not a single call, not a text. Do you have any idea how embarrassing that was, just sitting there alone?\nSpeaker 2: Look, I know, I'm sorry, alright? Work was a complete nightmare. My boss dropped a critical deadline on me at the last minute. I didn't even have a second to breathe, let alone check my phone.\nSpeaker 1: A nightmare? That's the same excuse you used last time. I'm starting to think you just don't care. It's easier to say 'work was crazy' than to just admit that I'm not a priority for you anymore.",
|
||||||
|
false,
|
||||||
|
"sdpa",
|
||||||
|
1.3,
|
||||||
|
10,
|
||||||
|
56109085141530,
|
||||||
|
"randomize",
|
||||||
|
true,
|
||||||
|
0.95,
|
||||||
|
0.95,
|
||||||
|
0
|
||||||
|
],
|
||||||
|
"color": "#232",
|
||||||
|
"bgcolor": "#353"
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"id": 8,
|
"id": 8,
|
||||||
"type": "LoadAudio",
|
"type": "LoadAudio",
|
||||||
@@ -67,14 +138,14 @@
|
|||||||
"name": "AUDIO",
|
"name": "AUDIO",
|
||||||
"type": "AUDIO",
|
"type": "AUDIO",
|
||||||
"links": [
|
"links": [
|
||||||
24
|
29
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"properties": {
|
"properties": {
|
||||||
"Node name for S&R": "LoadAudio",
|
|
||||||
"cnr_id": "comfy-core",
|
"cnr_id": "comfy-core",
|
||||||
"ver": "0.3.52",
|
"ver": "0.3.52",
|
||||||
|
"Node name for S&R": "LoadAudio",
|
||||||
"ue_properties": {
|
"ue_properties": {
|
||||||
"widget_ue_connectable": {
|
"widget_ue_connectable": {
|
||||||
"audio": true,
|
"audio": true,
|
||||||
@@ -90,44 +161,6 @@
|
|||||||
null
|
null
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"id": 3,
|
|
||||||
"type": "SaveAudio",
|
|
||||||
"pos": [
|
|
||||||
-1040,
|
|
||||||
-1130
|
|
||||||
],
|
|
||||||
"size": [
|
|
||||||
270,
|
|
||||||
112
|
|
||||||
],
|
|
||||||
"flags": {},
|
|
||||||
"order": 4,
|
|
||||||
"mode": 0,
|
|
||||||
"inputs": [
|
|
||||||
{
|
|
||||||
"name": "audio",
|
|
||||||
"type": "AUDIO",
|
|
||||||
"link": 23
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"outputs": [],
|
|
||||||
"properties": {
|
|
||||||
"Node name for S&R": "SaveAudio",
|
|
||||||
"cnr_id": "comfy-core",
|
|
||||||
"ver": "0.3.52",
|
|
||||||
"ue_properties": {
|
|
||||||
"widget_ue_connectable": {
|
|
||||||
"filename_prefix": true,
|
|
||||||
"audioUI": true
|
|
||||||
},
|
|
||||||
"version": "7.0.1"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"widgets_values": [
|
|
||||||
"audio/VibeVoice"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"id": 10,
|
"id": 10,
|
||||||
"type": "MarkdownNote",
|
"type": "MarkdownNote",
|
||||||
@@ -145,105 +178,80 @@
|
|||||||
"inputs": [],
|
"inputs": [],
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"title": "Notes",
|
"title": "Notes",
|
||||||
"properties": {},
|
"properties": {
|
||||||
|
"ue_properties": {
|
||||||
|
"widget_ue_connectable": {},
|
||||||
|
"version": "7.0.1"
|
||||||
|
}
|
||||||
|
},
|
||||||
"widgets_values": [
|
"widgets_values": [
|
||||||
"## Models\n\nWill be downloaded on the first run, or download them manually and place them into the directory: /models/tts/VibeVoice\n\n| Model | Context Length | Generation Length | Weight |\n|-------|----------------|----------|----------|\n| VibeVoice-0.5B-Streaming | - | - | On the way |\n| VibeVoice-1.5B | 64K | ~90 min | [HF link](https://huggingface.co/microsoft/VibeVoice-1.5B) |\n| VibeVoice-7B-Preview| 32K | ~45 min | [HF link](https://huggingface.co/WestZhang/VibeVoice-Large-pt) |"
|
"## Models\n\nWill be downloaded on the first run, or download them manually and place them into the directory: /models/tts/VibeVoice\n\n| Model | Context Length | Generation Length | Weight |\n|-------|----------------|----------|----------|\n| VibeVoice-0.5B-Streaming | - | - | On the way |\n| VibeVoice-1.5B | 64K | ~90 min | [HF link](https://huggingface.co/microsoft/VibeVoice-1.5B) |\n| VibeVoice-Large| 32K | ~45 min | [HF link](https://huggingface.co/microsoft/VibeVoice-Large) |"
|
||||||
],
|
],
|
||||||
"color": "#432",
|
"color": "#432",
|
||||||
"bgcolor": "#653"
|
"bgcolor": "#653"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"id": 9,
|
"id": 3,
|
||||||
"type": "VibeVoiceTTS",
|
"type": "SaveAudio",
|
||||||
"pos": [
|
"pos": [
|
||||||
-1570,
|
-1040,
|
||||||
-1130
|
-1130
|
||||||
],
|
],
|
||||||
"size": [
|
"size": [
|
||||||
480,
|
270,
|
||||||
490
|
112
|
||||||
],
|
],
|
||||||
"flags": {},
|
"flags": {},
|
||||||
"order": 3,
|
"order": 4,
|
||||||
"mode": 0,
|
"mode": 0,
|
||||||
"inputs": [
|
"inputs": [
|
||||||
{
|
{
|
||||||
"name": "speaker_1_voice",
|
"name": "audio",
|
||||||
"shape": 7,
|
|
||||||
"type": "AUDIO",
|
"type": "AUDIO",
|
||||||
"link": 24
|
"link": 27
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "speaker_2_voice",
|
|
||||||
"shape": 7,
|
|
||||||
"type": "AUDIO",
|
|
||||||
"link": 21
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "speaker_3_voice",
|
|
||||||
"shape": 7,
|
|
||||||
"type": "AUDIO",
|
|
||||||
"link": null
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "speaker_4_voice",
|
|
||||||
"shape": 7,
|
|
||||||
"type": "AUDIO",
|
|
||||||
"link": null
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"name": "AUDIO",
|
|
||||||
"type": "AUDIO",
|
|
||||||
"links": [
|
|
||||||
23
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
|
"outputs": [],
|
||||||
"properties": {
|
"properties": {
|
||||||
"Node name for S&R": "VibeVoiceTTS"
|
"cnr_id": "comfy-core",
|
||||||
|
"ver": "0.3.52",
|
||||||
|
"Node name for S&R": "SaveAudio",
|
||||||
|
"ue_properties": {
|
||||||
|
"widget_ue_connectable": {
|
||||||
|
"filename_prefix": true,
|
||||||
|
"audioUI": true
|
||||||
|
},
|
||||||
|
"version": "7.0.1"
|
||||||
|
}
|
||||||
},
|
},
|
||||||
"widgets_values": [
|
"widgets_values": [
|
||||||
"VibeVoice-1.5B",
|
"audio/VibeVoice"
|
||||||
"Speaker 1: I can't believe you did it again. I waited for two hours. Two hours! Not a single call, not a text. Do you have any idea how embarrassing that was, just sitting there alone?\nSpeaker 2: Look, I know, I'm sorry, alright? Work was a complete nightmare. My boss dropped a critical deadline on me at the last minute. I didn't even have a second to breathe, let alone check my phone.\nSpeaker 1: A nightmare? That's the same excuse you used last time. I'm starting to think you just don't care. It's easier to say 'work was crazy' than to just admit that I'm not a priority for you anymore.",
|
]
|
||||||
"flash_attention_2",
|
|
||||||
1.3,
|
|
||||||
30,
|
|
||||||
309317081412002,
|
|
||||||
"randomize",
|
|
||||||
true,
|
|
||||||
0.95,
|
|
||||||
0.95,
|
|
||||||
0
|
|
||||||
],
|
|
||||||
"color": "#232",
|
|
||||||
"bgcolor": "#353"
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"links": [
|
"links": [
|
||||||
[
|
[
|
||||||
21,
|
27,
|
||||||
4,
|
11,
|
||||||
0,
|
|
||||||
9,
|
|
||||||
1,
|
|
||||||
"AUDIO"
|
|
||||||
],
|
|
||||||
[
|
|
||||||
23,
|
|
||||||
9,
|
|
||||||
0,
|
0,
|
||||||
3,
|
3,
|
||||||
0,
|
0,
|
||||||
"AUDIO"
|
"AUDIO"
|
||||||
],
|
],
|
||||||
[
|
[
|
||||||
24,
|
28,
|
||||||
|
4,
|
||||||
|
0,
|
||||||
|
11,
|
||||||
|
0,
|
||||||
|
"AUDIO"
|
||||||
|
],
|
||||||
|
[
|
||||||
|
29,
|
||||||
8,
|
8,
|
||||||
0,
|
0,
|
||||||
9,
|
11,
|
||||||
0,
|
1,
|
||||||
"AUDIO"
|
"AUDIO"
|
||||||
]
|
]
|
||||||
],
|
],
|
||||||
@@ -253,13 +261,13 @@
|
|||||||
"ue_links": [],
|
"ue_links": [],
|
||||||
"links_added_by_ue": [],
|
"links_added_by_ue": [],
|
||||||
"ds": {
|
"ds": {
|
||||||
"scale": 1.0834705943388634,
|
"scale": 1.2100000000000004,
|
||||||
"offset": [
|
"offset": [
|
||||||
2057.223518869778,
|
2024.7933884297524,
|
||||||
1246.6132796718712
|
1252.3140495867776
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
"frontendVersion": "1.25.10",
|
"frontendVersion": "1.25.11",
|
||||||
"VHS_latentpreview": false,
|
"VHS_latentpreview": false,
|
||||||
"VHS_latentpreviewrate": 0,
|
"VHS_latentpreviewrate": 0,
|
||||||
"VHS_MetadataImage": true,
|
"VHS_MetadataImage": true,
|
||||||
|
|||||||
Binary file not shown.
|
Before Width: | Height: | Size: 134 KiB After Width: | Height: | Size: 138 KiB |
@@ -37,9 +37,9 @@ MODEL_CONFIGS = {
|
|||||||
"size_gb": 3.0,
|
"size_gb": 3.0,
|
||||||
"tokenizer_repo": "Qwen/Qwen2.5-1.5B"
|
"tokenizer_repo": "Qwen/Qwen2.5-1.5B"
|
||||||
},
|
},
|
||||||
"VibeVoice-Large-pt": {
|
"VibeVoice-Large": {
|
||||||
"repo_id": "WestZhang/VibeVoice-Large-pt",
|
"repo_id": "microsoft/VibeVoice-Large",
|
||||||
"size_gb": 14.0,
|
"size_gb": 17.4,
|
||||||
"tokenizer_repo": "Qwen/Qwen2.5-7B"
|
"tokenizer_repo": "Qwen/Qwen2.5-7B"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -281,14 +281,14 @@ class VibeVoiceLoader:
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Failed to load model with {final_attention_mode} attention: {e}")
|
logger.error(f"Failed to load model with {final_attention_mode} attention: {e}")
|
||||||
|
|
||||||
# Progressive fallback: flash -> sdpa -> eager
|
# Progressive fallback: flash -> sdpa -> eager
|
||||||
if final_attention_mode == "flash_attention_2":
|
if final_attention_mode == "flash_attention_2":
|
||||||
logger.info("Attempting fallback to SDPA...")
|
logger.info("Attempting fallback to SDPA...")
|
||||||
return VibeVoiceLoader.load_model(model_name, "sdpa")
|
return VibeVoiceLoader.load_model(model_name, device, "sdpa")
|
||||||
elif final_attention_mode == "sdpa":
|
elif final_attention_mode == "sdpa":
|
||||||
logger.info("Attempting fallback to eager...")
|
logger.info("Attempting fallback to eager...")
|
||||||
return VibeVoiceLoader.load_model(model_name, "eager")
|
return VibeVoiceLoader.load_model(model_name, device, "eager")
|
||||||
else:
|
else:
|
||||||
# If eager fails, something is seriously wrong
|
# If eager fails, something is seriously wrong
|
||||||
raise RuntimeError(f"Failed to load model even with eager attention: {e}")
|
raise RuntimeError(f"Failed to load model even with eager attention: {e}")
|
||||||
|
|||||||
Reference in New Issue
Block a user