model path update, fixes

2026-04-25 09:19:22 +00:00 · 2025-09-01 11:57:35 +03:00
parent 37803a884f
commit 64fdb94e16
3 changed files with 128 additions and 120 deletions
--- a/example_workflows/VibeVoice_example.json
+++ b/example_workflows/VibeVoice_example.json
@@ -1,8 +1,8 @@
 {
  "id": "b91265e5-1b03-4b63-8dc3-4abd9a030e08",
  "revision": 0,
-  "last_node_id": 10,
-  "last_link_id": 24,
+  "last_node_id": 11,
+  "last_link_id": 29,
  "nodes": [
    {
      "id": 4,
@@ -24,14 +24,14 @@
          "name": "AUDIO",
          "type": "AUDIO",
          "links": [
-            21
+            28
          ]
        }
      ],
      "properties": {
-        "Node name for S&R": "LoadAudio",
        "cnr_id": "comfy-core",
        "ver": "0.3.52",
+        "Node name for S&R": "LoadAudio",
        "ue_properties": {
          "widget_ue_connectable": {
            "audio": true,
@@ -47,6 +47,77 @@
        null
      ]
    },
+    {
+      "id": 11,
+      "type": "VibeVoiceTTS",
+      "pos": [
+        -1570,
+        -1130
+      ],
+      "size": [
+        460,
+        510
+      ],
+      "flags": {},
+      "order": 3,
+      "mode": 0,
+      "inputs": [
+        {
+          "name": "speaker_1_voice",
+          "shape": 7,
+          "type": "AUDIO",
+          "link": 28
+        },
+        {
+          "name": "speaker_2_voice",
+          "shape": 7,
+          "type": "AUDIO",
+          "link": 29
+        },
+        {
+          "name": "speaker_3_voice",
+          "shape": 7,
+          "type": "AUDIO",
+          "link": null
+        },
+        {
+          "name": "speaker_4_voice",
+          "shape": 7,
+          "type": "AUDIO",
+          "link": null
+        }
+      ],
+      "outputs": [
+        {
+          "name": "AUDIO",
+          "type": "AUDIO",
+          "links": [
+            27
+          ]
+        }
+      ],
+      "properties": {
+        "cnr_id": "ComfyUI-VibeVoice",
+        "ver": "37803a884fb8f9b43c38286f6d654c7f97181a73",
+        "Node name for S&R": "VibeVoiceTTS"
+      },
+      "widgets_values": [
+        "VibeVoice-1.5B",
+        "Speaker 1: I can't believe you did it again. I waited for two hours. Two hours! Not a single call, not a text. Do you have any idea how embarrassing that was, just sitting there alone?\nSpeaker 2: Look, I know, I'm sorry, alright? Work was a complete nightmare. My boss dropped a critical deadline on me at the last minute. I didn't even have a second to breathe, let alone check my phone.\nSpeaker 1: A nightmare? That's the same excuse you used last time. I'm starting to think you just don't care. It's easier to say 'work was crazy' than to just admit that I'm not a priority for you anymore.",
+        false,
+        "sdpa",
+        1.3,
+        10,
+        56109085141530,
+        "randomize",
+        true,
+        0.95,
+        0.95,
+        0
+      ],
+      "color": "#232",
+      "bgcolor": "#353"
+    },
    {
      "id": 8,
      "type": "LoadAudio",
@@ -67,14 +138,14 @@
          "name": "AUDIO",
          "type": "AUDIO",
          "links": [
-            24
+            29
          ]
        }
      ],
      "properties": {
-        "Node name for S&R": "LoadAudio",
        "cnr_id": "comfy-core",
        "ver": "0.3.52",
+        "Node name for S&R": "LoadAudio",
        "ue_properties": {
          "widget_ue_connectable": {
            "audio": true,
@@ -90,44 +161,6 @@
        null
      ]
    },
-    {
-      "id": 3,
-      "type": "SaveAudio",
-      "pos": [
-        -1040,
-        -1130
-      ],
-      "size": [
-        270,
-        112
-      ],
-      "flags": {},
-      "order": 4,
-      "mode": 0,
-      "inputs": [
-        {
-          "name": "audio",
-          "type": "AUDIO",
-          "link": 23
-        }
-      ],
-      "outputs": [],
-      "properties": {
-        "Node name for S&R": "SaveAudio",
-        "cnr_id": "comfy-core",
-        "ver": "0.3.52",
-        "ue_properties": {
-          "widget_ue_connectable": {
-            "filename_prefix": true,
-            "audioUI": true
-          },
-          "version": "7.0.1"
-        }
-      },
-      "widgets_values": [
-        "audio/VibeVoice"
-      ]
-    },
    {
      "id": 10,
      "type": "MarkdownNote",
@@ -145,105 +178,80 @@
      "inputs": [],
      "outputs": [],
      "title": "Notes",
-      "properties": {},
+      "properties": {
+        "ue_properties": {
+          "widget_ue_connectable": {},
+          "version": "7.0.1"
+        }
+      },
      "widgets_values": [
-        "## Models\n\nWill be downloaded on the first run, or download them manually and place them into the directory: /models/tts/VibeVoice\n\n| Model | Context Length | Generation Length |  Weight |\n|-------|----------------|----------|----------|\n| VibeVoice-0.5B-Streaming | - | - | On the way |\n| VibeVoice-1.5B | 64K | ~90 min | [HF link](https://huggingface.co/microsoft/VibeVoice-1.5B) |\n| VibeVoice-7B-Preview| 32K | ~45 min | [HF link](https://huggingface.co/WestZhang/VibeVoice-Large-pt) |"
+        "## Models\n\nWill be downloaded on the first run, or download them manually and place them into the directory: /models/tts/VibeVoice\n\n| Model | Context Length | Generation Length |  Weight |\n|-------|----------------|----------|----------|\n| VibeVoice-0.5B-Streaming | - | - | On the way |\n| VibeVoice-1.5B | 64K | ~90 min | [HF link](https://huggingface.co/microsoft/VibeVoice-1.5B) |\n| VibeVoice-Large| 32K | ~45 min | [HF link](https://huggingface.co/microsoft/VibeVoice-Large) |"
      ],
      "color": "#432",
      "bgcolor": "#653"
    },
    {
-      "id": 9,
-      "type": "VibeVoiceTTS",
+      "id": 3,
+      "type": "SaveAudio",
      "pos": [
-        -1570,
+        -1040,
        -1130
      ],
      "size": [
-        480,
-        490
+        270,
+        112
      ],
      "flags": {},
-      "order": 3,
+      "order": 4,
      "mode": 0,
      "inputs": [
        {
-          "name": "speaker_1_voice",
-          "shape": 7,
+          "name": "audio",
          "type": "AUDIO",
-          "link": 24
-        },
-        {
-          "name": "speaker_2_voice",
-          "shape": 7,
-          "type": "AUDIO",
-          "link": 21
-        },
-        {
-          "name": "speaker_3_voice",
-          "shape": 7,
-          "type": "AUDIO",
-          "link": null
-        },
-        {
-          "name": "speaker_4_voice",
-          "shape": 7,
-          "type": "AUDIO",
-          "link": null
-        }
-      ],
-      "outputs": [
-        {
-          "name": "AUDIO",
-          "type": "AUDIO",
-          "links": [
-            23
-          ]
+          "link": 27
        }
      ],
+      "outputs": [],
      "properties": {
-        "Node name for S&R": "VibeVoiceTTS"
+        "cnr_id": "comfy-core",
+        "ver": "0.3.52",
+        "Node name for S&R": "SaveAudio",
+        "ue_properties": {
+          "widget_ue_connectable": {
+            "filename_prefix": true,
+            "audioUI": true
+          },
+          "version": "7.0.1"
+        }
      },
      "widgets_values": [
-        "VibeVoice-1.5B",
-        "Speaker 1: I can't believe you did it again. I waited for two hours. Two hours! Not a single call, not a text. Do you have any idea how embarrassing that was, just sitting there alone?\nSpeaker 2: Look, I know, I'm sorry, alright? Work was a complete nightmare. My boss dropped a critical deadline on me at the last minute. I didn't even have a second to breathe, let alone check my phone.\nSpeaker 1: A nightmare? That's the same excuse you used last time. I'm starting to think you just don't care. It's easier to say 'work was crazy' than to just admit that I'm not a priority for you anymore.",
-        "flash_attention_2",
-        1.3,
-        30,
-        309317081412002,
-        "randomize",
-        true,
-        0.95,
-        0.95,
-        0
-      ],
-      "color": "#232",
-      "bgcolor": "#353"
+        "audio/VibeVoice"
+      ]
    }
  ],
  "links": [
    [
-      21,
-      4,
-      0,
-      9,
-      1,
-      "AUDIO"
-    ],
-    [
-      23,
-      9,
+      27,
+      11,
      0,
      3,
      0,
      "AUDIO"
    ],
    [
-      24,
+      28,
+      4,
+      0,
+      11,
+      0,
+      "AUDIO"
+    ],
+    [
+      29,
      8,
      0,
-      9,
-      0,
+      11,
+      1,
      "AUDIO"
    ]
  ],
@@ -253,13 +261,13 @@
    "ue_links": [],
    "links_added_by_ue": [],
    "ds": {
-      "scale": 1.0834705943388634,
+      "scale": 1.2100000000000004,
      "offset": [
-        2057.223518869778,
-        1246.6132796718712
+        2024.7933884297524,
+        1252.3140495867776
      ]
    },
-    "frontendVersion": "1.25.10",
+    "frontendVersion": "1.25.11",
    "VHS_latentpreview": false,
    "VHS_latentpreviewrate": 0,
    "VHS_MetadataImage": true,
--- a/example_workflows/VibeVoice_example.png
+++ b/example_workflows/VibeVoice_example.png
--- a/vibevoice_nodes.py
+++ b/vibevoice_nodes.py
@@ -37,9 +37,9 @@ MODEL_CONFIGS = {
        "size_gb": 3.0,
        "tokenizer_repo": "Qwen/Qwen2.5-1.5B"
    },
-    "VibeVoice-Large-pt": {
-        "repo_id": "WestZhang/VibeVoice-Large-pt",
-        "size_gb": 14.0,
+    "VibeVoice-Large": {
+        "repo_id": "microsoft/VibeVoice-Large",
+        "size_gb": 17.4,
        "tokenizer_repo": "Qwen/Qwen2.5-7B" 
    }
 }
@@ -281,14 +281,14 @@ class VibeVoiceLoader:
            
        except Exception as e:
            logger.error(f"Failed to load model with {final_attention_mode} attention: {e}")
-            
+
            # Progressive fallback: flash -> sdpa -> eager
            if final_attention_mode == "flash_attention_2":
                logger.info("Attempting fallback to SDPA...")
-                return VibeVoiceLoader.load_model(model_name, "sdpa")
+                return VibeVoiceLoader.load_model(model_name, device, "sdpa")
            elif final_attention_mode == "sdpa":
                logger.info("Attempting fallback to eager...")
-                return VibeVoiceLoader.load_model(model_name, "eager")
+                return VibeVoiceLoader.load_model(model_name, device, "eager")
            else:
                # If eager fails, something is seriously wrong
                raise RuntimeError(f"Failed to load model even with eager attention: {e}")