diff --git a/__init__.py b/__init__.py
index d9526d4..d48be41 100644
--- a/__init__.py
+++ b/__init__.py
@@ -11,14 +11,15 @@ import folder_paths
 
 from .vibevoice_nodes import NODE_CLASS_MAPPINGS, NODE_DISPLAY_NAME_MAPPINGS
 
-# logger
+# Configure a logger for the entire custom node package
 logger = logging.getLogger(__name__)
+logger.setLevel(logging.WARNING)
+
 if not logger.hasHandlers():
     handler = logging.StreamHandler(sys.stdout)
     formatter = logging.Formatter(f"[ComfyUI-VibeVoice] %(message)s")
     handler.setFormatter(formatter)
     logger.addHandler(handler)
-    logger.setLevel(logging.INFO)
 
 
 VIBEVOICE_MODEL_SUBDIR = os.path.join("tts", "VibeVoice")
diff --git a/example_workflows/VibeVoice_example.json b/example_workflows/VibeVoice_example.json
index 1a496bc..ed12a3a 100644
--- a/example_workflows/VibeVoice_example.json
+++ b/example_workflows/VibeVoice_example.json
@@ -1,11 +1,11 @@
 {
   "id": "b91265e5-1b03-4b63-8dc3-4abd9a030e08",
   "revision": 0,
-  "last_node_id": 5,
-  "last_link_id": 16,
+  "last_node_id": 10,
+  "last_link_id": 24,
   "nodes": [
     {
-      "id": 2,
+      "id": 4,
       "type": "LoadAudio",
       "pos": [
         -1900,
@@ -24,14 +24,14 @@
           "name": "AUDIO",
           "type": "AUDIO",
           "links": [
-            15
+            21
           ]
         }
       ],
       "properties": {
+        "Node name for S&R": "LoadAudio",
         "cnr_id": "comfy-core",
         "ver": "0.3.52",
-        "Node name for S&R": "LoadAudio",
         "ue_properties": {
           "widget_ue_connectable": {
             "audio": true,
@@ -42,13 +42,13 @@
         }
       },
       "widgets_values": [
-        "male_petergriffin.wav",
+        "male_rickmorty.mp3",
         null,
         null
       ]
     },
     {
-      "id": 4,
+      "id": 8,
       "type": "LoadAudio",
       "pos": [
         -1900,
@@ -67,14 +67,14 @@
           "name": "AUDIO",
           "type": "AUDIO",
           "links": [
-            16
+            24
           ]
         }
       ],
       "properties": {
+        "Node name for S&R": "LoadAudio",
         "cnr_id": "comfy-core",
         "ver": "0.3.52",
-        "Node name for S&R": "LoadAudio",
         "ue_properties": {
           "widget_ue_connectable": {
             "audio": true,
@@ -85,7 +85,7 @@
         }
       },
       "widgets_values": [
-        "male_rickmorty.mp3",
+        "male_stewie.mp3",
         null,
         null
       ]
@@ -102,20 +102,20 @@
         112
       ],
       "flags": {},
-      "order": 3,
+      "order": 4,
       "mode": 0,
       "inputs": [
         {
           "name": "audio",
           "type": "AUDIO",
-          "link": 13
+          "link": 23
         }
       ],
       "outputs": [],
       "properties": {
+        "Node name for S&R": "SaveAudio",
         "cnr_id": "comfy-core",
         "ver": "0.3.52",
-        "Node name for S&R": "SaveAudio",
         "ue_properties": {
           "widget_ue_connectable": {
             "filename_prefix": true,
@@ -129,31 +129,55 @@
       ]
     },
     {
-      "id": 5,
+      "id": 10,
+      "type": "MarkdownNote",
+      "pos": [
+        -1030,
+        -960
+      ],
+      "size": [
+        420,
+        210
+      ],
+      "flags": {},
+      "order": 2,
+      "mode": 0,
+      "inputs": [],
+      "outputs": [],
+      "title": "Notes",
+      "properties": {},
+      "widgets_values": [
+        "## Models\n\nWill be downloaded on the first run, or download them manually and place them into the directory: /models/tts/VibeVoice\n\n| Model | Context Length | Generation Length |  Weight |\n|-------|----------------|----------|----------|\n| VibeVoice-0.5B-Streaming | - | - | On the way |\n| VibeVoice-1.5B | 64K | ~90 min | [HF link](https://huggingface.co/microsoft/VibeVoice-1.5B) |\n| VibeVoice-7B-Preview| 32K | ~45 min | [HF link](https://huggingface.co/WestZhang/VibeVoice-Large-pt) |"
+      ],
+      "color": "#432",
+      "bgcolor": "#653"
+    },
+    {
+      "id": 9,
       "type": "VibeVoiceTTS",
       "pos": [
         -1570,
         -1130
       ],
       "size": [
-        460,
-        460
+        480,
+        490
       ],
       "flags": {},
-      "order": 2,
+      "order": 3,
       "mode": 0,
       "inputs": [
         {
           "name": "speaker_1_voice",
           "shape": 7,
           "type": "AUDIO",
-          "link": 15
+          "link": 24
         },
         {
           "name": "speaker_2_voice",
           "shape": 7,
           "type": "AUDIO",
-          "link": 16
+          "link": 21
         },
         {
           "name": "speaker_3_voice",
@@ -173,7 +197,7 @@
           "name": "AUDIO",
           "type": "AUDIO",
           "links": [
-            13
+            23
           ]
         }
       ],
@@ -182,11 +206,12 @@
       },
       "widgets_values": [
         "VibeVoice-1.5B",
-        "Speaker 1: Hey, remember \"See You Again\"?\nSpeaker 2: Yeah… from Furious 7, right? That song always hits deep.\nSpeaker 1: Let me try to sing a part of it for you. \"It's been a long day… without you, my friend. And I'll tell you all about it when I see you again…\"\nSpeaker 2: Wow… that line. Every time.\nSpeaker 1: Yeah, and then this part always makes me think of the people I've lost. \"We've come a long way… from where we began. Oh, I'll tell you all about it when I see you again…\"\nSpeaker 2: It's beautiful, really. It's not just sad—it's like… hopeful.\nSpeaker 1: Right? Like no matter how far apart we are, there's still that promise.",
+        "Speaker 1: I can't believe you did it again. I waited for two hours. Two hours! Not a single call, not a text. Do you have any idea how embarrassing that was, just sitting there alone?\nSpeaker 2: Look, I know, I'm sorry, alright? Work was a complete nightmare. My boss dropped a critical deadline on me at the last minute. I didn't even have a second to breathe, let alone check my phone.\nSpeaker 1: A nightmare? That's the same excuse you used last time. I'm starting to think you just don't care. It's easier to say 'work was crazy' than to just admit that I'm not a priority for you anymore.",
+        "flash_attention_2",
         1.3,
-        50,
-        42,
-        "fixed",
+        30,
+        309317081412002,
+        "randomize",
         true,
         0.95,
         0.95,
@@ -198,28 +223,28 @@
   ],
   "links": [
     [
-      13,
-      5,
+      21,
+      4,
+      0,
+      9,
+      1,
+      "AUDIO"
+    ],
+    [
+      23,
+      9,
       0,
       3,
       0,
       "AUDIO"
     ],
     [
-      15,
-      2,
+      24,
+      8,
       0,
-      5,
+      9,
       0,
       "AUDIO"
-    ],
-    [
-      16,
-      4,
-      0,
-      5,
-      1,
-      "AUDIO"
     ]
   ],
   "groups": [],
@@ -228,10 +253,10 @@
     "ue_links": [],
     "links_added_by_ue": [],
     "ds": {
-      "scale": 1.310999419150025,
+      "scale": 1.0834705943388634,
       "offset": [
-        2000,
-        1230
+        2057.223518869778,
+        1246.6132796718712
       ]
     },
     "frontendVersion": "1.25.10",
diff --git a/example_workflows/VibeVoice_example.png b/example_workflows/VibeVoice_example.png
index 5f353db..bec95db 100644
Binary files a/example_workflows/VibeVoice_example.png and b/example_workflows/VibeVoice_example.png differ
diff --git a/vibevoice_nodes.py b/vibevoice_nodes.py
index 29cbbb3..1b75e2d 100644
--- a/vibevoice_nodes.py
+++ b/vibevoice_nodes.py
@@ -3,22 +3,30 @@ import re
 import torch
 import numpy as np
 import random
-from huggingface_hub import snapshot_download
+from huggingface_hub import hf_hub_download, snapshot_download
 import logging
-import librosa
+
 import gc
 
 import folder_paths
 import comfy.model_management as model_management
 import comfy.model_patcher
 from comfy.utils import ProgressBar
+from comfy.model_management import throw_exception_if_processing_interrupted
 
-
-from transformers import set_seed
+from transformers import set_seed, AutoTokenizer
 from .vibevoice.modular.modeling_vibevoice_inference import VibeVoiceForConditionalGenerationInference
 from .vibevoice.processor.vibevoice_processor import VibeVoiceProcessor
+from .vibevoice.processor.vibevoice_tokenizer_processor import VibeVoiceTokenizerProcessor
+from .vibevoice.modular.modular_vibevoice_text_tokenizer import VibeVoiceTextTokenizerFast
 
-logger = logging.getLogger("comfyui_vibevoice")
+try:
+    import librosa
+except ImportError:
+    print("VibeVoice Node: `librosa` is not installed. Resampling of reference audio will not be available.")
+    librosa = None
+
+logger = logging.getLogger(__name__)
 
 LOADED_MODELS = {}
 VIBEVOICE_PATCHER_CACHE = {}
@@ -27,10 +35,12 @@ MODEL_CONFIGS = {
     "VibeVoice-1.5B": {
         "repo_id": "microsoft/VibeVoice-1.5B",
         "size_gb": 3.0,
+        "tokenizer_repo": "Qwen/Qwen2.5-1.5B"
     },
     "VibeVoice-Large-pt": {
         "repo_id": "WestZhang/VibeVoice-Large-pt",
         "size_gb": 14.0,
+        "tokenizer_repo": "Qwen/Qwen2.5-7B" 
     }
 }
 
@@ -80,7 +90,7 @@ class VibeVoiceModelHandler(torch.nn.Module):
         self.size = int(MODEL_CONFIGS[model_pack_name].get("size_gb", 4.0) * (1024**3))
 
     def load_model(self, device, attention_mode="eager"):
-        self.model, self.processor = VibeVoiceLoader.load_model(self.model_pack_name, attention_mode)
+        self.model, self.processor = VibeVoiceLoader.load_model(self.model_pack_name, device , attention_mode)
         self.model.to(device)
 
 class VibeVoicePatcher(comfy.model_patcher.ModelPatcher):
@@ -170,7 +180,7 @@ class VibeVoiceLoader:
         return attention_mode
 
     @staticmethod
-    def load_model(model_name: str, attention_mode: str = "eager"):
+    def load_model(model_name: str, device, attention_mode: str = "eager"):
         # Validate attention mode
         if attention_mode not in ATTENTION_MODES:
             logger.warning(f"Unknown attention mode '{attention_mode}', falling back to eager")
@@ -185,10 +195,19 @@ class VibeVoiceLoader:
 
         model_path = VibeVoiceLoader.get_model_path(model_name)
         
-        print(f"Loading VibeVoice model components from: {model_path}")
-        processor = VibeVoiceProcessor.from_pretrained(model_path)
+        logger.info(f"Loading VibeVoice model components from: {model_path}")
+
         
-        torch_dtype = model_management.text_encoder_dtype(model_management.get_torch_device())
+        tokenizer_repo = MODEL_CONFIGS[model_name].get("tokenizer_repo")
+        try:
+            tokenizer_file_path = hf_hub_download(repo_id=tokenizer_repo, filename="tokenizer.json")
+        except Exception as e:
+            raise RuntimeError(f"Could not download tokenizer.json for {tokenizer_repo}. Error: {e}")
+
+        vibevoice_tokenizer = VibeVoiceTextTokenizerFast(tokenizer_file=tokenizer_file_path)
+        audio_processor = VibeVoiceTokenizerProcessor()
+        processor = VibeVoiceProcessor(tokenizer=vibevoice_tokenizer, audio_processor=audio_processor)
+        torch_dtype = model_management.text_encoder_dtype(device)
         device_name = torch.cuda.get_device_name() if torch.cuda.is_available() else ""
         
         # Check compatibility and potentially fall back to safer mode
@@ -196,15 +215,15 @@ class VibeVoiceLoader:
             attention_mode, torch_dtype, device_name
         )
         
-        print(f"Requested attention mode: {attention_mode}")
+        logger.info(f"Requested attention mode: {attention_mode}")
         if final_attention_mode != attention_mode:
-            print(f"Using attention mode: {final_attention_mode} (automatic fallback)")
+            logger.info(f"Using attention mode: {final_attention_mode} (automatic fallback)")
             # Update cache key to reflect actual mode used
             cache_key = f"{model_name}_attn_{final_attention_mode}"
             if cache_key in LOADED_MODELS:
                 return LOADED_MODELS[cache_key]
         else:
-            print(f"Using attention mode: {final_attention_mode}")
+            logger.info(f"Using attention mode: {final_attention_mode}")
         
         logger.info(f"Final attention implementation: {final_attention_mode}")
 
@@ -236,6 +255,7 @@ class VibeVoiceLoader:
                 model_path,
                 torch_dtype=torch_dtype,
                 attn_implementation=final_attention_mode,
+                device_map=device
             )
             model.eval()
             
@@ -329,6 +349,8 @@ def preprocess_comfy_audio(audio_dict: dict, target_sr: int = 24000) -> np.ndarr
         waveform = waveform / max_val
 
     if original_sr != target_sr:
+        if librosa is None:
+            raise ImportError("`librosa` package is required for audio resampling. Please install it with `pip install librosa`.")
         logger.warning(f"Resampling reference audio from {original_sr}Hz to {target_sr}Hz.")
         waveform = librosa.resample(y=waveform, orig_sr=original_sr, target_sr=target_sr)
     
@@ -339,6 +361,12 @@ def preprocess_comfy_audio(audio_dict: dict, target_sr: int = 24000) -> np.ndarr
         
     return waveform.astype(np.float32)
 
+def check_for_interrupt():
+    try:
+        throw_exception_if_processing_interrupted()
+        return False
+    except:
+        return True
 
 class VibeVoiceTTSNode:
     @classmethod
@@ -508,7 +536,7 @@ class VibeVoiceTTSNode:
                     outputs = model.generate(
                         **inputs, max_new_tokens=None, cfg_scale=cfg_scale,
                         tokenizer=processor.tokenizer, generation_config=generation_config,
-                        verbose=False
+                        verbose=False, stop_check_fn=check_for_interrupt
                     )
                     # Note: The model.generate method doesn't support progress callbacks in the current VibeVoice implementation
                     # But we check for interruption at the start and end of generation