save audio - embedded player

2026-03-12 11:59:48 +00:00 · 2025-10-13 20:15:52 +13:00
parent 7f426f5033
commit 5b33e02bc0
5 changed files with 223 additions and 24 deletions
--- a/README.md
+++ b/README.md
@@ -8,6 +8,7 @@ Original repo: https://github.com/index-tts/index-tts
 ![ComfyUI-IndexTTS2 nodes](images/overview.png)

 ## Updates
+- 2025-10-13: Save Audio node now acts as an output node with an embedded player overlay for instant preview inside the graph (no need for downstream preview nodes).
 - 2025-10-08: Default FP32 with optional FP16 toggle, output gain control, and a Save Audio helper node (wav/mp3 + quality parameters).
 - 2025-09-22: Added IndexTTS2 Advanced node exposing sampling, speed, seed, and other generation controls.

@@ -27,7 +28,7 @@ Original repo: https://github.com/index-tts/index-tts
 - **IndexTTS2 Advanced** - Simple inputs plus overrides for sampling, speech speed, pauses, CFG, seed, FP16 toggle, and output gain.
 - **IndexTTS2 Emotion Vector** – eight sliders (0.0–1.4, sum <= 1.5) producing an emotion vector.
 - **IndexTTS2 Emotion From Text** – requires ModelScope and local QwenEmotion; turns short text into an emotion vector + summary.
- **IndexTTS2 Save Audio** - saves generated audio tensors to disk with wav/mp3 options.
+- **IndexTTS2 Save Audio** - saves generated audio tensors to disk with wav/mp3 options and surfaces an inline player directly on the node after execution.

 ## Examples
 - Speaker audio -> IndexTTS2 Simple -> Preview/Save Audio
--- a/init.py
+++ b/init.py
@@ -20,3 +20,7 @@ NODE_DISPLAY_NAME_MAPPINGS = {
    "IndexTTS2Simple": "IndexTTS2 Simple",
 }

+WEB_DIRECTORY = "./web"
+
+__all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS", "WEB_DIRECTORY"]
+
--- a/images/overview.png
+++ b/images/overview.png
--- a/nodes/indextts2_save_audio.py
+++ b/nodes/indextts2_save_audio.py
@@ -4,6 +4,9 @@ import numpy as np
 import folder_paths

 class IndexTTS2SaveAudio:
+    def __init__(self):
+        self._ui_type = "output"
+
    @classmethod
    def INPUT_TYPES(cls):
        return {
@@ -21,9 +24,9 @@ class IndexTTS2SaveAudio:
            },
        }

-    RETURN_TYPES = ("AUDIO", "STRING")
-    RETURN_NAMES = ("audio", "saved_path")
+    RETURN_TYPES: tuple = ()
    FUNCTION = "save"
+    OUTPUT_NODE = True
    CATEGORY = "Audio/IndexTTS"

    def _normalize(self, mono: np.ndarray):
@@ -55,21 +58,29 @@ class IndexTTS2SaveAudio:
                wf.writeframes(pcm16.T.tobytes())
            return True

-    def _compose_paths(self, name_prefix: str, batch_count: int) -> List[str]:
+    def _compose_paths(self, name_prefix: str, batch_count: int, extension: str):
        output_dir = folder_paths.get_output_directory()
        # Use Comfy's helper to build prefix and a counter
        full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(
            f"audio/{name_prefix}", output_dir
        )
-        paths = []
+        entries = []
+        normalized_subfolder = subfolder or ""
        for b in range(batch_count):
            filename_with_batch = filename.replace("%batch_num%", str(b))
-            file = f"{filename_with_batch}_{counter:05}_"
-            paths.append(os.path.join(full_output_folder, file))
+            file = f"{filename_with_batch}_{counter:05}_.{extension}"
+            entries.append(
+                {
+                    "abs_path": os.path.join(full_output_folder, file),
+                    "filename": file,
+                    "subfolder": normalized_subfolder,
+                    "type": self._ui_type,
+                }
+            )
            counter += 1
-        return paths
+        return entries

-    def _save_with_av(self, fmt: str, audio, filename_prefix: str, quality: str = "320k") -> List[str]:
+    def _save_with_av(self, fmt: str, audio, filename_prefix: str, quality: str = "320k") -> List[dict]:
        try:
            from comfy_extras import nodes_audio as ce_audio  # type: ignore
        except Exception as e:
@@ -82,12 +93,14 @@ class IndexTTS2SaveAudio:
            raise ValueError(f"Unsupported format for AV saver (mp3 only): {fmt}")

        results = ui.get("ui", {}).get("audio", [])
-        base = folder_paths.get_output_directory()
-        out: List[str] = []
-        for item in results:
-            sub = item.get("subfolder") or ""
-            out.append(os.path.join(base, sub, item.get("filename", "")))
-        return out
+        return [
+            {
+                "filename": item.get("filename", ""),
+                "subfolder": item.get("subfolder") or "",
+                "type": item.get("type") or self._ui_type,
+            }
+            for item in results
+        ]
    
    def save(self, audio, name: str, format: str,
             normalize_peak: bool = False,
@@ -124,23 +137,27 @@ class IndexTTS2SaveAudio:
            batch.append(np_w)

        name_prefix = (name or "tts2").strip() or "tts2"
-        paths: List[str] = []
+        ui_results: List[dict] = []

        if format == "wav":
-            base_paths = self._compose_paths(name_prefix, len(batch))
-            for np_w, base in zip(batch, base_paths):
-                out_path = base + ".wav"
+            entries = self._compose_paths(name_prefix, len(batch), "wav")
+            for np_w, entry in zip(batch, entries):
+                out_path = entry["abs_path"]
                os.makedirs(os.path.dirname(out_path), exist_ok=True)
                self._save_wav(out_path, np_w, sr, wav_pcm)
-                paths.append(out_path)
+                ui_results.append(
+                    {
+                        "filename": entry["filename"],
+                        "subfolder": entry["subfolder"],
+                        "type": entry["type"],
+                    }
+                )
        elif format == "mp3":
-            paths = self._save_with_av("mp3", audio, filename_prefix=f"audio/{name_prefix}", quality=mp3_bitrate)
+            ui_results = self._save_with_av("mp3", audio, filename_prefix=f"audio/{name_prefix}", quality=mp3_bitrate)
        else:
            raise ValueError(f"Unsupported format: {format}")

-        saved = "\n".join(paths)
-        # passthrough audio so the graph can continue if needed
-        return (audio, saved)
+        return {"ui": {"audio": ui_results}}



--- a/web/js/save_audio_player.js
+++ b/web/js/save_audio_player.js
@@ -0,0 +1,177 @@
+import { app } from '../../../scripts/app.js';
+import { api } from '../../../scripts/api.js';
+
+const MARGIN = 6;
+let stylesInjected = false;
+
+function ensureStyles() {
+  if (stylesInjected) return;
+  stylesInjected = true;
+  const style = document.createElement('style');
+  style.textContent = `
+.indextts2-audio-player {
+  position: absolute;
+  z-index: 12;
+  pointer-events: auto;
+  display: flex;
+  flex-direction: column;
+  gap: 4px;
+  padding: 6px;
+  background: var(--bg-color, var(--comfy-menu-bg, #2a2a2a));
+  border: 1px solid var(--border-color, #444);
+  border-radius: 8px;
+  box-shadow: 0 2px 8px rgba(0, 0, 0, 0.35);
+  min-width: 180px;
+  transition: opacity 0.2s ease, filter 0.2s ease;
+}
+.indextts2-audio-player[data-state="inactive"] {
+  opacity: 0.9;
+}
+.indextts2-audio-player[data-state="inactive"] audio {
+  pointer-events: none;
+  filter: grayscale(0.8);
+  opacity: 0.42;
+}
+.indextts2-audio-player__title {
+  font-size: 11px;
+  font-weight: 600;
+  color: var(--descrip-text, #c7c7c7);
+}
+`;
+  document.head.appendChild(style);
+}
+
+function buildTransformStyle(ctx, widgetWidth, y) {
+  const { canvas } = ctx;
+  const rect = canvas.getBoundingClientRect();
+  const matrix = new DOMMatrix()
+    .scaleSelf(rect.width / canvas.width, rect.height / canvas.height)
+    .multiplySelf(ctx.getTransform())
+    .translateSelf(MARGIN, y + MARGIN);
+  return {
+    transformOrigin: '0 0',
+    transform: matrix.toString(),
+    left: `${rect.left + window.scrollX}px`,
+    top: `${rect.top + window.scrollY}px`,
+  };
+}
+
+function buildAudioUrl(item) {
+  if (!item || !item.filename) {
+    return null;
+  }
+
+  const params = new URLSearchParams({
+    filename: item.filename,
+    type: item.type || 'output',
+  });
+
+  if (item.subfolder) {
+    params.set('subfolder', item.subfolder);
+  }
+
+  let url = api.apiURL(`/view?${params.toString()}`);
+  if (typeof app.getRandParam === 'function') {
+    url += app.getRandParam();
+  }
+  return url;
+}
+
+function setState(container, audioEl, titleEl, clip) {
+  if (clip?.url) {
+    if (audioEl.src !== clip.url) {
+      audioEl.src = clip.url;
+      audioEl.load();
+    }
+    titleEl.textContent = clip.title ?? 'Audio preview';
+    container.dataset.state = 'active';
+  } else {
+    audioEl.pause();
+    audioEl.removeAttribute('src');
+    audioEl.load();
+    titleEl.textContent = 'No audio yet';
+    container.dataset.state = 'inactive';
+  }
+}
+
+app.registerExtension({
+  name: 'IndexTTS2.SaveAudioPlayer',
+  beforeRegisterNodeDef(nodeType, nodeData) {
+    if (nodeData?.name !== 'IndexTTS2SaveAudio') {
+      return;
+    }
+
+    const originalOnNodeCreated = nodeType.prototype.onNodeCreated;
+    nodeType.prototype.onNodeCreated = function (...args) {
+      originalOnNodeCreated?.apply(this, args);
+
+      ensureStyles();
+
+      const container = document.createElement('div');
+      container.className = 'indextts2-audio-player';
+
+      const title = document.createElement('div');
+      title.className = 'indextts2-audio-player__title';
+      title.textContent = 'No audio yet';
+
+      const audio = document.createElement('audio');
+      audio.controls = true;
+      audio.style.width = '100%';
+
+      container.appendChild(title);
+      container.appendChild(audio);
+      document.body.appendChild(container);
+
+      const widget = {
+        name: 'indextts2_audio_widget',
+        type: 'indextts2_audio_widget',
+        draw(ctx, node, widgetWidth, y) {
+          if (!container.isConnected) {
+            document.body.appendChild(container);
+          }
+          const baseWidth = Math.max(160, (node.size?.[0] ?? widgetWidth) - MARGIN * 2);
+          container.style.width = `${baseWidth}px`;
+          const style = buildTransformStyle(ctx, widgetWidth, y);
+          Object.assign(container.style, style);
+        },
+        serialize: false,
+        computeSize() {
+          return [220, 90];
+        },
+      };
+
+      this.addCustomWidget(widget);
+      this.size = [this.size[0], Math.max(this.size[1], 120)];
+
+      const originalOnRemoved = this.onRemoved;
+      this.onRemoved = function () {
+        container.remove();
+        originalOnRemoved?.apply(this, arguments);
+      };
+      setState(container, audio, title, null);
+
+      const originalOnExecuted = this.onExecuted;
+      this.onExecuted = function (message) {
+        originalOnExecuted?.apply(this, arguments);
+        const audioResults =
+          message?.ui?.audio ||
+          message?.audio ||
+          [];
+
+        if (Array.isArray(audioResults) && audioResults.length > 0) {
+          const latest = audioResults[audioResults.length - 1];
+          const url = buildAudioUrl(latest);
+          if (url) {
+            setState(container, audio, title, {
+              url,
+              title: latest.filename || 'Audio preview',
+            });
+            return;
+          }
+        }
+
+        setState(container, audio, title, null);
+      };
+    };
+  },
+});