diff --git a/README.md b/README.md index 9840e27..400f752 100644 --- a/README.md +++ b/README.md @@ -8,6 +8,7 @@ Original repo: https://github.com/index-tts/index-tts ![ComfyUI-IndexTTS2 nodes](images/overview.png) ## Updates +- 2025-10-13: Save Audio node now acts as an output node with an embedded player overlay for instant preview inside the graph (no need for downstream preview nodes). - 2025-10-08: Default FP32 with optional FP16 toggle, output gain control, and a Save Audio helper node (wav/mp3 + quality parameters). - 2025-09-22: Added IndexTTS2 Advanced node exposing sampling, speed, seed, and other generation controls. @@ -27,7 +28,7 @@ Original repo: https://github.com/index-tts/index-tts - **IndexTTS2 Advanced** - Simple inputs plus overrides for sampling, speech speed, pauses, CFG, seed, FP16 toggle, and output gain. - **IndexTTS2 Emotion Vector** – eight sliders (0.0–1.4, sum <= 1.5) producing an emotion vector. - **IndexTTS2 Emotion From Text** – requires ModelScope and local QwenEmotion; turns short text into an emotion vector + summary. -- **IndexTTS2 Save Audio** - saves generated audio tensors to disk with wav/mp3 options. +- **IndexTTS2 Save Audio** - saves generated audio tensors to disk with wav/mp3 options and surfaces an inline player directly on the node after execution. ## Examples - Speaker audio -> IndexTTS2 Simple -> Preview/Save Audio diff --git a/__init__.py b/__init__.py index 0b9a2b8..39240bf 100644 --- a/__init__.py +++ b/__init__.py @@ -20,3 +20,7 @@ NODE_DISPLAY_NAME_MAPPINGS = { "IndexTTS2Simple": "IndexTTS2 Simple", } +WEB_DIRECTORY = "./web" + +__all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS", "WEB_DIRECTORY"] + diff --git a/images/overview.png b/images/overview.png index caf988a..85ec386 100644 Binary files a/images/overview.png and b/images/overview.png differ diff --git a/nodes/indextts2_save_audio.py b/nodes/indextts2_save_audio.py index e2164bd..41d9369 100644 --- a/nodes/indextts2_save_audio.py +++ b/nodes/indextts2_save_audio.py @@ -4,6 +4,9 @@ import numpy as np import folder_paths class IndexTTS2SaveAudio: + def __init__(self): + self._ui_type = "output" + @classmethod def INPUT_TYPES(cls): return { @@ -21,9 +24,9 @@ class IndexTTS2SaveAudio: }, } - RETURN_TYPES = ("AUDIO", "STRING") - RETURN_NAMES = ("audio", "saved_path") + RETURN_TYPES: tuple = () FUNCTION = "save" + OUTPUT_NODE = True CATEGORY = "Audio/IndexTTS" def _normalize(self, mono: np.ndarray): @@ -55,21 +58,29 @@ class IndexTTS2SaveAudio: wf.writeframes(pcm16.T.tobytes()) return True - def _compose_paths(self, name_prefix: str, batch_count: int) -> List[str]: + def _compose_paths(self, name_prefix: str, batch_count: int, extension: str): output_dir = folder_paths.get_output_directory() # Use Comfy's helper to build prefix and a counter full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path( f"audio/{name_prefix}", output_dir ) - paths = [] + entries = [] + normalized_subfolder = subfolder or "" for b in range(batch_count): filename_with_batch = filename.replace("%batch_num%", str(b)) - file = f"{filename_with_batch}_{counter:05}_" - paths.append(os.path.join(full_output_folder, file)) + file = f"{filename_with_batch}_{counter:05}_.{extension}" + entries.append( + { + "abs_path": os.path.join(full_output_folder, file), + "filename": file, + "subfolder": normalized_subfolder, + "type": self._ui_type, + } + ) counter += 1 - return paths + return entries - def _save_with_av(self, fmt: str, audio, filename_prefix: str, quality: str = "320k") -> List[str]: + def _save_with_av(self, fmt: str, audio, filename_prefix: str, quality: str = "320k") -> List[dict]: try: from comfy_extras import nodes_audio as ce_audio # type: ignore except Exception as e: @@ -82,12 +93,14 @@ class IndexTTS2SaveAudio: raise ValueError(f"Unsupported format for AV saver (mp3 only): {fmt}") results = ui.get("ui", {}).get("audio", []) - base = folder_paths.get_output_directory() - out: List[str] = [] - for item in results: - sub = item.get("subfolder") or "" - out.append(os.path.join(base, sub, item.get("filename", ""))) - return out + return [ + { + "filename": item.get("filename", ""), + "subfolder": item.get("subfolder") or "", + "type": item.get("type") or self._ui_type, + } + for item in results + ] def save(self, audio, name: str, format: str, normalize_peak: bool = False, @@ -124,23 +137,27 @@ class IndexTTS2SaveAudio: batch.append(np_w) name_prefix = (name or "tts2").strip() or "tts2" - paths: List[str] = [] + ui_results: List[dict] = [] if format == "wav": - base_paths = self._compose_paths(name_prefix, len(batch)) - for np_w, base in zip(batch, base_paths): - out_path = base + ".wav" + entries = self._compose_paths(name_prefix, len(batch), "wav") + for np_w, entry in zip(batch, entries): + out_path = entry["abs_path"] os.makedirs(os.path.dirname(out_path), exist_ok=True) self._save_wav(out_path, np_w, sr, wav_pcm) - paths.append(out_path) + ui_results.append( + { + "filename": entry["filename"], + "subfolder": entry["subfolder"], + "type": entry["type"], + } + ) elif format == "mp3": - paths = self._save_with_av("mp3", audio, filename_prefix=f"audio/{name_prefix}", quality=mp3_bitrate) + ui_results = self._save_with_av("mp3", audio, filename_prefix=f"audio/{name_prefix}", quality=mp3_bitrate) else: raise ValueError(f"Unsupported format: {format}") - saved = "\n".join(paths) - # passthrough audio so the graph can continue if needed - return (audio, saved) + return {"ui": {"audio": ui_results}} diff --git a/web/js/save_audio_player.js b/web/js/save_audio_player.js new file mode 100644 index 0000000..7d86a2c --- /dev/null +++ b/web/js/save_audio_player.js @@ -0,0 +1,177 @@ +import { app } from '../../../scripts/app.js'; +import { api } from '../../../scripts/api.js'; + +const MARGIN = 6; +let stylesInjected = false; + +function ensureStyles() { + if (stylesInjected) return; + stylesInjected = true; + const style = document.createElement('style'); + style.textContent = ` +.indextts2-audio-player { + position: absolute; + z-index: 12; + pointer-events: auto; + display: flex; + flex-direction: column; + gap: 4px; + padding: 6px; + background: var(--bg-color, var(--comfy-menu-bg, #2a2a2a)); + border: 1px solid var(--border-color, #444); + border-radius: 8px; + box-shadow: 0 2px 8px rgba(0, 0, 0, 0.35); + min-width: 180px; + transition: opacity 0.2s ease, filter 0.2s ease; +} +.indextts2-audio-player[data-state="inactive"] { + opacity: 0.9; +} +.indextts2-audio-player[data-state="inactive"] audio { + pointer-events: none; + filter: grayscale(0.8); + opacity: 0.42; +} +.indextts2-audio-player__title { + font-size: 11px; + font-weight: 600; + color: var(--descrip-text, #c7c7c7); +} +`; + document.head.appendChild(style); +} + +function buildTransformStyle(ctx, widgetWidth, y) { + const { canvas } = ctx; + const rect = canvas.getBoundingClientRect(); + const matrix = new DOMMatrix() + .scaleSelf(rect.width / canvas.width, rect.height / canvas.height) + .multiplySelf(ctx.getTransform()) + .translateSelf(MARGIN, y + MARGIN); + return { + transformOrigin: '0 0', + transform: matrix.toString(), + left: `${rect.left + window.scrollX}px`, + top: `${rect.top + window.scrollY}px`, + }; +} + +function buildAudioUrl(item) { + if (!item || !item.filename) { + return null; + } + + const params = new URLSearchParams({ + filename: item.filename, + type: item.type || 'output', + }); + + if (item.subfolder) { + params.set('subfolder', item.subfolder); + } + + let url = api.apiURL(`/view?${params.toString()}`); + if (typeof app.getRandParam === 'function') { + url += app.getRandParam(); + } + return url; +} + +function setState(container, audioEl, titleEl, clip) { + if (clip?.url) { + if (audioEl.src !== clip.url) { + audioEl.src = clip.url; + audioEl.load(); + } + titleEl.textContent = clip.title ?? 'Audio preview'; + container.dataset.state = 'active'; + } else { + audioEl.pause(); + audioEl.removeAttribute('src'); + audioEl.load(); + titleEl.textContent = 'No audio yet'; + container.dataset.state = 'inactive'; + } +} + +app.registerExtension({ + name: 'IndexTTS2.SaveAudioPlayer', + beforeRegisterNodeDef(nodeType, nodeData) { + if (nodeData?.name !== 'IndexTTS2SaveAudio') { + return; + } + + const originalOnNodeCreated = nodeType.prototype.onNodeCreated; + nodeType.prototype.onNodeCreated = function (...args) { + originalOnNodeCreated?.apply(this, args); + + ensureStyles(); + + const container = document.createElement('div'); + container.className = 'indextts2-audio-player'; + + const title = document.createElement('div'); + title.className = 'indextts2-audio-player__title'; + title.textContent = 'No audio yet'; + + const audio = document.createElement('audio'); + audio.controls = true; + audio.style.width = '100%'; + + container.appendChild(title); + container.appendChild(audio); + document.body.appendChild(container); + + const widget = { + name: 'indextts2_audio_widget', + type: 'indextts2_audio_widget', + draw(ctx, node, widgetWidth, y) { + if (!container.isConnected) { + document.body.appendChild(container); + } + const baseWidth = Math.max(160, (node.size?.[0] ?? widgetWidth) - MARGIN * 2); + container.style.width = `${baseWidth}px`; + const style = buildTransformStyle(ctx, widgetWidth, y); + Object.assign(container.style, style); + }, + serialize: false, + computeSize() { + return [220, 90]; + }, + }; + + this.addCustomWidget(widget); + this.size = [this.size[0], Math.max(this.size[1], 120)]; + + const originalOnRemoved = this.onRemoved; + this.onRemoved = function () { + container.remove(); + originalOnRemoved?.apply(this, arguments); + }; + setState(container, audio, title, null); + + const originalOnExecuted = this.onExecuted; + this.onExecuted = function (message) { + originalOnExecuted?.apply(this, arguments); + const audioResults = + message?.ui?.audio || + message?.audio || + []; + + if (Array.isArray(audioResults) && audioResults.length > 0) { + const latest = audioResults[audioResults.length - 1]; + const url = buildAudioUrl(latest); + if (url) { + setState(container, audio, title, { + url, + title: latest.filename || 'Audio preview', + }); + return; + } + } + + setState(container, audio, title, null); + }; + }; + }, +});