mirror of
https://github.com/snicolast/ComfyUI-IndexTTS2.git
synced 2026-01-26 06:29:45 +00:00
save audio - embedded player
This commit is contained in:
@@ -8,6 +8,7 @@ Original repo: https://github.com/index-tts/index-tts
|
||||

|
||||
|
||||
## Updates
|
||||
- 2025-10-13: Save Audio node now acts as an output node with an embedded player overlay for instant preview inside the graph (no need for downstream preview nodes).
|
||||
- 2025-10-08: Default FP32 with optional FP16 toggle, output gain control, and a Save Audio helper node (wav/mp3 + quality parameters).
|
||||
- 2025-09-22: Added IndexTTS2 Advanced node exposing sampling, speed, seed, and other generation controls.
|
||||
|
||||
@@ -27,7 +28,7 @@ Original repo: https://github.com/index-tts/index-tts
|
||||
- **IndexTTS2 Advanced** - Simple inputs plus overrides for sampling, speech speed, pauses, CFG, seed, FP16 toggle, and output gain.
|
||||
- **IndexTTS2 Emotion Vector** – eight sliders (0.0–1.4, sum <= 1.5) producing an emotion vector.
|
||||
- **IndexTTS2 Emotion From Text** – requires ModelScope and local QwenEmotion; turns short text into an emotion vector + summary.
|
||||
- **IndexTTS2 Save Audio** - saves generated audio tensors to disk with wav/mp3 options.
|
||||
- **IndexTTS2 Save Audio** - saves generated audio tensors to disk with wav/mp3 options and surfaces an inline player directly on the node after execution.
|
||||
|
||||
## Examples
|
||||
- Speaker audio -> IndexTTS2 Simple -> Preview/Save Audio
|
||||
|
||||
@@ -20,3 +20,7 @@ NODE_DISPLAY_NAME_MAPPINGS = {
|
||||
"IndexTTS2Simple": "IndexTTS2 Simple",
|
||||
}
|
||||
|
||||
WEB_DIRECTORY = "./web"
|
||||
|
||||
__all__ = ["NODE_CLASS_MAPPINGS", "NODE_DISPLAY_NAME_MAPPINGS", "WEB_DIRECTORY"]
|
||||
|
||||
|
||||
Binary file not shown.
|
Before Width: | Height: | Size: 233 KiB After Width: | Height: | Size: 293 KiB |
@@ -4,6 +4,9 @@ import numpy as np
|
||||
import folder_paths
|
||||
|
||||
class IndexTTS2SaveAudio:
|
||||
def __init__(self):
|
||||
self._ui_type = "output"
|
||||
|
||||
@classmethod
|
||||
def INPUT_TYPES(cls):
|
||||
return {
|
||||
@@ -21,9 +24,9 @@ class IndexTTS2SaveAudio:
|
||||
},
|
||||
}
|
||||
|
||||
RETURN_TYPES = ("AUDIO", "STRING")
|
||||
RETURN_NAMES = ("audio", "saved_path")
|
||||
RETURN_TYPES: tuple = ()
|
||||
FUNCTION = "save"
|
||||
OUTPUT_NODE = True
|
||||
CATEGORY = "Audio/IndexTTS"
|
||||
|
||||
def _normalize(self, mono: np.ndarray):
|
||||
@@ -55,21 +58,29 @@ class IndexTTS2SaveAudio:
|
||||
wf.writeframes(pcm16.T.tobytes())
|
||||
return True
|
||||
|
||||
def _compose_paths(self, name_prefix: str, batch_count: int) -> List[str]:
|
||||
def _compose_paths(self, name_prefix: str, batch_count: int, extension: str):
|
||||
output_dir = folder_paths.get_output_directory()
|
||||
# Use Comfy's helper to build prefix and a counter
|
||||
full_output_folder, filename, counter, subfolder, filename_prefix = folder_paths.get_save_image_path(
|
||||
f"audio/{name_prefix}", output_dir
|
||||
)
|
||||
paths = []
|
||||
entries = []
|
||||
normalized_subfolder = subfolder or ""
|
||||
for b in range(batch_count):
|
||||
filename_with_batch = filename.replace("%batch_num%", str(b))
|
||||
file = f"{filename_with_batch}_{counter:05}_"
|
||||
paths.append(os.path.join(full_output_folder, file))
|
||||
file = f"{filename_with_batch}_{counter:05}_.{extension}"
|
||||
entries.append(
|
||||
{
|
||||
"abs_path": os.path.join(full_output_folder, file),
|
||||
"filename": file,
|
||||
"subfolder": normalized_subfolder,
|
||||
"type": self._ui_type,
|
||||
}
|
||||
)
|
||||
counter += 1
|
||||
return paths
|
||||
return entries
|
||||
|
||||
def _save_with_av(self, fmt: str, audio, filename_prefix: str, quality: str = "320k") -> List[str]:
|
||||
def _save_with_av(self, fmt: str, audio, filename_prefix: str, quality: str = "320k") -> List[dict]:
|
||||
try:
|
||||
from comfy_extras import nodes_audio as ce_audio # type: ignore
|
||||
except Exception as e:
|
||||
@@ -82,12 +93,14 @@ class IndexTTS2SaveAudio:
|
||||
raise ValueError(f"Unsupported format for AV saver (mp3 only): {fmt}")
|
||||
|
||||
results = ui.get("ui", {}).get("audio", [])
|
||||
base = folder_paths.get_output_directory()
|
||||
out: List[str] = []
|
||||
for item in results:
|
||||
sub = item.get("subfolder") or ""
|
||||
out.append(os.path.join(base, sub, item.get("filename", "")))
|
||||
return out
|
||||
return [
|
||||
{
|
||||
"filename": item.get("filename", ""),
|
||||
"subfolder": item.get("subfolder") or "",
|
||||
"type": item.get("type") or self._ui_type,
|
||||
}
|
||||
for item in results
|
||||
]
|
||||
|
||||
def save(self, audio, name: str, format: str,
|
||||
normalize_peak: bool = False,
|
||||
@@ -124,23 +137,27 @@ class IndexTTS2SaveAudio:
|
||||
batch.append(np_w)
|
||||
|
||||
name_prefix = (name or "tts2").strip() or "tts2"
|
||||
paths: List[str] = []
|
||||
ui_results: List[dict] = []
|
||||
|
||||
if format == "wav":
|
||||
base_paths = self._compose_paths(name_prefix, len(batch))
|
||||
for np_w, base in zip(batch, base_paths):
|
||||
out_path = base + ".wav"
|
||||
entries = self._compose_paths(name_prefix, len(batch), "wav")
|
||||
for np_w, entry in zip(batch, entries):
|
||||
out_path = entry["abs_path"]
|
||||
os.makedirs(os.path.dirname(out_path), exist_ok=True)
|
||||
self._save_wav(out_path, np_w, sr, wav_pcm)
|
||||
paths.append(out_path)
|
||||
ui_results.append(
|
||||
{
|
||||
"filename": entry["filename"],
|
||||
"subfolder": entry["subfolder"],
|
||||
"type": entry["type"],
|
||||
}
|
||||
)
|
||||
elif format == "mp3":
|
||||
paths = self._save_with_av("mp3", audio, filename_prefix=f"audio/{name_prefix}", quality=mp3_bitrate)
|
||||
ui_results = self._save_with_av("mp3", audio, filename_prefix=f"audio/{name_prefix}", quality=mp3_bitrate)
|
||||
else:
|
||||
raise ValueError(f"Unsupported format: {format}")
|
||||
|
||||
saved = "\n".join(paths)
|
||||
# passthrough audio so the graph can continue if needed
|
||||
return (audio, saved)
|
||||
return {"ui": {"audio": ui_results}}
|
||||
|
||||
|
||||
|
||||
|
||||
177
web/js/save_audio_player.js
Normal file
177
web/js/save_audio_player.js
Normal file
@@ -0,0 +1,177 @@
|
||||
import { app } from '../../../scripts/app.js';
|
||||
import { api } from '../../../scripts/api.js';
|
||||
|
||||
const MARGIN = 6;
|
||||
let stylesInjected = false;
|
||||
|
||||
function ensureStyles() {
|
||||
if (stylesInjected) return;
|
||||
stylesInjected = true;
|
||||
const style = document.createElement('style');
|
||||
style.textContent = `
|
||||
.indextts2-audio-player {
|
||||
position: absolute;
|
||||
z-index: 12;
|
||||
pointer-events: auto;
|
||||
display: flex;
|
||||
flex-direction: column;
|
||||
gap: 4px;
|
||||
padding: 6px;
|
||||
background: var(--bg-color, var(--comfy-menu-bg, #2a2a2a));
|
||||
border: 1px solid var(--border-color, #444);
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 2px 8px rgba(0, 0, 0, 0.35);
|
||||
min-width: 180px;
|
||||
transition: opacity 0.2s ease, filter 0.2s ease;
|
||||
}
|
||||
.indextts2-audio-player[data-state="inactive"] {
|
||||
opacity: 0.9;
|
||||
}
|
||||
.indextts2-audio-player[data-state="inactive"] audio {
|
||||
pointer-events: none;
|
||||
filter: grayscale(0.8);
|
||||
opacity: 0.42;
|
||||
}
|
||||
.indextts2-audio-player__title {
|
||||
font-size: 11px;
|
||||
font-weight: 600;
|
||||
color: var(--descrip-text, #c7c7c7);
|
||||
}
|
||||
`;
|
||||
document.head.appendChild(style);
|
||||
}
|
||||
|
||||
function buildTransformStyle(ctx, widgetWidth, y) {
|
||||
const { canvas } = ctx;
|
||||
const rect = canvas.getBoundingClientRect();
|
||||
const matrix = new DOMMatrix()
|
||||
.scaleSelf(rect.width / canvas.width, rect.height / canvas.height)
|
||||
.multiplySelf(ctx.getTransform())
|
||||
.translateSelf(MARGIN, y + MARGIN);
|
||||
return {
|
||||
transformOrigin: '0 0',
|
||||
transform: matrix.toString(),
|
||||
left: `${rect.left + window.scrollX}px`,
|
||||
top: `${rect.top + window.scrollY}px`,
|
||||
};
|
||||
}
|
||||
|
||||
function buildAudioUrl(item) {
|
||||
if (!item || !item.filename) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const params = new URLSearchParams({
|
||||
filename: item.filename,
|
||||
type: item.type || 'output',
|
||||
});
|
||||
|
||||
if (item.subfolder) {
|
||||
params.set('subfolder', item.subfolder);
|
||||
}
|
||||
|
||||
let url = api.apiURL(`/view?${params.toString()}`);
|
||||
if (typeof app.getRandParam === 'function') {
|
||||
url += app.getRandParam();
|
||||
}
|
||||
return url;
|
||||
}
|
||||
|
||||
function setState(container, audioEl, titleEl, clip) {
|
||||
if (clip?.url) {
|
||||
if (audioEl.src !== clip.url) {
|
||||
audioEl.src = clip.url;
|
||||
audioEl.load();
|
||||
}
|
||||
titleEl.textContent = clip.title ?? 'Audio preview';
|
||||
container.dataset.state = 'active';
|
||||
} else {
|
||||
audioEl.pause();
|
||||
audioEl.removeAttribute('src');
|
||||
audioEl.load();
|
||||
titleEl.textContent = 'No audio yet';
|
||||
container.dataset.state = 'inactive';
|
||||
}
|
||||
}
|
||||
|
||||
app.registerExtension({
|
||||
name: 'IndexTTS2.SaveAudioPlayer',
|
||||
beforeRegisterNodeDef(nodeType, nodeData) {
|
||||
if (nodeData?.name !== 'IndexTTS2SaveAudio') {
|
||||
return;
|
||||
}
|
||||
|
||||
const originalOnNodeCreated = nodeType.prototype.onNodeCreated;
|
||||
nodeType.prototype.onNodeCreated = function (...args) {
|
||||
originalOnNodeCreated?.apply(this, args);
|
||||
|
||||
ensureStyles();
|
||||
|
||||
const container = document.createElement('div');
|
||||
container.className = 'indextts2-audio-player';
|
||||
|
||||
const title = document.createElement('div');
|
||||
title.className = 'indextts2-audio-player__title';
|
||||
title.textContent = 'No audio yet';
|
||||
|
||||
const audio = document.createElement('audio');
|
||||
audio.controls = true;
|
||||
audio.style.width = '100%';
|
||||
|
||||
container.appendChild(title);
|
||||
container.appendChild(audio);
|
||||
document.body.appendChild(container);
|
||||
|
||||
const widget = {
|
||||
name: 'indextts2_audio_widget',
|
||||
type: 'indextts2_audio_widget',
|
||||
draw(ctx, node, widgetWidth, y) {
|
||||
if (!container.isConnected) {
|
||||
document.body.appendChild(container);
|
||||
}
|
||||
const baseWidth = Math.max(160, (node.size?.[0] ?? widgetWidth) - MARGIN * 2);
|
||||
container.style.width = `${baseWidth}px`;
|
||||
const style = buildTransformStyle(ctx, widgetWidth, y);
|
||||
Object.assign(container.style, style);
|
||||
},
|
||||
serialize: false,
|
||||
computeSize() {
|
||||
return [220, 90];
|
||||
},
|
||||
};
|
||||
|
||||
this.addCustomWidget(widget);
|
||||
this.size = [this.size[0], Math.max(this.size[1], 120)];
|
||||
|
||||
const originalOnRemoved = this.onRemoved;
|
||||
this.onRemoved = function () {
|
||||
container.remove();
|
||||
originalOnRemoved?.apply(this, arguments);
|
||||
};
|
||||
setState(container, audio, title, null);
|
||||
|
||||
const originalOnExecuted = this.onExecuted;
|
||||
this.onExecuted = function (message) {
|
||||
originalOnExecuted?.apply(this, arguments);
|
||||
const audioResults =
|
||||
message?.ui?.audio ||
|
||||
message?.audio ||
|
||||
[];
|
||||
|
||||
if (Array.isArray(audioResults) && audioResults.length > 0) {
|
||||
const latest = audioResults[audioResults.length - 1];
|
||||
const url = buildAudioUrl(latest);
|
||||
if (url) {
|
||||
setState(container, audio, title, {
|
||||
url,
|
||||
title: latest.filename || 'Audio preview',
|
||||
});
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
setState(container, audio, title, null);
|
||||
};
|
||||
};
|
||||
},
|
||||
});
|
||||
Reference in New Issue
Block a user