Files
ComfyUI-IndexTTS2/nodes/indextts2_node_emotext.py
2025-09-12 16:23:19 +12:00

72 lines
2.2 KiB
Python

import os
from typing import Tuple
_QWEN_CACHE = None
def _get_qwen(model_dir: str):
global _QWEN_CACHE
if _QWEN_CACHE is not None:
return _QWEN_CACHE
try:
from modelscope import AutoModelForCausalLM # noqa: F401
except Exception:
raise ImportError("modelscope is required for Emotion From Text. Install with: pip install modelscope")
import sys
base_dir = os.path.dirname(os.path.abspath(__file__))
ext_root = os.path.dirname(base_dir)
if ext_root not in sys.path:
sys.path.insert(0, ext_root)
from indextts.infer_v2 import QwenEmotion
qwen = QwenEmotion(model_dir)
_QWEN_CACHE = qwen
return qwen
class IndexTTS2EmotionFromText:
@classmethod
def INPUT_TYPES(cls):
return {
"required": {
"text": ("STRING", {"multiline": True}),
},
}
RETURN_TYPES = ("EMOTION_VECTOR", "STRING")
FUNCTION = "build"
CATEGORY = "Audio/IndexTTS"
def build(self, text: str) -> Tuple[list, str]:
if not isinstance(text, str) or len(text.strip()) == 0:
raise ValueError("Text is empty. Please provide descriptive emotion text.")
base_dir = os.path.dirname(os.path.abspath(__file__))
qwen_dir = os.path.join(os.path.dirname(base_dir), "checkpoints", "qwen0.6bemo4-merge")
if not os.path.isdir(qwen_dir):
raise FileNotFoundError(f"QwenEmotion model not found at: {qwen_dir}")
qwen = _get_qwen(qwen_dir)
emo_dict = qwen.inference(text)
order = ["happy", "angry", "sad", "afraid", "disgusted", "melancholic", "surprised", "calm"]
vec = [float(max(0.0, emo_dict.get(k, 0.0))) for k in order]
#Clamp individual max to 1.4 (UI/Gradio baseline)
vec = [min(1.4, v) for v in vec]
cap = 1.5
total = sum(vec)
if total > cap:
raise ValueError(
f"Emotion vector sum {total:.3f} exceeds maximum {cap}. Reduce intensities or adjust with the Emotion Vector node."
)
info = (
f"Detected emotion vector (sum={sum(vec):.2f}):\n"
+ ", ".join(f"{k}={v:.2f}" for k, v in zip(order, vec))
)
return vec, info