mirror of
https://github.com/1038lab/ComfyUI-QwenTTS.git
synced 2026-05-17 10:59:24 +00:00
535 lines
11 KiB
JSON
535 lines
11 KiB
JSON
{
|
|
"id": "6b2e5d09-d1ea-4fc9-a8bd-8daca49d65ff",
|
|
"revision": 0,
|
|
"last_node_id": 64,
|
|
"last_link_id": 52,
|
|
"nodes": [
|
|
{
|
|
"id": 52,
|
|
"type": "MarkdownNote",
|
|
"pos": [
|
|
-406.07144079046043,
|
|
730.4236067119891
|
|
],
|
|
"size": [
|
|
383.75722406036607,
|
|
599.0649205241322
|
|
],
|
|
"flags": {},
|
|
"order": 0,
|
|
"mode": 0,
|
|
"inputs": [],
|
|
"outputs": [],
|
|
"properties": {
|
|
"ue_properties": {
|
|
"widget_ue_connectable": {},
|
|
"version": "7.5.2",
|
|
"input_ue_unconnectable": {}
|
|
}
|
|
},
|
|
"widgets_values": [
|
|
"# ComfyUI-QwenTTS\n\nA clean, efficient ComfyUI custom node pack for **Qwen3-TTS**. It provides **CustomVoice**, **VoiceDesign**, and **VoiceClone** workflows with strict ComfyUI compatibility and practical controls for quality, speed, and stability.\n\n## Usage\n\n### Custom Voice TTS\nAdd **Qwen3 TTS CustomVoice** and fill `text`, choose `speaker`, optionally add `instruct`.\n\n### Voice Design\nAdd **Qwen3 TTS VoiceDesign**, provide `text` and `instruct`.\n\n### Voice Clone\nAdd **Qwen3 TTS VoiceClone**, connect `reference_audio`, and provide `reference_text`.\n\n## Model Download\n\nModels can be auto-downloaded to:\n```\nComfyUI/models/TTS/Qwen3-TTS/<MODEL_NAME>/\n```\n\nSupported model IDs (Hugging Face):\n- [Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice](https://huggingface.co/Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice)\n- [Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice](https://huggingface.co/Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice)\n- [Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign](https://huggingface.co/Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign)\n- [Qwen/Qwen3-TTS-12Hz-1.7B-Base](https://huggingface.co/Qwen/Qwen3-TTS-12Hz-1.7B-Base)\n- [Qwen/Qwen3-TTS-12Hz-0.6B-Base](https://huggingface.co/Qwen/Qwen3-TTS-12Hz-0.6B-Base)\n- [Qwen/Qwen3-TTS-Tokenizer-12Hz](https://huggingface.co/Qwen/Qwen3-TTS-Tokenizer-12Hz)\n\n\n## FlashAttention (Optional)\n\nIf you have a compatible NVIDIA GPU, FlashAttention 2 can improve speed.\n\n```bash\npip install flash-attn --no-build-isolation\n```"
|
|
],
|
|
"color": "#432",
|
|
"bgcolor": "#653"
|
|
},
|
|
{
|
|
"id": 58,
|
|
"type": "SaveAudioMP3",
|
|
"pos": [
|
|
2.158001494103997,
|
|
1298.862742925313
|
|
],
|
|
"size": [
|
|
270,
|
|
136
|
|
],
|
|
"flags": {},
|
|
"order": 7,
|
|
"mode": 0,
|
|
"inputs": [
|
|
{
|
|
"name": "audio",
|
|
"type": "AUDIO",
|
|
"link": 47
|
|
}
|
|
],
|
|
"outputs": [],
|
|
"properties": {
|
|
"ue_properties": {
|
|
"widget_ue_connectable": {},
|
|
"input_ue_unconnectable": {}
|
|
},
|
|
"cnr_id": "comfy-core",
|
|
"ver": "0.11.1",
|
|
"Node name for S&R": "SaveAudioMP3"
|
|
},
|
|
"widgets_values": [
|
|
"audio/ComfyUI",
|
|
"V0"
|
|
]
|
|
},
|
|
{
|
|
"id": 59,
|
|
"type": "AILab_Qwen3TTSVoiceClone",
|
|
"pos": [
|
|
2.1580014941039964,
|
|
947.4393213149233
|
|
],
|
|
"size": [
|
|
270,
|
|
306
|
|
],
|
|
"flags": {},
|
|
"order": 4,
|
|
"mode": 0,
|
|
"inputs": [
|
|
{
|
|
"name": "reference_audio",
|
|
"shape": 7,
|
|
"type": "AUDIO",
|
|
"link": 50
|
|
},
|
|
{
|
|
"name": "voice",
|
|
"shape": 7,
|
|
"type": "VOICE",
|
|
"link": null
|
|
}
|
|
],
|
|
"outputs": [
|
|
{
|
|
"name": "audio",
|
|
"type": "AUDIO",
|
|
"links": [
|
|
47
|
|
]
|
|
}
|
|
],
|
|
"properties": {
|
|
"ue_properties": {
|
|
"widget_ue_connectable": {},
|
|
"input_ue_unconnectable": {}
|
|
},
|
|
"Node name for S&R": "AILab_Qwen3TTSVoiceClone"
|
|
},
|
|
"widgets_values": [
|
|
"Hello, this is a cloned voice.",
|
|
"1.7B",
|
|
"Auto",
|
|
"",
|
|
false,
|
|
true,
|
|
-1,
|
|
"randomize"
|
|
],
|
|
"color": "#4D2600",
|
|
"bgcolor": "#3e5249"
|
|
},
|
|
{
|
|
"id": 62,
|
|
"type": "LoadAudio",
|
|
"pos": [
|
|
2.1580014941039964,
|
|
767.8369715781096
|
|
],
|
|
"size": [
|
|
270,
|
|
136
|
|
],
|
|
"flags": {},
|
|
"order": 1,
|
|
"mode": 0,
|
|
"inputs": [],
|
|
"outputs": [
|
|
{
|
|
"name": "AUDIO",
|
|
"type": "AUDIO",
|
|
"links": [
|
|
50
|
|
]
|
|
}
|
|
],
|
|
"properties": {
|
|
"ue_properties": {
|
|
"widget_ue_connectable": {},
|
|
"input_ue_unconnectable": {}
|
|
},
|
|
"cnr_id": "comfy-core",
|
|
"ver": "0.11.1",
|
|
"Node name for S&R": "LoadAudio"
|
|
},
|
|
"widgets_values": [
|
|
"107289363.mp4",
|
|
null,
|
|
""
|
|
]
|
|
},
|
|
{
|
|
"id": 61,
|
|
"type": "AILab_Qwen3TTSCustomVoice",
|
|
"pos": [
|
|
598.6659776089256,
|
|
1303.8139799386574
|
|
],
|
|
"size": [
|
|
270,
|
|
286
|
|
],
|
|
"flags": {},
|
|
"order": 6,
|
|
"mode": 0,
|
|
"inputs": [
|
|
{
|
|
"name": "instruct",
|
|
"shape": 7,
|
|
"type": "STRING",
|
|
"widget": {
|
|
"name": "instruct"
|
|
},
|
|
"link": 52
|
|
}
|
|
],
|
|
"outputs": [
|
|
{
|
|
"name": "audio",
|
|
"type": "AUDIO",
|
|
"links": [
|
|
49
|
|
]
|
|
}
|
|
],
|
|
"properties": {
|
|
"ue_properties": {
|
|
"widget_ue_connectable": {},
|
|
"input_ue_unconnectable": {}
|
|
},
|
|
"Node name for S&R": "AILab_Qwen3TTSCustomVoice"
|
|
},
|
|
"widgets_values": [
|
|
"Hello from Qwen3-TTS.",
|
|
"Ryan",
|
|
"1.7B",
|
|
"Auto",
|
|
"",
|
|
true,
|
|
-1,
|
|
"randomize"
|
|
],
|
|
"color": "#4D2600",
|
|
"bgcolor": "#3e5249"
|
|
},
|
|
{
|
|
"id": 63,
|
|
"type": "AILab_Qwen3TTSVoiceInstruct",
|
|
"pos": [
|
|
304.0686521893615,
|
|
855.8522837949066
|
|
],
|
|
"size": [
|
|
270,
|
|
200
|
|
],
|
|
"flags": {},
|
|
"order": 2,
|
|
"mode": 0,
|
|
"inputs": [],
|
|
"outputs": [
|
|
{
|
|
"name": "VOICE_INSTRUCT",
|
|
"type": "STRING",
|
|
"links": [
|
|
51
|
|
]
|
|
}
|
|
],
|
|
"properties": {
|
|
"ue_properties": {
|
|
"widget_ue_connectable": {},
|
|
"input_ue_unconnectable": {}
|
|
},
|
|
"Node name for S&R": "AILab_Qwen3TTSVoiceInstruct"
|
|
},
|
|
"widgets_values": [
|
|
"Female",
|
|
"Warm",
|
|
""
|
|
],
|
|
"color": "#28403f",
|
|
"bgcolor": "#233238"
|
|
},
|
|
{
|
|
"id": 60,
|
|
"type": "AILab_Qwen3TTSVoiceDesign",
|
|
"pos": [
|
|
599.6445169129086,
|
|
773.2291834556061
|
|
],
|
|
"size": [
|
|
270,
|
|
262
|
|
],
|
|
"flags": {},
|
|
"order": 5,
|
|
"mode": 0,
|
|
"inputs": [
|
|
{
|
|
"name": "instruct",
|
|
"type": "STRING",
|
|
"widget": {
|
|
"name": "instruct"
|
|
},
|
|
"link": 51
|
|
}
|
|
],
|
|
"outputs": [
|
|
{
|
|
"name": "audio",
|
|
"type": "AUDIO",
|
|
"links": [
|
|
48
|
|
]
|
|
}
|
|
],
|
|
"properties": {
|
|
"ue_properties": {
|
|
"widget_ue_connectable": {},
|
|
"input_ue_unconnectable": {}
|
|
},
|
|
"Node name for S&R": "AILab_Qwen3TTSVoiceDesign"
|
|
},
|
|
"widgets_values": [
|
|
"Hello from Qwen3-TTS VoiceDesign.",
|
|
"A warm, gentle female voice.",
|
|
"1.7B",
|
|
"Auto",
|
|
true,
|
|
-1,
|
|
"randomize"
|
|
],
|
|
"color": "#4D2600",
|
|
"bgcolor": "#3e5249"
|
|
},
|
|
{
|
|
"id": 56,
|
|
"type": "SaveAudioMP3",
|
|
"pos": [
|
|
599.6445169129086,
|
|
1073.9855370124342
|
|
],
|
|
"size": [
|
|
270,
|
|
136
|
|
],
|
|
"flags": {},
|
|
"order": 8,
|
|
"mode": 0,
|
|
"inputs": [
|
|
{
|
|
"name": "audio",
|
|
"type": "AUDIO",
|
|
"link": 48
|
|
}
|
|
],
|
|
"outputs": [],
|
|
"properties": {
|
|
"ue_properties": {
|
|
"widget_ue_connectable": {},
|
|
"input_ue_unconnectable": {}
|
|
},
|
|
"cnr_id": "comfy-core",
|
|
"ver": "0.11.1",
|
|
"Node name for S&R": "SaveAudioMP3"
|
|
},
|
|
"widgets_values": [
|
|
"audio/ComfyUI",
|
|
"V0"
|
|
]
|
|
},
|
|
{
|
|
"id": 64,
|
|
"type": "AILab_Qwen3TTSVoiceInstruct",
|
|
"pos": [
|
|
312.5384260678091,
|
|
1456.303587530096
|
|
],
|
|
"size": [
|
|
270,
|
|
200
|
|
],
|
|
"flags": {},
|
|
"order": 3,
|
|
"mode": 0,
|
|
"inputs": [],
|
|
"outputs": [
|
|
{
|
|
"name": "VOICE_INSTRUCT",
|
|
"type": "STRING",
|
|
"links": [
|
|
52
|
|
]
|
|
}
|
|
],
|
|
"properties": {
|
|
"ue_properties": {
|
|
"widget_ue_connectable": {},
|
|
"input_ue_unconnectable": {}
|
|
},
|
|
"Node name for S&R": "AILab_Qwen3TTSVoiceInstruct"
|
|
},
|
|
"widgets_values": [
|
|
"Male",
|
|
"Gentle",
|
|
""
|
|
],
|
|
"color": "#28403f",
|
|
"bgcolor": "#233238"
|
|
},
|
|
{
|
|
"id": 57,
|
|
"type": "SaveAudioMP3",
|
|
"pos": [
|
|
598.6659776089256,
|
|
1630.3268205770878
|
|
],
|
|
"size": [
|
|
270,
|
|
136
|
|
],
|
|
"flags": {},
|
|
"order": 9,
|
|
"mode": 0,
|
|
"inputs": [
|
|
{
|
|
"name": "audio",
|
|
"type": "AUDIO",
|
|
"link": 49
|
|
}
|
|
],
|
|
"outputs": [],
|
|
"properties": {
|
|
"ue_properties": {
|
|
"widget_ue_connectable": {},
|
|
"input_ue_unconnectable": {}
|
|
},
|
|
"cnr_id": "comfy-core",
|
|
"ver": "0.11.1",
|
|
"Node name for S&R": "SaveAudioMP3"
|
|
},
|
|
"widgets_values": [
|
|
"audio/ComfyUI",
|
|
"V0"
|
|
]
|
|
}
|
|
],
|
|
"links": [
|
|
[
|
|
47,
|
|
59,
|
|
0,
|
|
58,
|
|
0,
|
|
"AUDIO"
|
|
],
|
|
[
|
|
48,
|
|
60,
|
|
0,
|
|
56,
|
|
0,
|
|
"AUDIO"
|
|
],
|
|
[
|
|
49,
|
|
61,
|
|
0,
|
|
57,
|
|
0,
|
|
"AUDIO"
|
|
],
|
|
[
|
|
50,
|
|
62,
|
|
0,
|
|
59,
|
|
0,
|
|
"AUDIO"
|
|
],
|
|
[
|
|
51,
|
|
63,
|
|
0,
|
|
60,
|
|
0,
|
|
"STRING"
|
|
],
|
|
[
|
|
52,
|
|
64,
|
|
0,
|
|
61,
|
|
0,
|
|
"STRING"
|
|
]
|
|
],
|
|
"groups": [
|
|
{
|
|
"id": 1,
|
|
"title": "Voice Clone",
|
|
"bounding": [
|
|
-7.841998505896003,
|
|
694.2369715781095,
|
|
291.2533417260747,
|
|
746.581911977605
|
|
],
|
|
"color": "#3f789e",
|
|
"font_size": 24,
|
|
"flags": {}
|
|
},
|
|
{
|
|
"id": 2,
|
|
"title": "Custom Voice",
|
|
"bounding": [
|
|
295.1353255271107,
|
|
1230.2139799386573,
|
|
577.8864378990316,
|
|
545.6276515604848
|
|
],
|
|
"color": "#3f789e",
|
|
"font_size": 24,
|
|
"flags": {}
|
|
},
|
|
{
|
|
"id": 3,
|
|
"title": "Voice Design",
|
|
"bounding": [
|
|
293.1106339788069,
|
|
698.282992452134,
|
|
584.879856378544,
|
|
519.9913891997099
|
|
],
|
|
"color": "#3f789e",
|
|
"font_size": 24,
|
|
"flags": {}
|
|
}
|
|
],
|
|
"config": {},
|
|
"extra": {
|
|
"workflowRendererVersion": "LG",
|
|
"ue_links": [],
|
|
"links_added_by_ue": [],
|
|
"ds": {
|
|
"scale": 0.8174702380952426,
|
|
"offset": [
|
|
1193.8852300590352,
|
|
-320.80151692094415
|
|
]
|
|
},
|
|
"frontendVersion": "1.37.11",
|
|
"VHS_latentpreview": false,
|
|
"VHS_latentpreviewrate": 0,
|
|
"VHS_MetadataImage": true,
|
|
"VHS_KeepIntermediate": true
|
|
},
|
|
"version": 0.4
|
|
} |