Files
ComfyUI-QwenTTS/example_workflows/QwenTTS_sample_workflow.json
2026-01-30 15:38:25 -08:00

535 lines
11 KiB
JSON

{
"id": "6b2e5d09-d1ea-4fc9-a8bd-8daca49d65ff",
"revision": 0,
"last_node_id": 64,
"last_link_id": 52,
"nodes": [
{
"id": 52,
"type": "MarkdownNote",
"pos": [
-406.07144079046043,
730.4236067119891
],
"size": [
383.75722406036607,
599.0649205241322
],
"flags": {},
"order": 0,
"mode": 0,
"inputs": [],
"outputs": [],
"properties": {
"ue_properties": {
"widget_ue_connectable": {},
"version": "7.5.2",
"input_ue_unconnectable": {}
}
},
"widgets_values": [
"# ComfyUI-QwenTTS\n\nA clean, efficient ComfyUI custom node pack for **Qwen3-TTS**. It provides **CustomVoice**, **VoiceDesign**, and **VoiceClone** workflows with strict ComfyUI compatibility and practical controls for quality, speed, and stability.\n\n## Usage\n\n### Custom Voice TTS\nAdd **Qwen3 TTS CustomVoice** and fill `text`, choose `speaker`, optionally add `instruct`.\n\n### Voice Design\nAdd **Qwen3 TTS VoiceDesign**, provide `text` and `instruct`.\n\n### Voice Clone\nAdd **Qwen3 TTS VoiceClone**, connect `reference_audio`, and provide `reference_text`.\n\n## Model Download\n\nModels can be auto-downloaded to:\n```\nComfyUI/models/TTS/Qwen3-TTS/<MODEL_NAME>/\n```\n\nSupported model IDs (Hugging Face):\n- [Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice](https://huggingface.co/Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice)\n- [Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice](https://huggingface.co/Qwen/Qwen3-TTS-12Hz-0.6B-CustomVoice)\n- [Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign](https://huggingface.co/Qwen/Qwen3-TTS-12Hz-1.7B-VoiceDesign)\n- [Qwen/Qwen3-TTS-12Hz-1.7B-Base](https://huggingface.co/Qwen/Qwen3-TTS-12Hz-1.7B-Base)\n- [Qwen/Qwen3-TTS-12Hz-0.6B-Base](https://huggingface.co/Qwen/Qwen3-TTS-12Hz-0.6B-Base)\n- [Qwen/Qwen3-TTS-Tokenizer-12Hz](https://huggingface.co/Qwen/Qwen3-TTS-Tokenizer-12Hz)\n\n\n## FlashAttention (Optional)\n\nIf you have a compatible NVIDIA GPU, FlashAttention 2 can improve speed.\n\n```bash\npip install flash-attn --no-build-isolation\n```"
],
"color": "#432",
"bgcolor": "#653"
},
{
"id": 58,
"type": "SaveAudioMP3",
"pos": [
2.158001494103997,
1298.862742925313
],
"size": [
270,
136
],
"flags": {},
"order": 7,
"mode": 0,
"inputs": [
{
"name": "audio",
"type": "AUDIO",
"link": 47
}
],
"outputs": [],
"properties": {
"ue_properties": {
"widget_ue_connectable": {},
"input_ue_unconnectable": {}
},
"cnr_id": "comfy-core",
"ver": "0.11.1",
"Node name for S&R": "SaveAudioMP3"
},
"widgets_values": [
"audio/ComfyUI",
"V0"
]
},
{
"id": 59,
"type": "AILab_Qwen3TTSVoiceClone",
"pos": [
2.1580014941039964,
947.4393213149233
],
"size": [
270,
306
],
"flags": {},
"order": 4,
"mode": 0,
"inputs": [
{
"name": "reference_audio",
"shape": 7,
"type": "AUDIO",
"link": 50
},
{
"name": "voice",
"shape": 7,
"type": "VOICE",
"link": null
}
],
"outputs": [
{
"name": "audio",
"type": "AUDIO",
"links": [
47
]
}
],
"properties": {
"ue_properties": {
"widget_ue_connectable": {},
"input_ue_unconnectable": {}
},
"Node name for S&R": "AILab_Qwen3TTSVoiceClone"
},
"widgets_values": [
"Hello, this is a cloned voice.",
"1.7B",
"Auto",
"",
false,
true,
-1,
"randomize"
],
"color": "#4D2600",
"bgcolor": "#3e5249"
},
{
"id": 62,
"type": "LoadAudio",
"pos": [
2.1580014941039964,
767.8369715781096
],
"size": [
270,
136
],
"flags": {},
"order": 1,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "AUDIO",
"type": "AUDIO",
"links": [
50
]
}
],
"properties": {
"ue_properties": {
"widget_ue_connectable": {},
"input_ue_unconnectable": {}
},
"cnr_id": "comfy-core",
"ver": "0.11.1",
"Node name for S&R": "LoadAudio"
},
"widgets_values": [
"107289363.mp4",
null,
""
]
},
{
"id": 61,
"type": "AILab_Qwen3TTSCustomVoice",
"pos": [
598.6659776089256,
1303.8139799386574
],
"size": [
270,
286
],
"flags": {},
"order": 6,
"mode": 0,
"inputs": [
{
"name": "instruct",
"shape": 7,
"type": "STRING",
"widget": {
"name": "instruct"
},
"link": 52
}
],
"outputs": [
{
"name": "audio",
"type": "AUDIO",
"links": [
49
]
}
],
"properties": {
"ue_properties": {
"widget_ue_connectable": {},
"input_ue_unconnectable": {}
},
"Node name for S&R": "AILab_Qwen3TTSCustomVoice"
},
"widgets_values": [
"Hello from Qwen3-TTS.",
"Ryan",
"1.7B",
"Auto",
"",
true,
-1,
"randomize"
],
"color": "#4D2600",
"bgcolor": "#3e5249"
},
{
"id": 63,
"type": "AILab_Qwen3TTSVoiceInstruct",
"pos": [
304.0686521893615,
855.8522837949066
],
"size": [
270,
200
],
"flags": {},
"order": 2,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "VOICE_INSTRUCT",
"type": "STRING",
"links": [
51
]
}
],
"properties": {
"ue_properties": {
"widget_ue_connectable": {},
"input_ue_unconnectable": {}
},
"Node name for S&R": "AILab_Qwen3TTSVoiceInstruct"
},
"widgets_values": [
"Female",
"Warm",
""
],
"color": "#28403f",
"bgcolor": "#233238"
},
{
"id": 60,
"type": "AILab_Qwen3TTSVoiceDesign",
"pos": [
599.6445169129086,
773.2291834556061
],
"size": [
270,
262
],
"flags": {},
"order": 5,
"mode": 0,
"inputs": [
{
"name": "instruct",
"type": "STRING",
"widget": {
"name": "instruct"
},
"link": 51
}
],
"outputs": [
{
"name": "audio",
"type": "AUDIO",
"links": [
48
]
}
],
"properties": {
"ue_properties": {
"widget_ue_connectable": {},
"input_ue_unconnectable": {}
},
"Node name for S&R": "AILab_Qwen3TTSVoiceDesign"
},
"widgets_values": [
"Hello from Qwen3-TTS VoiceDesign.",
"A warm, gentle female voice.",
"1.7B",
"Auto",
true,
-1,
"randomize"
],
"color": "#4D2600",
"bgcolor": "#3e5249"
},
{
"id": 56,
"type": "SaveAudioMP3",
"pos": [
599.6445169129086,
1073.9855370124342
],
"size": [
270,
136
],
"flags": {},
"order": 8,
"mode": 0,
"inputs": [
{
"name": "audio",
"type": "AUDIO",
"link": 48
}
],
"outputs": [],
"properties": {
"ue_properties": {
"widget_ue_connectable": {},
"input_ue_unconnectable": {}
},
"cnr_id": "comfy-core",
"ver": "0.11.1",
"Node name for S&R": "SaveAudioMP3"
},
"widgets_values": [
"audio/ComfyUI",
"V0"
]
},
{
"id": 64,
"type": "AILab_Qwen3TTSVoiceInstruct",
"pos": [
312.5384260678091,
1456.303587530096
],
"size": [
270,
200
],
"flags": {},
"order": 3,
"mode": 0,
"inputs": [],
"outputs": [
{
"name": "VOICE_INSTRUCT",
"type": "STRING",
"links": [
52
]
}
],
"properties": {
"ue_properties": {
"widget_ue_connectable": {},
"input_ue_unconnectable": {}
},
"Node name for S&R": "AILab_Qwen3TTSVoiceInstruct"
},
"widgets_values": [
"Male",
"Gentle",
""
],
"color": "#28403f",
"bgcolor": "#233238"
},
{
"id": 57,
"type": "SaveAudioMP3",
"pos": [
598.6659776089256,
1630.3268205770878
],
"size": [
270,
136
],
"flags": {},
"order": 9,
"mode": 0,
"inputs": [
{
"name": "audio",
"type": "AUDIO",
"link": 49
}
],
"outputs": [],
"properties": {
"ue_properties": {
"widget_ue_connectable": {},
"input_ue_unconnectable": {}
},
"cnr_id": "comfy-core",
"ver": "0.11.1",
"Node name for S&R": "SaveAudioMP3"
},
"widgets_values": [
"audio/ComfyUI",
"V0"
]
}
],
"links": [
[
47,
59,
0,
58,
0,
"AUDIO"
],
[
48,
60,
0,
56,
0,
"AUDIO"
],
[
49,
61,
0,
57,
0,
"AUDIO"
],
[
50,
62,
0,
59,
0,
"AUDIO"
],
[
51,
63,
0,
60,
0,
"STRING"
],
[
52,
64,
0,
61,
0,
"STRING"
]
],
"groups": [
{
"id": 1,
"title": "Voice Clone",
"bounding": [
-7.841998505896003,
694.2369715781095,
291.2533417260747,
746.581911977605
],
"color": "#3f789e",
"font_size": 24,
"flags": {}
},
{
"id": 2,
"title": "Custom Voice",
"bounding": [
295.1353255271107,
1230.2139799386573,
577.8864378990316,
545.6276515604848
],
"color": "#3f789e",
"font_size": 24,
"flags": {}
},
{
"id": 3,
"title": "Voice Design",
"bounding": [
293.1106339788069,
698.282992452134,
584.879856378544,
519.9913891997099
],
"color": "#3f789e",
"font_size": 24,
"flags": {}
}
],
"config": {},
"extra": {
"workflowRendererVersion": "LG",
"ue_links": [],
"links_added_by_ue": [],
"ds": {
"scale": 0.8174702380952426,
"offset": [
1193.8852300590352,
-320.80151692094415
]
},
"frontendVersion": "1.37.11",
"VHS_latentpreview": false,
"VHS_latentpreviewrate": 0,
"VHS_MetadataImage": true,
"VHS_KeepIntermediate": true
},
"version": 0.4
}