1.40.9 (#9034)

Patch version increment to 1.40.9 **Base branch:** `main` ┆Issue is synchronized with this [Notion page](https://www.notion.so/PR-9034-1-40-9-30e6d73d365081a1b1e4e7a1c0b77629) by [Unito](https://www.unito.io) --------- Co-authored-by: christian-byrne <72887196+christian-byrne@users.noreply.github.com> Co-authored-by: github-actions <github-actions@github.com> Co-authored-by: Alexander Brown <drjkl@comfy.org>
2026-03-12 00:20:15 +00:00 · 2026-02-21 13:16:39 +09:00
parent c1a569211d
commit 5fe902358c
49 changed files with 7888 additions and 49 deletions
--- a/src/locales/en/nodeDefs.json
+++ b/src/locales/en/nodeDefs.json
@@ -2305,6 +2305,304 @@
      }
    }
  },
+  "ElevenLabsAudioIsolation": {
+    "display_name": "ElevenLabs Voice Isolation",
+    "description": "Remove background noise from audio, isolating vocals or speech.",
+    "inputs": {
+      "audio": {
+        "name": "audio",
+        "tooltip": "Audio to process for background noise removal."
+      }
+    },
+    "outputs": {
+      "0": {
+        "tooltip": null
+      }
+    }
+  },
+  "ElevenLabsInstantVoiceClone": {
+    "display_name": "ElevenLabs Instant Voice Clone",
+    "description": "Create a cloned voice from audio samples. Provide 1-8 audio recordings of the voice to clone.",
+    "inputs": {
+      "files": {
+        "name": "files",
+        "tooltip": "Audio recordings for voice cloning."
+      },
+      "remove_background_noise": {
+        "name": "remove_background_noise",
+        "tooltip": "Remove background noise from voice samples using audio isolation."
+      }
+    },
+    "outputs": {
+      "0": {
+        "name": "voice",
+        "tooltip": null
+      }
+    }
+  },
+  "ElevenLabsSpeechToSpeech": {
+    "display_name": "ElevenLabs Speech to Speech",
+    "description": "Transform speech from one voice to another while preserving the original content and emotion.",
+    "inputs": {
+      "voice": {
+        "name": "voice",
+        "tooltip": "Target voice for the transformation. Connect from Voice Selector or Instant Voice Clone."
+      },
+      "audio": {
+        "name": "audio",
+        "tooltip": "Source audio to transform."
+      },
+      "stability": {
+        "name": "stability",
+        "tooltip": "Voice stability. Lower values give broader emotional range, higher values produce more consistent but potentially monotonous speech."
+      },
+      "model": {
+        "name": "model",
+        "tooltip": "Model to use for speech-to-speech transformation."
+      },
+      "output_format": {
+        "name": "output_format",
+        "tooltip": "Audio output format."
+      },
+      "seed": {
+        "name": "seed",
+        "tooltip": "Seed for reproducibility."
+      },
+      "remove_background_noise": {
+        "name": "remove_background_noise",
+        "tooltip": "Remove background noise from input audio using audio isolation."
+      },
+      "control_after_generate": {
+        "name": "control after generate"
+      },
+      "model_similarity_boost": {
+        "name": "similarity_boost"
+      },
+      "model_speed": {
+        "name": "speed"
+      },
+      "model_style": {
+        "name": "style"
+      },
+      "model_use_speaker_boost": {
+        "name": "use_speaker_boost"
+      }
+    },
+    "outputs": {
+      "0": {
+        "tooltip": null
+      }
+    }
+  },
+  "ElevenLabsSpeechToText": {
+    "display_name": "ElevenLabs Speech to Text",
+    "description": "Transcribe audio to text. Supports automatic language detection, speaker diarization, and audio event tagging.",
+    "inputs": {
+      "audio": {
+        "name": "audio",
+        "tooltip": "Audio to transcribe."
+      },
+      "model": {
+        "name": "model",
+        "tooltip": "Model to use for transcription."
+      },
+      "language_code": {
+        "name": "language_code",
+        "tooltip": "ISO-639-1 or ISO-639-3 language code (e.g., 'en', 'es', 'fra'). Leave empty for automatic detection."
+      },
+      "num_speakers": {
+        "name": "num_speakers",
+        "tooltip": "Maximum number of speakers to predict. Set to 0 for automatic detection."
+      },
+      "seed": {
+        "name": "seed",
+        "tooltip": "Seed for reproducibility (determinism not guaranteed)."
+      },
+      "control_after_generate": {
+        "name": "control after generate"
+      },
+      "model_diarization_threshold": {
+        "name": "diarization_threshold"
+      },
+      "model_diarize": {
+        "name": "diarize"
+      },
+      "model_tag_audio_events": {
+        "name": "tag_audio_events"
+      },
+      "model_temperature": {
+        "name": "temperature"
+      },
+      "model_timestamps_granularity": {
+        "name": "timestamps_granularity"
+      }
+    },
+    "outputs": {
+      "0": {
+        "name": "text",
+        "tooltip": null
+      },
+      "1": {
+        "name": "language_code",
+        "tooltip": null
+      },
+      "2": {
+        "name": "words_json",
+        "tooltip": null
+      }
+    }
+  },
+  "ElevenLabsTextToDialogue": {
+    "display_name": "ElevenLabs Text to Dialogue",
+    "description": "Generate multi-speaker dialogue from text. Each dialogue entry has its own text and voice.",
+    "inputs": {
+      "stability": {
+        "name": "stability",
+        "tooltip": "Voice stability. Lower values give broader emotional range, higher values produce more consistent but potentially monotonous speech."
+      },
+      "apply_text_normalization": {
+        "name": "apply_text_normalization",
+        "tooltip": "Text normalization mode. 'auto' lets the system decide, 'on' always applies normalization, 'off' skips it."
+      },
+      "model": {
+        "name": "model",
+        "tooltip": "Model to use for dialogue generation."
+      },
+      "inputs": {
+        "name": "inputs",
+        "tooltip": "Number of dialogue entries."
+      },
+      "language_code": {
+        "name": "language_code",
+        "tooltip": "ISO-639-1 or ISO-639-3 language code (e.g., 'en', 'es', 'fra'). Leave empty for automatic detection."
+      },
+      "seed": {
+        "name": "seed",
+        "tooltip": "Seed for reproducibility."
+      },
+      "output_format": {
+        "name": "output_format",
+        "tooltip": "Audio output format."
+      },
+      "control_after_generate": {
+        "name": "control after generate"
+      },
+      "inputs_text1": {
+        "name": "text1"
+      }
+    },
+    "outputs": {
+      "0": {
+        "tooltip": null
+      }
+    }
+  },
+  "ElevenLabsTextToSoundEffects": {
+    "display_name": "ElevenLabs Text to Sound Effects",
+    "description": "Generate sound effects from text descriptions.",
+    "inputs": {
+      "text": {
+        "name": "text",
+        "tooltip": "Text description of the sound effect to generate."
+      },
+      "model": {
+        "name": "model",
+        "tooltip": "Model to use for sound effect generation."
+      },
+      "output_format": {
+        "name": "output_format",
+        "tooltip": "Audio output format."
+      },
+      "model_duration": {
+        "name": "duration"
+      },
+      "model_loop": {
+        "name": "loop"
+      },
+      "model_prompt_influence": {
+        "name": "prompt_influence"
+      }
+    },
+    "outputs": {
+      "0": {
+        "tooltip": null
+      }
+    }
+  },
+  "ElevenLabsTextToSpeech": {
+    "display_name": "ElevenLabs Text to Speech",
+    "description": "Convert text to speech.",
+    "inputs": {
+      "voice": {
+        "name": "voice",
+        "tooltip": "Voice to use for speech synthesis. Connect from Voice Selector or Instant Voice Clone."
+      },
+      "text": {
+        "name": "text",
+        "tooltip": "The text to convert to speech."
+      },
+      "stability": {
+        "name": "stability",
+        "tooltip": "Voice stability. Lower values give broader emotional range, higher values produce more consistent but potentially monotonous speech."
+      },
+      "apply_text_normalization": {
+        "name": "apply_text_normalization",
+        "tooltip": "Text normalization mode. 'auto' lets the system decide, 'on' always applies normalization, 'off' skips it."
+      },
+      "model": {
+        "name": "model",
+        "tooltip": "Model to use for text-to-speech."
+      },
+      "language_code": {
+        "name": "language_code",
+        "tooltip": "ISO-639-1 or ISO-639-3 language code (e.g., 'en', 'es', 'fra'). Leave empty for automatic detection."
+      },
+      "seed": {
+        "name": "seed",
+        "tooltip": "Seed for reproducibility (determinism not guaranteed)."
+      },
+      "output_format": {
+        "name": "output_format",
+        "tooltip": "Audio output format."
+      },
+      "control_after_generate": {
+        "name": "control after generate"
+      },
+      "model_similarity_boost": {
+        "name": "similarity_boost"
+      },
+      "model_speed": {
+        "name": "speed"
+      },
+      "model_style": {
+        "name": "style"
+      },
+      "model_use_speaker_boost": {
+        "name": "use_speaker_boost"
+      }
+    },
+    "outputs": {
+      "0": {
+        "tooltip": null
+      }
+    }
+  },
+  "ElevenLabsVoiceSelector": {
+    "display_name": "ElevenLabs Voice Selector",
+    "description": "Select a predefined ElevenLabs voice for text-to-speech generation.",
+    "inputs": {
+      "voice": {
+        "name": "voice",
+        "tooltip": "Choose a voice from the predefined ElevenLabs voices."
+      }
+    },
+    "outputs": {
+      "0": {
+        "name": "voice",
+        "tooltip": null
+      }
+    }
+  },
  "EmptyAceStep1_5LatentAudio": {
    "display_name": "Empty Ace Step 1.5 Latent Audio",
    "inputs": {
@@ -3519,6 +3817,50 @@
      }
    }
  },
+  "GLSLShader": {
+    "display_name": "GLSL Shader",
+    "description": "Apply GLSL ES fragment shaders to images. u_resolution (vec2) is always available.",
+    "inputs": {
+      "fragment_shader": {
+        "name": "fragment_shader",
+        "tooltip": "GLSL fragment shader source code (GLSL ES 3.00 / WebGL 2.0 compatible)"
+      },
+      "size_mode": {
+        "name": "size_mode",
+        "tooltip": "Output size: 'from_input' uses first input image dimensions, 'custom' allows manual size"
+      },
+      "images": {
+        "name": "images",
+        "tooltip": "Images are available as u_image0-4 (sampler2D) in the shader code"
+      },
+      "floats": {
+        "name": "floats",
+        "tooltip": "Floats are available as u_float0-4 in the shader code"
+      },
+      "ints": {
+        "name": "ints",
+        "tooltip": "Ints are available as u_int0-4 in the shader code"
+      }
+    },
+    "outputs": {
+      "0": {
+        "name": "IMAGE0",
+        "tooltip": "Available via layout(location = 0) out vec4 fragColor0 in the shader code"
+      },
+      "1": {
+        "name": "IMAGE1",
+        "tooltip": "Available via layout(location = 1) out vec4 fragColor1 in the shader code"
+      },
+      "2": {
+        "name": "IMAGE2",
+        "tooltip": "Available via layout(location = 2) out vec4 fragColor2 in the shader code"
+      },
+      "3": {
+        "name": "IMAGE3",
+        "tooltip": "Available via layout(location = 3) out vec4 fragColor3 in the shader code"
+      }
+    }
+  },
  "GrokImageEditNode": {
    "display_name": "Grok Image Edit",
    "description": "Modify an existing image based on a text prompt",
@@ -14723,6 +15065,94 @@
      }
    }
  },
+  "TextGenerate": {
+    "display_name": "TextGenerate",
+    "inputs": {
+      "clip": {
+        "name": "clip"
+      },
+      "prompt": {
+        "name": "prompt"
+      },
+      "max_length": {
+        "name": "max_length"
+      },
+      "sampling_mode": {
+        "name": "sampling_mode"
+      },
+      "image": {
+        "name": "image"
+      },
+      "sampling_mode_min_p": {
+        "name": "min_p"
+      },
+      "sampling_mode_repetition_penalty": {
+        "name": "repetition_penalty"
+      },
+      "sampling_mode_seed": {
+        "name": "seed"
+      },
+      "sampling_mode_temperature": {
+        "name": "temperature"
+      },
+      "sampling_mode_top_k": {
+        "name": "top_k"
+      },
+      "sampling_mode_top_p": {
+        "name": "top_p"
+      }
+    },
+    "outputs": {
+      "0": {
+        "name": "generated_text",
+        "tooltip": null
+      }
+    }
+  },
+  "TextGenerateLTX2Prompt": {
+    "display_name": "TextGenerateLTX2Prompt",
+    "inputs": {
+      "clip": {
+        "name": "clip"
+      },
+      "prompt": {
+        "name": "prompt"
+      },
+      "max_length": {
+        "name": "max_length"
+      },
+      "sampling_mode": {
+        "name": "sampling_mode"
+      },
+      "image": {
+        "name": "image"
+      },
+      "sampling_mode_min_p": {
+        "name": "min_p"
+      },
+      "sampling_mode_repetition_penalty": {
+        "name": "repetition_penalty"
+      },
+      "sampling_mode_seed": {
+        "name": "seed"
+      },
+      "sampling_mode_temperature": {
+        "name": "temperature"
+      },
+      "sampling_mode_top_k": {
+        "name": "top_k"
+      },
+      "sampling_mode_top_p": {
+        "name": "top_p"
+      }
+    },
+    "outputs": {
+      "0": {
+        "name": "generated_text",
+        "tooltip": null
+      }
+    }
+  },
  "TextToLowercase": {
    "display_name": "Text to Lowercase",
    "inputs": {