mirror of
https://github.com/SillyTavern/SillyTavern-Extras.git
synced 2026-04-29 02:41:21 +00:00
reduced coqui api call to minimum, added model download
This commit is contained in:
@@ -12,10 +12,13 @@ References:
|
||||
- Coqui TTS https://tts.readthedocs.io/en/latest/
|
||||
- Audio-webui: https://github.com/gitmylo/audio-webui
|
||||
"""
|
||||
from flask import abort, request, send_file, jsonify
|
||||
import json
|
||||
import os
|
||||
import io
|
||||
import shutil
|
||||
|
||||
from flask import abort, request, send_file, jsonify
|
||||
|
||||
from TTS.api import TTS
|
||||
from TTS.utils.manage import ModelManager
|
||||
|
||||
@@ -23,65 +26,8 @@ from TTS.utils.manage import ModelManager
|
||||
DEBUG_PREFIX = "<Coqui-TTS module>"
|
||||
OUTPUT_PATH = "data/tmp/coqui_output.wav"
|
||||
|
||||
gpu = False
|
||||
|
||||
|
||||
def coqui_get_api_models():
|
||||
"""
|
||||
Return supported models in the following format: [language][dataset][name] = TTS_string_id
|
||||
Example:
|
||||
{
|
||||
"en": {
|
||||
"ljspeech": {
|
||||
"tacotron2-DDC": "tts_models/en/ljspeech/tacotron2-DDC",
|
||||
"glow-tts": "tts_models/en/ljspeech/glow-tts",
|
||||
"vits": "tts_models/en/ljspeech/vits"
|
||||
},
|
||||
"vctk": {
|
||||
"vits": "tts_models/en/vctk/vits"
|
||||
}
|
||||
},
|
||||
"ja": {
|
||||
"kokoro": {
|
||||
"tacotron2-DDC": "tts_models/ja/kokoro/tacotron2-DDC"
|
||||
}
|
||||
}
|
||||
}
|
||||
"""
|
||||
|
||||
try:
|
||||
models = {}
|
||||
|
||||
model_selection = ["your_tts", "vits", "jenny", "glow-tts", "tacotron2-DDC"]
|
||||
language_selection = ["multilingual", "en", "fr", "es", "ja"]
|
||||
|
||||
for i in TTS.list_models():
|
||||
tokens = i.split("/")
|
||||
language = tokens[1]
|
||||
dataset = tokens[2]
|
||||
name = tokens[3]
|
||||
|
||||
if language not in language_selection:
|
||||
continue
|
||||
|
||||
if name not in model_selection:
|
||||
continue
|
||||
|
||||
if language not in models:
|
||||
models[language] = {}
|
||||
|
||||
if dataset not in models[language]:
|
||||
models[language][dataset] = {}
|
||||
|
||||
models[language][dataset][name] = i
|
||||
|
||||
response = json.dumps(models)
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
print(e)
|
||||
abort(500, DEBUG_PREFIX + " Exception occurs while trying to get list of TTS available")
|
||||
|
||||
gpu_mode = False
|
||||
is_downloading = False
|
||||
|
||||
def coqui_check_model_state():
|
||||
"""
|
||||
@@ -130,24 +76,62 @@ def coqui_install_model():
|
||||
"""
|
||||
Install requested model is installed on the server machine
|
||||
"""
|
||||
global gpu_mode
|
||||
global is_downloading
|
||||
|
||||
try:
|
||||
model_installed = False
|
||||
request_json = request.get_json()
|
||||
model_id = request_json["model_id"]
|
||||
|
||||
print(DEBUG_PREFIX,"Search for model", model_id)
|
||||
action = request_json["action"]
|
||||
|
||||
print(DEBUG_PREFIX,"Received request",action,"for model",model_id)
|
||||
|
||||
if (is_downloading):
|
||||
print(DEBUG_PREFIX,"Rejected, already downloading a model")
|
||||
return json.dumps({"status":"downloading"})
|
||||
|
||||
coqui_models_folder = ModelManager().output_prefix # models location
|
||||
installed_models = os.listdir(coqui_models_folder)
|
||||
model_path = None
|
||||
|
||||
print(DEBUG_PREFIX,"Found",len(installed_models),"models in",coqui_models_folder)
|
||||
|
||||
for i in installed_models:
|
||||
if model_id == i.replace("--","/"):
|
||||
model_installed = True
|
||||
model_path = os.path.join(coqui_models_folder,i)
|
||||
|
||||
response = json.dumps({"model_installed":model_installed})
|
||||
if model_installed:
|
||||
print(DEBUG_PREFIX,"model found:", model_id)
|
||||
else:
|
||||
print(DEBUG_PREFIX,"model not found")
|
||||
|
||||
if action == "download":
|
||||
if model_installed:
|
||||
abort(500, DEBUG_PREFIX + "Bad request, model already installed.")
|
||||
|
||||
is_downloading = True
|
||||
TTS(model_name=model_id, progress_bar=True, gpu=gpu_mode)
|
||||
is_downloading = False
|
||||
|
||||
if action == "repare":
|
||||
if not model_installed:
|
||||
abort(500, DEBUG_PREFIX + " bad request: requesting repare of model not installed")
|
||||
|
||||
|
||||
print(DEBUG_PREFIX,"Deleting corrupted model folder:",model_path)
|
||||
shutil.rmtree(model_path, ignore_errors=True)
|
||||
|
||||
is_downloading = True
|
||||
TTS(model_name=model_id, progress_bar=True, gpu=gpu_mode)
|
||||
is_downloading = False
|
||||
|
||||
response = json.dumps({"status":"done"})
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
is_downloading = False
|
||||
print(e)
|
||||
abort(500, DEBUG_PREFIX + " Exception occurs while trying to search for installed model")
|
||||
|
||||
@@ -158,52 +142,15 @@ def coqui_get_local_models():
|
||||
|
||||
abort(500, DEBUG_PREFIX + " Not implemented yet")
|
||||
|
||||
def coqui_get_model_settings():
|
||||
"""
|
||||
Process request model and return available speakers
|
||||
- expected request: {
|
||||
model_id: string
|
||||
}
|
||||
"""
|
||||
try:
|
||||
request_json = request.get_json()
|
||||
#print(request_json)
|
||||
|
||||
print(DEBUG_PREFIX,"Received get_speakers request for model", request_json["model_id"])
|
||||
|
||||
model_id = request_json["model_id"]
|
||||
model_languages = []
|
||||
model_speakers = []
|
||||
|
||||
print(DEBUG_PREFIX,"Loading tts model", model_id,"\n - using", ("GPU" if gpu else "CPU"))
|
||||
|
||||
tts = TTS(model_name=model_id, progress_bar=True, gpu=gpu)
|
||||
|
||||
if tts.is_multi_lingual:
|
||||
model_languages = tts.languages
|
||||
|
||||
if tts.is_multi_speaker:
|
||||
model_speakers = tts.speakers
|
||||
|
||||
response = json.dumps({"languages":model_languages, "speakers":model_speakers})
|
||||
print(DEBUG_PREFIX,"Model settings: ", response)
|
||||
return response
|
||||
|
||||
|
||||
except Exception as e:
|
||||
print(e)
|
||||
abort(500, DEBUG_PREFIX + " Exception occurs while trying to get model speakers")
|
||||
|
||||
|
||||
|
||||
def coqui_process_text():
|
||||
def coqui_generate_tts():
|
||||
"""
|
||||
Process request text with the loaded RVC model
|
||||
- expected request: {
|
||||
"text": text,
|
||||
"model_id": voiceId,
|
||||
"language": language,
|
||||
"speaker": speaker
|
||||
"language_id": language,
|
||||
"speaker_id": speaker
|
||||
}
|
||||
|
||||
- model_id formats:
|
||||
@@ -215,38 +162,45 @@ def coqui_process_text():
|
||||
- tts_models/en/vctk/vits[0]
|
||||
- tts_models/multilingual/multi-dataset/your_tts[2][1]
|
||||
"""
|
||||
global gpu
|
||||
|
||||
global gpu_mode
|
||||
global is_downloading
|
||||
|
||||
try:
|
||||
request_json = request.get_json()
|
||||
#print(request_json)
|
||||
|
||||
print(DEBUG_PREFIX,"Received TTS request for ", request_json)
|
||||
|
||||
if (is_downloading):
|
||||
print(DEBUG_PREFIX,"Rejected, currently downloading a model, cannot perform TTS")
|
||||
abort(500, DEBUG_PREFIX + " Requested TTS while downloading a model")
|
||||
|
||||
text = request_json["text"]
|
||||
model_name = request_json["model_id"]
|
||||
language = None
|
||||
speaker = None
|
||||
language_id = None
|
||||
speaker_id = None
|
||||
|
||||
if request_json["language"] != "none":
|
||||
language = request_json["language"]
|
||||
if request_json["language_id"] != "none":
|
||||
language_id = request_json["language_id"]
|
||||
|
||||
if request_json["speaker"] != "none":
|
||||
speaker = request_json["speaker"]
|
||||
if request_json["speaker_id"] != "none":
|
||||
speaker_id = request_json["speaker_id"]
|
||||
|
||||
print(DEBUG_PREFIX,"Loading tts model", model_name, "\n - speaker: ",speaker,"\n - language: ",language, "\n - using",("GPU" if gpu else "CPU"))
|
||||
print(DEBUG_PREFIX,"Loading tts \n- model", model_name, "\n - speaker_id: ",speaker_id,"\n - language_id: ",language_id, "\n - using",("GPU" if gpu_mode else "CPU"))
|
||||
|
||||
tts = TTS(model_name=model_name, progress_bar=True, gpu=gpu)
|
||||
|
||||
if tts.is_multi_speaker:
|
||||
if speaker is None:
|
||||
abort(400, DEBUG_PREFIX + " Requested model "+model_name+" is multi-speaker but no speaker provided")
|
||||
tts = TTS(model_name=model_name, progress_bar=True, gpu=gpu_mode)
|
||||
|
||||
if tts.is_multi_lingual:
|
||||
if speaker is None:
|
||||
abort(400, DEBUG_PREFIX + " Requested model "+model_name+" is multi-lingual but no language provided")
|
||||
if language_id is None:
|
||||
abort(400, DEBUG_PREFIX + " Requested model "+model_name+" is multi-lingual but no language id provided")
|
||||
language_id = tts.languages[int(language_id)]
|
||||
|
||||
tts.tts_to_file(text=text, file_path=OUTPUT_PATH, speaker=speaker, language=language)
|
||||
if tts.is_multi_speaker:
|
||||
if speaker_id is None:
|
||||
abort(400, DEBUG_PREFIX + " Requested model "+model_name+" is multi-speaker but no speaker id provided")
|
||||
speaker_id =tts.speakers[int(speaker_id)]
|
||||
|
||||
tts.tts_to_file(text=text, file_path=OUTPUT_PATH, speaker=speaker_id, language=language_id)
|
||||
|
||||
print(DEBUG_PREFIX, "Success, saved to",OUTPUT_PATH)
|
||||
|
||||
|
||||
@@ -385,14 +385,12 @@ if "coqui-tts" in modules:
|
||||
import modules.text_to_speech.coqui.coqui_module as coqui_module
|
||||
if mode == "GPU":
|
||||
coqui_module.gpu = True
|
||||
app.add_url_rule("/api/text-to-speech/coqui/coqui-api/get-models", view_func=coqui_module.coqui_get_api_models, methods=["POST"])
|
||||
app.add_url_rule("/api/text-to-speech/coqui/coqui-api/check-model-state", view_func=coqui_module.coqui_check_model_state, methods=["POST"])
|
||||
app.add_url_rule("/api/text-to-speech/coqui/coqui-api/install-model", view_func=coqui_module.coqui_install_model, methods=["POST"])
|
||||
app.add_url_rule("/api/text-to-speech/coqui/coqui-api/get-model-settings", view_func=coqui_module.coqui_get_model_settings, methods=["POST"])
|
||||
|
||||
app.add_url_rule("/api/text-to-speech/coqui/local/get-models", view_func=coqui_module.coqui_get_local_models, methods=["POST"])
|
||||
|
||||
app.add_url_rule("/api/text-to-speech/coqui/process-text", view_func=coqui_module.coqui_process_text, methods=["POST"])
|
||||
app.add_url_rule("/api/text-to-speech/coqui/generate-tts", view_func=coqui_module.coqui_generate_tts, methods=["POST"])
|
||||
|
||||
def require_module(name):
|
||||
def wrapper(fn):
|
||||
|
||||
Reference in New Issue
Block a user