mirror of
https://github.com/SillyTavern/SillyTavern-Extras.git
synced 2026-04-24 08:28:59 +00:00
Added missing route to populate ST coqui UI settings and download/check models of coqui api.
This commit is contained in:
@@ -17,6 +17,7 @@ import json
|
||||
import os
|
||||
import io
|
||||
from TTS.api import TTS
|
||||
from TTS.utils.manage import ModelManager
|
||||
|
||||
|
||||
DEBUG_PREFIX = "<Coqui-TTS module>"
|
||||
@@ -25,23 +26,19 @@ OUTPUT_PATH = "data/tmp/coqui_output.wav"
|
||||
gpu = False
|
||||
|
||||
|
||||
def coqui_supported_models():
|
||||
def coqui_get_api_models():
|
||||
"""
|
||||
Return supported models in the following format: [language][dataset][name] = TTS_string_id
|
||||
Example:
|
||||
{
|
||||
"multilingual": {
|
||||
"multi-dataset": {
|
||||
"your_tts": "tts_models/multilingual/multi-dataset/your_tts"
|
||||
}
|
||||
},
|
||||
"en": {
|
||||
"ljspeech": {
|
||||
"tacotron2-DDC": "tts_models/en/ljspeech/tacotron2-DDC",
|
||||
"glow-tts": "tts_models/en/ljspeech/glow-tts",
|
||||
"vits": "tts_models/en/ljspeech/vits"
|
||||
},
|
||||
"jenny": {
|
||||
"jenny": "tts_models/en/jenny/jenny"
|
||||
"vctk": {
|
||||
"vits": "tts_models/en/vctk/vits"
|
||||
}
|
||||
},
|
||||
"ja": {
|
||||
@@ -72,10 +69,13 @@ def coqui_supported_models():
|
||||
|
||||
if language not in models:
|
||||
models[language] = {}
|
||||
|
||||
models[language][name+"/"+dataset] = i
|
||||
|
||||
response = json.dumps(models,indent=4)
|
||||
if dataset not in models[language]:
|
||||
models[language][dataset] = {}
|
||||
|
||||
models[language][dataset][name] = i
|
||||
|
||||
response = json.dumps(models)
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
@@ -83,22 +83,137 @@ def coqui_supported_models():
|
||||
abort(500, DEBUG_PREFIX + " Exception occurs while trying to get list of TTS available")
|
||||
|
||||
|
||||
def coqui_check_model_state():
|
||||
"""
|
||||
Check if the requested model is installed on the server machine
|
||||
"""
|
||||
try:
|
||||
model_state = "absent"
|
||||
request_json = request.get_json()
|
||||
model_id = request_json["model_id"]
|
||||
|
||||
print(DEBUG_PREFIX,"Search for model", model_id)
|
||||
|
||||
coqui_models_folder = ModelManager().output_prefix # models location
|
||||
installed_models = os.listdir(coqui_models_folder)
|
||||
|
||||
model_folder_exists = False
|
||||
model_folder = None
|
||||
|
||||
for i in installed_models:
|
||||
if model_id == i.replace("--","/"):
|
||||
model_folder_exists = True
|
||||
model_folder = i
|
||||
print(DEBUG_PREFIX,"Folder found:",model_folder)
|
||||
|
||||
# Check failed download
|
||||
if model_folder_exists:
|
||||
content = os.listdir(os.path.join(coqui_models_folder,model_folder))
|
||||
print(DEBUG_PREFIX,"Checking content:",content)
|
||||
for i in content:
|
||||
if i == model_folder+".zip":
|
||||
print("Corrupt installed found, model download must have failed previously")
|
||||
model_state = "corrupted"
|
||||
break
|
||||
|
||||
if model_state != "corrupted":
|
||||
model_state = "installed"
|
||||
|
||||
response = json.dumps({"model_state":model_state})
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
print(e)
|
||||
abort(500, DEBUG_PREFIX + " Exception occurs while trying to search for installed model")
|
||||
|
||||
def coqui_install_model():
|
||||
"""
|
||||
Install requested model is installed on the server machine
|
||||
"""
|
||||
try:
|
||||
model_installed = False
|
||||
request_json = request.get_json()
|
||||
model_id = request_json["model_id"]
|
||||
|
||||
print(DEBUG_PREFIX,"Search for model", model_id)
|
||||
|
||||
coqui_models_folder = ModelManager().output_prefix # models location
|
||||
installed_models = os.listdir(coqui_models_folder)
|
||||
|
||||
for i in installed_models:
|
||||
if model_id == i.replace("--","/"):
|
||||
model_installed = True
|
||||
|
||||
response = json.dumps({"model_installed":model_installed})
|
||||
return response
|
||||
|
||||
except Exception as e:
|
||||
print(e)
|
||||
abort(500, DEBUG_PREFIX + " Exception occurs while trying to search for installed model")
|
||||
|
||||
def coqui_get_local_models():
|
||||
"""
|
||||
Return user local models list in the following format: [language][dataset][name] = TTS_string_id
|
||||
"""
|
||||
|
||||
abort(500, DEBUG_PREFIX + " Not implemented yet")
|
||||
|
||||
def coqui_get_model_settings():
|
||||
"""
|
||||
Process request model and return available speakers
|
||||
- expected request: {
|
||||
model_id: string
|
||||
}
|
||||
"""
|
||||
try:
|
||||
request_json = request.get_json()
|
||||
#print(request_json)
|
||||
|
||||
print(DEBUG_PREFIX,"Received get_speakers request for model", request_json["model_id"])
|
||||
|
||||
model_id = request_json["model_id"]
|
||||
model_languages = []
|
||||
model_speakers = []
|
||||
|
||||
print(DEBUG_PREFIX,"Loading tts model", model_id,"\n - using", ("GPU" if gpu else "CPU"))
|
||||
|
||||
tts = TTS(model_name=model_id, progress_bar=True, gpu=gpu)
|
||||
|
||||
if tts.is_multi_lingual:
|
||||
model_languages = tts.languages
|
||||
|
||||
if tts.is_multi_speaker:
|
||||
model_speakers = tts.speakers
|
||||
|
||||
response = json.dumps({"languages":model_languages, "speakers":model_speakers})
|
||||
print(DEBUG_PREFIX,"Model settings: ", response)
|
||||
return response
|
||||
|
||||
|
||||
except Exception as e:
|
||||
print(e)
|
||||
abort(500, DEBUG_PREFIX + " Exception occurs while trying to get model speakers")
|
||||
|
||||
|
||||
|
||||
def coqui_process_text():
|
||||
"""
|
||||
Process request text with the loaded RVC model
|
||||
- expected request: {
|
||||
text: string,
|
||||
voiceId: string
|
||||
}
|
||||
- expected request: {
|
||||
"text": text,
|
||||
"model_id": voiceId,
|
||||
"language": language,
|
||||
"speaker": speaker
|
||||
}
|
||||
|
||||
voiceId formats:
|
||||
- model_type/language/dataset/model_name
|
||||
- model_type/language/dataset/model_name[spearker_id]
|
||||
- model_type/language/dataset/model_name[spearker_id][language_id]
|
||||
examples:
|
||||
- tts_models/ja/kokoro/tacotron2-DDC
|
||||
- tts_models/en/vctk/vits[0]
|
||||
- tts_models/multilingual/multi-dataset/your_tts[2][1]
|
||||
- model_id formats:
|
||||
- model_type/language/dataset/model_name
|
||||
- model_type/language/dataset/model_name[spearker_id]
|
||||
- model_type/language/dataset/model_name[spearker_id][language_id]
|
||||
- examples:
|
||||
- tts_models/ja/kokoro/tacotron2-DDC
|
||||
- tts_models/en/vctk/vits[0]
|
||||
- tts_models/multilingual/multi-dataset/your_tts[2][1]
|
||||
"""
|
||||
global gpu
|
||||
|
||||
@@ -106,21 +221,18 @@ def coqui_process_text():
|
||||
request_json = request.get_json()
|
||||
#print(request_json)
|
||||
|
||||
print(DEBUG_PREFIX,"Received TTS request for voiceId", request_json["voiceId"], "with text:\n",request_json["text"])
|
||||
print(DEBUG_PREFIX,"Received TTS request for ", request_json)
|
||||
|
||||
text = request_json["text"]
|
||||
tokens = [i.strip("]") for i in request_json["voiceId"].split("[")]
|
||||
|
||||
print(tokens)
|
||||
|
||||
model_name = tokens[0]
|
||||
speaker = None
|
||||
model_name = request_json["model_id"]
|
||||
language = None
|
||||
speaker = None
|
||||
|
||||
if len(tokens) > 1:
|
||||
speaker = tokens[1]
|
||||
if len(tokens) > 2:
|
||||
language = tokens[2]
|
||||
if request_json["language"] != "none":
|
||||
language = request_json["language"]
|
||||
|
||||
if request_json["speaker"] != "none":
|
||||
speaker = request_json["speaker"]
|
||||
|
||||
print(DEBUG_PREFIX,"Loading tts model", model_name, "\n - speaker: ",speaker,"\n - language: ",language, "\n - using",("GPU" if gpu else "CPU"))
|
||||
|
||||
@@ -130,14 +242,10 @@ def coqui_process_text():
|
||||
if speaker is None:
|
||||
abort(400, DEBUG_PREFIX + " Requested model "+model_name+" is multi-speaker but no speaker provided")
|
||||
|
||||
speaker = tts.speakers[int(speaker)]
|
||||
|
||||
if tts.is_multi_lingual:
|
||||
if speaker is None:
|
||||
abort(400, DEBUG_PREFIX + " Requested model "+model_name+" is multi-lingual but no language provided")
|
||||
|
||||
language = tts.languages[int(language)]
|
||||
|
||||
tts.tts_to_file(text=text, file_path=OUTPUT_PATH, speaker=speaker, language=language)
|
||||
|
||||
print(DEBUG_PREFIX, "Success, saved to",OUTPUT_PATH)
|
||||
|
||||
@@ -385,7 +385,13 @@ if "coqui-tts" in modules:
|
||||
import modules.text_to_speech.coqui.coqui_module as coqui_module
|
||||
if mode == "GPU":
|
||||
coqui_module.gpu = True
|
||||
app.add_url_rule("/api/text-to-speech/coqui/supported-models", view_func=coqui_module.coqui_supported_models, methods=["POST"])
|
||||
app.add_url_rule("/api/text-to-speech/coqui/coqui-api/get-models", view_func=coqui_module.coqui_get_api_models, methods=["POST"])
|
||||
app.add_url_rule("/api/text-to-speech/coqui/coqui-api/check-model-state", view_func=coqui_module.coqui_check_model_state, methods=["POST"])
|
||||
app.add_url_rule("/api/text-to-speech/coqui/coqui-api/install-model", view_func=coqui_module.coqui_install_model, methods=["POST"])
|
||||
app.add_url_rule("/api/text-to-speech/coqui/coqui-api/get-model-settings", view_func=coqui_module.coqui_get_model_settings, methods=["POST"])
|
||||
|
||||
app.add_url_rule("/api/text-to-speech/coqui/local/get-models", view_func=coqui_module.coqui_get_local_models, methods=["POST"])
|
||||
|
||||
app.add_url_rule("/api/text-to-speech/coqui/process-text", view_func=coqui_module.coqui_process_text, methods=["POST"])
|
||||
|
||||
def require_module(name):
|
||||
|
||||
Reference in New Issue
Block a user