reduced coqui api call to minimum, added model download

This commit is contained in:
Tony Ribeiro
2023-08-14 04:04:46 +02:00
parent db63c58c30
commit a440177f25
2 changed files with 75 additions and 123 deletions

View File

@@ -12,10 +12,13 @@ References:
- Coqui TTS https://tts.readthedocs.io/en/latest/
- Audio-webui: https://github.com/gitmylo/audio-webui
"""
from flask import abort, request, send_file, jsonify
import json
import os
import io
import shutil
from flask import abort, request, send_file, jsonify
from TTS.api import TTS
from TTS.utils.manage import ModelManager
@@ -23,65 +26,8 @@ from TTS.utils.manage import ModelManager
DEBUG_PREFIX = "<Coqui-TTS module>"
OUTPUT_PATH = "data/tmp/coqui_output.wav"
gpu = False
def coqui_get_api_models():
"""
Return supported models in the following format: [language][dataset][name] = TTS_string_id
Example:
{
"en": {
"ljspeech": {
"tacotron2-DDC": "tts_models/en/ljspeech/tacotron2-DDC",
"glow-tts": "tts_models/en/ljspeech/glow-tts",
"vits": "tts_models/en/ljspeech/vits"
},
"vctk": {
"vits": "tts_models/en/vctk/vits"
}
},
"ja": {
"kokoro": {
"tacotron2-DDC": "tts_models/ja/kokoro/tacotron2-DDC"
}
}
}
"""
try:
models = {}
model_selection = ["your_tts", "vits", "jenny", "glow-tts", "tacotron2-DDC"]
language_selection = ["multilingual", "en", "fr", "es", "ja"]
for i in TTS.list_models():
tokens = i.split("/")
language = tokens[1]
dataset = tokens[2]
name = tokens[3]
if language not in language_selection:
continue
if name not in model_selection:
continue
if language not in models:
models[language] = {}
if dataset not in models[language]:
models[language][dataset] = {}
models[language][dataset][name] = i
response = json.dumps(models)
return response
except Exception as e:
print(e)
abort(500, DEBUG_PREFIX + " Exception occurs while trying to get list of TTS available")
gpu_mode = False
is_downloading = False
def coqui_check_model_state():
"""
@@ -130,24 +76,62 @@ def coqui_install_model():
"""
Install requested model is installed on the server machine
"""
global gpu_mode
global is_downloading
try:
model_installed = False
request_json = request.get_json()
model_id = request_json["model_id"]
print(DEBUG_PREFIX,"Search for model", model_id)
action = request_json["action"]
print(DEBUG_PREFIX,"Received request",action,"for model",model_id)
if (is_downloading):
print(DEBUG_PREFIX,"Rejected, already downloading a model")
return json.dumps({"status":"downloading"})
coqui_models_folder = ModelManager().output_prefix # models location
installed_models = os.listdir(coqui_models_folder)
model_path = None
print(DEBUG_PREFIX,"Found",len(installed_models),"models in",coqui_models_folder)
for i in installed_models:
if model_id == i.replace("--","/"):
model_installed = True
model_path = os.path.join(coqui_models_folder,i)
response = json.dumps({"model_installed":model_installed})
if model_installed:
print(DEBUG_PREFIX,"model found:", model_id)
else:
print(DEBUG_PREFIX,"model not found")
if action == "download":
if model_installed:
abort(500, DEBUG_PREFIX + "Bad request, model already installed.")
is_downloading = True
TTS(model_name=model_id, progress_bar=True, gpu=gpu_mode)
is_downloading = False
if action == "repare":
if not model_installed:
abort(500, DEBUG_PREFIX + " bad request: requesting repare of model not installed")
print(DEBUG_PREFIX,"Deleting corrupted model folder:",model_path)
shutil.rmtree(model_path, ignore_errors=True)
is_downloading = True
TTS(model_name=model_id, progress_bar=True, gpu=gpu_mode)
is_downloading = False
response = json.dumps({"status":"done"})
return response
except Exception as e:
is_downloading = False
print(e)
abort(500, DEBUG_PREFIX + " Exception occurs while trying to search for installed model")
@@ -158,52 +142,15 @@ def coqui_get_local_models():
abort(500, DEBUG_PREFIX + " Not implemented yet")
def coqui_get_model_settings():
"""
Process request model and return available speakers
- expected request: {
model_id: string
}
"""
try:
request_json = request.get_json()
#print(request_json)
print(DEBUG_PREFIX,"Received get_speakers request for model", request_json["model_id"])
model_id = request_json["model_id"]
model_languages = []
model_speakers = []
print(DEBUG_PREFIX,"Loading tts model", model_id,"\n - using", ("GPU" if gpu else "CPU"))
tts = TTS(model_name=model_id, progress_bar=True, gpu=gpu)
if tts.is_multi_lingual:
model_languages = tts.languages
if tts.is_multi_speaker:
model_speakers = tts.speakers
response = json.dumps({"languages":model_languages, "speakers":model_speakers})
print(DEBUG_PREFIX,"Model settings: ", response)
return response
except Exception as e:
print(e)
abort(500, DEBUG_PREFIX + " Exception occurs while trying to get model speakers")
def coqui_process_text():
def coqui_generate_tts():
"""
Process request text with the loaded RVC model
- expected request: {
"text": text,
"model_id": voiceId,
"language": language,
"speaker": speaker
"language_id": language,
"speaker_id": speaker
}
- model_id formats:
@@ -215,38 +162,45 @@ def coqui_process_text():
- tts_models/en/vctk/vits[0]
- tts_models/multilingual/multi-dataset/your_tts[2][1]
"""
global gpu
global gpu_mode
global is_downloading
try:
request_json = request.get_json()
#print(request_json)
print(DEBUG_PREFIX,"Received TTS request for ", request_json)
if (is_downloading):
print(DEBUG_PREFIX,"Rejected, currently downloading a model, cannot perform TTS")
abort(500, DEBUG_PREFIX + " Requested TTS while downloading a model")
text = request_json["text"]
model_name = request_json["model_id"]
language = None
speaker = None
language_id = None
speaker_id = None
if request_json["language"] != "none":
language = request_json["language"]
if request_json["language_id"] != "none":
language_id = request_json["language_id"]
if request_json["speaker"] != "none":
speaker = request_json["speaker"]
if request_json["speaker_id"] != "none":
speaker_id = request_json["speaker_id"]
print(DEBUG_PREFIX,"Loading tts model", model_name, "\n - speaker: ",speaker,"\n - language: ",language, "\n - using",("GPU" if gpu else "CPU"))
print(DEBUG_PREFIX,"Loading tts \n- model", model_name, "\n - speaker_id: ",speaker_id,"\n - language_id: ",language_id, "\n - using",("GPU" if gpu_mode else "CPU"))
tts = TTS(model_name=model_name, progress_bar=True, gpu=gpu)
if tts.is_multi_speaker:
if speaker is None:
abort(400, DEBUG_PREFIX + " Requested model "+model_name+" is multi-speaker but no speaker provided")
tts = TTS(model_name=model_name, progress_bar=True, gpu=gpu_mode)
if tts.is_multi_lingual:
if speaker is None:
abort(400, DEBUG_PREFIX + " Requested model "+model_name+" is multi-lingual but no language provided")
if language_id is None:
abort(400, DEBUG_PREFIX + " Requested model "+model_name+" is multi-lingual but no language id provided")
language_id = tts.languages[int(language_id)]
tts.tts_to_file(text=text, file_path=OUTPUT_PATH, speaker=speaker, language=language)
if tts.is_multi_speaker:
if speaker_id is None:
abort(400, DEBUG_PREFIX + " Requested model "+model_name+" is multi-speaker but no speaker id provided")
speaker_id =tts.speakers[int(speaker_id)]
tts.tts_to_file(text=text, file_path=OUTPUT_PATH, speaker=speaker_id, language=language_id)
print(DEBUG_PREFIX, "Success, saved to",OUTPUT_PATH)

View File

@@ -385,14 +385,12 @@ if "coqui-tts" in modules:
import modules.text_to_speech.coqui.coqui_module as coqui_module
if mode == "GPU":
coqui_module.gpu = True
app.add_url_rule("/api/text-to-speech/coqui/coqui-api/get-models", view_func=coqui_module.coqui_get_api_models, methods=["POST"])
app.add_url_rule("/api/text-to-speech/coqui/coqui-api/check-model-state", view_func=coqui_module.coqui_check_model_state, methods=["POST"])
app.add_url_rule("/api/text-to-speech/coqui/coqui-api/install-model", view_func=coqui_module.coqui_install_model, methods=["POST"])
app.add_url_rule("/api/text-to-speech/coqui/coqui-api/get-model-settings", view_func=coqui_module.coqui_get_model_settings, methods=["POST"])
app.add_url_rule("/api/text-to-speech/coqui/local/get-models", view_func=coqui_module.coqui_get_local_models, methods=["POST"])
app.add_url_rule("/api/text-to-speech/coqui/process-text", view_func=coqui_module.coqui_process_text, methods=["POST"])
app.add_url_rule("/api/text-to-speech/coqui/generate-tts", view_func=coqui_module.coqui_generate_tts, methods=["POST"])
def require_module(name):
def wrapper(fn):