reduced coqui api call to minimum, added model download

2026-04-29 02:41:21 +00:00 · 2023-08-14 04:04:46 +02:00
parent db63c58c30
commit a440177f25
2 changed files with 75 additions and 123 deletions
--- a/modules/text_to_speech/coqui/coqui_module.py
+++ b/modules/text_to_speech/coqui/coqui_module.py
@@ -12,10 +12,13 @@ References:
        - Coqui TTS https://tts.readthedocs.io/en/latest/
        - Audio-webui: https://github.com/gitmylo/audio-webui
 """
-from flask import abort, request, send_file, jsonify
 import json
 import os
 import io
+import shutil
+
+from flask import abort, request, send_file, jsonify
+
 from TTS.api import TTS
 from TTS.utils.manage import ModelManager

@@ -23,65 +26,8 @@ from TTS.utils.manage import ModelManager
 DEBUG_PREFIX = "<Coqui-TTS module>"
 OUTPUT_PATH = "data/tmp/coqui_output.wav"

-gpu = False
-
-
-def coqui_get_api_models():
-    """
-    Return supported models in the following format: [language][dataset][name] = TTS_string_id
-    Example:
-    {
-        "en": {
-            "ljspeech": {
-                "tacotron2-DDC": "tts_models/en/ljspeech/tacotron2-DDC",
-                "glow-tts": "tts_models/en/ljspeech/glow-tts",
-                "vits": "tts_models/en/ljspeech/vits"
-            },
-            "vctk": {
-                "vits": "tts_models/en/vctk/vits"
-            }
-        },
-        "ja": {
-            "kokoro": {
-                "tacotron2-DDC": "tts_models/ja/kokoro/tacotron2-DDC"
-            }
-        }
-    }
-    """
-
-    try:
-        models = {}
-
-        model_selection = ["your_tts", "vits", "jenny", "glow-tts", "tacotron2-DDC"]
-        language_selection = ["multilingual", "en", "fr", "es", "ja"]
-
-        for i in TTS.list_models():
-            tokens = i.split("/")
-            language = tokens[1]
-            dataset = tokens[2]
-            name = tokens[3]
-
-            if language not in language_selection:
-                continue
-
-            if name not in model_selection:
-                continue
-
-            if language not in models:
-                models[language] = {}
-
-            if dataset not in models[language]:
-                models[language][dataset] = {}
-            
-            models[language][dataset][name] = i
-
-        response = json.dumps(models)
-        return response
-    
-    except Exception as e:
-        print(e)
-        abort(500, DEBUG_PREFIX + " Exception occurs while trying to get list of TTS available")
-
+gpu_mode = False
+is_downloading = False

 def coqui_check_model_state():
    """
@@ -130,24 +76,62 @@ def coqui_install_model():
    """
        Install requested model is installed on the server machine
    """
+    global gpu_mode
+    global is_downloading
+
    try:
        model_installed = False
        request_json = request.get_json()
        model_id = request_json["model_id"]
-        
-        print(DEBUG_PREFIX,"Search for model", model_id)
+        action = request_json["action"]

+        print(DEBUG_PREFIX,"Received request",action,"for model",model_id)
+        
+        if (is_downloading):
+            print(DEBUG_PREFIX,"Rejected, already downloading a model")
+            return json.dumps({"status":"downloading"})
+        
        coqui_models_folder = ModelManager().output_prefix  # models location
        installed_models = os.listdir(coqui_models_folder)
+        model_path = None
+
+        print(DEBUG_PREFIX,"Found",len(installed_models),"models in",coqui_models_folder)

        for i in installed_models:
            if model_id == i.replace("--","/"):
                model_installed = True
+                model_path = os.path.join(coqui_models_folder,i)

-        response = json.dumps({"model_installed":model_installed})
+        if model_installed:
+            print(DEBUG_PREFIX,"model found:", model_id)
+        else:
+            print(DEBUG_PREFIX,"model not found")
+
+        if action == "download":
+            if model_installed:
+                abort(500, DEBUG_PREFIX + "Bad request, model already installed.")
+
+            is_downloading = True
+            TTS(model_name=model_id, progress_bar=True, gpu=gpu_mode)
+            is_downloading = False
+
+        if action == "repare":
+            if not model_installed:
+                abort(500, DEBUG_PREFIX + " bad request: requesting repare of model not installed")
+
+
+            print(DEBUG_PREFIX,"Deleting corrupted model folder:",model_path)
+            shutil.rmtree(model_path, ignore_errors=True)
+
+            is_downloading = True
+            TTS(model_name=model_id, progress_bar=True, gpu=gpu_mode)
+            is_downloading = False
+
+        response = json.dumps({"status":"done"})
        return response

    except Exception as e:
+        is_downloading = False
        print(e)
        abort(500, DEBUG_PREFIX + " Exception occurs while trying to search for installed model")

@@ -158,52 +142,15 @@ def coqui_get_local_models():

    abort(500, DEBUG_PREFIX + " Not implemented yet")

-def coqui_get_model_settings():
-    """
-    Process request model and return available speakers
-        - expected request: {
-            model_id: string
-        }
-    """
-    try:
-        request_json = request.get_json()
-        #print(request_json)

-        print(DEBUG_PREFIX,"Received get_speakers request for model", request_json["model_id"])
-        
-        model_id = request_json["model_id"]
-        model_languages = []
-        model_speakers = []
-
-        print(DEBUG_PREFIX,"Loading tts model", model_id,"\n - using", ("GPU" if gpu else "CPU"))
-
-        tts = TTS(model_name=model_id, progress_bar=True, gpu=gpu)
-
-        if tts.is_multi_lingual:
-            model_languages = tts.languages
-
-        if tts.is_multi_speaker:
-            model_speakers = tts.speakers
-
-        response = json.dumps({"languages":model_languages, "speakers":model_speakers})
-        print(DEBUG_PREFIX,"Model settings: ", response)
-        return response
-    
-    
-    except Exception as e:
-        print(e)
-        abort(500, DEBUG_PREFIX + " Exception occurs while trying to get model speakers")
-
-
-
-def coqui_process_text():
+def coqui_generate_tts():
    """
    Process request text with the loaded RVC model
        - expected request: {
            "text": text,
            "model_id": voiceId,
-            "language": language,
-            "speaker": speaker
+            "language_id": language,
+            "speaker_id": speaker
        }

        - model_id formats:
@@ -215,38 +162,45 @@ def coqui_process_text():
            - tts_models/en/vctk/vits[0]
            - tts_models/multilingual/multi-dataset/your_tts[2][1]
    """
-    global gpu
-
+    global gpu_mode
+    global is_downloading
+    
    try:
        request_json = request.get_json()
        #print(request_json)

        print(DEBUG_PREFIX,"Received TTS request for ", request_json)
+
+        if (is_downloading):
+            print(DEBUG_PREFIX,"Rejected, currently downloading a model, cannot perform TTS")
+            abort(500, DEBUG_PREFIX + " Requested TTS while downloading a model")
        
        text = request_json["text"]
        model_name = request_json["model_id"]
-        language = None
-        speaker =  None
+        language_id = None
+        speaker_id =  None

-        if request_json["language"] != "none":
-            language = request_json["language"]
+        if request_json["language_id"] != "none":
+            language_id = request_json["language_id"]
        
-        if request_json["speaker"] != "none":
-            speaker = request_json["speaker"]
+        if request_json["speaker_id"] != "none":
+            speaker_id = request_json["speaker_id"]

-        print(DEBUG_PREFIX,"Loading tts model", model_name, "\n - speaker: ",speaker,"\n - language: ",language, "\n - using",("GPU" if gpu else "CPU"))
+        print(DEBUG_PREFIX,"Loading tts \n- model", model_name, "\n - speaker_id: ",speaker_id,"\n - language_id: ",language_id, "\n - using",("GPU" if gpu_mode else "CPU"))

-        tts = TTS(model_name=model_name, progress_bar=True, gpu=gpu)
-
-        if tts.is_multi_speaker:
-            if speaker is None:
-                abort(400, DEBUG_PREFIX + " Requested model "+model_name+" is multi-speaker but no speaker provided")
+        tts = TTS(model_name=model_name, progress_bar=True, gpu=gpu_mode)

        if tts.is_multi_lingual:
-            if speaker is None:
-                abort(400, DEBUG_PREFIX + " Requested model "+model_name+" is multi-lingual but no language provided")
+            if language_id is None:
+                abort(400, DEBUG_PREFIX + " Requested model "+model_name+" is multi-lingual but no language id provided")
+            language_id = tts.languages[int(language_id)]

-        tts.tts_to_file(text=text, file_path=OUTPUT_PATH, speaker=speaker, language=language)
+        if tts.is_multi_speaker:
+            if speaker_id is None:
+                abort(400, DEBUG_PREFIX + " Requested model "+model_name+" is multi-speaker but no speaker id provided")
+            speaker_id =tts.speakers[int(speaker_id)]
+
+        tts.tts_to_file(text=text, file_path=OUTPUT_PATH, speaker=speaker_id, language=language_id)

        print(DEBUG_PREFIX, "Success, saved to",OUTPUT_PATH)
        
--- a/server.py
+++ b/server.py
@@ -385,14 +385,12 @@ if "coqui-tts" in modules:
    import modules.text_to_speech.coqui.coqui_module as coqui_module
    if mode == "GPU":
        coqui_module.gpu = True
-    app.add_url_rule("/api/text-to-speech/coqui/coqui-api/get-models", view_func=coqui_module.coqui_get_api_models, methods=["POST"])
    app.add_url_rule("/api/text-to-speech/coqui/coqui-api/check-model-state", view_func=coqui_module.coqui_check_model_state, methods=["POST"])
    app.add_url_rule("/api/text-to-speech/coqui/coqui-api/install-model", view_func=coqui_module.coqui_install_model, methods=["POST"])
-    app.add_url_rule("/api/text-to-speech/coqui/coqui-api/get-model-settings", view_func=coqui_module.coqui_get_model_settings, methods=["POST"])

    app.add_url_rule("/api/text-to-speech/coqui/local/get-models", view_func=coqui_module.coqui_get_local_models, methods=["POST"])

-    app.add_url_rule("/api/text-to-speech/coqui/process-text", view_func=coqui_module.coqui_process_text, methods=["POST"])
+    app.add_url_rule("/api/text-to-speech/coqui/generate-tts", view_func=coqui_module.coqui_generate_tts, methods=["POST"])

 def require_module(name):
    def wrapper(fn):