Cleanup remaining old coqui implementation

2026-03-13 07:20:23 +00:00 · 2023-08-14 05:32:34 +02:00
parent 166b77b7fd
commit c800eae3e8
2 changed files with 0 additions and 473 deletions
--- a/server.py
+++ b/server.py
@@ -88,7 +88,6 @@ parser.add_argument(
 )
 parser.add_argument("--talkinghead-gpu", action="store_true", help="Run the talkinghead animation on the GPU (CPU is default)")
 parser.add_argument("--coqui-gpu", action="store_false", help="Run the voice models on the GPU (CPU is default)")
-parser.add_argument("--coqui-model", help="Load a custom Coqui TTS model")
 parser.add_argument("--stt-vosk-model-path", help="Load a custom vosk speech-to-text model")
 parser.add_argument("--stt-whisper-model-path", help="Load a custom vosk speech-to-text model")
 sd_group = parser.add_mutually_exclusive_group()
@@ -278,16 +277,6 @@ if "silero-tts" in modules:
        tts_service.update_sample_text(SILERO_SAMPLE_TEXT)
        tts_service.generate_samples()

-if "coqui-tts" in modules:
-    mode = "CPU" if args.coqui_gpu else "GPU"
-    print("Initializing Coqui TTS client in " + mode + " mode")
-    import tts_coqui as coqui
-    from tts_coqui import *
-    if mode == "GPU":
-        coqui.setGPU(True)
-    if args.coqui_model is not None:
-        coqui.coqui_modeldownload(args.coqui_model)
-
 if "edge-tts" in modules:
    print("Initializing Edge TTS client")
    import tts_edge as edge
@@ -674,57 +663,6 @@ def stop_talking():
 def result_feed():
    return talkinghead.result_feed()

-@app.route("/api/coqui-tts/load", methods=["GET"])
-@require_module("coqui-tts")
-def load_model():
-    # Accessing the URL parameters
-    _model = request.args.get('_model')
-    _gpu = False if args.coqui_gpu else True
-    _progress = request.args.get('_progress')
-    return coqui.load_model(_model, _gpu, _progress)
-
-@app.route("/api/coqui-tts/list", methods=["GET"]) #dropdown list
-@require_module("coqui-tts")
-def coqui_list():
-    return coqui.get_coqui_models()
-
-@app.route("/api/coqui-tts/multspeaker", methods=["GET"])
-@require_module("coqui-tts")
-def is_multi_speaker_model():
-    return coqui.is_multi_speaker_model()
-
-@app.route("/api/coqui-tts/multlang", methods=["GET"])
-@require_module("coqui-tts")
-def is_multi_lang_model():
-    return coqui.is_multi_lang_model()
-
-@app.route("/api/coqui-tts/speaker_id", methods=["GET"]) #available voices
-@require_module("coqui-tts")
-def coqui_download_models():
-    return coqui.get_coqui_download_models()
-
-@app.route("/api/coqui-tts/checkmap", methods=["GET"]) #checkmap
-@require_module("coqui-tts")
-def coqui_checkmap():
-    return coqui.coqui_checkmap()
-
-@app.route("/api/coqui-tts/download", methods=["GET"])
-@require_module("coqui-tts")
-def coqui_modeldownload():
-    _modeldownload = request.args.get('model')
-    return coqui.coqui_modeldownload(_modeldownload)
-
-@app.route("/api/coqui-tts/tts", methods=["GET"])
-@require_module("coqui-tts")
-def coqui_tts():
-    # Accessing the URL parameters
-    text = request.args.get('text')
-    speaker_id = request.args.get('speaker_id')
-    mspker_id = request.args.get('mspker')
-    language_id = request.args.get('language_id')
-    style_wav = request.args.get('style_wav')
-    return coqui.coqui_tts(text, speaker_id, mspker_id, style_wav, language_id)
-
@app.route("/api/image", methods=["POST"])
@require_module("sd")
 def api_image():
--- a/tts_coqui.py
+++ b/tts_coqui.py
@@ -1,411 +0,0 @@
-import io
-import asyncio
-import json
-import os
-import torch
-import gc
-from pathlib import Path
-import TTS
-from TTS.api import TTS
-from TTS.utils.manage import ModelManager
-
-from TTS.tts.configs.bark_config import BarkConfig
-from TTS.tts.models.bark import Bark
-
-from TTS.tts.configs.tortoise_config import TortoiseConfig
-from TTS.tts.models.tortoise import Tortoise
-
-from flask import send_file
-
-tts = None
-tts_type = None
-multlang = "None"
-multspeak = "None"
-loadedModel = "None"
-spkdirectory = ""
-multspeakjson = ""
-status = ""
-_gpu = False
-is_coqui_available = os.environ.get("COQUI_STUDIO_TOKEN")
-
-def setGPU(flag):
-    global _gpu
-    _gpu = flag
-    return
-
-def model_type(_config_path):
-    try:
-        with open(_config_path, 'r') as config_file:
-            config_data = json.load(config_file)
-
-            # Search for the key "model" and print its value
-            if "model" in config_data:
-                model_value = config_data["model"]
-                return model_value
-            else:
-                print("ERR: The key 'model' is not present in the config file.")
-    except FileNotFoundError:
-        print("Config file not found.")
-    except json.JSONDecodeError:
-        pass
-        #print("Invalid JSON format in the config file.")
-    except Exception as e:
-        pass
-        #print("An error occurred:", str(e))
-
-def load_model(_model, _gpu, _progress):
-    global tts
-    global tts_type
-    global loadedModel
-    global multlang
-    global multspeak
-    global status
-
-    #print("GPU is set to: ", _gpu)
-
-    _model_directory, _file = os.path.split(_model)
-
-    if _model_directory == "": #make it assign vars correctly if no filename provioded
-        _model_directory = _file
-        _file = None
-
-    if _model is None:
-        status = "ERROR: Invalid model name or path."
-    else:
-        try:
-            if _gpu == True: #Reclaim memory
-                    del tts
-                    try:
-                        import gc
-                        gc.collect()
-                        torch.cuda.empty_cache()
-                    except Exception:
-                        pass
-        except Exception as e:
-            status = str(e)
-
-        _target_directory = ModelManager().output_prefix  # models location
-        _modified_speaker_id = _model_directory.replace("\\", "--")
-
-        if _file != None:
-            _model_path = os.path.join(_modified_speaker_id, _file)
-        else:
-            _model_path = os.path.join(_modified_speaker_id)
-
-        _config_path = os.path.join(_target_directory, _modified_speaker_id, "config.json")
-
-
-        #prevent multiple loading
-        if status == "Loading":
-            status = "Loading"
-            print(status)
-            return status
-
-        #prevent multiple loading
-        if os.path.join(_model_path) == loadedModel:
-            status = "Already Loaded"
-            print(status)
-            return status
-
-        if model_type(_config_path) == "tortoise":
-            print("Loading Tortoise...")
-            status = "Loading"
-            _loadtortoisemodel = _model_directory.replace("--", "/")
-            tts = TTS(_loadtortoisemodel, gpu=_gpu)
-            loadedModel = _model
-
-        if model_type(_config_path) == "bark":
-            print("Loading Bark...")
-            _loadbarkmodel = _model_directory.replace("--", "/")
-            tts = TTS(_loadbarkmodel, gpu=_gpu)
-            loadedModel = _model
-
-        _loadertypes = ["tortoise", "bark"]
-        if model_type(_config_path) not in _loadertypes:
-            try:
-                print("Loading ", model_type(_config_path))
-                #print("Load Line:", _model_path, _progress, _gpu)
-                tts = TTS(model_path=os.path.join(_target_directory, _model_path), config_path=_config_path, progress_bar=_progress, gpu=_gpu)
-                status = "Loaded"
-                loadedModel = _model
-            except Exception as e:
-                print("An exception occurred while loading VITS:", str(e))
-                print("Continuing with other parts of the code...")
-        else:
-            pass
-
-        tts_type = model_type(_config_path)
-        #print("Type: ", type)
-        #print("Status", status)
-
-    if status is None:
-        status = "Unknown error occurred"
-    if tts_type is None:
-        tts_type = "Unknown"
-
-    return status
-
-def is_multi_speaker_model():
-    global multspeak
-    global tts_type
-    global spkdirectory
-    global multspeakjson
-    global tts
-
-    if tts is None:
-        multspeak = "None"
-        return multspeak
-    try:
-
-
-        if tts_type == "bark" or tts_type == "tortoise":
-            _target_directory = ModelManager().output_prefix
-            # Convert _target_directory to a string and remove the trailing backslash if present
-            _target_directory_str = str(_target_directory)
-            if _target_directory_str.endswith("\\"):
-                _target_directory_str = _target_directory_str[:-1]
-
-            spkdirectory = os.path.join(_target_directory_str, "bark_v0", "speakers")
-
-            subfolder_names = [folder for folder in os.listdir(spkdirectory) if os.path.isdir(os.path.join(spkdirectory, folder))]
-
-            subfolder_names.insert(0, "random") # Add "Random" as the first element in the subfolder_names list
-
-            unique_names = list(dict.fromkeys(subfolder_names))
-            multspeak = json.dumps({index: name for index, name in enumerate(unique_names)})
-            #print(multspeak)
-        else:
-
-            value = tts.speakers
-            if value is not None:
-                unique_speakers = list(dict.fromkeys(value))
-                speaker_dict = {index: value for index, value in enumerate(unique_speakers)}
-                multspeak = json.dumps(speaker_dict)
-                #print(multspeak)
-            else:
-                multspeak = "None"
-
-
-    except Exception as e:
-        print("Error:", e)
-        multspeak = "None"
-    multspeakjson = multspeak
-    return multspeak #return name and ID in named json
-
-def is_multi_lang_model():
-    global multlang
-    global tts
-    if tts is None:
-        multlang = "None"
-        return multlang
-    try:
-        value = tts.languages
-        if value is not None:
-            unique_lang = list(dict.fromkeys(value))# Remove duplicate values and preserve the order
-            lang_dict = {index: value for index, value in enumerate(unique_lang)} # Create a dictionary with indices as keys and values as keys
-            multlang = json.dumps(lang_dict)  # Convert the dictionary to JSON format
-            #print(multlang)
-        else:
-            multlang = "None"
-    except Exception as e:
-        print("Error:", e)
-        multlang = "None"
-
-    return multlang
-
-def get_coqui_models(): #DROPDOWN MODELS
-    manager = ModelManager()
-    model_folder = manager.output_prefix
-
-    cwd = os.path.dirname(os.path.realpath(__file__))
-    target_directory = model_folder
-
-    if not os.path.exists(target_directory):
-        os.makedirs(target_directory)
-
-    os.chdir(target_directory)
-    folder_list = [
-        folder for folder in os.listdir(target_directory) if os.path.isdir(os.path.join(target_directory, folder)) and "--" in folder and "vocoder" not in folder.lower() and "voice_conversion_models" not in folder.lower()
-    ]
-
-
-    file_paths = []
-
-    for folder in folder_list:
-        _config_path = os.path.join(target_directory, folder, "config.json")
-        if model_type(_config_path) == "bark" or model_type(_config_path) == "tortoise":
-            file_paths.append(str(Path(folder, '')))
-        else:
-            for file in os.listdir(os.path.join(target_directory, folder)):
-                if file.endswith(('.pt', '.tar', '.pkl', '.pth')) and not file.startswith('.'):
-                    file_paths.append(str(Path(folder, file)))
-
-    merged_json = json.dumps(file_paths)
-
-    os.chdir(cwd)
-    return merged_json
-
-def coqui_checkmap():
-    manager = ModelManager()
-    model_folder = manager.output_prefix
-
-    cwd = os.path.dirname(os.path.realpath(__file__))
-    target_directory = model_folder
-
-    if not os.path.exists(target_directory):
-        os.makedirs(target_directory)
-
-    os.chdir(target_directory)
-    folder_list = [
-        folder for folder in os.listdir() if os.path.isdir(os.path.join(target_directory, folder)) and "--" in folder and "vocoder" not in folder.lower()
-    ]
-
-    file_paths = []
-
-    for folder in folder_list:
-        _config_path = os.path.join(target_directory, folder, "config.json")
-        if model_type(_config_path) == "bark" or model_type(_config_path) == "tortoise":
-            file_paths.append(str(Path(folder, '')))
-        else:
-            for file in os.listdir(os.path.join(target_directory, folder)):
-                if file.endswith(('.pt', '.tar', '.pkl', '.pth')) and not file.startswith('.'):
-                    file_paths.append(str(Path(folder, file)))
-
-    # Convert the list into a list of dictionaries with "id" as the key
-    keyed_json_list = [{"id": item} for item in file_paths]
-
-    # Convert the list to a JSON string with indentation
-    keyed_json_string = json.dumps(keyed_json_list, indent=2)
-
-    # Replace double backslashes with single backslashes
-    #keyed_json_string = keyed_json_string.replace("\\\\", "\\")
-
-    os.chdir(cwd)
-
-    return keyed_json_string
-
-def get_coqui_download_models(): #Avail voices list
-    formatted_list = []
-    #voices_list = json.loads(get_coqui_downloaded())
-    voices_list = TTS.list_models()
-
-    for model in voices_list:
-        split_model = model.split('/')
-        formatted_list.append({
-            "type": split_model[0], #type
-            "lang": split_model[1], #lang
-            "id-only": split_model[2], #id
-            "name-only": split_model[3], #name
-            "id": split_model[0] + '/' + split_model[1] + "/" + split_model[2] + "/" + split_model[3], #combined id and name tts_models/bn/custom/vits-male
-        })
-
-    json_data = json.dumps(formatted_list, indent=4)
-    return json_data
-
-def coqui_modeldownload(_modeldownload): #Avail voices function
-    global _gpu
-    print(_modeldownload)
-    try:
-        tts = TTS(model_name=_modeldownload, progress_bar=True, gpu=_gpu)
-        status = "True"
-    except:
-        status = "False"
-    return status
-
-def coqui_tts(text, speaker_id, mspker_id, style_wav, language_id):
-    global tts_type
-    global multlang
-    global multspeak
-    global loadedModel
-    global spkdirectory
-    global multspeakjson
-    global _gpu
-
-    try:
-        # Splitting the string to get speaker_id and the rest
-        parts = speaker_id.split("[", 1)
-        speaker_id = parts[0]
-        remainder = parts[1].rstrip("]")
-        variables = remainder.split("][")
-        # Converting to integers with default values of 0 if conversion fails
-        mspker_id = int(variables[0]) if variables[0].isdigit() else 0
-        language_id = int(variables[1]) if variables[1].isdigit() else 0
-        # multspeak = mspker_id # might break previews
-        multlang = language_id
-    except Exception:
-        pass
-        #print("exception 1")
-
-    #print("mspker_id: ", mspker_id)
-    #print("language_id: ", language_id)
-
-
-
-    try: #see is values passed in URL
-        if language_id is not None:
-            float(language_id)
-            multlang = float(language_id)
-        else:
-            pass
-    except ValueError:
-        pass
-
-
-    try:
-        if mspker_id is not None:
-            float(mspker_id)
-            multspeak = float(mspker_id)
-        else:
-            pass
-    except ValueError:
-        pass
-
-
-    if loadedModel != speaker_id:
-        print("MODEL NOT LOADED!!! Loading... ", loadedModel, speaker_id)
-        print("Loading :", speaker_id, "GPU is: ", _gpu)
-
-        load_model(speaker_id, _gpu, True)
-
-
-    audio_buffer = io.BytesIO()
-
-    if not isinstance(multspeak, (int, float)) and not isinstance(multlang, (int, float)): #if not a number
-        #print("Single Model")
-        tts.tts_to_file(text, file_path=audio_buffer)
-    elif isinstance(multspeak, (int, float)) and not isinstance(multlang, (int, float)):
-        #print("speaker only")
-        if tts_type == "bark" or tts_type == "tortoise":
-            try:
-                if multspeakjson == "": #failing because multispeakjson not loaded
-                    parsed_multspeak = json.loads(is_multi_speaker_model())
-                else:
-                    parsed_multspeak = json.loads(multspeakjson)
-
-                value_at_key = parsed_multspeak.get(str(mspker_id))
-                #print(value_at_key)
-                # ♪ In the jungle, the mighty jungle, the lion barks tonight ♪
-                #I have a silky smooth voice, and today I will tell you about the exercise regimen of the common sloth.
-                if value_at_key == "random":
-                    tts.tts_to_file(text, file_path=audio_buffer)
-                else:
-                    #print("using speaker ", value_at_key)
-                    tts.tts_to_file(text, file_path=audio_buffer, voice_dir=spkdirectory, speaker=value_at_key)
-            except Exception as e:
-                print("An error occurred:", str(e))
-        else:
-            tts.tts_to_file(text, speaker=tts.speakers[int(mspker_id)], file_path=audio_buffer)
-    elif not isinstance(multspeak, (int, float)) and isinstance(multlang, (int, float)):
-        #print("lang only")
-        tts.tts_to_file(text, language=tts.languages[int(language_id)], file_path=audio_buffer)
-    else:
-        #print("spk and lang")
-        tts.tts_to_file(text, speaker=tts.speakers[int(mspker_id)], language=tts.languages[int(language_id)], file_path=audio_buffer)
-
-    audio_buffer.seek(0)
-    response = send_file(audio_buffer, mimetype="audio/wav")
-
-    #reset for next dynamic tts
-    multlang = None
-    multspeak = None
-    return response