SillyTavern-extras/tts_coqui.py

import io
import asyncio
import json
import os
import torch
import gc
from pathlib import Path
import TTS
from TTS.api import TTS
from TTS.utils.manage import ModelManager

from TTS.tts.configs.bark_config import BarkConfig
from TTS.tts.models.bark import Bark

from TTS.tts.configs.tortoise_config import TortoiseConfig
from TTS.tts.models.tortoise import Tortoise

from flask import send_file

tts = None
type = None
multlang = "None"
multspeak = "None"
loadedModel = "None"
spkdirectory = ""
multspeakjson = ""
_gpu = False

def setGPU(flag):
    global _gpu
    _gpu = flag
    return

def model_type(_config_path):
    try:
        with open(_config_path, 'r') as config_file:
            config_data = json.load(config_file)

            # Search for the key "model" and print its value
            if "model" in config_data:
                model_value = config_data["model"]
                return model_value
            else:
                print("ERR: The key 'model' is not present in the config file.")
    except FileNotFoundError:
        print("Config file not found.")
    except json.JSONDecodeError:
        pass
        #print("Invalid JSON format in the config file.")
    except Exception as e:
        pass
        #print("An error occurred:", str(e))

def load_model(_model, _gpu, _progress):
    global tts
    global type
    global loadedModel
    global multlang
    global multspeak

    status = None

    print("GPU is set to: ", _gpu)

    _model_directory, _file = os.path.split(_model)

    if _model_directory == "": #make it assign vars correctly if no filename provioded
        _model_directory = _file
        _file = None

    if _model is None:
        status = "ERROR: Invalid model name or path."
    else:
        try:
            if _gpu == True: #Reclaim memory
                    del tts
                    try:
                        import gc
                        gc.collect()
                        torch.cuda.empty_cache()
                    except Exception:
                        pass
        except Exception as e:
            status = str(e)

        _target_directory = ModelManager().output_prefix  # models location
        _modified_speaker_id = _model_directory.replace("\\", "--")

        if _file != None:
            _model_path = os.path.join(_target_directory, _modified_speaker_id, _file)
        else:
            _model_path = os.path.join(_target_directory, _modified_speaker_id)

        _config_path = os.path.join(_target_directory, _modified_speaker_id, "config.json")

        if model_type(_config_path) == "tortoise":
            print("Loading Tortoise...")
            print("_model", _model)
            print("Tortoise not supported at the moment im tired of working on this")
            #_loadtortoisemodel = _model_directory.replace("--", "/")
            #print("_loadtortoisemodel", _loadtortoisemodel)

            #config = TortoiseConfig()
            #model = Tortoise.init_from_config(config)
            #model.load_checkpoint(config, checkpoint_dir="C:/Users/jsviv/AppData/Local/tts/tts_models--en--multi-dataset--tortoise-v2", eval=False)

            #tts = TTS(_loadtortoisemodel)
            #tts = TTS(model_name="tts_models/en/multi-dataset/tortoise-v2", progress_bar=True, gpu=True)

            #loadedModel = _model
            #print("loaded model", loadedModel)

        if model_type(_config_path) == "bark":
            print("Loading Bark...")
            _loadbarkmodel = _model_directory.replace("--", "/")
            tts = TTS(_loadbarkmodel, gpu=_gpu)
            loadedModel = _model

        _loadertypes = ["tortoise", "bark"]
        if model_type(_config_path) not in _loadertypes:
            try:
                print("Loading ", model_type(_config_path))
                print("Load Line:", _model_path, _progress, _gpu)
                tts = TTS(model_path=_model_path, config_path=_config_path, progress_bar=_progress, gpu=_gpu)
                status = "Loaded"
                loadedModel = _model
            except Exception as e:
                print("An exception occurred while loading VITS:", str(e))
                print("Continuing with other parts of the code...")
        else:
            pass

        type = model_type(_config_path)
        print("Type: ", type)

    if status is None:
        status = "Unknown error occurred"
    if type is None:
        type = "Unknown"

    return status

def is_multi_speaker_model():
    global multspeak
    global type
    global spkdirectory
    global multspeakjson
    global tts

    if tts is None:
        multspeak = "None"
        return multspeak
    try:


        if type == "bark":
            _target_directory = ModelManager().output_prefix
            # Convert _target_directory to a string and remove the trailing backslash if present
            _target_directory_str = str(_target_directory)
            if _target_directory_str.endswith("\\"):
                _target_directory_str = _target_directory_str[:-1]

            spkdirectory = os.path.join(_target_directory_str, "bark_v0", "speakers")

            subfolder_names = [folder for folder in os.listdir(spkdirectory) if os.path.isdir(os.path.join(spkdirectory, folder))]

            subfolder_names.insert(0, "random") # Add "Random" as the first element in the subfolder_names list

            unique_names = list(dict.fromkeys(subfolder_names))
            multspeak = json.dumps({index: name for index, name in enumerate(unique_names)})
            #print(multspeak)
        else:

            value = tts.speakers
            if value is not None:
                unique_speakers = list(dict.fromkeys(value))
                speaker_dict = {index: value for index, value in enumerate(unique_speakers)}
                multspeak = json.dumps(speaker_dict)
                #print(multspeak)
            else:
                multspeak = "None"


    except Exception as e:
        print("Error:", e)
        multspeak = "None"
    multspeakjson = multspeak
    return multspeak #return name and ID in named json

def is_multi_lang_model():
    global multlang
    global tts
    if tts is None:
        multlang = "None"
        return multlang
    try:
        value = tts.languages
        if value is not None:
            unique_lang = list(dict.fromkeys(value))# Remove duplicate values and preserve the order
            lang_dict = {index: value for index, value in enumerate(unique_lang)} # Create a dictionary with indices as keys and values as keys
            multlang = json.dumps(lang_dict)  # Convert the dictionary to JSON format
            #print(multlang)
        else:
            multlang = "None"
    except Exception as e:
        print("Error:", e)
        multlang = "None"

    return multlang

def get_coqui_models(): #DROPDOWN MODELS
    manager = ModelManager()
    model_folder = manager.output_prefix

    cwd = os.path.dirname(os.path.realpath(__file__))
    target_directory = model_folder

    if not os.path.exists(target_directory):
        os.makedirs(target_directory)

    os.chdir(target_directory)
    folder_list = [
        folder for folder in os.listdir(target_directory) if os.path.isdir(os.path.join(target_directory, folder)) and "--" in folder and "vocoder" not in folder.lower() and "voice_conversion_models" not in folder.lower()
    ]


    file_paths = []

    for folder in folder_list:
        _config_path = os.path.join(target_directory, folder, "config.json")
        if model_type(_config_path) == "bark" or model_type(_config_path) == "tortoise":
            file_paths.append(str(Path(folder, '')))
        else:
            for file in os.listdir(os.path.join(target_directory, folder)):
                if file.endswith(('.pt', '.tar', '.pkl', '.pth')) and not file.startswith('.'):
                    file_paths.append(str(Path(folder, file)))

    merged_json = json.dumps(file_paths)
    return merged_json

def coqui_checkmap():
    manager = ModelManager()
    model_folder = manager.output_prefix

    cwd = os.path.dirname(os.path.realpath(__file__))
    target_directory = model_folder

    if not os.path.exists(target_directory):
        os.makedirs(target_directory)

    os.chdir(target_directory)
    folder_list = [
        folder for folder in os.listdir() if os.path.isdir(os.path.join(target_directory, folder)) and "--" in folder and "vocoder" not in folder.lower()
    ]

    file_paths = []

    for folder in folder_list:
        _config_path = os.path.join(target_directory, folder, "config.json")
        if model_type(_config_path) == "bark" or model_type(_config_path) == "tortoise":
            file_paths.append(str(Path(folder, '')))
        else:
            for file in os.listdir(os.path.join(target_directory, folder)):
                if file.endswith(('.pt', '.tar', '.pkl', '.pth')) and not file.startswith('.'):
                    file_paths.append(str(Path(folder, file)))

    # Convert the list into a list of dictionaries with "id" as the key
    keyed_json_list = [{"id": item} for item in file_paths]

    # Convert the list to a JSON string with indentation
    keyed_json_string = json.dumps(keyed_json_list, indent=2)

    # Replace double backslashes with single backslashes
    #keyed_json_string = keyed_json_string.replace("\\\\", "\\")

    return keyed_json_string

def get_coqui_download_models(): #Avail voices list
    formatted_list = []
    #voices_list = json.loads(get_coqui_downloaded())
    voices_list = TTS.list_models()

    for model in voices_list:
        split_model = model.split('/')
        formatted_list.append({
            "type": split_model[0], #type
            "lang": split_model[1], #lang
            "id-only": split_model[2], #id
            "name-only": split_model[3], #name
            "id": split_model[0] + '/' + split_model[1] + "/" + split_model[2] + "/" + split_model[3], #combined id and name tts_models/bn/custom/vits-male
        })

    json_data = json.dumps(formatted_list, indent=4)
    return json_data

def coqui_modeldownload(_modeldownload): #Avail voices function
    global _gpu
    print(_modeldownload)
    try:
        tts = TTS(model_name=_modeldownload, progress_bar=True, gpu=_gpu)
        status = "True"
    except:
        status = "False"
    return status

def coqui_tts(text, speaker_id, mspker_id, style_wav, language_id):
    global type
    global multlang
    global multspeak
    global loadedModel
    global spkdirectory
    global multspeakjson
    global _gpu

    try:
        # Splitting the string to get speaker_id and the rest
        parts = speaker_id.split("[", 1)
        speaker_id = parts[0]
        remainder = parts[1].rstrip("]")
        variables = remainder.split("][")
        # Converting to integers with default values of 0 if conversion fails
        mspker_id = int(variables[0]) if variables[0].isdigit() else 0
        language_id = int(variables[1]) if variables[1].isdigit() else 0
        # multspeak = mspker_id # might break previews
        multlang = language_id
    except Exception:
        pass
        #print("exception 1")

    print("mspker_id: ", mspker_id)
    print("language_id: ", language_id)


    try: #see is values passed in URL
        if language_id is not None:
            float(language_id)
            multlang = float(language_id)
        else:
            pass
    except ValueError:
        pass


    try:
        if mspker_id is not None:
            float(mspker_id)
            multspeak = float(mspker_id)
        else:
            pass
    except ValueError:
        pass


    if loadedModel != speaker_id:
        print("MODEL NOT LOADED!!! Loading... ", loadedModel, speaker_id)
        print("Loading :", speaker_id, "GPU is: ", _gpu)

        load_model(speaker_id, _gpu, True)


    audio_buffer = io.BytesIO()

    if not isinstance(multspeak, (int, float)) and not isinstance(multlang, (int, float)): #if not a number
        print("Single Model")
        tts.tts_to_file(text, file_path=audio_buffer)
    elif isinstance(multspeak, (int, float)) and not isinstance(multlang, (int, float)):
        print("speaker only")
        if type == "bark" or type == "tortoise":
            try:
                if multspeakjson == "": #failing because multispeakjson not loaded
                    parsed_multspeak = json.loads(is_multi_speaker_model())
                else:
                    parsed_multspeak = json.loads(multspeakjson)

                value_at_key = parsed_multspeak.get(str(mspker_id))
                #print(value_at_key)
                # ♪ In the jungle, the mighty jungle, the lion barks tonight ♪
                #I have a silky smooth voice, and today I will tell you about the exercise regimen of the common sloth.
                if value_at_key == "random":
                    tts.tts_to_file(text, file_path=audio_buffer)
                else:
                    print("using speaker ", value_at_key)
                    tts.tts_to_file(text, file_path=audio_buffer, voice_dir=spkdirectory, speaker=value_at_key)
            except Exception as e:
                print("An error occurred:", str(e))
        else:
            tts.tts_to_file(text, speaker=tts.speakers[int(mspker_id)], file_path=audio_buffer)
    elif not isinstance(multspeak, (int, float)) and isinstance(multlang, (int, float)):
        print("lang only")
        tts.tts_to_file(text, language=tts.languages[int(language_id)], file_path=audio_buffer)
    else:
        print("spk and lang")
        tts.tts_to_file(text, speaker=tts.speakers[int(mspker_id)], language=tts.languages[int(language_id)], file_path=audio_buffer)

    audio_buffer.seek(0)
    response = send_file(audio_buffer, mimetype="audio/wav")

    #reset for next dynamic tts
    multlang = None
    multspeak = None
    return response