import io import asyncio import json import os import torch import gc from pathlib import Path import TTS from TTS.api import TTS from TTS.utils.manage import ModelManager from TTS.tts.configs.bark_config import BarkConfig from TTS.tts.models.bark import Bark from TTS.tts.configs.tortoise_config import TortoiseConfig from TTS.tts.models.tortoise import Tortoise from flask import send_file tts = None type = None multlang = "None" multspeak = "None" loadedModel = "None" spkdirectory = "" multspeakjson = "" _gpu = False def setGPU(flag): global _gpu _gpu = flag return def model_type(_config_path): try: with open(_config_path, 'r') as config_file: config_data = json.load(config_file) # Search for the key "model" and print its value if "model" in config_data: model_value = config_data["model"] return model_value else: print("ERR: The key 'model' is not present in the config file.") except FileNotFoundError: print("Config file not found.") except json.JSONDecodeError: pass #print("Invalid JSON format in the config file.") except Exception as e: pass #print("An error occurred:", str(e)) def load_model(_model, _gpu, _progress): global tts global type global loadedModel global multlang global multspeak status = None print("GPU is set to: ", _gpu) _model_directory, _file = os.path.split(_model) if _model_directory == "": #make it assign vars correctly if no filename provioded _model_directory = _file _file = None if _model is None: status = "ERROR: Invalid model name or path." else: try: if _gpu == True: #Reclaim memory del tts try: import gc gc.collect() torch.cuda.empty_cache() except Exception: pass except Exception as e: status = str(e) _target_directory = ModelManager().output_prefix # models location _modified_speaker_id = _model_directory.replace("\\", "--") if _file != None: _model_path = os.path.join(_target_directory, _modified_speaker_id, _file) else: _model_path = os.path.join(_target_directory, _modified_speaker_id) _config_path = os.path.join(_target_directory, _modified_speaker_id, "config.json") if model_type(_config_path) == "tortoise": print("Loading Tortoise...") print("_model", _model) print("Tortoise not supported at the moment im tired of working on this") #_loadtortoisemodel = _model_directory.replace("--", "/") #print("_loadtortoisemodel", _loadtortoisemodel) #config = TortoiseConfig() #model = Tortoise.init_from_config(config) #model.load_checkpoint(config, checkpoint_dir="C:/Users/jsviv/AppData/Local/tts/tts_models--en--multi-dataset--tortoise-v2", eval=False) #tts = TTS(_loadtortoisemodel) #tts = TTS(model_name="tts_models/en/multi-dataset/tortoise-v2", progress_bar=True, gpu=True) #loadedModel = _model #print("loaded model", loadedModel) if model_type(_config_path) == "bark": print("Loading Bark...") _loadbarkmodel = _model_directory.replace("--", "/") tts = TTS(_loadbarkmodel, gpu=_gpu) loadedModel = _model _loadertypes = ["tortoise", "bark"] if model_type(_config_path) not in _loadertypes: try: print("Loading ", model_type(_config_path)) print("Load Line:", _model_path, _progress, _gpu) tts = TTS(model_path=_model_path, config_path=_config_path, progress_bar=_progress, gpu=_gpu) status = "Loaded" loadedModel = _model except Exception as e: print("An exception occurred while loading VITS:", str(e)) print("Continuing with other parts of the code...") else: pass type = model_type(_config_path) print("Type: ", type) if status is None: status = "Unknown error occurred" if type is None: type = "Unknown" return status def is_multi_speaker_model(): global multspeak global type global spkdirectory global multspeakjson global tts if tts is None: multspeak = "None" return multspeak try: if type == "bark": _target_directory = ModelManager().output_prefix # Convert _target_directory to a string and remove the trailing backslash if present _target_directory_str = str(_target_directory) if _target_directory_str.endswith("\\"): _target_directory_str = _target_directory_str[:-1] spkdirectory = os.path.join(_target_directory_str, "bark_v0", "speakers") subfolder_names = [folder for folder in os.listdir(spkdirectory) if os.path.isdir(os.path.join(spkdirectory, folder))] subfolder_names.insert(0, "random") # Add "Random" as the first element in the subfolder_names list unique_names = list(dict.fromkeys(subfolder_names)) multspeak = json.dumps({index: name for index, name in enumerate(unique_names)}) #print(multspeak) else: value = tts.speakers if value is not None: unique_speakers = list(dict.fromkeys(value)) speaker_dict = {index: value for index, value in enumerate(unique_speakers)} multspeak = json.dumps(speaker_dict) #print(multspeak) else: multspeak = "None" except Exception as e: print("Error:", e) multspeak = "None" multspeakjson = multspeak return multspeak #return name and ID in named json def is_multi_lang_model(): global multlang global tts if tts is None: multlang = "None" return multlang try: value = tts.languages if value is not None: unique_lang = list(dict.fromkeys(value))# Remove duplicate values and preserve the order lang_dict = {index: value for index, value in enumerate(unique_lang)} # Create a dictionary with indices as keys and values as keys multlang = json.dumps(lang_dict) # Convert the dictionary to JSON format #print(multlang) else: multlang = "None" except Exception as e: print("Error:", e) multlang = "None" return multlang def get_coqui_models(): #DROPDOWN MODELS manager = ModelManager() model_folder = manager.output_prefix cwd = os.path.dirname(os.path.realpath(__file__)) target_directory = model_folder if not os.path.exists(target_directory): os.makedirs(target_directory) os.chdir(target_directory) folder_list = [ folder for folder in os.listdir(target_directory) if os.path.isdir(os.path.join(target_directory, folder)) and "--" in folder and "vocoder" not in folder.lower() and "voice_conversion_models" not in folder.lower() ] file_paths = [] for folder in folder_list: _config_path = os.path.join(target_directory, folder, "config.json") if model_type(_config_path) == "bark" or model_type(_config_path) == "tortoise": file_paths.append(str(Path(folder, ''))) else: for file in os.listdir(os.path.join(target_directory, folder)): if file.endswith(('.pt', '.tar', '.pkl', '.pth')) and not file.startswith('.'): file_paths.append(str(Path(folder, file))) merged_json = json.dumps(file_paths) return merged_json def coqui_checkmap(): manager = ModelManager() model_folder = manager.output_prefix cwd = os.path.dirname(os.path.realpath(__file__)) target_directory = model_folder if not os.path.exists(target_directory): os.makedirs(target_directory) os.chdir(target_directory) folder_list = [ folder for folder in os.listdir() if os.path.isdir(os.path.join(target_directory, folder)) and "--" in folder and "vocoder" not in folder.lower() ] file_paths = [] for folder in folder_list: _config_path = os.path.join(target_directory, folder, "config.json") if model_type(_config_path) == "bark" or model_type(_config_path) == "tortoise": file_paths.append(str(Path(folder, ''))) else: for file in os.listdir(os.path.join(target_directory, folder)): if file.endswith(('.pt', '.tar', '.pkl', '.pth')) and not file.startswith('.'): file_paths.append(str(Path(folder, file))) # Convert the list into a list of dictionaries with "id" as the key keyed_json_list = [{"id": item} for item in file_paths] # Convert the list to a JSON string with indentation keyed_json_string = json.dumps(keyed_json_list, indent=2) # Replace double backslashes with single backslashes #keyed_json_string = keyed_json_string.replace("\\\\", "\\") return keyed_json_string def get_coqui_download_models(): #Avail voices list formatted_list = [] #voices_list = json.loads(get_coqui_downloaded()) voices_list = TTS.list_models() for model in voices_list: split_model = model.split('/') formatted_list.append({ "type": split_model[0], #type "lang": split_model[1], #lang "id-only": split_model[2], #id "name-only": split_model[3], #name "id": split_model[0] + '/' + split_model[1] + "/" + split_model[2] + "/" + split_model[3], #combined id and name tts_models/bn/custom/vits-male }) json_data = json.dumps(formatted_list, indent=4) return json_data def coqui_modeldownload(_modeldownload): #Avail voices function global _gpu print(_modeldownload) try: tts = TTS(model_name=_modeldownload, progress_bar=True, gpu=_gpu) status = "True" except: status = "False" return status def coqui_tts(text, speaker_id, mspker_id, style_wav, language_id): global type global multlang global multspeak global loadedModel global spkdirectory global multspeakjson global _gpu try: # Splitting the string to get speaker_id and the rest parts = speaker_id.split("[", 1) speaker_id = parts[0] remainder = parts[1].rstrip("]") variables = remainder.split("][") # Converting to integers with default values of 0 if conversion fails mspker_id = int(variables[0]) if variables[0].isdigit() else 0 language_id = int(variables[1]) if variables[1].isdigit() else 0 # multspeak = mspker_id # might break previews multlang = language_id except Exception: pass #print("exception 1") print("mspker_id: ", mspker_id) print("language_id: ", language_id) try: #see is values passed in URL if language_id is not None: float(language_id) multlang = float(language_id) else: pass except ValueError: pass try: if mspker_id is not None: float(mspker_id) multspeak = float(mspker_id) else: pass except ValueError: pass if loadedModel != speaker_id: print("MODEL NOT LOADED!!! Loading... ", loadedModel, speaker_id) print("Loading :", speaker_id, "GPU is: ", _gpu) load_model(speaker_id, _gpu, True) audio_buffer = io.BytesIO() if not isinstance(multspeak, (int, float)) and not isinstance(multlang, (int, float)): #if not a number print("Single Model") tts.tts_to_file(text, file_path=audio_buffer) elif isinstance(multspeak, (int, float)) and not isinstance(multlang, (int, float)): print("speaker only") if type == "bark" or type == "tortoise": try: if multspeakjson == "": #failing because multispeakjson not loaded parsed_multspeak = json.loads(is_multi_speaker_model()) else: parsed_multspeak = json.loads(multspeakjson) value_at_key = parsed_multspeak.get(str(mspker_id)) #print(value_at_key) # ♪ In the jungle, the mighty jungle, the lion barks tonight ♪ #I have a silky smooth voice, and today I will tell you about the exercise regimen of the common sloth. if value_at_key == "random": tts.tts_to_file(text, file_path=audio_buffer) else: print("using speaker ", value_at_key) tts.tts_to_file(text, file_path=audio_buffer, voice_dir=spkdirectory, speaker=value_at_key) except Exception as e: print("An error occurred:", str(e)) else: tts.tts_to_file(text, speaker=tts.speakers[int(mspker_id)], file_path=audio_buffer) elif not isinstance(multspeak, (int, float)) and isinstance(multlang, (int, float)): print("lang only") tts.tts_to_file(text, language=tts.languages[int(language_id)], file_path=audio_buffer) else: print("spk and lang") tts.tts_to_file(text, speaker=tts.speakers[int(mspker_id)], language=tts.languages[int(language_id)], file_path=audio_buffer) audio_buffer.seek(0) response = send_file(audio_buffer, mimetype="audio/wav") #reset for next dynamic tts multlang = None multspeak = None return response