Fix coqui TTS module

This commit is contained in:
Cohee
2023-07-25 12:46:17 +03:00
parent eba02d4f23
commit 5cd40e0520
5 changed files with 452 additions and 3 deletions

View File

@@ -17,3 +17,4 @@ silero-api-server
chromadb
sentence_transformers
edge-tts
TTS

View File

@@ -16,4 +16,5 @@ diffusers==0.16.1
silero-api-server
chromadb
sentence_transformers
edge-tts
edge-tts
TTS

View File

@@ -17,3 +17,4 @@ accelerate
chromadb
sentence_transformers
edge-tts
TTS

View File

@@ -82,7 +82,7 @@ parser.add_argument('--chroma-persist', help="ChromaDB persistence", default=Tru
parser.add_argument(
"--secure", action="store_true", help="Enforces the use of an API key"
)
parser.add_argument("--coqui-gpu", action="store_false", help="Run the voice models on the GPU (CPU is default)")
sd_group = parser.add_mutually_exclusive_group()
local_sd = sd_group.add_argument_group("sd-local")
@@ -248,6 +248,11 @@ if "silero-tts" in modules:
tts_service.update_sample_text(SILERO_SAMPLE_TEXT)
tts_service.generate_samples()
if "coqui-tts" in modules:
mode = "CPU" if args.coqui_gpu else "GPU"
print("Initializing Coqui TTS client in " + mode + " mode")
import tts_coqui as coqui
from tts_coqui import *
if "edge-tts" in modules:
print("Initializing Edge TTS client")
@@ -285,7 +290,7 @@ if "chromadb" in modules:
)
print(f"ChromaDB is remotely configured at {args.chroma_host}:{chroma_port}")
chromadb_embedder = SentenceTransformer(embedding_model)
chromadb_embedder = SentenceTransformer(embedding_model, device=device_string)
chromadb_embed_fn = lambda *args, **kwargs: chromadb_embedder.encode(*args, **kwargs).tolist()
# Check if the db is connected and running, otherwise tell the user
@@ -558,6 +563,59 @@ def api_classify_labels():
labels = [x["label"] for x in classification]
return jsonify({"labels": labels})
@app.route("/api/coqui-tts/load", methods=["GET"])
@require_module("coqui-tts")
def load_model():
# Accessing the URL parameters
_model = request.args.get('_model')
_gpu = False if args.coqui_gpu else True
print(_gpu)
_progress = request.args.get('_progress')
return coqui.load_model(_model, _gpu, _progress)
@app.route("/api/coqui-tts/list", methods=["GET"]) #dropdown list
@require_module("coqui-tts")
def coqui_list():
return coqui.get_coqui_models()
@app.route("/api/coqui-tts/multspeaker", methods=["GET"])
@require_module("coqui-tts")
def is_multi_speaker_model():
return coqui.is_multi_speaker_model()
@app.route("/api/coqui-tts/multlang", methods=["GET"])
@require_module("coqui-tts")
def is_multi_lang_model():
return coqui.is_multi_lang_model()
@app.route("/api/coqui-tts/speaker_id", methods=["GET"]) #available voices
@require_module("coqui-tts")
def coqui_download_models():
return coqui.get_coqui_download_models()
@app.route("/api/coqui-tts/checkmap", methods=["GET"]) #checkmap
@require_module("coqui-tts")
def coqui_checkmap():
return coqui.coqui_checkmap()
@app.route("/api/coqui-tts/download", methods=["GET"])
@require_module("coqui-tts")
def coqui_modeldownload():
_modeldownload = request.args.get('model')
return coqui.coqui_modeldownload(_modeldownload)
@app.route("/api/coqui-tts/tts", methods=["GET"])
@require_module("coqui-tts")
def coqui_tts():
# Accessing the URL parameters
text = request.args.get('text')
speaker_id = request.args.get('speaker_id')
mspker_id = request.args.get('mspker')
language_id = request.args.get('language_id')
style_wav = request.args.get('style_wav')
return coqui.coqui_tts(text, speaker_id, mspker_id, style_wav, language_id)
@app.route("/api/image", methods=["POST"])
@require_module("sd")

388
tts_coqui.py Normal file
View File

@@ -0,0 +1,388 @@
import io
import asyncio
import json
import os
import torch
import gc
from pathlib import Path
import TTS
from TTS.api import TTS
from TTS.utils.manage import ModelManager
from TTS.tts.configs.bark_config import BarkConfig
from TTS.tts.models.bark import Bark
from TTS.tts.configs.tortoise_config import TortoiseConfig
from TTS.tts.models.tortoise import Tortoise
from flask import send_file
tts = None
type = None
multlang = "None"
multspeak = "None"
loadedModel = "None"
spkdirectory = ""
multspeakjson = ""
def model_type(_config_path):
try:
with open(_config_path, 'r') as config_file:
config_data = json.load(config_file)
# Search for the key "model" and print its value
if "model" in config_data:
model_value = config_data["model"]
return model_value
else:
print("ERR: The key 'model' is not present in the config file.")
except FileNotFoundError:
print("Config file not found.")
except json.JSONDecodeError:
pass
#print("Invalid JSON format in the config file.")
except Exception as e:
pass
#print("An error occurred:", str(e))
def load_model(_model, _gpu, _progress):
global tts
global type
global loadedModel
global multlang
global multspeak
status = None
_model_directory, _file = os.path.split(_model)
if _model_directory == "": #make it assign vars correctly if no filename provioded
_model_directory = _file
_file = None
if _model is None:
status = "ERROR: Invalid model name or path."
else:
try:
if _gpu == True: #Reclaim memory
del tts
try:
import gc
gc.collect()
torch.cuda.empty_cache()
except Exception:
pass
except Exception as e:
status = str(e)
_target_directory = ModelManager().output_prefix # models location
_modified_speaker_id = _model_directory.replace("\\", "--")
if _file != None:
_model_path = os.path.join(_target_directory, _modified_speaker_id, _file)
else:
_model_path = os.path.join(_target_directory, _modified_speaker_id)
_config_path = os.path.join(_target_directory, _modified_speaker_id, "config.json")
if model_type(_config_path) == "tortoise":
print("Loading Tortoise...")
print("_model", _model)
print("Tortoise not supported at the moment im tired of working on this")
#_loadtortoisemodel = _model_directory.replace("--", "/")
#print("_loadtortoisemodel", _loadtortoisemodel)
#config = TortoiseConfig()
#model = Tortoise.init_from_config(config)
#model.load_checkpoint(config, checkpoint_dir="C:/Users/jsviv/AppData/Local/tts/tts_models--en--multi-dataset--tortoise-v2", eval=False)
#tts = TTS(_loadtortoisemodel)
#tts = TTS(model_name="tts_models/en/multi-dataset/tortoise-v2", progress_bar=True, gpu=True)
#loadedModel = _model
#print("loaded model", loadedModel)
if model_type(_config_path) == "bark":
print("Loading Bark...")
_loadbarkmodel = _model_directory.replace("--", "/")
tts = TTS(_loadbarkmodel, gpu=_gpu)
loadedModel = _model
_loadertypes = ["tortoise", "bark"]
if model_type(_config_path) not in _loadertypes:
try:
print("Loading ", model_type(_config_path))
tts = TTS(model_path=_model_path, config_path=_config_path, progress_bar=_progress, gpu=_gpu)
status = "Loaded"
loadedModel = _model
except Exception as e:
print("An exception occurred while loading VITS:", str(e))
print("Continuing with other parts of the code...")
else:
pass
type = model_type(_config_path)
print("Type: ", type)
if status is None:
status = "Unknown error occurred"
if type is None:
type = "Unknown"
return status
def is_multi_speaker_model():
global multspeak
global type
global spkdirectory
global multspeakjson
if tts is None:
multspeak = "None"
return multspeak
try:
if type == "bark":
_target_directory = ModelManager().output_prefix
# Convert _target_directory to a string and remove the trailing backslash if present
_target_directory_str = str(_target_directory)
if _target_directory_str.endswith("\\"):
_target_directory_str = _target_directory_str[:-1]
spkdirectory = os.path.join(_target_directory_str, "bark_v0", "speakers")
subfolder_names = [folder for folder in os.listdir(spkdirectory) if os.path.isdir(os.path.join(spkdirectory, folder))]
subfolder_names.insert(0, "random") # Add "Random" as the first element in the subfolder_names list
unique_names = list(dict.fromkeys(subfolder_names))
multspeak = json.dumps({index: name for index, name in enumerate(unique_names)})
#print(multspeak)
else:
value = tts.speakers
if value is not None:
unique_speakers = list(dict.fromkeys(value))
speaker_dict = {index: value for index, value in enumerate(unique_speakers)}
multspeak = json.dumps(speaker_dict)
#print(multspeak)
else:
multspeak = "None"
except Exception as e:
print("Error:", e)
multspeak = "None"
multspeakjson = multspeak
return multspeak #return name and ID in named json
def is_multi_lang_model():
global multlang
if tts is None:
multlang = "None"
return multlang
try:
value = tts.languages
if value is not None:
unique_lang = list(dict.fromkeys(value))# Remove duplicate values and preserve the order
lang_dict = {index: value for index, value in enumerate(unique_lang)} # Create a dictionary with indices as keys and values as keys
multlang = json.dumps(lang_dict) # Convert the dictionary to JSON format
#print(multlang)
else:
multlang = "None"
except Exception as e:
print("Error:", e)
multlang = "None"
return multlang
def get_coqui_models(): #DROPDOWN MODELS
manager = ModelManager()
model_folder = manager.output_prefix
cwd = os.path.dirname(os.path.realpath(__file__))
target_directory = model_folder
if not os.path.exists(target_directory):
os.makedirs(target_directory)
os.chdir(target_directory)
folder_list = [
folder for folder in os.listdir(target_directory) if os.path.isdir(os.path.join(target_directory, folder)) and "--" in folder and "vocoder" not in folder.lower() and "voice_conversion_models" not in folder.lower()
]
file_paths = []
for folder in folder_list:
_config_path = os.path.join(target_directory, folder, "config.json")
if model_type(_config_path) == "bark" or model_type(_config_path) == "tortoise":
file_paths.append(str(Path(folder, '')))
else:
for file in os.listdir(os.path.join(target_directory, folder)):
if file.endswith(('.pt', '.tar', '.pkl', '.pth')) and not file.startswith('.'):
file_paths.append(str(Path(folder, file)))
merged_json = json.dumps(file_paths)
return merged_json
def coqui_checkmap():
manager = ModelManager()
model_folder = manager.output_prefix
cwd = os.path.dirname(os.path.realpath(__file__))
target_directory = model_folder
if not os.path.exists(target_directory):
os.makedirs(target_directory)
os.chdir(target_directory)
folder_list = [
folder for folder in os.listdir() if os.path.isdir(os.path.join(target_directory, folder)) and "--" in folder and "vocoder" not in folder.lower()
]
file_paths = []
for folder in folder_list:
_config_path = os.path.join(target_directory, folder, "config.json")
if model_type(_config_path) == "bark" or model_type(_config_path) == "tortoise":
file_paths.append(str(Path(folder, '')))
else:
for file in os.listdir(os.path.join(target_directory, folder)):
if file.endswith(('.pt', '.tar', '.pkl', '.pth')) and not file.startswith('.'):
file_paths.append(str(Path(folder, file)))
# Convert the list into a list of dictionaries with "id" as the key
keyed_json_list = [{"id": item} for item in file_paths]
# Convert the list to a JSON string with indentation
keyed_json_string = json.dumps(keyed_json_list, indent=2)
# Replace double backslashes with single backslashes
#keyed_json_string = keyed_json_string.replace("\\\\", "\\")
return keyed_json_string
def get_coqui_download_models(): #Avail voices list
formatted_list = []
#voices_list = json.loads(get_coqui_downloaded())
voices_list = TTS.list_models()
for model in voices_list:
split_model = model.split('/')
formatted_list.append({
"type": split_model[0], #type
"lang": split_model[1], #lang
"id-only": split_model[2], #id
"name-only": split_model[3], #name
"id": split_model[0] + '/' + split_model[1] + "/" + split_model[2] + "/" + split_model[3], #combined id and name tts_models/bn/custom/vits-male
})
json_data = json.dumps(formatted_list, indent=4)
return json_data
def coqui_modeldownload(_modeldownload): #Avail voices function
print(_modeldownload)
try:
tts = TTS(model_name=_modeldownload, progress_bar=True, gpu=False)
status = "True"
except:
status = "False"
return status
def coqui_tts(text, speaker_id, mspker_id, style_wav, language_id):
global type
global multlang
global multspeak
global loadedModel
global spkdirectory
global multspeakjson
try:
# Splitting the string to get speaker_id and the rest
parts = speaker_id.split("[", 1)
speaker_id = parts[0]
remainder = parts[1].rstrip("]")
variables = remainder.split("][")
# Converting to integers with default values of 0 if conversion fails
mspker_id = int(variables[0]) if variables[0].isdigit() else 0
language_id = int(variables[1]) if variables[1].isdigit() else 0
# multspeak = mspker_id # might break previews
multlang = language_id
except Exception:
pass
#print("exception 1")
print("mspker_id: ", mspker_id)
print("language_id: ", language_id)
try: #see is values passed in URL
if language_id is not None:
float(language_id)
multlang = float(language_id)
else:
pass
except ValueError:
pass
try:
if mspker_id is not None:
float(mspker_id)
multspeak = float(mspker_id)
else:
pass
except ValueError:
pass
if loadedModel != speaker_id:
print("MODEL NOT LOADED!!! Loading... ", loadedModel, speaker_id)
load_model(speaker_id, True, True) #use GPU and progress bar?
audio_buffer = io.BytesIO()
if not isinstance(multspeak, (int, float)) and not isinstance(multlang, (int, float)): #if not a number
print("Single Model")
tts.tts_to_file(text, file_path=audio_buffer)
elif isinstance(multspeak, (int, float)) and not isinstance(multlang, (int, float)):
print("speaker only")
if type == "bark" or type == "tortoise":
try:
if multspeakjson == "": #failing because multispeakjson not loaded
parsed_multspeak = json.loads(is_multi_speaker_model())
else:
parsed_multspeak = json.loads(multspeakjson)
value_at_key = parsed_multspeak.get(str(mspker_id))
#print(value_at_key)
# ♪ In the jungle, the mighty jungle, the lion barks tonight ♪
#I have a silky smooth voice, and today I will tell you about the exercise regimen of the common sloth.
if value_at_key == "random":
tts.tts_to_file(text, file_path=audio_buffer)
else:
print("using speaker ", value_at_key)
tts.tts_to_file(text, file_path=audio_buffer, voice_dir=spkdirectory, speaker=value_at_key)
except Exception as e:
print("An error occurred:", str(e))
else:
tts.tts_to_file(text, speaker=tts.speakers[int(mspker_id)], file_path=audio_buffer)
elif not isinstance(multspeak, (int, float)) and isinstance(multlang, (int, float)):
print("lang only")
tts.tts_to_file(text, language=tts.languages[int(language_id)], file_path=audio_buffer)
else:
print("spk and lang")
tts.tts_to_file(text, speaker=tts.speakers[int(mspker_id)], language=tts.languages[int(language_id)], file_path=audio_buffer)
audio_buffer.seek(0)
response = send_file(audio_buffer, mimetype="audio/wav")
#reset for next dynamic tts
multlang = None
multspeak = None
return response