diff --git a/modules/voice_conversion/rvc_module.py b/modules/voice_conversion/rvc_module.py index 9db21f1..c00b5c1 100644 --- a/modules/voice_conversion/rvc_module.py +++ b/modules/voice_conversion/rvc_module.py @@ -19,14 +19,24 @@ from scipy.io import wavfile import os import io +from py7zr import pack_7zarchive, unpack_7zarchive +import shutil + DEBUG_PREFIX = "" RVC_MODELS_PATH = "data/models/rvc/" IGNORED_FILES = [".placeholder"] +TEMP_FOLDER_PATH = "data/tmp/" + RVC_INPUT_PATH = "data/tmp/rvc_input.wav" RVC_OUTPUT_PATH ="data/tmp/rvc_output.wav" save_file = False + +# register file format at first. +shutil.register_archive_format('7zip', pack_7zarchive, description='7zip archive') +shutil.register_unpack_format('7zip', ['.7z'], unpack_7zarchive) + def rvc_get_models_list(): """ Return the list of RVC model in the expected folder @@ -75,6 +85,55 @@ def rvc_get_models_list(): print(e) abort(500, DEBUG_PREFIX + " Exception occurs while searching for RVC models.") +def rvc_upload_models(): + """ + Install RVC models uploaded via ST request + - Needs flask MAX_CONTENT_LENGTH to be adapted accordingly + """ + try: + request_files = request.files + print(DEBUG_PREFIX, "received:", request_files) + + for request_file_name in request_files: + zip_file_path = os.path.join(TEMP_FOLDER_PATH,request_file_name) + print("> Saving",request_file_name,"to",zip_file_path) + + request_file = request_files.get(request_file_name) + request_file.save(zip_file_path) + + model_folder_name, _ = os.path.splitext(request_file_name) + model_folder_path = os.path.join(RVC_MODELS_PATH,model_folder_name) + + shutil.unpack_archive(zip_file_path, model_folder_path) + + print("> Cleaning up model folder",model_folder_path) + + print("> Moving file to model root folder") + # Move all files to model root folder + for root, dirs, files in os.walk(model_folder_path): + for file in files: + file_path = os.path.join(root,file) + if not os.path.isdir(file_path): + # move file from nested folder into the base folder + shutil.move(file_path,os.path.join(model_folder_path,file)) + + print("> Deleting model subfolders") + # Remove all subfolder + for root, dirs, files in os.walk(model_folder_path): + for dir in dirs: + folder_path = os.path.join(root,dir) + if os.path.isdir(folder_path): + os.rmdir(folder_path) + + print("> Success") + + response = json.dumps({"status":"ok"}) + return response + + except Exception as e: + print(e) + abort(500, DEBUG_PREFIX + " Exception occurs while uploading models.") + def rvc_process_audio(): """ Process request audio file with the loaded RVC model diff --git a/requirements-rvc.txt b/requirements-rvc.txt index aedfcb9..b4540e4 100644 --- a/requirements-rvc.txt +++ b/requirements-rvc.txt @@ -8,3 +8,4 @@ bitarray==2.8.1 sacrebleu==2.3.1 numpy==1.23.0 ffmpeg-python==0.2.0 +py7zr==0.20.6 \ No newline at end of file diff --git a/server.py b/server.py index ce69546..9626696 100644 --- a/server.py +++ b/server.py @@ -91,6 +91,7 @@ parser.add_argument("--talkinghead-gpu", action="store_true", help="Run the talk parser.add_argument("--coqui-gpu", action="store_true", help="Run the voice models on the GPU (CPU is default)") parser.add_argument("--coqui-models", help="Install given Coqui-api TTS model at launch (comma separated list, last one will be loaded at start)") +parser.add_argument("--max-content-length", help="Set the max") parser.add_argument("--rvc-save-file", action="store_true", help="Save the last rvc input/output audio file into data/tmp/ folder (for research)") parser.add_argument("--stt-vosk-model-path", help="Load a custom vosk speech-to-text model") @@ -327,6 +328,15 @@ CORS(app) # allow cross-domain requests Compress(app) # compress responses app.config["MAX_CONTENT_LENGTH"] = 100 * 1024 * 1024 +max_content_length = ( + args.max_content_length + if args.max_content_length + else None) + +if max_content_length is not None: + print("Setting MAX_CONTENT_LENGTH to",max_content_length,"Mb") + app.config["MAX_CONTENT_LENGTH"] = int(max_content_length) * 1024 * 1024 + if "vosk-stt" in modules: print("Initializing Vosk speech-recognition (from ST request file)") vosk_model_path = ( @@ -365,6 +375,7 @@ if "streaming-stt" in modules: if "rvc" in modules: print("Initializing RVC voice conversion (from ST request file)") + print("Increasing server upload limit") rvc_save_file = ( args.rvc_save_file if args.rvc_save_file @@ -380,8 +391,10 @@ if "rvc" in modules: rvc_module.save_file = rvc_save_file rvc_module.fix_model_install() app.add_url_rule("/api/voice-conversion/rvc/get-models-list", view_func=rvc_module.rvc_get_models_list, methods=["POST"]) + app.add_url_rule("/api/voice-conversion/rvc/upload-models", view_func=rvc_module.rvc_upload_models, methods=["POST"]) app.add_url_rule("/api/voice-conversion/rvc/process-audio", view_func=rvc_module.rvc_process_audio, methods=["POST"]) + if "coqui-tts" in modules: mode = "GPU" if args.coqui_gpu else "CPU" print("Initializing Coqui TTS client in " + mode + " mode")