diff --git a/data/tmp/.placeholder b/data/tmp/.placeholder new file mode 100644 index 0000000..67cc998 --- /dev/null +++ b/data/tmp/.placeholder @@ -0,0 +1,3 @@ +Put RVC models folder here. +Must have ".pth" file in it +.index file is optional but could help improve the processing time/quality. diff --git a/modules/voice_conversion/rvc_module.py b/modules/voice_conversion/rvc_module.py index cd7b156..9db21f1 100644 --- a/modules/voice_conversion/rvc_module.py +++ b/modules/voice_conversion/rvc_module.py @@ -23,6 +23,10 @@ DEBUG_PREFIX = "" RVC_MODELS_PATH = "data/models/rvc/" IGNORED_FILES = [".placeholder"] +RVC_INPUT_PATH = "data/tmp/rvc_input.wav" +RVC_OUTPUT_PATH ="data/tmp/rvc_output.wav" +save_file = False + def rvc_get_models_list(): """ Return the list of RVC model in the expected folder @@ -83,6 +87,8 @@ def rvc_process_audio(): rmsMixRate: rmsMixRate, protect: float [0,1] """ + global save_file + try: file = request.files.get('AudioFile') print(DEBUG_PREFIX, "received:", file) @@ -90,9 +96,15 @@ def rvc_process_audio(): # Create new instances of io.BytesIO() for each request input_audio_path = io.BytesIO() output_audio_path = io.BytesIO() + + if save_file: + input_audio_path = RVC_INPUT_PATH + output_audio_path = RVC_OUTPUT_PATH file.save(input_audio_path) - input_audio_path.seek(0) + + if not save_file: + input_audio_path.seek(0) parameters = json.loads(request.form["json"]) @@ -145,7 +157,9 @@ def rvc_process_audio(): #out_path = os.path.join("data/", "rvc_output.wav") wavfile.write(output_audio_path, tgt_sr, wav_opt) - output_audio_path.seek(0) # Reset cursor position + + if not save_file: + output_audio_path.seek(0) # Reset cursor position print(DEBUG_PREFIX, "Audio converted using RVC model:", rvc.rvc_model_name) diff --git a/server.py b/server.py index c0721fc..ce69546 100644 --- a/server.py +++ b/server.py @@ -91,6 +91,8 @@ parser.add_argument("--talkinghead-gpu", action="store_true", help="Run the talk parser.add_argument("--coqui-gpu", action="store_true", help="Run the voice models on the GPU (CPU is default)") parser.add_argument("--coqui-models", help="Install given Coqui-api TTS model at launch (comma separated list, last one will be loaded at start)") +parser.add_argument("--rvc-save-file", action="store_true", help="Save the last rvc input/output audio file into data/tmp/ folder (for research)") + parser.add_argument("--stt-vosk-model-path", help="Load a custom vosk speech-to-text model") parser.add_argument("--stt-whisper-model-path", help="Load a custom vosk speech-to-text model") sd_group = parser.add_mutually_exclusive_group() @@ -363,11 +365,19 @@ if "streaming-stt" in modules: if "rvc" in modules: print("Initializing RVC voice conversion (from ST request file)") + rvc_save_file = ( + args.rvc_save_file + if args.rvc_save_file + else False) + + if rvc_save_file: + print("RVC saving file option detected, input/output audio will be savec into data/tmp/ folder") import sys sys.path.insert(0,'modules/voice_conversion') import modules.voice_conversion.rvc_module as rvc_module + rvc_module.save_file = rvc_save_file rvc_module.fix_model_install() app.add_url_rule("/api/voice-conversion/rvc/get-models-list", view_func=rvc_module.rvc_get_models_list, methods=["POST"]) app.add_url_rule("/api/voice-conversion/rvc/process-audio", view_func=rvc_module.rvc_process_audio, methods=["POST"])