Added parameter --rvc-save-file to save last rvc input/output audio file into data/tmp. For research purpose and easy sharing of TTS/RVC sample files

This commit is contained in:
Tony Ribeiro
2023-08-17 01:43:13 +02:00
parent d3d6729cdd
commit 185fc1334f
3 changed files with 29 additions and 2 deletions

3
data/tmp/.placeholder Normal file
View File

@@ -0,0 +1,3 @@
Put RVC models folder here.
Must have ".pth" file in it
.index file is optional but could help improve the processing time/quality.

View File

@@ -23,6 +23,10 @@ DEBUG_PREFIX = "<RVC module>"
RVC_MODELS_PATH = "data/models/rvc/"
IGNORED_FILES = [".placeholder"]
RVC_INPUT_PATH = "data/tmp/rvc_input.wav"
RVC_OUTPUT_PATH ="data/tmp/rvc_output.wav"
save_file = False
def rvc_get_models_list():
"""
Return the list of RVC model in the expected folder
@@ -83,6 +87,8 @@ def rvc_process_audio():
rmsMixRate: rmsMixRate,
protect: float [0,1]
"""
global save_file
try:
file = request.files.get('AudioFile')
print(DEBUG_PREFIX, "received:", file)
@@ -90,9 +96,15 @@ def rvc_process_audio():
# Create new instances of io.BytesIO() for each request
input_audio_path = io.BytesIO()
output_audio_path = io.BytesIO()
if save_file:
input_audio_path = RVC_INPUT_PATH
output_audio_path = RVC_OUTPUT_PATH
file.save(input_audio_path)
input_audio_path.seek(0)
if not save_file:
input_audio_path.seek(0)
parameters = json.loads(request.form["json"])
@@ -145,7 +157,9 @@ def rvc_process_audio():
#out_path = os.path.join("data/", "rvc_output.wav")
wavfile.write(output_audio_path, tgt_sr, wav_opt)
output_audio_path.seek(0) # Reset cursor position
if not save_file:
output_audio_path.seek(0) # Reset cursor position
print(DEBUG_PREFIX, "Audio converted using RVC model:", rvc.rvc_model_name)

View File

@@ -91,6 +91,8 @@ parser.add_argument("--talkinghead-gpu", action="store_true", help="Run the talk
parser.add_argument("--coqui-gpu", action="store_true", help="Run the voice models on the GPU (CPU is default)")
parser.add_argument("--coqui-models", help="Install given Coqui-api TTS model at launch (comma separated list, last one will be loaded at start)")
parser.add_argument("--rvc-save-file", action="store_true", help="Save the last rvc input/output audio file into data/tmp/ folder (for research)")
parser.add_argument("--stt-vosk-model-path", help="Load a custom vosk speech-to-text model")
parser.add_argument("--stt-whisper-model-path", help="Load a custom vosk speech-to-text model")
sd_group = parser.add_mutually_exclusive_group()
@@ -363,11 +365,19 @@ if "streaming-stt" in modules:
if "rvc" in modules:
print("Initializing RVC voice conversion (from ST request file)")
rvc_save_file = (
args.rvc_save_file
if args.rvc_save_file
else False)
if rvc_save_file:
print("RVC saving file option detected, input/output audio will be savec into data/tmp/ folder")
import sys
sys.path.insert(0,'modules/voice_conversion')
import modules.voice_conversion.rvc_module as rvc_module
rvc_module.save_file = rvc_save_file
rvc_module.fix_model_install()
app.add_url_rule("/api/voice-conversion/rvc/get-models-list", view_func=rvc_module.rvc_get_models_list, methods=["POST"])
app.add_url_rule("/api/voice-conversion/rvc/process-audio", view_func=rvc_module.rvc_process_audio, methods=["POST"])