Removed wav files, uses IO memory now

This commit is contained in:
joe
2023-08-11 10:09:46 +09:00
parent aa56303230
commit 05155c01c7
2 changed files with 24 additions and 14 deletions

View File

@@ -13,6 +13,7 @@ import numpy as np
import torch.cuda
import argparse
import torch
import io
from multiprocessing import cpu_count
from fairseq import checkpoint_utils
@@ -137,19 +138,25 @@ def load_hubert():
hubert_model.eval()
def load_audio(file, sr):
def load_audio(audio_source, sr):
try:
# https://github.com/openai/whisper/blob/main/whisper/audio.py#L26
# This launches a subprocess to decode audio while down-mixing and resampling as necessary.
# Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
file = (
file.strip(" ").strip('"').strip("\n").strip('"').strip(" ")
) # 防止小白拷路径头尾带了空格和"和回车
out, _ = (
ffmpeg.input(file, threads=0)
.output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
)
if isinstance(audio_source, str): # If it's a file path
audio_input = audio_source.strip(" ").strip('"').strip("\n").strip('"')
out, _ = (
ffmpeg.input(audio_input, threads=0)
.output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
.run(cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
)
elif isinstance(audio_source, io.BytesIO): # If it's a BytesIO object
audio_source.seek(0)
out, _ = (
ffmpeg.input("pipe:0", format="wav", threads=0)
.output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
.run(input=audio_source.read(), cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
)
else:
raise ValueError("Invalid audio source")
except Exception as e:
raise RuntimeError(f"Failed to load audio: {e}")

View File

@@ -17,11 +17,13 @@ import wave
import modules.voice_conversion.rvc.rvc as rvc
from scipy.io import wavfile
import os
import io
import logging
DEBUG_PREFIX = "<RVC module>"
INPUT_FILE_PATH = "data/tmp/rvc_input.wav" #"./data/tmp/rvc_input.wav"
OUTPUT_FILE_PATH = "data/tmp/rvc_output.wav" #"./data/tmp/rvc_output.wav"
INPUT_FILE_PATH = io.BytesIO() #"./data/tmp/rvc_input.wav"
OUTPUT_FILE_PATH = io.BytesIO() #"./data/tmp/rvc_output.wav"
def rvc_process_audio():
"""
@@ -71,6 +73,7 @@ def rvc_process_audio():
#print(out_path) # DBG
print(DEBUG_PREFIX, "Audio converted using RVC model:", rvc.rvc_model_name)
OUTPUT_FILE_PATH.seek(0)
response = send_file(OUTPUT_FILE_PATH, mimetype="audio/x-wav")
return response