diff --git a/modules/voice_conversion/rvc/rvc.py b/modules/voice_conversion/rvc/rvc.py index 4bdd454..ac5d5ed 100644 --- a/modules/voice_conversion/rvc/rvc.py +++ b/modules/voice_conversion/rvc/rvc.py @@ -66,7 +66,7 @@ class Config: or "1070" in self.gpu_name or "1080" in self.gpu_name ): - print("16系/10系显卡和P40强制单精度") + print("Forcing full precision for 16/10 series cards.") self.is_half = False config_file_change_fp32() else: @@ -84,12 +84,12 @@ class Config: # with open("trainset_preprocess_pipeline_print.py", "w") as f: # f.write(strr) elif torch.backends.mps.is_available(): - print("没有发现支持的N卡, 使用MPS进行推理") + print("No compatible GPU found, using MPS for inference.") self.device = "mps" self.is_half = False config_file_change_fp32() else: - print("没有发现支持的N卡, 使用CPU进行推理") + print("No compatible GPU found, using CPU for inference.") self.device = "cpu" self.is_half = False config_file_change_fp32() @@ -150,7 +150,7 @@ def load_audio(audio_source, sr): elif isinstance(audio_source, io.BytesIO): # If it's a BytesIO object audio_source.seek(0) out, _ = ( - ffmpeg.input("pipe:0", format="wav", threads=0) + ffmpeg.input("pipe:0", threads=0) .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr) .run(input=audio_source.read(), cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True) ) diff --git a/modules/voice_conversion/rvc/vc_infer_pipeline.py b/modules/voice_conversion/rvc/vc_infer_pipeline.py index d7dfdc4..b183706 100644 --- a/modules/voice_conversion/rvc/vc_infer_pipeline.py +++ b/modules/voice_conversion/rvc/vc_infer_pipeline.py @@ -115,7 +115,7 @@ class VC(object): ) + 1 f0_mel[f0_mel <= 1] = 1 f0_mel[f0_mel > 255] = 255 - f0_coarse = np.rint(f0_mel).astype(np.int) + f0_coarse = np.rint(f0_mel).astype(int) return f0_coarse, f0bak # 1-0 def vc( diff --git a/requirements-complete.txt b/requirements-complete.txt index 1f935eb..57340b9 100644 --- a/requirements-complete.txt +++ b/requirements-complete.txt @@ -1,29 +1,29 @@ -flask -flask-cloudflared -flask-cors -flask-compress -markdown -Pillow -colorama -webuiapi +flask==2.3.2 +flask-cloudflared==0.0.13 +flask-cors==4.0.0 +flask-compress==1.13 +markdown==3.4.3 +Pillow==9.5.0 +colorama==0.4.6 +webuiapi==0.9.5 --extra-index-url https://download.pytorch.org/whl/cu117 torch==2.0.0+cu117 torchvision==0.15.1 torchaudio==2.0.1+cu117 -accelerate +accelerate==0.20.3 transformers==4.28.1 -diffusers==0.16.1 -silero-api-server -chromadb -sentence_transformers -edge-tts +diffusers==0.17.1 +silero-api-server==0.2.4 +chromadb==0.4.5 +sentence_transformers==2.2.2 +edge-tts==6.1.8 -vosk -sounddevice -openai-whisper +vosk==0.3.44 +sounddevice==0.4.6 +openai-whisper==20230314 -TTS -fastapi -wxpython -mecab-python3 -unidic-lite \ No newline at end of file +TTS==0.15.6 +fastapi==0.99.1 +wxpython==4.2.1; sys_platform == 'win32' or sys_platform == 'darwin' +mecab-python3==1.0.6 +unidic-lite==1.0.8 diff --git a/requirements-rocm.txt b/requirements-rocm.txt index 8003a63..bc126c2 100644 --- a/requirements-rocm.txt +++ b/requirements-rocm.txt @@ -1,24 +1,24 @@ -flask -flask-cloudflared -flask-cors -flask-compress -markdown -Pillow -colorama -webuiapi +flask==2.3.2 +flask-cloudflared==0.0.13 +flask-cors==4.0.0 +flask-compress==1.13 +markdown==3.4.3 +Pillow==9.5.0 +colorama==0.4.6 +webuiapi==0.9.5 --extra-index-url https://download.pytorch.org/whl/rocm5.4.2 torch>=2.0.0+rocm5.4.2,<2.1.0+rocm5.4.2 torchvision>=0.15.0+rocm5.4.2,<0.16.0+rocm5.4.2 torchaudio>=2.0.0+rocm5.4.2,<2.1.0+rocm5.4.2 -accelerate +accelerate==0.20.3 transformers==4.28.1 -diffusers==0.16.1 -silero-api-server -chromadb -sentence_transformers -edge-tts -TTS -fastapi -wxpython -mecab-python3 -unidic-lite +diffusers==0.17.1 +silero-api-server==0.2.4 +chromadb==0.4.5 +sentence_transformers==2.2.2 +edge-tts==6.1.8 +TTS==0.15.6 +fastapi==0.99.1 +wxpython==4.2.1 +mecab-python3==1.0.6 +unidic-lite==1.0.8 diff --git a/requirements-rvc.txt b/requirements-rvc.txt index 2bfb930..f7d2f48 100644 --- a/requirements-rvc.txt +++ b/requirements-rvc.txt @@ -6,4 +6,6 @@ omegaconf==2.3.0 hydra-core==1.3.0 bitarray==2.8.1 sacrebleu==2.3.1 -numpy==1.23.0 \ No newline at end of file +numpy==1.23.0 +ffmpeg==1.4 +ffmpeg-python==0.2.0 diff --git a/requirements-silicon.txt b/requirements-silicon.txt index fd75055..39f7b35 100644 --- a/requirements-silicon.txt +++ b/requirements-silicon.txt @@ -1,24 +1,24 @@ -flask -flask-cloudflared -flask-cors -flask-compress -markdown -Pillow -colorama -torch +flask==2.3.2 +flask-cloudflared==0.0.13 +flask-cors==4.0.0 +flask-compress==1.13 +markdown==3.4.3 +Pillow==9.5.0 +colorama==0.4.6 +torch==2.0.0 transformers==4.28.1 -webuiapi -edge-tts -silero-api-server -torchvision -torchaudio -diffusers -accelerate -chromadb -sentence_transformers -edge-tts -TTS -fastapi -wxpython -mecab-python3 -unidic-lite +webuiapi==0.9.5 +edge-tts==6.1.8 +silero-api-server==0.2.4 +torchvision==0.15.1 +torchaudio==2.0.1 +diffusers==0.17.1 +accelerate==0.2.4 +chromadb==0.4.5 +sentence_transformers==2.2.2 +edge-tts==6.1.8 +TTS==0.15.6 +fastapi==0.99.1 +wxpython==4.2.1 +mecab-python3==1.0.6 +unidic-lite==1.0.8 diff --git a/requirements.txt b/requirements.txt index 33a6eff..e4af5b0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,12 +1,12 @@ -flask -flask-cloudflared -flask-cors -flask-compress -markdown -Pillow -colorama +flask==2.3.2 +flask-cloudflared==0.0.13 +flask-cors==4.0.0 +flask-compress==1.13 +markdown==3.4.3 +Pillow==9.5.0 +colorama==0.4.6 --extra-index-url https://download.pytorch.org/whl/cu117 torch==2.0.0+cu117 transformers==4.28.1 -webuiapi -edge-tts +webuiapi==0.9.5 +edge-tts==6.1.8 diff --git a/server.py b/server.py index e476519..e23c950 100644 --- a/server.py +++ b/server.py @@ -371,7 +371,7 @@ if "streaming-stt" in modules: if "rvc" in modules: print("Initializing RVC voice conversion (from ST request file)") - + import sys sys.path.insert(0,'modules/voice_conversion') diff --git a/tts_coqui.py b/tts_coqui.py index b2ce4ce..e418bb9 100644 --- a/tts_coqui.py +++ b/tts_coqui.py @@ -18,7 +18,7 @@ from TTS.tts.models.tortoise import Tortoise from flask import send_file tts = None -type = None +tts_type = None multlang = "None" multspeak = "None" loadedModel = "None" @@ -55,7 +55,7 @@ def model_type(_config_path): def load_model(_model, _gpu, _progress): global tts - global type + global tts_type global loadedModel global multlang global multspeak @@ -96,13 +96,13 @@ def load_model(_model, _gpu, _progress): #prevent multiple loading - if status == "Loading": + if status == "Loading": status = "Loading" print(status) return status - + #prevent multiple loading - if os.path.join(_model_path) == loadedModel: + if os.path.join(_model_path) == loadedModel: status = "Already Loaded" print(status) return status @@ -134,20 +134,20 @@ def load_model(_model, _gpu, _progress): else: pass - type = model_type(_config_path) + tts_type = model_type(_config_path) #print("Type: ", type) #print("Status", status) if status is None: status = "Unknown error occurred" - if type is None: - type = "Unknown" + if tts_type is None: + tts_type = "Unknown" return status def is_multi_speaker_model(): global multspeak - global type + global tts_type global spkdirectory global multspeakjson global tts @@ -158,7 +158,7 @@ def is_multi_speaker_model(): try: - if type == "bark" or type == "tortoise": + if tts_type == "bark" or tts_type == "tortoise": _target_directory = ModelManager().output_prefix # Convert _target_directory to a string and remove the trailing backslash if present _target_directory_str = str(_target_directory) @@ -313,7 +313,7 @@ def coqui_modeldownload(_modeldownload): #Avail voices function return status def coqui_tts(text, speaker_id, mspker_id, style_wav, language_id): - global type + global tts_type global multlang global multspeak global loadedModel @@ -365,7 +365,7 @@ def coqui_tts(text, speaker_id, mspker_id, style_wav, language_id): print("MODEL NOT LOADED!!! Loading... ", loadedModel, speaker_id) print("Loading :", speaker_id, "GPU is: ", _gpu) - load_model(speaker_id, _gpu, True) + load_model(speaker_id, _gpu, True) audio_buffer = io.BytesIO() @@ -375,7 +375,7 @@ def coqui_tts(text, speaker_id, mspker_id, style_wav, language_id): tts.tts_to_file(text, file_path=audio_buffer) elif isinstance(multspeak, (int, float)) and not isinstance(multlang, (int, float)): #print("speaker only") - if type == "bark" or type == "tortoise": + if tts_type == "bark" or tts_type == "tortoise": try: if multspeakjson == "": #failing because multispeakjson not loaded parsed_multspeak = json.loads(is_multi_speaker_model())