Merge branch 'neo' of https://github.com/Tony-sama/SillyTavern-extras into neo

2026-02-23 14:44:11 +00:00 · 2023-08-14 04:04:55 +02:00
parent a440177f25 f87ac34928
commit 46fecb5b48
9 changed files with 95 additions and 93 deletions
--- a/modules/voice_conversion/rvc/rvc.py
+++ b/modules/voice_conversion/rvc/rvc.py
@@ -66,7 +66,7 @@ class Config:
                or "1070" in self.gpu_name
                or "1080" in self.gpu_name
            ):
-                print("16系/10系显卡和P40强制单精度")
+                print("Forcing full precision for 16/10 series cards.")
                self.is_half = False
                config_file_change_fp32()
            else:
@@ -84,12 +84,12 @@ class Config:
            #     with open("trainset_preprocess_pipeline_print.py", "w") as f:
            #         f.write(strr)
        elif torch.backends.mps.is_available():
-            print("没有发现支持的N卡, 使用MPS进行推理")
+            print("No compatible GPU found, using MPS for inference.")
            self.device = "mps"
            self.is_half = False
            config_file_change_fp32()
        else:
-            print("没有发现支持的N卡, 使用CPU进行推理")
+            print("No compatible GPU found, using CPU for inference.")
            self.device = "cpu"
            self.is_half = False
            config_file_change_fp32()
@@ -150,7 +150,7 @@ def load_audio(audio_source, sr):
        elif isinstance(audio_source, io.BytesIO):  # If it's a BytesIO object
            audio_source.seek(0)
            out, _ = (
-                ffmpeg.input("pipe:0", format="wav", threads=0)
+                ffmpeg.input("pipe:0", threads=0)
                .output("-", format="f32le", acodec="pcm_f32le", ac=1, ar=sr)
                .run(input=audio_source.read(), cmd=["ffmpeg", "-nostdin"], capture_stdout=True, capture_stderr=True)
            )
--- a/modules/voice_conversion/rvc/vc_infer_pipeline.py
+++ b/modules/voice_conversion/rvc/vc_infer_pipeline.py
@@ -115,7 +115,7 @@ class VC(object):
        ) + 1
        f0_mel[f0_mel <= 1] = 1
        f0_mel[f0_mel > 255] = 255
-        f0_coarse = np.rint(f0_mel).astype(np.int)
+        f0_coarse = np.rint(f0_mel).astype(int)
        return f0_coarse, f0bak  # 1-0

    def vc(
--- a/requirements-complete.txt
+++ b/requirements-complete.txt
@@ -1,29 +1,29 @@
-flask
-flask-cloudflared
-flask-cors
-flask-compress
-markdown
-Pillow
-colorama
-webuiapi
+flask==2.3.2
+flask-cloudflared==0.0.13
+flask-cors==4.0.0
+flask-compress==1.13
+markdown==3.4.3
+Pillow==9.5.0
+colorama==0.4.6
+webuiapi==0.9.5
 --extra-index-url https://download.pytorch.org/whl/cu117
 torch==2.0.0+cu117
 torchvision==0.15.1
 torchaudio==2.0.1+cu117
-accelerate
+accelerate==0.20.3
 transformers==4.28.1
-diffusers==0.16.1
-silero-api-server
-chromadb
-sentence_transformers
-edge-tts
+diffusers==0.17.1
+silero-api-server==0.2.4
+chromadb==0.4.5
+sentence_transformers==2.2.2
+edge-tts==6.1.8

-vosk
-sounddevice
-openai-whisper
+vosk==0.3.44
+sounddevice==0.4.6
+openai-whisper==20230314

-TTS
-fastapi
-wxpython
-mecab-python3
-unidic-lite
+TTS==0.15.6
+fastapi==0.99.1
+wxpython==4.2.1; sys_platform == 'win32' or sys_platform == 'darwin'
+mecab-python3==1.0.6
+unidic-lite==1.0.8
--- a/requirements-rocm.txt
+++ b/requirements-rocm.txt
@@ -1,24 +1,24 @@
-flask
-flask-cloudflared
-flask-cors
-flask-compress
-markdown
-Pillow
-colorama
-webuiapi
+flask==2.3.2
+flask-cloudflared==0.0.13
+flask-cors==4.0.0
+flask-compress==1.13
+markdown==3.4.3
+Pillow==9.5.0
+colorama==0.4.6
+webuiapi==0.9.5
 --extra-index-url https://download.pytorch.org/whl/rocm5.4.2
 torch>=2.0.0+rocm5.4.2,<2.1.0+rocm5.4.2
 torchvision>=0.15.0+rocm5.4.2,<0.16.0+rocm5.4.2
 torchaudio>=2.0.0+rocm5.4.2,<2.1.0+rocm5.4.2
-accelerate
+accelerate==0.20.3
 transformers==4.28.1
-diffusers==0.16.1
-silero-api-server
-chromadb
-sentence_transformers
-edge-tts
-TTS
-fastapi
-wxpython
-mecab-python3
-unidic-lite
+diffusers==0.17.1
+silero-api-server==0.2.4
+chromadb==0.4.5
+sentence_transformers==2.2.2
+edge-tts==6.1.8
+TTS==0.15.6
+fastapi==0.99.1
+wxpython==4.2.1
+mecab-python3==1.0.6
+unidic-lite==1.0.8
--- a/requirements-rvc.txt
+++ b/requirements-rvc.txt
@@ -6,4 +6,6 @@ omegaconf==2.3.0
 hydra-core==1.3.0
 bitarray==2.8.1
 sacrebleu==2.3.1
-numpy==1.23.0
+numpy==1.23.0
+ffmpeg==1.4
+ffmpeg-python==0.2.0
--- a/requirements-silicon.txt
+++ b/requirements-silicon.txt
@@ -1,24 +1,24 @@
-flask
-flask-cloudflared
-flask-cors
-flask-compress
-markdown
-Pillow
-colorama
-torch
+flask==2.3.2
+flask-cloudflared==0.0.13
+flask-cors==4.0.0
+flask-compress==1.13
+markdown==3.4.3
+Pillow==9.5.0
+colorama==0.4.6
+torch==2.0.0
 transformers==4.28.1
-webuiapi
-edge-tts
-silero-api-server
-torchvision
-torchaudio
-diffusers
-accelerate
-chromadb
-sentence_transformers
-edge-tts
-TTS
-fastapi
-wxpython
-mecab-python3
-unidic-lite
+webuiapi==0.9.5
+edge-tts==6.1.8
+silero-api-server==0.2.4
+torchvision==0.15.1
+torchaudio==2.0.1
+diffusers==0.17.1
+accelerate==0.2.4
+chromadb==0.4.5
+sentence_transformers==2.2.2
+edge-tts==6.1.8
+TTS==0.15.6
+fastapi==0.99.1
+wxpython==4.2.1
+mecab-python3==1.0.6
+unidic-lite==1.0.8
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,12 +1,12 @@
-flask
-flask-cloudflared
-flask-cors
-flask-compress
-markdown
-Pillow
-colorama
+flask==2.3.2
+flask-cloudflared==0.0.13
+flask-cors==4.0.0
+flask-compress==1.13
+markdown==3.4.3
+Pillow==9.5.0
+colorama==0.4.6
 --extra-index-url https://download.pytorch.org/whl/cu117
 torch==2.0.0+cu117
 transformers==4.28.1
-webuiapi
-edge-tts
+webuiapi==0.9.5
+edge-tts==6.1.8
--- a/server.py
+++ b/server.py
@@ -371,7 +371,7 @@ if "streaming-stt" in modules:

 if "rvc" in modules:
    print("Initializing RVC voice conversion (from ST request file)")
-    
+
    import sys
    sys.path.insert(0,'modules/voice_conversion')

--- a/tts_coqui.py
+++ b/tts_coqui.py
@@ -18,7 +18,7 @@ from TTS.tts.models.tortoise import Tortoise
 from flask import send_file

 tts = None
-type = None
+tts_type = None
 multlang = "None"
 multspeak = "None"
 loadedModel = "None"
@@ -55,7 +55,7 @@ def model_type(_config_path):

 def load_model(_model, _gpu, _progress):
    global tts
-    global type
+    global tts_type
    global loadedModel
    global multlang
    global multspeak
@@ -96,13 +96,13 @@ def load_model(_model, _gpu, _progress):


        #prevent multiple loading
-        if status == "Loading": 
+        if status == "Loading":
            status = "Loading"
            print(status)
            return status
-        
+
        #prevent multiple loading
-        if os.path.join(_model_path) == loadedModel: 
+        if os.path.join(_model_path) == loadedModel:
            status = "Already Loaded"
            print(status)
            return status
@@ -134,20 +134,20 @@ def load_model(_model, _gpu, _progress):
        else:
            pass

-        type = model_type(_config_path)
+        tts_type = model_type(_config_path)
        #print("Type: ", type)
        #print("Status", status)

    if status is None:
        status = "Unknown error occurred"
-    if type is None:
-        type = "Unknown"
+    if tts_type is None:
+        tts_type = "Unknown"

    return status

 def is_multi_speaker_model():
    global multspeak
-    global type
+    global tts_type
    global spkdirectory
    global multspeakjson
    global tts
@@ -158,7 +158,7 @@ def is_multi_speaker_model():
    try:


-        if type == "bark" or type == "tortoise":
+        if tts_type == "bark" or tts_type == "tortoise":
            _target_directory = ModelManager().output_prefix
            # Convert _target_directory to a string and remove the trailing backslash if present
            _target_directory_str = str(_target_directory)
@@ -313,7 +313,7 @@ def coqui_modeldownload(_modeldownload): #Avail voices function
    return status

 def coqui_tts(text, speaker_id, mspker_id, style_wav, language_id):
-    global type
+    global tts_type
    global multlang
    global multspeak
    global loadedModel
@@ -365,7 +365,7 @@ def coqui_tts(text, speaker_id, mspker_id, style_wav, language_id):
        print("MODEL NOT LOADED!!! Loading... ", loadedModel, speaker_id)
        print("Loading :", speaker_id, "GPU is: ", _gpu)

-        load_model(speaker_id, _gpu, True) 
+        load_model(speaker_id, _gpu, True)


    audio_buffer = io.BytesIO()
@@ -375,7 +375,7 @@ def coqui_tts(text, speaker_id, mspker_id, style_wav, language_id):
        tts.tts_to_file(text, file_path=audio_buffer)
    elif isinstance(multspeak, (int, float)) and not isinstance(multlang, (int, float)):
        #print("speaker only")
-        if type == "bark" or type == "tortoise":
+        if tts_type == "bark" or tts_type == "tortoise":
            try:
                if multspeakjson == "": #failing because multispeakjson not loaded
                    parsed_multspeak = json.loads(is_multi_speaker_model())