diff --git a/.gitignore b/.gitignore index e385523..61324d0 100644 --- a/.gitignore +++ b/.gitignore @@ -141,3 +141,4 @@ api_key.txt stt_test.wav talkinghead/tha3/models docker/cache +launch.bat diff --git a/modules/speech_recognition/faster_whisper_module.py b/modules/speech_recognition/faster_whisper_module.py index 50f4c0b..aedb95a 100644 --- a/modules/speech_recognition/faster_whisper_module.py +++ b/modules/speech_recognition/faster_whisper_module.py @@ -21,6 +21,7 @@ RECORDING_FILE_PATH = "stt_test.wav" model_size = "large-v3-turbo" + def load_model(file_path=None,whisper_device="cuda",whisper_compute_type='float16'): """ Load given vosk model from file or default to en-us model. diff --git a/server.py b/server.py index 6a3548e..38054e0 100644 --- a/server.py +++ b/server.py @@ -938,7 +938,7 @@ parser.add_argument("--stt-vosk-model-path", help="Load a custom vosk speech-to- parser.add_argument("--stt-whisper-model-path", help="Load a custom whisper speech-to-text model") parser.add_argument("--use-faster-whisper", action="store_true", help="Choose to use faster-whisper instead of whisper") -parser.add_argument("--faster-whisper-device", help="Choose between cpu and cuda to run faster-whisper, defaults to cuda") +parser.add_argument("--faster-whisper-cpu", action="store_true", help="Use cpu to run faster-whisper, saves VRAM but much slower") parser.add_argument("--faster-whisper-type", help="Choose faster-whisper compute type, defaults to float16") # sd_group = parser.add_mutually_exclusive_group() @@ -1179,8 +1179,8 @@ if "whisper-stt" in modules: if whisper_fast: faster_whisper_device=( - args.faster_whisper_device - if args.faster_whisper_device + "cpu" + if args.faster_whisper_cpu else "cuda") faster_whisper_type=(