diff --git a/modules/speech_recognition/faster_whisper_module.py b/modules/speech_recognition/faster_whisper_module.py index aedb95a..c571257 100644 --- a/modules/speech_recognition/faster_whisper_module.py +++ b/modules/speech_recognition/faster_whisper_module.py @@ -22,17 +22,22 @@ RECORDING_FILE_PATH = "stt_test.wav" model_size = "large-v3-turbo" -def load_model(file_path=None,whisper_device="cuda",whisper_compute_type='float16'): +def load_model(file_path=None,whisper_device="cuda",whisper_compute_type="auto"): """ Load given vosk model from file or default to en-us model. Download model to user cache folder, example: C:/Users/toto/.cache/vosk """ + if whisper_compute_type=="auto": + whisper_compute_type=( + "int8" + if whisper_device=="cpu" + else "float16") if file_path is None: - print(f"faster-whisper using {model_size}") + print(f"faster-whisper using {model_size} model with {whisper_compute_type}") return WhisperModel(model_size, device=whisper_device, compute_type=whisper_compute_type) else: - print(f"faster-whisper using {file_path}") + print(f"faster-whisper using {file_path} model with {whisper_compute_type}") return WhisperModel(file_path, device=whisper_device, compute_type=whisper_compute_type) def process_audio(): diff --git a/server.py b/server.py index 38054e0..e821bd6 100644 --- a/server.py +++ b/server.py @@ -939,7 +939,7 @@ parser.add_argument("--stt-whisper-model-path", help="Load a custom whisper spee parser.add_argument("--use-faster-whisper", action="store_true", help="Choose to use faster-whisper instead of whisper") parser.add_argument("--faster-whisper-cpu", action="store_true", help="Use cpu to run faster-whisper, saves VRAM but much slower") -parser.add_argument("--faster-whisper-type", help="Choose faster-whisper compute type, defaults to float16") +parser.add_argument("--faster-whisper-type", help="Choose faster-whisper compute type, defaults to float16 for cuda and int8 for cpu") # sd_group = parser.add_mutually_exclusive_group() @@ -1186,7 +1186,7 @@ if "whisper-stt" in modules: faster_whisper_type=( args.faster_whisper_type if args.faster_whisper_type - else "float16") + else "auto") print(f"Initializing Faster-Whisper speech-recognition (from ST request file) on {faster_whisper_device}") import modules.speech_recognition.faster_whisper_module as whisper_module