From 9d55444995a93aba06fae10bcff6e65d3f3e6e88 Mon Sep 17 00:00:00 2001 From: ArrangingFear56 <82667894+ArrangingFear56@users.noreply.github.com> Date: Mon, 9 Dec 2024 17:09:56 +0800 Subject: [PATCH] Add files via upload --- fasterWhisperRequirements.txt | 6 ++++ whisper_module.py | 56 +++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) create mode 100644 fasterWhisperRequirements.txt create mode 100644 whisper_module.py diff --git a/fasterWhisperRequirements.txt b/fasterWhisperRequirements.txt new file mode 100644 index 0000000..a2cb6c0 --- /dev/null +++ b/fasterWhisperRequirements.txt @@ -0,0 +1,6 @@ +ctranslate2==4.4.0 +huggingface_hub>=0.13 +tokenizers>=0.13,<1 +onnxruntime>=1.14,<2 +av>=11 +tqdm \ No newline at end of file diff --git a/whisper_module.py b/whisper_module.py new file mode 100644 index 0000000..551ac89 --- /dev/null +++ b/whisper_module.py @@ -0,0 +1,56 @@ +""" +Speech-to-text module based on Whisper for SillyTavern Extras + - Whisper github: https://github.com/openai/whisper + +Authors: + - Tony Ribeiro (https://github.com/Tony-sama) + +Models are saved into user cache folder, example: C:/Users/toto/.cache/whisper + +References: + - Code adapted from: + - whisper github: https://github.com/openai/whisper + - oobabooga text-generation-webui github: https://github.com/oobabooga/text-generation-webui +""" +from flask import jsonify, abort, request + +from faster_whisper import WhisperModel + +DEBUG_PREFIX = "" +RECORDING_FILE_PATH = "stt_test.wav" + +model_size = "large-v3-turbo" + +model = WhisperModel(model_size, device="cuda", compute_type="float16") + +def load_model(file_path=None): + """ + Load given vosk model from file or default to en-us model. + Download model to user cache folder, example: C:/Users/toto/.cache/vosk + """ + return WhisperModel(model_size, device="cuda", compute_type="float16") + +def process_audio(): + """ + Transcript request audio file to text using Whisper + """ + + if model is None: + print(DEBUG_PREFIX,"Whisper model not initialized yet.") + return WhisperModel(model_size, device="cuda", compute_type="float16") + + try: + file = request.files.get('AudioFile') + language = request.form.get('language', default=None) + file.save(RECORDING_FILE_PATH) + segments, info = model.transcribe(RECORDING_FILE_PATH, beam_size=5) + transcript="" + for segment in segments: + transcript=transcript+" "+segment.text + print(DEBUG_PREFIX, "Transcripted from audio file (whisper):", transcript) + + return jsonify({"transcript": transcript}) + + except Exception as e: # No exception observed during test but we never know + print(e) + abort(500, DEBUG_PREFIX+" Exception occurs while processing audio")