Add files via upload

2026-05-01 03:41:24 +00:00 · 2024-12-09 17:09:56 +08:00
parent edee9dcd85
commit 9d55444995
2 changed files with 62 additions and 0 deletions
--- a/fasterWhisperRequirements.txt
+++ b/fasterWhisperRequirements.txt
@@ -0,0 +1,6 @@
 ctranslate2==4.4.0
 huggingface_hub>=0.13
 tokenizers>=0.13,<1
 onnxruntime>=1.14,<2 
 av>=11
 tqdm
--- a/whisper_module.py
+++ b/whisper_module.py
@@ -0,0 +1,56 @@
 """
 Speech-to-text module based on Whisper for SillyTavern Extras
    - Whisper github: https://github.com/openai/whisper
 Authors:
    - Tony Ribeiro (https://github.com/Tony-sama)
 Models are saved into user cache folder, example: C:/Users/toto/.cache/whisper
 References:
    - Code adapted from:
        - whisper github: https://github.com/openai/whisper
        - oobabooga text-generation-webui github: https://github.com/oobabooga/text-generation-webui
 """
 from flask import jsonify, abort, request
 from faster_whisper import WhisperModel
 DEBUG_PREFIX = "<stt whisper module>"
 RECORDING_FILE_PATH = "stt_test.wav"
 model_size = "large-v3-turbo"
 model = WhisperModel(model_size, device="cuda", compute_type="float16")
 def load_model(file_path=None):
    """
    Load given vosk model from file or default to en-us model.
    Download model to user cache folder, example: C:/Users/toto/.cache/vosk
    """
    return WhisperModel(model_size, device="cuda", compute_type="float16")
 def process_audio():
    """
    Transcript request audio file to text using Whisper
    """
    if model is None:
        print(DEBUG_PREFIX,"Whisper model not initialized yet.")
        return WhisperModel(model_size, device="cuda", compute_type="float16")
    try:
        file = request.files.get('AudioFile')
        language = request.form.get('language', default=None)
        file.save(RECORDING_FILE_PATH)
        segments, info = model.transcribe(RECORDING_FILE_PATH, beam_size=5)
        transcript=""
        for segment in segments:
            transcript=transcript+" "+segment.text
        print(DEBUG_PREFIX, "Transcripted from audio file (whisper):", transcript)
        return jsonify({"transcript": transcript})
    except Exception as e: # No exception observed during test but we never know
        print(e)
        abort(500, DEBUG_PREFIX+" Exception occurs while processing audio")