mirror of
https://github.com/SillyTavern/SillyTavern-Extras.git
synced 2026-01-26 17:20:04 +00:00
@@ -14,21 +14,23 @@ References:
|
||||
"""
|
||||
from flask import jsonify, abort, request
|
||||
|
||||
from faster_whisper import WhisperModel
|
||||
import whisper
|
||||
|
||||
DEBUG_PREFIX = "<stt whisper module>"
|
||||
RECORDING_FILE_PATH = "stt_test.wav"
|
||||
|
||||
model_size = "large-v3-turbo"
|
||||
|
||||
model = WhisperModel(model_size, device="cuda", compute_type="float16")
|
||||
model = None
|
||||
|
||||
def load_model(file_path=None):
|
||||
"""
|
||||
Load given vosk model from file or default to en-us model.
|
||||
Download model to user cache folder, example: C:/Users/toto/.cache/vosk
|
||||
"""
|
||||
return WhisperModel(model_size, device="cuda", compute_type="float16")
|
||||
|
||||
if file_path is None:
|
||||
return whisper.load_model("base.en")
|
||||
else:
|
||||
return whisper.load_model(file_path)
|
||||
|
||||
def process_audio():
|
||||
"""
|
||||
@@ -37,16 +39,15 @@ def process_audio():
|
||||
|
||||
if model is None:
|
||||
print(DEBUG_PREFIX,"Whisper model not initialized yet.")
|
||||
return WhisperModel(model_size, device="cuda", compute_type="float16")
|
||||
return ""
|
||||
|
||||
try:
|
||||
file = request.files.get('AudioFile')
|
||||
language = request.form.get('language', default=None)
|
||||
file.save(RECORDING_FILE_PATH)
|
||||
segments, info = model.transcribe(RECORDING_FILE_PATH, beam_size=5)
|
||||
transcript=""
|
||||
for segment in segments:
|
||||
transcript=transcript+" "+segment.text
|
||||
|
||||
result = model.transcribe(RECORDING_FILE_PATH, condition_on_previous_text=False, language=language)
|
||||
transcript = result["text"]
|
||||
print(DEBUG_PREFIX, "Transcripted from audio file (whisper):", transcript)
|
||||
|
||||
return jsonify({"transcript": transcript})
|
||||
|
||||
Reference in New Issue
Block a user