Restored speech recognition streaming mode as an independant module. Perform audio recording using mic on server side, detect voice start/end with vosk and transcript with whisper.

This commit is contained in:
Tony Ribeiro
2023-07-31 18:50:44 +02:00
parent 83adae950e
commit 912c4335c2
3 changed files with 135 additions and 3 deletions

View File

@@ -305,7 +305,7 @@ Compress(app) # compress responses
app.config["MAX_CONTENT_LENGTH"] = 100 * 1024 * 1024
if "vosk-stt" in modules:
print("Initializing Vosk STT streaming")
print("Initializing Vosk speech-recognition (from ST request file)")
vosk_model_path = (
args.stt_vosk_model_path
if args.stt_vosk_model_path
@@ -317,7 +317,7 @@ if "vosk-stt" in modules:
app.add_url_rule("/api/speech-recognition/vosk/process-audio", view_func=vosk_module.process_audio, methods=["POST"])
if "whisper-stt" in modules:
print("Initializing Whisper STT streaming")
print("Initializing Whisper speech-recognition (from ST request file)")
whisper_model_path = (
args.stt_whisper_model_path
if args.stt_whisper_model_path
@@ -328,6 +328,18 @@ if "whisper-stt" in modules:
whisper_module.model = whisper_module.load_model(file_path=whisper_model_path)
app.add_url_rule("/api/speech-recognition/whisper/process-audio", view_func=whisper_module.process_audio, methods=["POST"])
if "streaming-stt" in modules:
print("Initializing vosk/whisper speech-recognition (from extras server microphone)")
whisper_model_path = (
args.stt_whisper_model_path
if args.stt_whisper_model_path
else None)
import modules.speech_recognition.streaming_module as streaming_module
streaming_module.whisper_model, streaming_module.vosk_model = streaming_module.load_model(file_path=whisper_model_path)
app.add_url_rule("/api/speech-recognition/streaming/record-and-transcript", view_func=streaming_module.record_and_transcript, methods=["POST"])
def require_module(name):
def wrapper(fn):
@wraps(fn)