Add language parameter for whisper

This commit is contained in:
Cohee
2023-11-19 21:10:41 +02:00
parent 61efbb9aa3
commit e32d0c30c2
3 changed files with 8 additions and 6 deletions

View File

@@ -15,7 +15,7 @@ References:
- oobabooga text-generation-webui github: https://github.com/oobabooga/text-generation-webui - oobabooga text-generation-webui github: https://github.com/oobabooga/text-generation-webui
- vosk github: https://github.com/alphacep/vosk-api/blob/master/python/example/test_microphone.py - vosk github: https://github.com/alphacep/vosk-api/blob/master/python/example/test_microphone.py
""" """
from flask import jsonify, abort from flask import jsonify, abort, request
import queue import queue
import sys import sys
@@ -77,6 +77,7 @@ def record_and_transcript():
q.put(bytes(indata)) q.put(bytes(indata))
try: try:
language = request.form.get('language', default=None)
device_info = sd.query_devices(device, "input") device_info = sd.query_devices(device, "input")
# soundfile expects an int, sounddevice provides a float: # soundfile expects an int, sounddevice provides a float:
samplerate = int(device_info["default_samplerate"]) samplerate = int(device_info["default_samplerate"])
@@ -107,7 +108,7 @@ def record_and_transcript():
print(DEBUG_PREFIX, "Recorded message saved to", RECORDING_FILE_PATH) print(DEBUG_PREFIX, "Recorded message saved to", RECORDING_FILE_PATH)
# Whisper HACK # Whisper HACK
result = whisper_model.transcribe(RECORDING_FILE_PATH, condition_on_previous_text=False) result = whisper_model.transcribe(RECORDING_FILE_PATH, condition_on_previous_text=False, language=language)
transcript = result["text"] transcript = result["text"]
print(DEBUG_PREFIX, "Transcripted from audio file (whisper):", transcript) print(DEBUG_PREFIX, "Transcripted from audio file (whisper):", transcript)
# ---------------------------------- # ----------------------------------

View File

@@ -44,7 +44,7 @@ def process_audio():
print(DEBUG_PREFIX,"Vosk model not initialized yet.") print(DEBUG_PREFIX,"Vosk model not initialized yet.")
return "" return ""
try: try:
file = request.files.get('AudioFile') file = request.files.get('AudioFile')
file.save(RECORDING_FILE_PATH) file.save(RECORDING_FILE_PATH)
@@ -67,11 +67,11 @@ def process_audio():
break break
if rec.AcceptWaveform(data): if rec.AcceptWaveform(data):
break break
transcript = rec.Result()[14:-3] transcript = rec.Result()[14:-3]
print(DEBUG_PREFIX, "Transcripted from request audio file:", transcript) print(DEBUG_PREFIX, "Transcripted from request audio file:", transcript)
return jsonify({"transcript": transcript}) return jsonify({"transcript": transcript})
except Exception as e: # No exception observed during test but we never know except Exception as e: # No exception observed during test but we never know
print(e) print(e)
abort(500, DEBUG_PREFIX+" Exception occurs while processing audio") abort(500, DEBUG_PREFIX+" Exception occurs while processing audio")

View File

@@ -43,9 +43,10 @@ def process_audio():
try: try:
file = request.files.get('AudioFile') file = request.files.get('AudioFile')
language = request.form.get('language', default=None)
file.save(RECORDING_FILE_PATH) file.save(RECORDING_FILE_PATH)
result = model.transcribe(RECORDING_FILE_PATH, condition_on_previous_text=False) result = model.transcribe(RECORDING_FILE_PATH, condition_on_previous_text=False, language=language)
transcript = result["text"] transcript = result["text"]
print(DEBUG_PREFIX, "Transcripted from audio file (whisper):", transcript) print(DEBUG_PREFIX, "Transcripted from audio file (whisper):", transcript)