mirror of
https://github.com/SillyTavern/SillyTavern-Extras.git
synced 2026-05-01 11:51:22 +00:00
Add language parameter for whisper
This commit is contained in:
@@ -15,7 +15,7 @@ References:
|
|||||||
- oobabooga text-generation-webui github: https://github.com/oobabooga/text-generation-webui
|
- oobabooga text-generation-webui github: https://github.com/oobabooga/text-generation-webui
|
||||||
- vosk github: https://github.com/alphacep/vosk-api/blob/master/python/example/test_microphone.py
|
- vosk github: https://github.com/alphacep/vosk-api/blob/master/python/example/test_microphone.py
|
||||||
"""
|
"""
|
||||||
from flask import jsonify, abort
|
from flask import jsonify, abort, request
|
||||||
|
|
||||||
import queue
|
import queue
|
||||||
import sys
|
import sys
|
||||||
@@ -77,6 +77,7 @@ def record_and_transcript():
|
|||||||
q.put(bytes(indata))
|
q.put(bytes(indata))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
language = request.form.get('language', default=None)
|
||||||
device_info = sd.query_devices(device, "input")
|
device_info = sd.query_devices(device, "input")
|
||||||
# soundfile expects an int, sounddevice provides a float:
|
# soundfile expects an int, sounddevice provides a float:
|
||||||
samplerate = int(device_info["default_samplerate"])
|
samplerate = int(device_info["default_samplerate"])
|
||||||
@@ -107,7 +108,7 @@ def record_and_transcript():
|
|||||||
print(DEBUG_PREFIX, "Recorded message saved to", RECORDING_FILE_PATH)
|
print(DEBUG_PREFIX, "Recorded message saved to", RECORDING_FILE_PATH)
|
||||||
|
|
||||||
# Whisper HACK
|
# Whisper HACK
|
||||||
result = whisper_model.transcribe(RECORDING_FILE_PATH, condition_on_previous_text=False)
|
result = whisper_model.transcribe(RECORDING_FILE_PATH, condition_on_previous_text=False, language=language)
|
||||||
transcript = result["text"]
|
transcript = result["text"]
|
||||||
print(DEBUG_PREFIX, "Transcripted from audio file (whisper):", transcript)
|
print(DEBUG_PREFIX, "Transcripted from audio file (whisper):", transcript)
|
||||||
# ----------------------------------
|
# ----------------------------------
|
||||||
|
|||||||
@@ -44,7 +44,7 @@ def process_audio():
|
|||||||
print(DEBUG_PREFIX,"Vosk model not initialized yet.")
|
print(DEBUG_PREFIX,"Vosk model not initialized yet.")
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
try:
|
try:
|
||||||
file = request.files.get('AudioFile')
|
file = request.files.get('AudioFile')
|
||||||
file.save(RECORDING_FILE_PATH)
|
file.save(RECORDING_FILE_PATH)
|
||||||
|
|
||||||
@@ -67,11 +67,11 @@ def process_audio():
|
|||||||
break
|
break
|
||||||
if rec.AcceptWaveform(data):
|
if rec.AcceptWaveform(data):
|
||||||
break
|
break
|
||||||
|
|
||||||
transcript = rec.Result()[14:-3]
|
transcript = rec.Result()[14:-3]
|
||||||
print(DEBUG_PREFIX, "Transcripted from request audio file:", transcript)
|
print(DEBUG_PREFIX, "Transcripted from request audio file:", transcript)
|
||||||
return jsonify({"transcript": transcript})
|
return jsonify({"transcript": transcript})
|
||||||
|
|
||||||
except Exception as e: # No exception observed during test but we never know
|
except Exception as e: # No exception observed during test but we never know
|
||||||
print(e)
|
print(e)
|
||||||
abort(500, DEBUG_PREFIX+" Exception occurs while processing audio")
|
abort(500, DEBUG_PREFIX+" Exception occurs while processing audio")
|
||||||
|
|||||||
@@ -43,9 +43,10 @@ def process_audio():
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
file = request.files.get('AudioFile')
|
file = request.files.get('AudioFile')
|
||||||
|
language = request.form.get('language', default=None)
|
||||||
file.save(RECORDING_FILE_PATH)
|
file.save(RECORDING_FILE_PATH)
|
||||||
|
|
||||||
result = model.transcribe(RECORDING_FILE_PATH, condition_on_previous_text=False)
|
result = model.transcribe(RECORDING_FILE_PATH, condition_on_previous_text=False, language=language)
|
||||||
transcript = result["text"]
|
transcript = result["text"]
|
||||||
print(DEBUG_PREFIX, "Transcripted from audio file (whisper):", transcript)
|
print(DEBUG_PREFIX, "Transcripted from audio file (whisper):", transcript)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user