mirror of
https://github.com/SillyTavern/SillyTavern-Extras.git
synced 2026-04-29 10:51:19 +00:00
Added dedicated script for classify feature. Clean call to classifier module in RVC.
This commit is contained in:
44
modules/classify/classify_module.py
Normal file
44
modules/classify/classify_module.py
Normal file
@@ -0,0 +1,44 @@
|
||||
"""
|
||||
Classify module for SillyTavern Extras
|
||||
|
||||
Authors:
|
||||
- Tony Ribeiro (https://github.com/Tony-sama)
|
||||
|
||||
Provides classification features for text
|
||||
|
||||
References:
|
||||
"""
|
||||
|
||||
import torch
|
||||
from transformers import pipeline
|
||||
|
||||
DEBUG_PREFIX = "<Classify module>"
|
||||
|
||||
# Models init
|
||||
cuda_device = "cuda:0"# if not args.cuda_device else args.cuda_device
|
||||
device_string = cuda_device if torch.cuda.is_available() else 'cpu'
|
||||
device = torch.device(device_string)
|
||||
torch_dtype = torch.float32 if device_string != cuda_device else torch.float16
|
||||
|
||||
text_emotion_pipe = None
|
||||
|
||||
def init_text_emotion_classifier(model_name: str) -> None:
|
||||
global text_emotion_pipe
|
||||
|
||||
print(DEBUG_PREFIX,"Initializing text classification pipeline with model",model_name)
|
||||
text_emotion_pipe = pipeline(
|
||||
"text-classification",
|
||||
model=model_name,
|
||||
top_k=None,
|
||||
device=device,
|
||||
torch_dtype=torch_dtype,
|
||||
)
|
||||
|
||||
|
||||
def classify_text_emotion(text: str) -> list:
|
||||
output = text_emotion_pipe(
|
||||
text,
|
||||
truncation=True,
|
||||
max_length=text_emotion_pipe.model.config.max_position_embeddings,
|
||||
)[0]
|
||||
return sorted(output, key=lambda x: x["score"], reverse=True)
|
||||
@@ -14,14 +14,14 @@ References:
|
||||
"""
|
||||
from flask import abort, request, send_file, jsonify
|
||||
import json
|
||||
import modules.voice_conversion.rvc.rvc as rvc
|
||||
from scipy.io import wavfile
|
||||
import os
|
||||
import io
|
||||
|
||||
from py7zr import pack_7zarchive, unpack_7zarchive
|
||||
import shutil
|
||||
from py7zr import pack_7zarchive, unpack_7zarchive
|
||||
|
||||
import modules.voice_conversion.rvc.rvc as rvc
|
||||
import modules.classify.classify_module as classify_module
|
||||
|
||||
DEBUG_PREFIX = "<RVC module>"
|
||||
RVC_MODELS_PATH = "data/models/rvc/"
|
||||
@@ -194,7 +194,7 @@ def rvc_process_audio():
|
||||
|
||||
if emotion is None:
|
||||
print("> calling text classification pipeline")
|
||||
emotions_score = classify_text(parameters["text"])
|
||||
emotions_score = classify_module.classify_text_emotion(parameters["text"])
|
||||
|
||||
print(" > ",emotions_score)
|
||||
emotion = emotions_score[0]["label"]
|
||||
@@ -355,30 +355,4 @@ def fix_model_install():
|
||||
if not found:
|
||||
print(" > WARNING: no corresponding folder found, move or delete the file manually to stop warnings.")
|
||||
|
||||
print(DEBUG_PREFIX,"RVC model folder checked.")
|
||||
|
||||
#### Emotion HACK
|
||||
from transformers import AutoTokenizer, AutoProcessor, pipeline
|
||||
import torch
|
||||
|
||||
# Models init
|
||||
cuda_device = "cuda:0"# if not args.cuda_device else args.cuda_device
|
||||
device_string = cuda_device if torch.cuda.is_available() else 'cpu'
|
||||
device = torch.device(device_string)
|
||||
torch_dtype = torch.float32 if device_string != cuda_device else torch.float16
|
||||
|
||||
def classify_text(text: str) -> list:
|
||||
output = classification_pipe(
|
||||
text,
|
||||
truncation=True,
|
||||
max_length=classification_pipe.model.config.max_position_embeddings,
|
||||
)[0]
|
||||
return sorted(output, key=lambda x: x["score"], reverse=True)
|
||||
|
||||
classification_pipe = pipeline(
|
||||
"text-classification",
|
||||
model="nateraw/bert-base-uncased-emotion",
|
||||
top_k=None,
|
||||
device=device,
|
||||
torch_dtype=torch_dtype,
|
||||
)
|
||||
print(DEBUG_PREFIX,"RVC model folder checked.")
|
||||
24
server.py
24
server.py
@@ -221,16 +221,6 @@ if "summarize" in modules:
|
||||
summarization_model, torch_dtype=torch_dtype
|
||||
).to(device)
|
||||
|
||||
if "classify" in modules:
|
||||
print("Initializing a sentiment classification pipeline...")
|
||||
classification_pipe = pipeline(
|
||||
"text-classification",
|
||||
model=classification_model,
|
||||
top_k=None,
|
||||
device=device,
|
||||
torch_dtype=torch_dtype,
|
||||
)
|
||||
|
||||
if "sd" in modules and not sd_use_remote:
|
||||
from diffusers import StableDiffusionPipeline
|
||||
from diffusers import EulerAncestralDiscreteScheduler
|
||||
@@ -337,6 +327,20 @@ if max_content_length is not None:
|
||||
print("Setting MAX_CONTENT_LENGTH to",max_content_length,"Mb")
|
||||
app.config["MAX_CONTENT_LENGTH"] = int(max_content_length) * 1024 * 1024
|
||||
|
||||
# TODO: Keij, unify main classify and module one
|
||||
if "classify" in modules:
|
||||
print("Initializing a sentiment classification pipeline...")
|
||||
classification_pipe = pipeline(
|
||||
"text-classification",
|
||||
model=classification_model,
|
||||
top_k=None,
|
||||
device=device,
|
||||
torch_dtype=torch_dtype,
|
||||
)
|
||||
|
||||
import modules.classify.classify_module as classify_module
|
||||
classify_module.init_text_emotion_classifier(classification_model)
|
||||
|
||||
if "vosk-stt" in modules:
|
||||
print("Initializing Vosk speech-recognition (from ST request file)")
|
||||
vosk_model_path = (
|
||||
|
||||
Reference in New Issue
Block a user