Added dedicated script for classify feature. Clean call to classifier module in RVC.

2026-04-29 19:01:20 +00:00 · 2023-08-21 17:15:48 +02:00
parent 16d267c916
commit 650b05190a
3 changed files with 63 additions and 41 deletions
--- a/modules/classify/classify_module.py
+++ b/modules/classify/classify_module.py
@@ -0,0 +1,44 @@
+"""
+Classify module for SillyTavern Extras
+
+Authors:
+    - Tony Ribeiro (https://github.com/Tony-sama)
+
+Provides classification features for text
+
+References:
+"""
+
+import torch
+from transformers import pipeline
+
+DEBUG_PREFIX = "<Classify module>"
+
+# Models init
+cuda_device = "cuda:0"# if not args.cuda_device else args.cuda_device
+device_string = cuda_device if torch.cuda.is_available() else 'cpu'
+device = torch.device(device_string)
+torch_dtype = torch.float32 if device_string != cuda_device  else torch.float16
+
+text_emotion_pipe = None
+
+def init_text_emotion_classifier(model_name: str) -> None:
+    global text_emotion_pipe
+
+    print(DEBUG_PREFIX,"Initializing text classification pipeline with model",model_name)
+    text_emotion_pipe = pipeline(
+            "text-classification",
+            model=model_name,
+            top_k=None,
+            device=device,
+            torch_dtype=torch_dtype,
+        )
+
+
+def classify_text_emotion(text: str) -> list:
+    output = text_emotion_pipe(
+        text,
+        truncation=True,
+        max_length=text_emotion_pipe.model.config.max_position_embeddings,
+    )[0]
+    return sorted(output, key=lambda x: x["score"], reverse=True)
--- a/modules/voice_conversion/rvc_module.py
+++ b/modules/voice_conversion/rvc_module.py
@@ -14,14 +14,14 @@ References:
 """
 from flask import abort, request, send_file, jsonify
 import json
-import modules.voice_conversion.rvc.rvc as rvc
 from scipy.io import wavfile
 import os
 import io
-
-from py7zr import pack_7zarchive, unpack_7zarchive
 import shutil
+from py7zr import pack_7zarchive, unpack_7zarchive

+import modules.voice_conversion.rvc.rvc as rvc
+import modules.classify.classify_module as classify_module

 DEBUG_PREFIX = "<RVC module>"
 RVC_MODELS_PATH = "data/models/rvc/"
@@ -194,7 +194,7 @@ def rvc_process_audio():
            
            if emotion is None: 
                print("> calling text classification pipeline")
-                emotions_score = classify_text(parameters["text"])
+                emotions_score = classify_module.classify_text_emotion(parameters["text"])

                print(" > ",emotions_score)
                emotion = emotions_score[0]["label"]
@@ -355,30 +355,4 @@ def fix_model_install():
            if not found:
                print("  > WARNING: no corresponding folder found, move or delete the file manually to stop warnings.")

-    print(DEBUG_PREFIX,"RVC model folder checked.")
-
-#### Emotion HACK
-from transformers import AutoTokenizer, AutoProcessor, pipeline
-import torch
-
-# Models init
-cuda_device = "cuda:0"# if not args.cuda_device else args.cuda_device
-device_string = cuda_device if torch.cuda.is_available() else 'cpu'
-device = torch.device(device_string)
-torch_dtype = torch.float32 if device_string != cuda_device  else torch.float16
-
-def classify_text(text: str) -> list:
-    output = classification_pipe(
-        text,
-        truncation=True,
-        max_length=classification_pipe.model.config.max_position_embeddings,
-    )[0]
-    return sorted(output, key=lambda x: x["score"], reverse=True)
-
-classification_pipe = pipeline(
-        "text-classification",
-        model="nateraw/bert-base-uncased-emotion",
-        top_k=None,
-        device=device,
-        torch_dtype=torch_dtype,
-    )
+    print(DEBUG_PREFIX,"RVC model folder checked.")
--- a/server.py
+++ b/server.py
@@ -221,16 +221,6 @@ if "summarize" in modules:
        summarization_model, torch_dtype=torch_dtype
    ).to(device)

-if "classify" in modules:
-    print("Initializing a sentiment classification pipeline...")
-    classification_pipe = pipeline(
-        "text-classification",
-        model=classification_model,
-        top_k=None,
-        device=device,
-        torch_dtype=torch_dtype,
-    )
-
 if "sd" in modules and not sd_use_remote:
    from diffusers import StableDiffusionPipeline
    from diffusers import EulerAncestralDiscreteScheduler
@@ -337,6 +327,20 @@ if max_content_length is not None:
    print("Setting MAX_CONTENT_LENGTH to",max_content_length,"Mb")
    app.config["MAX_CONTENT_LENGTH"] = int(max_content_length) * 1024 * 1024

+# TODO: Keij, unify main classify and module one
+if "classify" in modules:
+    print("Initializing a sentiment classification pipeline...")
+    classification_pipe = pipeline(
+        "text-classification",
+        model=classification_model,
+        top_k=None,
+        device=device,
+        torch_dtype=torch_dtype,
+    )
+
+    import modules.classify.classify_module as classify_module
+    classify_module.init_text_emotion_classifier(classification_model)
+
 if "vosk-stt" in modules:
    print("Initializing Vosk speech-recognition (from ST request file)")
    vosk_model_path = (