Extra modules (except SD) now use CPU by default

2026-04-28 02:11:22 +00:00 · 2023-06-18 21:51:31 +03:00
parent 94ab92972c
commit db0b232903
2 changed files with 7 additions and 6 deletions
--- a/README.md
+++ b/README.md
@@ -4,8 +4,7 @@ A set of APIs for various SillyTavern extensions.

 **You need to run the latest version of SillyTavern. Grab it here: [How to install](https://docs.sillytavern.app/installation/windows/), [Git repository](https://github.com/SillyTavern/SillyTavern)**

-All modules require at least 6 Gb of VRAM to run. With Stable Diffusion disabled, it will probably fit in 4 Gb.
-Alternatively, everything could also be run on the CPU.
+All modules, except for Stable Diffusion, run on the CPU by default. However, they can alternatively be configured to use CUDA (with `--cpu=false` command line option). When running all modules simultaneously, you can expect a usage of approximately 6 GB of RAM. Loading Stable Diffusion adds an additional couple of GB to the memory usage.

 Try on Colab (will give you a link to Extras API):  <a target="_blank" href="https://colab.research.google.com/github/SillyTavern/SillyTavern/blob/main/colab/GPU.ipynb">
  <img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/>
@@ -137,7 +136,7 @@ cd SillyTavern-extras
 | `--listen`               | Host the app on the local network                                      |
 | `--share`                | Share the app on CloudFlare tunnel                                     |
 | `--secure`               | Adds API key authentication requirements. Highly recommended when paired with share! |
-| `--cpu`                  | Run the models on the CPU instead of CUDA                              |
+| `--cpu`                  | Run the models on the CPU instead of CUDA. Enabled by default. Set to false to force running on the GPU. |
 | `--summarization-model`  | Load a custom summarization model.<br>Expects a HuggingFace model ID.<br>Default: [Qiliang/bart-large-cnn-samsum-ChatGPT_v3](https://huggingface.co/Qiliang/bart-large-cnn-samsum-ChatGPT_v3) |
 | `--classification-model` | Load a custom sentiment classification model.<br>Expects a HuggingFace model ID.<br>Default (6 emotions): [nateraw/bert-base-uncased-emotion](https://huggingface.co/nateraw/bert-base-uncased-emotion)<br>Other solid option is (28 emotions): [joeddav/distilbert-base-uncased-go-emotions-student](https://huggingface.co/joeddav/distilbert-base-uncased-go-emotions-student)<br>For Chinese language: [touch20032003/xuyuan-trial-sentiment-bert-chinese](https://huggingface.co/touch20032003/xuyuan-trial-sentiment-bert-chinese) |
 | `--captioning-model`     | Load a custom captioning model.<br>Expects a HuggingFace model ID.<br>Default: [Salesforce/blip-image-captioning-large](https://huggingface.co/Salesforce/blip-image-captioning-large) |
--- a/server.py
+++ b/server.py
@@ -54,7 +54,7 @@ parser.add_argument(
 parser.add_argument(
    "--share", action="store_true", help="Share the app on CloudFlare tunnel"
 )
-parser.add_argument("--cpu", action="store_true", help="Run the models on the CPU")
+parser.add_argument("--cpu", action="store_true", help="Run the models on the CPU", default=True)
 parser.add_argument("--summarization-model", help="Load a custom summarization model")
 parser.add_argument(
    "--classification-model", help="Load a custom text classification model"
@@ -73,7 +73,7 @@ sd_group = parser.add_mutually_exclusive_group()

 local_sd = sd_group.add_argument_group("sd-local")
 local_sd.add_argument("--sd-model", help="Load a custom SD image generation model")
-local_sd.add_argument("--sd-cpu", help="Force the SD pipeline to run on the CPU")
+local_sd.add_argument("--sd-cpu", help="Force the SD pipeline to run on the CPU", action="store_true")

 remote_sd = sd_group.add_argument_group("sd-remote")
 remote_sd.add_argument(
@@ -144,6 +144,8 @@ device_string = "cuda:0" if torch.cuda.is_available() and not args.cpu else "cpu
 device = torch.device(device_string)
 torch_dtype = torch.float32 if device_string == "cpu" else torch.float16

+print(f"{Fore.GREEN}{Style.BRIGHT}Using torch device: {device_string}{Style.RESET_ALL}")
+
 if "caption" in modules:
    print("Initializing an image captioning model...")
    captioning_processor = AutoProcessor.from_pretrained(captioning_model)
@@ -248,7 +250,7 @@ if "chromadb" in modules:
            chromadb_client = chromadb.Client(Settings(anonymized_telemetry=False, persist_directory=args.chroma_folder, chroma_db_impl='duckdb+parquet'))
            print(f"ChromaDB is running in-memory with persistence. Persistence is stored in {args.chroma_folder}. Can be cleared by deleting the folder or purging db.")
        else:
-            chromadb_client = chromadb.Client(Settings(anonymized_telemetry=False))            
+            chromadb_client = chromadb.Client(Settings(anonymized_telemetry=False))
            print(f"ChromaDB is running in-memory without persistence.")
    else:
        chroma_port=(