Embeddings: Update config, args, and parameter names

Use embeddings_device as the parameter for device to remove ambiguity. Signed-off-by: kingbri <bdashore3@proton.me>
2026-03-15 00:07:28 +00:00 · 2024-07-30 15:32:26 -04:00
parent bfa011e0ce
commit dc3dcc9c0d
5 changed files with 43 additions and 9 deletions
--- a/config_sample.yml
+++ b/config_sample.yml
@@ -72,13 +72,6 @@ developer:
  # Otherwise, the priority will be set to high
  #realtime_process_priority: False

-embeddings:
-  embedding_model_dir: models
-
-  embedding_model_name:
-
-  embeddings_device: cpu
-
 # Options for model overrides and loading
 # Please read the comments to understand how arguments are handled between initial and API loads
 model:
@@ -208,3 +201,19 @@ model:
    #loras:
    #- name: lora1
    #  scaling: 1.0
+
+# Options for embedding models and loading.
+# NOTE: Embeddings requires the "extras" feature to be installed
+# Install it via "pip install .[extras]"
+embeddings:
+  # Overrides directory to look for embedding models (default: models)
+  embedding_model_dir: models
+
+  # An initial embedding model to load on the infinity backend (default: None)
+  embedding_model_name:
+
+  # Device to load embedding models on (default: cpu)
+  # Possible values: cpu, auto, cuda
+  # NOTE: It's recommended to load embedding models on the CPU.
+  # If you'd like to load on an AMD gpu, set this value to "cuda" as well.
+  embeddings_device: cpu