Model: Add log messages for model loading

It's useful to know the split method that the model is being loaded on. Signed-off-by: kingbri <8082010+kingbri1@users.noreply.github.com>
2026-03-14 15:57:27 +00:00 · 2025-08-17 23:09:27 -04:00
parent a3a32c30a4
commit a4d02c2b70
1 changed files with 10 additions and 0 deletions
--- a/backends/exllamav3/model.py
+++ b/backends/exllamav3/model.py
@@ -188,6 +188,7 @@ class ExllamaV3Container(BaseModelContainer):
            # Set GPU split options
            # Enable manual GPU split if provided
            if gpu_split:
+                self.gpu_split_auto = False
                self.gpu_split = gpu_split

                # Causes crash if set with GPU split
@@ -464,6 +465,15 @@ class ExllamaV3Container(BaseModelContainer):
                if value:
                    yield value

+        logger.info("Loading model: " + str(self.model_dir))
+
+        if self.use_tp:
+            logger.info("Loading with tensor parallel")
+        elif self.gpu_split_auto:
+            logger.info("Loading with autosplit")
+        else:
+            logger.info("Loading with a manual GPU split (or a one GPU setup)")
+
        for value in self.model.load_gen(
            tensor_p=self.use_tp,
            tp_backend=self.tp_backend,