Give the user the option to override where model weights are stored

2026-04-24 00:19:19 +00:00 · 2025-02-24 16:02:31 +02:00
parent 547eee81d9
commit 2572a6de3c
5 changed files with 781 additions and 621 deletions
--- a/include/llama.h
+++ b/include/llama.h
@@ -305,6 +305,11 @@ extern "C" {
        };
    };

+    struct llama_model_tensor_buft_override {
+        const char * pattern;
+        ggml_backend_buffer_type_t buft;
+    };
+
    struct llama_model_params {
        int32_t n_gpu_layers; // number of layers to store in VRAM
        enum llama_split_mode split_mode; // how to split the model across multiple GPUs
@@ -332,6 +337,8 @@ extern "C" {
        // override key-value pairs of the model meta data
        const struct llama_model_kv_override * kv_overrides;

+        const struct llama_model_tensor_buft_override * tensor_buft_overrides;
+
        // Keep the booleans together to avoid misalignment during copy-by-value.
        bool vocab_only;    // only load the vocabulary, no weights
        bool use_mmap;      // use mmap if possible