Support --device and --device-draft parameter (#866)

* add --device and --device-draft parameter * don't print debug message in release mode * fix * bug fix to throw exception when no device specified * add const --------- Co-authored-by: firecoperana <firecoperana>
2026-01-26 17:20:01 +00:00 · 2025-10-27 16:13:28 +00:00
parent eb8116b097
commit 904e994bfb
12 changed files with 283 additions and 40 deletions
--- a/include/llama.h
+++ b/include/llama.h
@@ -342,6 +342,9 @@ extern "C" {
    };

    struct llama_model_params {
+        // comma separated list of devices to use for offloading
+        const char* devices;
+
        int32_t n_gpu_layers; // number of layers to store in VRAM
        int32_t mla;          // MLA implementation to use (only applicable to DeepSeek models at this point)
        enum llama_split_mode split_mode; // how to split the model across multiple GPUs