GPU offload policy (#405)

* Adding GPU offload policy * Minor --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
2026-05-11 08:30:19 +00:00 · 2025-05-12 07:47:46 +03:00
parent 504fb890d9
commit 8669c3db2b
7 changed files with 77 additions and 2 deletions
--- a/include/llama.h
+++ b/include/llama.h
@@ -408,6 +408,7 @@ extern "C" {
        // currently works only with CPU execution
        ggml_abort_callback abort_callback;
        void *              abort_callback_data;
+        void *              offload_policy;
    };

    // model quantization parameters
@@ -523,6 +524,8 @@ extern "C" {
                     struct llama_model * model,
            struct llama_context_params   params);

+    LLAMA_API void llama_set_offload_policy(struct llama_context * lctx, int op, bool on_or_off);
+
    // Frees all allocated memory
    LLAMA_API void llama_free(struct llama_context * ctx);