GPU offload policy (#405)

* Adding GPU offload policy

* Minor

---------

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
Kawrakow
2025-05-12 07:47:46 +03:00
committed by GitHub
parent 504fb890d9
commit 8669c3db2b
7 changed files with 77 additions and 2 deletions

View File

@@ -408,6 +408,7 @@ extern "C" {
// currently works only with CPU execution
ggml_abort_callback abort_callback;
void * abort_callback_data;
void * offload_policy;
};
// model quantization parameters
@@ -523,6 +524,8 @@ extern "C" {
struct llama_model * model,
struct llama_context_params params);
LLAMA_API void llama_set_offload_policy(struct llama_context * lctx, int op, bool on_or_off);
// Frees all allocated memory
LLAMA_API void llama_free(struct llama_context * ctx);