mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-24 08:29:29 +00:00
Command line option to set max. extra VRAM that the scheduler can use
This commit is contained in:
@@ -404,6 +404,7 @@ extern "C" {
|
||||
uint32_t n_seq_max; // max number of sequences (i.e. distinct states for recurrent models)
|
||||
uint32_t n_threads; // number of threads to use for generation
|
||||
uint32_t n_threads_batch; // number of threads to use for batch processing
|
||||
int32_t max_extra_alloc; // Max. additional VRAM the scheduler is allowed to allocate
|
||||
|
||||
enum llama_rope_scaling_type rope_scaling_type; // RoPE scaling type, from `enum llama_rope_scaling_type`
|
||||
enum llama_pooling_type pooling_type; // whether to pool (sum) embedding results by sequence id
|
||||
|
||||
Reference in New Issue
Block a user