mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-03-04 19:10:03 +00:00
Be able to set reduce op data type for split mode "graph"
This commit is contained in:
@@ -444,6 +444,7 @@ extern "C" {
|
||||
bool only_active_experts;
|
||||
bool k_cache_hadamard; // if true, apply Hadamard transfrom to K-cache
|
||||
bool split_mode_graph_scheduling; // if true, force split mode graph scheduling
|
||||
bool split_mode_f16; // if true, cast intermediate results to f16 before copying to other GPUs
|
||||
|
||||
// Abort callback
|
||||
// if it returns true, execution of llama_decode() will be aborted
|
||||
|
||||
Reference in New Issue
Block a user