mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-21 05:34:08 +00:00
WIP split mode attn
Works for LlaMA models, but not for GLM-4.5. Doesn't seem to improve performance, so I guess no point in trying to fix it.
This commit is contained in:
@@ -1276,6 +1276,9 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
||||
else if (arg_next == "layer") {
|
||||
params.split_mode = LLAMA_SPLIT_MODE_LAYER;
|
||||
}
|
||||
else if (arg_next == "attn") {
|
||||
params.split_mode = LLAMA_SPLIT_MODE_ATTN;
|
||||
}
|
||||
else if (arg_next == "graph") {
|
||||
params.split_mode = LLAMA_SPLIT_MODE_GRAPH;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user