mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-01-26 17:20:01 +00:00
Disable split mode "row" (#987)
* Disable split mode "row" * Also llama-bench --------- Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
@@ -1240,10 +1240,10 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
|
||||
params.split_mode = LLAMA_SPLIT_MODE_LAYER;
|
||||
}
|
||||
else if (arg_next == "row") {
|
||||
#ifdef GGML_USE_SYCL
|
||||
fprintf(stderr, "warning: The split mode value:[row] is not supported by llama.cpp with SYCL. It's developing.\nExit!\n");
|
||||
exit(1);
|
||||
#endif // GGML_USE_SYCL
|
||||
fprintf(stderr, "\n\n=====================================================================================\n");
|
||||
fprintf(stderr, " Split mode row is no longer supported\n");
|
||||
fprintf(stderr, "=====================================================================================\n\n\n");
|
||||
GGML_ABORT("fatal error");
|
||||
params.split_mode = LLAMA_SPLIT_MODE_ROW;
|
||||
}
|
||||
else {
|
||||
@@ -2217,8 +2217,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
|
||||
options.push_back({ "*", "-sm, --split-mode SPLIT_MODE",
|
||||
"how to split the model across multiple GPUs, one of:\n"
|
||||
" - none: use one GPU only\n"
|
||||
" - layer (default): split layers and KV across GPUs\n"
|
||||
" - row: split rows across GPUs" });
|
||||
" - layer (default): split layers and KV across GPUs\n" });
|
||||
options.push_back({ "*", "-ts, --tensor-split SPLIT",
|
||||
"fraction of the model to offload to each GPU, comma-separated list of proportions, e.g. 3,1" });
|
||||
options.push_back({ "*", "-dev, --device dev1,dev2",
|
||||
|
||||
@@ -334,7 +334,7 @@ static void print_usage(int /* argc */, char ** argv) {
|
||||
printf(" -ngl, --n-gpu-layers <n> (default: %s)\n", join(cmd_params_defaults.n_gpu_layers, ",").c_str());
|
||||
printf(" --n-cpu-moe <n> (default: none)\n");
|
||||
printf(" -rpc, --rpc <rpc_servers> (default: %s)\n", join(cmd_params_defaults.rpc_servers, ",").c_str());
|
||||
printf(" -sm, --split-mode <none|layer|row> (default: %s)\n", join(transform_to_str(cmd_params_defaults.split_mode, split_mode_str), ",").c_str());
|
||||
printf(" -sm, --split-mode <none|layer> (default: %s)\n", join(transform_to_str(cmd_params_defaults.split_mode, split_mode_str), ",").c_str());
|
||||
printf(" -mg, --main-gpu <i> (default: %s)\n", join(cmd_params_defaults.main_gpu, ",").c_str());
|
||||
printf(" -nkvo, --no-kv-offload <0|1> (default: %s)\n", join(cmd_params_defaults.no_kv_offload, ",").c_str());
|
||||
printf(" -fa, --flash-attn <0|1> (default: %s)\n", join(cmd_params_defaults.flash_attn, ",").c_str());
|
||||
@@ -631,7 +631,11 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
|
||||
} else if (m == "layer") {
|
||||
mode = LLAMA_SPLIT_MODE_LAYER;
|
||||
} else if (m == "row") {
|
||||
mode = LLAMA_SPLIT_MODE_ROW;
|
||||
fprintf(stderr, "\n\n=======================================================================\n");
|
||||
fprintf(stderr, "Split mode 'row' is no longer supported\n");
|
||||
fprintf(stderr, "=======================================================================\n\n\n");
|
||||
invalid_param = true;
|
||||
break;
|
||||
} else {
|
||||
invalid_param = true;
|
||||
break;
|
||||
|
||||
Reference in New Issue
Block a user