diff --git a/common/common.cpp b/common/common.cpp index 17a0c7ce..1ffe67e3 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -1240,10 +1240,10 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa params.split_mode = LLAMA_SPLIT_MODE_LAYER; } else if (arg_next == "row") { -#ifdef GGML_USE_SYCL - fprintf(stderr, "warning: The split mode value:[row] is not supported by llama.cpp with SYCL. It's developing.\nExit!\n"); - exit(1); -#endif // GGML_USE_SYCL + fprintf(stderr, "\n\n=====================================================================================\n"); + fprintf(stderr, " Split mode row is no longer supported\n"); + fprintf(stderr, "=====================================================================================\n\n\n"); + GGML_ABORT("fatal error"); params.split_mode = LLAMA_SPLIT_MODE_ROW; } else { @@ -2217,8 +2217,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param options.push_back({ "*", "-sm, --split-mode SPLIT_MODE", "how to split the model across multiple GPUs, one of:\n" " - none: use one GPU only\n" - " - layer (default): split layers and KV across GPUs\n" - " - row: split rows across GPUs" }); + " - layer (default): split layers and KV across GPUs\n" }); options.push_back({ "*", "-ts, --tensor-split SPLIT", "fraction of the model to offload to each GPU, comma-separated list of proportions, e.g. 3,1" }); options.push_back({ "*", "-dev, --device dev1,dev2", diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index ed859e31..a479ba39 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -334,7 +334,7 @@ static void print_usage(int /* argc */, char ** argv) { printf(" -ngl, --n-gpu-layers (default: %s)\n", join(cmd_params_defaults.n_gpu_layers, ",").c_str()); printf(" --n-cpu-moe (default: none)\n"); printf(" -rpc, --rpc (default: %s)\n", join(cmd_params_defaults.rpc_servers, ",").c_str()); - printf(" -sm, --split-mode (default: %s)\n", join(transform_to_str(cmd_params_defaults.split_mode, split_mode_str), ",").c_str()); + printf(" -sm, --split-mode (default: %s)\n", join(transform_to_str(cmd_params_defaults.split_mode, split_mode_str), ",").c_str()); printf(" -mg, --main-gpu (default: %s)\n", join(cmd_params_defaults.main_gpu, ",").c_str()); printf(" -nkvo, --no-kv-offload <0|1> (default: %s)\n", join(cmd_params_defaults.no_kv_offload, ",").c_str()); printf(" -fa, --flash-attn <0|1> (default: %s)\n", join(cmd_params_defaults.flash_attn, ",").c_str()); @@ -631,7 +631,11 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { } else if (m == "layer") { mode = LLAMA_SPLIT_MODE_LAYER; } else if (m == "row") { - mode = LLAMA_SPLIT_MODE_ROW; + fprintf(stderr, "\n\n=======================================================================\n"); + fprintf(stderr, "Split mode 'row' is no longer supported\n"); + fprintf(stderr, "=======================================================================\n\n\n"); + invalid_param = true; + break; } else { invalid_param = true; break;