diff --git a/common/common.cpp b/common/common.cpp
index 942b4150..5bdd01b7 100644
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -1242,12 +1242,8 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
         else if (arg_next == "layer") {
             params.split_mode = LLAMA_SPLIT_MODE_LAYER;
         }
-        else if (arg_next == "row") {
-            //fprintf(stderr, "\n\n=====================================================================================\n");
-            //fprintf(stderr, " Split mode row is no longer supported\n");
-            //fprintf(stderr, "=====================================================================================\n\n\n");
-            //GGML_ABORT("fatal error");
-            params.split_mode = LLAMA_SPLIT_MODE_ROW;
+        else if (arg_next == "graph") {
+            params.split_mode = LLAMA_SPLIT_MODE_GRAPH;
         }
         else {
             invalid_param = true;
@@ -2220,6 +2216,7 @@ void gpt_params_print_usage(int /*argc*/, char ** argv, const gpt_params & param
         options.push_back({ "*",           "-sm,   --split-mode SPLIT_MODE",
                                                                         "how to split the model across multiple GPUs, one of:\n"
                                                                         "  - none: use one GPU only\n"
+                                                                        "  - graph: split model tensors and computation graph across GPUs\n"
                                                                         "  - layer (default): split layers and KV across GPUs\n" });
         options.push_back({ "*",           "-ts,   --tensor-split SPLIT",
                                                                         "fraction of the model to offload to each GPU, comma-separated list of proportions, e.g. 3,1" });
diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp
index 3de656ad..f0f62d46 100644
--- a/examples/llama-bench/llama-bench.cpp
+++ b/examples/llama-bench/llama-bench.cpp
@@ -217,7 +217,7 @@ static const char * split_mode_str(llama_split_mode mode) {
     switch (mode) {
         case LLAMA_SPLIT_MODE_NONE:  return "none";
         case LLAMA_SPLIT_MODE_LAYER: return "layer";
-        case LLAMA_SPLIT_MODE_ROW:   return "row";
+        case LLAMA_SPLIT_MODE_GRAPH: return "graph";
         default: GGML_ABORT("invalid split mode");
     }
 }
@@ -630,13 +630,8 @@ static cmd_params parse_cmd_params(int argc, char ** argv) {
                     mode = LLAMA_SPLIT_MODE_NONE;
                 } else if (m == "layer") {
                     mode = LLAMA_SPLIT_MODE_LAYER;
-                } else if (m == "row") {
-                    mode = LLAMA_SPLIT_MODE_ROW;
-                    //fprintf(stderr, "\n\n=======================================================================\n");
-                    //fprintf(stderr, "Split mode 'row' is no longer supported\n");
-                    //fprintf(stderr, "=======================================================================\n\n\n");
-                    //invalid_param = true;
-                    //break;
+                } else if (m == "graph") {
+                    mode = LLAMA_SPLIT_MODE_GRAPH;
                 } else {
                     invalid_param = true;
                     break;
diff --git a/include/llama.h b/include/llama.h
index 7682951e..ed3f67e9 100644
--- a/include/llama.h
+++ b/include/llama.h
@@ -269,7 +269,7 @@ extern "C" {
     enum llama_split_mode {
         LLAMA_SPLIT_MODE_NONE    = 0, // single GPU
         LLAMA_SPLIT_MODE_LAYER   = 1, // split layers and KV across GPUs
-        LLAMA_SPLIT_MODE_ROW     = 2, // split rows across GPUs
+        LLAMA_SPLIT_MODE_GRAPH   = 2, // splits computations across GPUs
     };
 
 
diff --git a/src/llama-load-tensors.cpp b/src/llama-load-tensors.cpp
index a2a9c327..4d5f2a76 100644
--- a/src/llama-load-tensors.cpp
+++ b/src/llama-load-tensors.cpp
@@ -230,7 +230,7 @@ create_tensors_helper::create_tensors_helper(llama_model_loader & _ml, llama_mod
             printf("  Oops: null buft for debvice %d\n", device);
         }
     }
-    if (model.split_mode == LLAMA_SPLIT_MODE_ROW) {
+    if (model.split_mode == LLAMA_SPLIT_MODE_GRAPH) {
         printf("model.splits:");
         for (auto s : model.splits) printf(" %g", s);
         printf("\n");
@@ -305,7 +305,7 @@ ggml_tensor * create_tensors_helper::create_tensor(ggml_context * ctx, const std
     }
     if (actual_context) *actual_context = ctx;
     auto tensor = ml.create_tensor(ctx, name, ne, flags);
-    //if (tensor && requested_ctx == ctx && model.split_mode == LLAMA_SPLIT_MODE_ROW) {
+    //if (tensor && requested_ctx == ctx && model.split_mode == LLAMA_SPLIT_MODE_GRAPH) {
     //    int i_layer = -1;
     //    if (auto pos = name.find("blk."); pos == 0) {
     //        GGML_ASSERT(sscanf(name.c_str(), "blk.%d.", &i_layer) == 1);
@@ -2929,7 +2929,7 @@ bool create_tensors_helper::create_tensors() {
         default:
             throw std::runtime_error("unknown architecture");
     }
-    if (model.split_mode == LLAMA_SPLIT_MODE_ROW) {
+    if (model.split_mode == LLAMA_SPLIT_MODE_GRAPH) {
         std::vector<size_t> mem_used(model.splits.size(), 0);
         const auto & hparams = model.hparams;
         int gqa_ratio = hparams.n_head() / hparams.n_head_kv();
diff --git a/src/llama.cpp b/src/llama.cpp
index 4a7b00b3..6eb2d820 100644
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -648,7 +648,7 @@ static bool llama_kv_cache_init(
     }
 
     bool split_cache   = false;
-    if (model.split_mode == LLAMA_SPLIT_MODE_ROW && model.arch != LLM_ARCH_DEEPSEEK2 && offload) {
+    if (model.split_mode == LLAMA_SPLIT_MODE_GRAPH && model.arch != LLM_ARCH_DEEPSEEK2 && offload) {
         cache.split_k_l.reserve(n_layer);
         cache.split_v_l.reserve(n_layer);
         split_cache = true;
@@ -1785,7 +1785,7 @@ static bool llm_load_tensors(
         }
     } else {
         ggml_backend_buffer_type_t split_buft;
-        if (split_mode == LLAMA_SPLIT_MODE_ROW && model.splits.size() > 1) {
+        if (split_mode == LLAMA_SPLIT_MODE_GRAPH && model.splits.size() > 1) {
             split_buft = llama_default_buffer_type_split(model, model.devices[main_gpu], model.splits.data());
         } else {
             // LLAMA_SPLIT_MODE_NONE or LLAMA_SPLIT_MODE_LAYER in backends where it is not supported
@@ -4424,7 +4424,7 @@ struct llama_context * llama_new_context_with_model(
         }
 #elif defined(GGML_USE_CUDA)
         if (model->split_mode == LLAMA_SPLIT_MODE_NONE) {
-            // with split_mode LLAMA_SPLIT_MODE_NONE or LLAMA_SPLIT_MODE_ROW, only the main GPU backend is used
+            // with split_mode LLAMA_SPLIT_MODE_NONE or LLAMA_SPLIT_MODE_GRAPH, only the main GPU backend is used
             ggml_backend_t backend = ggml_backend_cuda_init(model->main_gpu, cparams.cuda_params);
             if (backend == nullptr) {
                 LLAMA_LOG_ERROR("%s: failed to initialize CUDA%d backend\n", __func__, model->main_gpu);
@@ -4434,7 +4434,7 @@ struct llama_context * llama_new_context_with_model(
             ggml_backend_add_from_device(ctx, backend);
 
         } else {
-            // LLAMA_SPLIT_MODE_LAYER and LLAMA_SPLIT_MODE_ROW require a backend for each GPU
+            // LLAMA_SPLIT_MODE_LAYER and LLAMA_SPLIT_MODE_GRAPH require a backend for each GPU
             for (int device = 0; device < ggml_backend_cuda_get_device_count(); ++device) {
                 ggml_backend_t backend = ggml_backend_cuda_init(device, cparams.cuda_params);
                 if (backend == nullptr) {
@@ -4446,7 +4446,7 @@ struct llama_context * llama_new_context_with_model(
             }
         }
 #elif defined(GGML_USE_VULKAN)
-        if (model->split_mode == LLAMA_SPLIT_MODE_ROW) {
+        if (model->split_mode == LLAMA_SPLIT_MODE_GRAPH) {
             LLAMA_LOG_ERROR("%s: Row split not supported. Failed to initialize Vulkan backend\n", __func__);
             llama_free(ctx);
             return nullptr;
@@ -4471,8 +4471,8 @@ struct llama_context * llama_new_context_with_model(
             }
         }
 #elif defined(GGML_USE_SYCL)
-        // with split_mode LLAMA_SPLIT_MODE_NONE or LLAMA_SPLIT_MODE_ROW, only the main GPU backend is used
-        if (model->split_mode == LLAMA_SPLIT_MODE_NONE || model->split_mode == LLAMA_SPLIT_MODE_ROW) {
+        // with split_mode LLAMA_SPLIT_MODE_NONE or LLAMA_SPLIT_MODE_GRAPH, only the main GPU backend is used
+        if (model->split_mode == LLAMA_SPLIT_MODE_NONE || model->split_mode == LLAMA_SPLIT_MODE_GRAPH) {
             ggml_backend_t backend = ggml_backend_sycl_init(model->main_gpu);
             if (backend == nullptr) {
                 LLAMA_LOG_ERROR("%s: failed to initialize SYCL%d backend\n", __func__, model->main_gpu);
@@ -4503,9 +4503,9 @@ struct llama_context * llama_new_context_with_model(
             ggml_backend_add_from_device(ctx, backend);
         }
 #elif defined(GGML_USE_CANN)
-    // with split_mode LLAMA_SPLIT_MODE_NONE or LLAMA_SPLIT_MODE_ROW, only the main GPU backend is used
+    // with split_mode LLAMA_SPLIT_MODE_NONE or LLAMA_SPLIT_MODE_GRAPH, only the main GPU backend is used
     // TODO: ggml_backend_cann is not support split tensor now, just leave code here.
-    if (model->split_mode == LLAMA_SPLIT_MODE_NONE || model->split_mode == LLAMA_SPLIT_MODE_ROW) {
+    if (model->split_mode == LLAMA_SPLIT_MODE_NONE || model->split_mode == LLAMA_SPLIT_MODE_GRAPH) {
         ggml_backend_t backend = ggml_backend_cann_init(model->main_gpu);
         if (backend == nullptr) {
             LLAMA_LOG_ERROR("%s: failed to initialize CANN%d backend\n", __func__, model->main_gpu);