Try removing copy indirection

2026-02-24 07:04:11 +00:00 · 2025-10-27 11:39:18 +02:00
parent 444782523d
commit 1f14f50dfd
2 changed files with 4 additions and 4 deletions
--- a/ggml/src/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda.cu
@@ -3558,7 +3558,7 @@ static bool check_node_graph_compatibility_and_refresh_copy_ops(ggml_backend_cud
 #endif
        }

-        if (node->op == GGML_OP_CPY) {
+        if (false && node->op == GGML_OP_CPY) {

            // Store the pointers which are updated for each token, such that these can be sent
            // to the device and accessed using indirection from CUDA graph
@@ -3602,7 +3602,7 @@ static void set_ggml_graph_node_properties(ggml_tensor * node, ggml_graph_node_p

 static bool ggml_graph_node_has_matching_properties(ggml_tensor * node, ggml_graph_node_properties * graph_node_properties) {
    if (node->data != graph_node_properties->node_address &&
-          node->op != GGML_OP_CPY &&
+          //node->op != GGML_OP_CPY &&
          node->op != GGML_OP_VIEW) {
        return false;
    }
@@ -3623,7 +3623,7 @@ static bool ggml_graph_node_has_matching_properties(ggml_tensor * node, ggml_gra
    for (int i = 0; i < GGML_MAX_SRC; i++) {
        if (node->src[i] &&
            node->src[i]->data != graph_node_properties->src_address[i] &&
-            node->op != GGML_OP_CPY &&
+            //node->op != GGML_OP_CPY &&
            node->op != GGML_OP_VIEW
        ) {
            return false;
--- a/ggml/src/ggml-cuda/cpy.cuh
+++ b/ggml/src/ggml-cuda/cpy.cuh
@@ -2,7 +2,7 @@

 #define CUDA_CPY_BLOCK_SIZE 64

-void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, ggml_tensor * src1,  bool disable_indirection = false);
+void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, ggml_tensor * src1,  bool disable_indirection = true); //false);

 void ggml_cuda_dup(ggml_backend_cuda_context & ctx, ggml_tensor * dst);