From 1f14f50dfdd251a4d02a4fd241bce6af924a152d Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Mon, 27 Oct 2025 11:39:18 +0200 Subject: [PATCH] Try removing copy indirection --- ggml/src/ggml-cuda.cu | 6 +++--- ggml/src/ggml-cuda/cpy.cuh | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ggml/src/ggml-cuda.cu b/ggml/src/ggml-cuda.cu index b47b7dd5..b21d95d7 100644 --- a/ggml/src/ggml-cuda.cu +++ b/ggml/src/ggml-cuda.cu @@ -3558,7 +3558,7 @@ static bool check_node_graph_compatibility_and_refresh_copy_ops(ggml_backend_cud #endif } - if (node->op == GGML_OP_CPY) { + if (false && node->op == GGML_OP_CPY) { // Store the pointers which are updated for each token, such that these can be sent // to the device and accessed using indirection from CUDA graph @@ -3602,7 +3602,7 @@ static void set_ggml_graph_node_properties(ggml_tensor * node, ggml_graph_node_p static bool ggml_graph_node_has_matching_properties(ggml_tensor * node, ggml_graph_node_properties * graph_node_properties) { if (node->data != graph_node_properties->node_address && - node->op != GGML_OP_CPY && + //node->op != GGML_OP_CPY && node->op != GGML_OP_VIEW) { return false; } @@ -3623,7 +3623,7 @@ static bool ggml_graph_node_has_matching_properties(ggml_tensor * node, ggml_gra for (int i = 0; i < GGML_MAX_SRC; i++) { if (node->src[i] && node->src[i]->data != graph_node_properties->src_address[i] && - node->op != GGML_OP_CPY && + //node->op != GGML_OP_CPY && node->op != GGML_OP_VIEW ) { return false; diff --git a/ggml/src/ggml-cuda/cpy.cuh b/ggml/src/ggml-cuda/cpy.cuh index 0bd3c0c6..9bd34b7c 100644 --- a/ggml/src/ggml-cuda/cpy.cuh +++ b/ggml/src/ggml-cuda/cpy.cuh @@ -2,7 +2,7 @@ #define CUDA_CPY_BLOCK_SIZE 64 -void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, ggml_tensor * src1, bool disable_indirection = false); +void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, ggml_tensor * src1, bool disable_indirection = true); //false); void ggml_cuda_dup(ggml_backend_cuda_context & ctx, ggml_tensor * dst);