Try removing copy indirection

This commit is contained in:
Iwan Kawrakow
2025-10-27 11:39:18 +02:00
parent 444782523d
commit 1f14f50dfd
2 changed files with 4 additions and 4 deletions

View File

@@ -3558,7 +3558,7 @@ static bool check_node_graph_compatibility_and_refresh_copy_ops(ggml_backend_cud
#endif
}
if (node->op == GGML_OP_CPY) {
if (false && node->op == GGML_OP_CPY) {
// Store the pointers which are updated for each token, such that these can be sent
// to the device and accessed using indirection from CUDA graph
@@ -3602,7 +3602,7 @@ static void set_ggml_graph_node_properties(ggml_tensor * node, ggml_graph_node_p
static bool ggml_graph_node_has_matching_properties(ggml_tensor * node, ggml_graph_node_properties * graph_node_properties) {
if (node->data != graph_node_properties->node_address &&
node->op != GGML_OP_CPY &&
//node->op != GGML_OP_CPY &&
node->op != GGML_OP_VIEW) {
return false;
}
@@ -3623,7 +3623,7 @@ static bool ggml_graph_node_has_matching_properties(ggml_tensor * node, ggml_gra
for (int i = 0; i < GGML_MAX_SRC; i++) {
if (node->src[i] &&
node->src[i]->data != graph_node_properties->src_address[i] &&
node->op != GGML_OP_CPY &&
//node->op != GGML_OP_CPY &&
node->op != GGML_OP_VIEW
) {
return false;

View File

@@ -2,7 +2,7 @@
#define CUDA_CPY_BLOCK_SIZE 64
void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, ggml_tensor * src1, bool disable_indirection = false);
void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, ggml_tensor * src1, bool disable_indirection = true); //false);
void ggml_cuda_dup(ggml_backend_cuda_context & ctx, ggml_tensor * dst);