mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-30 11:21:56 +00:00
Try removing copy indirection
This commit is contained in:
@@ -3558,7 +3558,7 @@ static bool check_node_graph_compatibility_and_refresh_copy_ops(ggml_backend_cud
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
if (node->op == GGML_OP_CPY) {
|
if (false && node->op == GGML_OP_CPY) {
|
||||||
|
|
||||||
// Store the pointers which are updated for each token, such that these can be sent
|
// Store the pointers which are updated for each token, such that these can be sent
|
||||||
// to the device and accessed using indirection from CUDA graph
|
// to the device and accessed using indirection from CUDA graph
|
||||||
@@ -3602,7 +3602,7 @@ static void set_ggml_graph_node_properties(ggml_tensor * node, ggml_graph_node_p
|
|||||||
|
|
||||||
static bool ggml_graph_node_has_matching_properties(ggml_tensor * node, ggml_graph_node_properties * graph_node_properties) {
|
static bool ggml_graph_node_has_matching_properties(ggml_tensor * node, ggml_graph_node_properties * graph_node_properties) {
|
||||||
if (node->data != graph_node_properties->node_address &&
|
if (node->data != graph_node_properties->node_address &&
|
||||||
node->op != GGML_OP_CPY &&
|
//node->op != GGML_OP_CPY &&
|
||||||
node->op != GGML_OP_VIEW) {
|
node->op != GGML_OP_VIEW) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@@ -3623,7 +3623,7 @@ static bool ggml_graph_node_has_matching_properties(ggml_tensor * node, ggml_gra
|
|||||||
for (int i = 0; i < GGML_MAX_SRC; i++) {
|
for (int i = 0; i < GGML_MAX_SRC; i++) {
|
||||||
if (node->src[i] &&
|
if (node->src[i] &&
|
||||||
node->src[i]->data != graph_node_properties->src_address[i] &&
|
node->src[i]->data != graph_node_properties->src_address[i] &&
|
||||||
node->op != GGML_OP_CPY &&
|
//node->op != GGML_OP_CPY &&
|
||||||
node->op != GGML_OP_VIEW
|
node->op != GGML_OP_VIEW
|
||||||
) {
|
) {
|
||||||
return false;
|
return false;
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
#define CUDA_CPY_BLOCK_SIZE 64
|
#define CUDA_CPY_BLOCK_SIZE 64
|
||||||
|
|
||||||
void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, ggml_tensor * src1, bool disable_indirection = false);
|
void ggml_cuda_cpy(ggml_backend_cuda_context & ctx, const ggml_tensor * src0, ggml_tensor * src1, bool disable_indirection = true); //false);
|
||||||
|
|
||||||
void ggml_cuda_dup(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
void ggml_cuda_dup(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user