mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-30 11:21:56 +00:00
Avoid ggml_get_rows if not necessary (#1160)
* Copy reduce result to other GPUs if necessary * Avoid ggml_get_rows for TG * For the output ops use the result of the split that ran on the main GPU * More models
This commit is contained in:
@@ -2244,7 +2244,7 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
|
||||
}
|
||||
}
|
||||
|
||||
if (split->graph.nodes[0]->op == GGML_OP_REDUCE) {
|
||||
if (split->graph.nodes[0]->op == GGML_OP_REDUCE && i < sched->n_splits - 1) {
|
||||
last_reduce = split_backend_id;
|
||||
if (ith == split_backend_id) {
|
||||
auto node = split->graph.nodes[0];
|
||||
@@ -2318,7 +2318,7 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
|
||||
}
|
||||
}
|
||||
|
||||
if (split->graph.nodes[0]->op == GGML_OP_REDUCE) {
|
||||
if (split->graph.nodes[0]->op == GGML_OP_REDUCE && i < sched->n_splits - 1) {
|
||||
last_reduce = split_backend_id;
|
||||
barrier.arrive_and_wait();
|
||||
if (ith == split_backend_id) {
|
||||
|
||||
Reference in New Issue
Block a user