mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-03-05 03:20:00 +00:00
Do not use OpenMP if there are tensor overrides
This commit is contained in:
@@ -14,10 +14,9 @@
|
||||
#include <set>
|
||||
#include <array>
|
||||
#include <chrono>
|
||||
#include <barrier>
|
||||
#ifdef GGML_USE_OPENMP
|
||||
#include <omp.h>
|
||||
#else
|
||||
#include <barrier>
|
||||
#endif
|
||||
|
||||
#define IK_PRINT_TIMING 0
|
||||
@@ -1174,9 +1173,7 @@ struct ggml_backend_sched {
|
||||
|
||||
uint32_t op_offload[(GGML_OP_COUNT + 31)/32];
|
||||
|
||||
#ifndef GGML_USE_OPENMP
|
||||
std::vector<std::thread> workers;
|
||||
#endif
|
||||
std::vector<ggml_status> statuses;
|
||||
std::vector<std::vector<ggml_backend_sched_split*>> backend_splits;
|
||||
std::array<bool, GGML_SCHED_MAX_BACKENDS> needs_sync;
|
||||
@@ -2164,7 +2161,22 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
|
||||
|
||||
for (auto & s : sched->statuses) s = GGML_STATUS_SUCCESS;
|
||||
|
||||
bool work_done = false;
|
||||
#ifdef GGML_USE_OPENMP
|
||||
bool has_cpu_work = false;
|
||||
for (int i = 0; i < sched->n_backends; ++i) {
|
||||
if (!sched->backend_splits[i].empty()) {
|
||||
auto split = sched->backend_splits[i].front();
|
||||
if (ggml_backend_is_cpu(sched->backends[split->backend_id])) {
|
||||
if (sched->backend_splits[i].size() > 1) {
|
||||
has_cpu_work = true;
|
||||
}
|
||||
break;
|
||||
//printf("CPU backend %d has %d splits\n", split->backend_id, (int)sched->backend_splits[i].size());
|
||||
}
|
||||
}
|
||||
}
|
||||
if (!has_cpu_work) {
|
||||
#pragma omp parallel num_threads(sched->n_backends)
|
||||
{
|
||||
|
||||
@@ -2218,7 +2230,10 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
work_done = true;
|
||||
}
|
||||
#endif
|
||||
if (!work_done) {
|
||||
std::barrier barrier(sched->n_backends, [] () {});
|
||||
auto compute = [sched, &barrier] (int ith) {
|
||||
|
||||
@@ -2277,7 +2292,7 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
|
||||
for (int i = 0; i < sched->n_backends; ++i) sched->workers.emplace_back(compute, i);
|
||||
for (auto & w : sched->workers) w.join();
|
||||
sched->workers.clear();
|
||||
#endif
|
||||
}
|
||||
for (auto status : sched->statuses) {
|
||||
if (status != GGML_STATUS_SUCCESS) return status;
|
||||
}
|
||||
@@ -2379,9 +2394,7 @@ ggml_backend_sched_t ggml_backend_sched_new(
|
||||
|
||||
sched->galloc = ggml_gallocr_new_n(sched->bufts, n_backends);
|
||||
|
||||
#ifndef GGML_USE_OPENMP
|
||||
sched->workers.reserve(sched->n_backends);
|
||||
#endif
|
||||
sched->statuses.resize(sched->n_backends, GGML_STATUS_SUCCESS);
|
||||
sched->backend_splits.resize(sched->n_backends);
|
||||
|
||||
@@ -2446,27 +2459,6 @@ bool ggml_backend_sched_reserve(ggml_backend_sched_t sched, struct ggml_cgraph *
|
||||
return true;
|
||||
}
|
||||
|
||||
bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
|
||||
GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes + graph->n_leafs);
|
||||
|
||||
ggml_backend_sched_split_graph(sched, graph);
|
||||
|
||||
|
||||
if (!ggml_backend_sched_alloc_splits(sched)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
sched->is_alloc = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
|
||||
enum ggml_status err = ggml_backend_sched_graph_compute_async(sched, graph);
|
||||
ggml_backend_sched_synchronize(sched);
|
||||
return err;
|
||||
}
|
||||
|
||||
static void ggml_sched_prepare_graph(ggml_backend_sched_t sched) {
|
||||
|
||||
for (auto & item : sched->own_cpy ) item = false;
|
||||
@@ -2553,6 +2545,27 @@ static void ggml_sched_prepare_graph(ggml_backend_sched_t sched) {
|
||||
}
|
||||
}
|
||||
|
||||
bool ggml_backend_sched_alloc_graph(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
|
||||
GGML_ASSERT((int)sched->hash_set.size >= graph->n_nodes + graph->n_leafs);
|
||||
|
||||
ggml_backend_sched_split_graph(sched, graph);
|
||||
|
||||
if (!ggml_backend_sched_alloc_splits(sched)) {
|
||||
return false;
|
||||
}
|
||||
ggml_sched_prepare_graph(sched);
|
||||
|
||||
sched->is_alloc = true;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
enum ggml_status ggml_backend_sched_graph_compute(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
|
||||
enum ggml_status err = ggml_backend_sched_graph_compute_async(sched, graph);
|
||||
ggml_backend_sched_synchronize(sched);
|
||||
return err;
|
||||
}
|
||||
|
||||
enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sched, struct ggml_cgraph * graph) {
|
||||
if (!sched->is_reset && !sched->is_alloc) {
|
||||
ggml_backend_sched_reset(sched);
|
||||
@@ -2562,7 +2575,6 @@ enum ggml_status ggml_backend_sched_graph_compute_async(ggml_backend_sched_t sch
|
||||
if (!ggml_backend_sched_alloc_graph(sched, graph)) {
|
||||
return GGML_STATUS_ALLOC_FAILED;
|
||||
}
|
||||
ggml_sched_prepare_graph(sched);
|
||||
}
|
||||
|
||||
return ggml_backend_sched_compute_splits(sched);
|
||||
|
||||
Reference in New Issue
Block a user