Looks like with this change it is working with tensor overrides

This commit is contained in:
Iwan Kawrakow
2025-12-16 17:41:04 +00:00
parent 58ef9d608e
commit f44844b328
2 changed files with 8 additions and 1 deletions

View File

@@ -2120,6 +2120,7 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
backend_splits[sched->splits[i].backend_id].push_back(&sched->splits[i]);
}
for (int backend_id = 0; backend_id < sched->n_backends; ++backend_id) {
if (ggml_backend_is_cpu(ggml_backend_sched_get_backend(sched, backend_id))) continue;
if (backend_splits[backend_id].empty()) continue;
size_t input_size = 0;
size_t max_input_size = 0;
@@ -2206,6 +2207,12 @@ static enum ggml_status ggml_backend_sched_compute_splits(ggml_backend_sched_t s
if (split->n_inputs > 0 && !own_cpy[split_backend_id]) {
needs_sync[split_backend_id] = true;
} else {
for (int j = 0; j < split->n_inputs; ++j) {
if (ggml_backend_buffer_is_host(split->inputs[j]->buffer)) {
needs_sync[split_backend_id] = true;
}
}
}
if (!sched->callback_eval) {
#if IK_PRINT_TIMING

View File

@@ -4773,7 +4773,7 @@ struct llama_context * llama_new_context_with_model(
LLAMA_LOG_INFO("XXXXXXXXXXXXXXXXXXXXX Setting only active experts offload\n");
ggml_backend_sched_set_only_active_experts(ctx->sched, true);
}
if (model->split_mode == LLAMA_SPLIT_MODE_GRAPH && !model->has_tensor_overrides()) {
if (model->split_mode == LLAMA_SPLIT_MODE_GRAPH) { // && !model->has_tensor_overrides()) {
ggml_backend_sched_set_split_mode_graph(ctx->sched, true);
ggml_backend_sched_set_max_extra_alloc(ctx->sched, params.max_extra_alloc);
}