This is better

2026-01-26 17:20:01 +00:00 · 2025-12-12 06:23:39 +00:00
parent 07ae4b1ef0
commit 643cccd2c8
2 changed files with 14 additions and 0 deletions
--- a/ggml/src/ggml-backend.cpp
+++ b/ggml/src/ggml-backend.cpp
@@ -2053,6 +2053,19 @@ static ggml_status ggml_backend_sched_compute_splits_sm_graph(ggml_backend_sched
            }
            this_split.push_back(split_j);
        }
+        if (false) {
+            auto split = this_split.front();
+            if (this_split.size() == 1) {
+                printf("=== Split %d with %d inputs on backend %d\n", i, split->n_inputs, split->backend_id);
+            } else {
+                printf("=== Split %d with %d inputs on backends", i, split->n_inputs);
+                for (int j = 0; j < (int)this_split.size(); ++j) printf(" %d", this_split[j]->backend_id);
+                printf("\n");
+            }
+            for (int j = 0; j < split->graph.n_nodes; ++j) {
+                printf("  %d  %s(%s)\n", j, ggml_op_name(split->graph.nodes[j]->op), split->graph.nodes[j]->name);
+            }
+        }
        for (auto split : this_split) {
            ggml_backend_sched_copy_inputs(sched, split, needs_sync, ids, unique_ids, last_ids_tensor);
        }
--- a/src/llama-build-context.cpp
+++ b/src/llama-build-context.cpp
@@ -1228,6 +1228,7 @@ llm_expert_gating_func_type   gating_op,
            cur = ggml_cast(ctx, cur, GGML_TYPE_F16);
            cb(cur, "ffn_out_f16", il_cb);
        }
+        ggml_build_forward_expand(graph, routed_out);
        results.push_back(cur);
    }
    GGML_ASSERT(!results.empty());