From 905bca2e1c2bd706e429e5ca4093940b48f21479 Mon Sep 17 00:00:00 2001 From: Kawrakow Date: Mon, 12 Jan 2026 15:28:06 +0200 Subject: [PATCH] Cleanup --- src/llama.cpp | 53 --------------------------------------------------- 1 file changed, 53 deletions(-) diff --git a/src/llama.cpp b/src/llama.cpp index 9e906157..053cc42e 100644 --- a/src/llama.cpp +++ b/src/llama.cpp @@ -4363,59 +4363,6 @@ static void llama_repack_up_gate_exps(llama_context & lctx) { } } } - - /* - ggml_init_params params{lctx.buf_compute_meta.size(), lctx.buf_compute_meta.data(), true}; - auto ctx = ggml_init(params); - - auto gf = ggml_new_graph_custom(ctx, model.max_nodes(), false); - - for (int il = 0; il < int(model.layers.size()); ++il) { - auto & l = model.layers[il]; - if (l.ffn_up_gate_exps && l.ffn_up_exps && l.ffn_gate_exps) { - GGML_ASSERT(l.ffn_up_gate_exps->type == l.ffn_up_exps->type && l.ffn_up_gate_exps->type == l.ffn_gate_exps->type); - GGML_ASSERT(l.ffn_up_gate_exps->ne[0] == l.ffn_up_exps->ne[0] && l.ffn_up_gate_exps->ne[0] == l.ffn_gate_exps->ne[0]); - GGML_ASSERT(l.ffn_up_gate_exps->ne[2] == l.ffn_up_exps->ne[2] && l.ffn_up_gate_exps->ne[2] == l.ffn_gate_exps->ne[2]); - GGML_ASSERT(l.ffn_up_gate_exps->ne[1] == l.ffn_up_exps->ne[1] + l.ffn_gate_exps->ne[1]); - printf("%s: repacking up/gate experts in layer %d\n", __func__, il); - auto aux = ggml_dup(ctx, l.ffn_up_exps); - auto ffn_up_gate_exps_flat = ggml_reshape_2d(ctx, l.ffn_up_gate_exps, - l.ffn_up_gate_exps->ne[0], l.ffn_up_gate_exps->ne[1]*l.ffn_up_gate_exps->ne[2]); - auto ffn_up_flat = ggml_reshape_2d(ctx, aux, l.ffn_up_exps->ne[0], l.ffn_up_exps->ne[1]*l.ffn_up_exps->ne[2]); - auto ffn_gate_flat = ggml_reshape_2d(ctx, l.ffn_gate_exps, l.ffn_gate_exps->ne[0], l.ffn_gate_exps->ne[1]*l.ffn_gate_exps->ne[2]); - size_t offset_up_gate = 0; - size_t offset_up = 0; - for (int i2 = 0; i2 < (int)l.ffn_up_gate_exps->ne[2]; ++i2) { - auto dst = ggml_view_2d(ctx, ffn_up_gate_exps_flat, l.ffn_up_exps->ne[0], l.ffn_up_exps->ne[1], - l.ffn_up_exps->nb[1], offset_up_gate); - auto src = ggml_view_2d(ctx, ffn_up_flat, l.ffn_up_exps->ne[0], l.ffn_up_exps->ne[1], - l.ffn_up_exps->nb[1], offset_up); - auto cpy1 = ggml_cpy(ctx, src, dst); - offset_up_gate += l.ffn_up_exps->ne[1]*l.ffn_up_exps->nb[1]; - if (i2 < (int)l.ffn_up_gate_exps->ne[2]-1) { - dst = ggml_view_2d(ctx, ffn_up_gate_exps_flat, l.ffn_up_exps->ne[0], l.ffn_up_exps->ne[1], - l.ffn_up_exps->nb[1], offset_up_gate); - src = ggml_view_2d(ctx, ffn_gate_flat, l.ffn_up_exps->ne[0], l.ffn_up_exps->ne[1], - l.ffn_up_exps->nb[1], offset_up); - auto cpy2 = ggml_cpy(ctx, src, dst); - offset_up_gate += l.ffn_up_exps->ne[1]*l.ffn_up_exps->nb[1]; - offset_up += l.ffn_up_exps->ne[1]*l.ffn_up_exps->nb[1]; - ggml_build_forward_expand(gf, cpy1); - ggml_build_forward_expand(gf, cpy2); - } else { - ggml_build_forward_expand(gf, cpy1); - } - } - } - } - int n_threads = std::thread::hardware_concurrency()/2; - if (n_threads == 0) n_threads = 1; - llama_graph_compute(lctx, gf, n_threads); - llama_synchronize(&lctx); - ggml_backend_sched_reset(lctx.sched); - ggml_graph_clear(gf); - ggml_free(ctx); - */ } struct llama_context * llama_new_context_with_model(