Minor delta-net tweak (#1308)

* Make sure we pick the reduced tensor from the right GPU * Minor * Minor delta-net tweak
2026-05-11 00:20:19 +00:00 · 2026-02-24 15:22:57 +01:00
parent 7065488135
commit 38ca19d828
2 changed files with 2 additions and 5 deletions
--- a/ggml/src/ggml-cuda.cu
+++ b/ggml/src/ggml-cuda.cu
@@ -3263,8 +3263,6 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
                ggml_cuda_op_sum_rows_nc(ctx, cgraph->nodes[i+1]);
                i += 2;
            } else {
-                //auto src = dst->src[0];
-                //printf("cont(%s -> %s): %ld x %ld x %ld x %ld; %zu x %zu x %zu x %zu\n", src->name, dst->name, src->ne[0], src->ne[1], src->ne[2], src->ne[3], src->nb[0], src->nb[1], src->nb[2], src->nb[3]);
                ggml_cuda_dup(ctx, dst);
            }
            break;
--- a/src/llama-delta-net.cpp
+++ b/src/llama-delta-net.cpp
@@ -636,9 +636,8 @@ ggml_tensor * delta_net::build_layer_attn_linear_core(ggml_context * ctx0, ggml_
    ggml_tensor * z_2d        = ggml_reshape_2d(ctx0, z,      head_v_dim, num_v_heads * n_tok);

    ggml_tensor * attn_out_norm = llm_build_context::llm_build_norm(ctx0, attn_out_2d, hparams, model.layers[il].ssm_norm, nullptr, LLM_NORM_RMS, cb, il);
-    ggml_tensor * gated_silu    = ggml_silu(ctx0, z_2d);
-    cb(gated_silu, "gated_silu", il);
-    attn_out_norm = ggml_mul(ctx0, attn_out_norm, gated_silu);
+    cb(attn_out_norm, "attn_rms_norm", il);
+    attn_out_norm = ggml_fused_mul_unary(ctx0, z_2d, attn_out_norm, GGML_UNARY_OP_SILU);
    cb(attn_out_norm, "attn_out_norm", il);

    ggml_tensor * final_output = ggml_reshape_2d(ctx0, attn_out_norm, value_dim, n_tok);