From 5db8262d941eb6efb005c981eba2ed5801c2cfb6 Mon Sep 17 00:00:00 2001
From: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Date: Sun, 21 Dec 2025 16:05:34 +0000
Subject: [PATCH] WIP: fix sm layer (dense)

---
 src/llama-build-context.cpp | 31 ++++++++++---------------------
 1 file changed, 10 insertions(+), 21 deletions(-)

diff --git a/src/llama-build-context.cpp b/src/llama-build-context.cpp
index 7e602f26..19152174 100644
--- a/src/llama-build-context.cpp
+++ b/src/llama-build-context.cpp
@@ -707,29 +707,15 @@ ggml_tensor * llm_build_context::llm_build_ffn(
         cb(cur, "ffn_combined", il);
         ggml_build_forward_expand(graph, cur);
         return cur;
-        //auto cur = ggml_add(ctx, ffn[0], ffn[1]);
-        //cb(cur, "combine_ffn", il);
-        //cur->op_params[0] = 0xff;
-        //for (int id = 2; id < int(ffn.size()); ++id) {
-        //    cur = ggml_add(ctx, cur, ffn[id]);
-        //    cb(cur, "combine_ffn", il);
-        //}
-        //if (ffn.size() > 2) {
-        //    cur->op_params[0] = 0xff;
-        //}
-        ////if (cur->type != GGML_TYPE_F32) {
-        ////    cur = ggml_cast(ctx, cur, GGML_TYPE_F32);
-        ////}
-
-        //return cur;
     }
 
+    auto cur = input;
     if (ffn_norm) {
-        input = llm_build_norm(ctx, input, lctx.model.hparams, ffn_norm, NULL, LLM_NORM_RMS, cb, il);
+        cur = llm_build_norm(ctx, cur, lctx.model.hparams, ffn_norm, NULL, LLM_NORM_RMS, cb, il);
         cb(input, "ffn_norm", il);
     }
-    else if (input->type != GGML_TYPE_F32) {
-        input = ggml_cast(ctx, input, GGML_TYPE_F32);
+    if (cur->type != GGML_TYPE_F32) {
+        cur = ggml_cast(ctx, cur, GGML_TYPE_F32);
     }
 
     if (lctx.cparams.fused_up_gate &&
@@ -737,7 +723,7 @@ ggml_tensor * llm_build_context::llm_build_ffn(
         (type_op == LLM_FFN_SILU || type_op == LLM_FFN_RELU || (type_op == LLM_FFN_GELU && !act_scales))) {
         auto unary_op = type_op == LLM_FFN_SILU ? GGML_UNARY_OP_SILU :
                         type_op == LLM_FFN_RELU ? GGML_UNARY_OP_RELU : GGML_UNARY_OP_GELU;
-        auto cur = ggml_fused_up_gate(ctx, up, gate, input, unary_op);
+        cur = ggml_fused_up_gate(ctx, up, gate, cur, unary_op);
         cb(cur, "ffn_up_gate", il);
         if (down) {
             cur = llm_build_lora_mm(lctx, ctx, down, cur);
@@ -756,10 +742,14 @@ ggml_tensor * llm_build_context::llm_build_ffn(
             cur = ggml_mul(ctx, cur, down_s);
             cb(cur, "ffn_down_s", il);
         }
+        if (add_input) {
+            cur = ggml_add(ctx, cur, input);
+            cb(cur, "ffn_out_with_inp", il);
+        }
         return cur;
     }
 
-    struct ggml_tensor * tmp = up ? llm_build_lora_mm(lctx, ctx, up, input) : input;
+    struct ggml_tensor * tmp = up ? llm_build_lora_mm(lctx, ctx, up, cur) : cur;
     cb(tmp, "ffn_up", il);
 
     if (up_b) {
@@ -772,7 +762,6 @@ ggml_tensor * llm_build_context::llm_build_ffn(
         cb(tmp, "ffn_up_s", il);
     }
 
-    auto cur = input;
     if (gate) {
         switch (type_gate) {
             case LLM_FFN_SEQ: