mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-26 16:14:10 +00:00
Minor delta-net tweak (#1308)
* Make sure we pick the reduced tensor from the right GPU * Minor * Minor delta-net tweak
This commit is contained in:
@@ -3263,8 +3263,6 @@ static bool ggml_cuda_compute_forward(ggml_backend_cuda_context & ctx, struct gg
|
||||
ggml_cuda_op_sum_rows_nc(ctx, cgraph->nodes[i+1]);
|
||||
i += 2;
|
||||
} else {
|
||||
//auto src = dst->src[0];
|
||||
//printf("cont(%s -> %s): %ld x %ld x %ld x %ld; %zu x %zu x %zu x %zu\n", src->name, dst->name, src->ne[0], src->ne[1], src->ne[2], src->ne[3], src->nb[0], src->nb[1], src->nb[2], src->nb[3]);
|
||||
ggml_cuda_dup(ctx, dst);
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -636,9 +636,8 @@ ggml_tensor * delta_net::build_layer_attn_linear_core(ggml_context * ctx0, ggml_
|
||||
ggml_tensor * z_2d = ggml_reshape_2d(ctx0, z, head_v_dim, num_v_heads * n_tok);
|
||||
|
||||
ggml_tensor * attn_out_norm = llm_build_context::llm_build_norm(ctx0, attn_out_2d, hparams, model.layers[il].ssm_norm, nullptr, LLM_NORM_RMS, cb, il);
|
||||
ggml_tensor * gated_silu = ggml_silu(ctx0, z_2d);
|
||||
cb(gated_silu, "gated_silu", il);
|
||||
attn_out_norm = ggml_mul(ctx0, attn_out_norm, gated_silu);
|
||||
cb(attn_out_norm, "attn_rms_norm", il);
|
||||
attn_out_norm = ggml_fused_mul_unary(ctx0, z_2d, attn_out_norm, GGML_UNARY_OP_SILU);
|
||||
cb(attn_out_norm, "attn_out_norm", il);
|
||||
|
||||
ggml_tensor * final_output = ggml_reshape_2d(ctx0, attn_out_norm, value_dim, n_tok);
|
||||
|
||||
Reference in New Issue
Block a user