mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-02-25 07:34:10 +00:00
Better
This commit is contained in:
@@ -9220,11 +9220,15 @@ ggml_tensor * llm_build_context::build_std_attention(ggml_cgraph * gf, ggml_tens
|
||||
else if (cur->type != GGML_TYPE_F32) {
|
||||
cur = ggml_cast(ctx0, cur, GGML_TYPE_F32);
|
||||
}
|
||||
auto the_q_norm = model.layers[il].attn_q_norm ? model.layers[il].attn_q_norm->extra ?
|
||||
((ggml_split_tensor_t *)model.layers[il].attn_q_norm->extra)->splits[id] : model.layers[il].attn_q_norm : nullptr;
|
||||
auto the_k_norm = model.layers[il].attn_k_norm ? model.layers[il].attn_k_norm->extra ?
|
||||
((ggml_split_tensor_t *)model.layers[il].attn_k_norm->extra)->splits[id] : model.layers[il].attn_k_norm : nullptr;
|
||||
auto [Qcur, Kcur, Vcur] = llm_build_mul_mat_qkv(gf, cur, nullptr, nullptr, nullptr, nullptr,
|
||||
split_wq, bq ? bq->splits[id] : nullptr,
|
||||
split_wk, bk ? bk->splits[id] : nullptr,
|
||||
split_wv, bv ? bv->splits[id] : nullptr,
|
||||
model.layers[il].attn_q_norm, model.layers[il].attn_k_norm, f_attn_scale, il_cb);
|
||||
the_q_norm, the_k_norm, f_attn_scale, il_cb);
|
||||
auto rope_factors = rope_factors_in;
|
||||
if (!rope_factors && model.layers[il].rope_freqs && model.layers[il].rope_freqs->extra) {
|
||||
auto extra = (ggml_split_tensor_t *)model.layers[il].rope_freqs->extra;
|
||||
|
||||
@@ -1858,9 +1858,9 @@ bool create_tensors_helper::create_glm4_moe_tensors(const LLM_TN & tn) {
|
||||
layer.wo = create_tensor(ctx_split, tn(LLM_TENSOR_ATTN_OUT, "weight", i), { n_embd_head_k * n_head, n_embd }, flags);
|
||||
|
||||
// K/Q norm tensors (optional for GLM-4.5 355B variant)
|
||||
layer.attn_q_norm = create_tensor(ctx_layer,
|
||||
layer.attn_q_norm = create_tensor(ctx_split,
|
||||
tn(LLM_TENSOR_ATTN_Q_NORM, "weight", i), { n_embd_head_k }, llama_model_loader::TENSOR_NOT_REQUIRED | flags);
|
||||
layer.attn_k_norm = create_tensor(ctx_layer,
|
||||
layer.attn_k_norm = create_tensor(ctx_split,
|
||||
tn(LLM_TENSOR_ATTN_K_NORM, "weight", i), { n_embd_head_k }, llama_model_loader::TENSOR_NOT_REQUIRED | flags);
|
||||
|
||||
// Why are we adding an additional tensor type?
|
||||
@@ -2945,6 +2945,9 @@ bool create_tensors_helper::create_tensors() {
|
||||
if (layer.bq) {
|
||||
prepare_split_tensors(0, ctx_split, layer.bq, layer.split_bq, split, mem_used);
|
||||
}
|
||||
if (layer.attn_q_norm) {
|
||||
prepare_split_tensors(-1, ctx_split, layer.attn_q_norm, layer.split_q_norm, split, mem_used);
|
||||
}
|
||||
for (auto & s : split) s /= gqa_ratio;
|
||||
prepare_split_tensors(1, ctx_split, layer.wk, layer.split_wk, split, mem_used);
|
||||
prepare_split_tensors(1, ctx_split, layer.wv, layer.split_wv, split, mem_used);
|
||||
@@ -2954,6 +2957,9 @@ bool create_tensors_helper::create_tensors() {
|
||||
if (layer.bv) {
|
||||
prepare_split_tensors(0, ctx_split, layer.bv, layer.split_bv, split, mem_used);
|
||||
}
|
||||
if (layer.attn_k_norm) {
|
||||
prepare_split_tensors(-1, ctx_split, layer.attn_k_norm, layer.split_k_norm, split, mem_used);
|
||||
}
|
||||
}
|
||||
|
||||
if (layer.ffn_norm) {
|
||||
|
||||
@@ -198,6 +198,8 @@ struct llama_layer {
|
||||
llama_split_tensor split_bqkv;
|
||||
llama_split_tensor split_bqk;
|
||||
llama_split_tensor split_bkv;
|
||||
llama_split_tensor split_q_norm;
|
||||
llama_split_tensor split_k_norm;
|
||||
|
||||
// relative position bias
|
||||
struct ggml_tensor * attn_rel_b = nullptr;
|
||||
|
||||
Reference in New Issue
Block a user