mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-01-26 17:20:01 +00:00
Leave FFN partial results as f16
This commit is contained in:
@@ -691,9 +691,9 @@ ggml_tensor * llm_build_context::llm_build_ffn(
|
||||
if (ffn.size() > 2) {
|
||||
cur->op_params[0] = 0xff;
|
||||
}
|
||||
if (cur->type != GGML_TYPE_F32) {
|
||||
cur = ggml_cast(ctx, cur, GGML_TYPE_F32);
|
||||
}
|
||||
//if (cur->type != GGML_TYPE_F32) {
|
||||
// cur = ggml_cast(ctx, cur, GGML_TYPE_F32);
|
||||
//}
|
||||
|
||||
return cur;
|
||||
}
|
||||
@@ -9002,6 +9002,9 @@ ggml_tensor * llm_build_context::build_std_attention(ggml_cgraph * gf, ggml_tens
|
||||
cur = llm_build_norm(ctx0, cur, hparams, split_norm, NULL, LLM_NORM_RMS, cb, il);
|
||||
cb(cur, "attn_norm", il_cb);
|
||||
}
|
||||
else if (cur->type != GGML_TYPE_F32) {
|
||||
cur = ggml_cast(ctx0, cur, GGML_TYPE_F32);
|
||||
}
|
||||
auto [Qcur, Kcur, Vcur] = llm_build_mul_mat_qkv(gf, cur, nullptr, nullptr, nullptr, nullptr,
|
||||
split_wq, nullptr, split_wk, nullptr, split_wv, nullptr,
|
||||
model.layers[il].attn_q_norm, model.layers[il].attn_k_norm, f_attn_scale, il_cb);
|
||||
|
||||
Reference in New Issue
Block a user