Support GigaChat3 (#995)

* Fixing Gigachat support

* Gigachat: CUDA FA (needs 192 x 192 for MLA = 3)

* Gigachat: CPU FA (needs 192 x 192 for MLA = 3)

---------

Co-authored-by: Iwan Kawrakow <iwan.kawrakow@gmail.com>
This commit is contained in:
Kawrakow
2025-11-24 06:55:14 +01:00
committed by GitHub
parent 1feccd4174
commit f1191036b2
11 changed files with 103 additions and 4 deletions

View File

@@ -1617,7 +1617,7 @@ bool create_tensors_helper::create_arctix_tensors(const LLM_TN & tn) {
bool create_tensors_helper::create_deepseek2_tensors(const LLM_TN & tn) {
LOADING_PRELUDE
const bool is_lite = (hparams.n_layer == 27);
const bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26);
const int64_t n_embd_head_qk_rope = hparams.n_rot;
const int64_t n_embd_head_qk_nope = hparams.n_embd_head_k - hparams.n_rot;