Fix incorrect --amb n_max_head fitting (#1375)

kv_f32_size should be fit to --amb by number of divisions, not heads per
division.

Regression in b85a2a5
This commit is contained in:
usrlocalben
2026-03-07 01:01:14 -07:00
committed by GitHub
parent 277fc1d26f
commit c1c3421462

View File

@@ -7079,7 +7079,7 @@ ggml_cgraph * llm_build_context::build_deepseek2() {
if (cparams.attn_max_batch > 0 && kv_f32_size > cparams.attn_max_batch) {
n_max_head = 1;
for (int niter = 2; niter < n_head; ++niter) {
if (n_head % niter == 0 && kv_f32_size/(n_head/niter) <= cparams.attn_max_batch) {
if (n_head % niter == 0 && kv_f32_size/niter <= cparams.attn_max_batch) {
n_max_head = n_head/niter;
break;
}