mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-04-29 19:01:47 +00:00
Fix incorrect --amb n_max_head fitting (#1375)
kv_f32_size should be fit to --amb by number of divisions, not heads per
division.
Regression in b85a2a5
This commit is contained in:
@@ -7079,7 +7079,7 @@ ggml_cgraph * llm_build_context::build_deepseek2() {
|
||||
if (cparams.attn_max_batch > 0 && kv_f32_size > cparams.attn_max_batch) {
|
||||
n_max_head = 1;
|
||||
for (int niter = 2; niter < n_head; ++niter) {
|
||||
if (n_head % niter == 0 && kv_f32_size/(n_head/niter) <= cparams.attn_max_batch) {
|
||||
if (n_head % niter == 0 && kv_f32_size/niter <= cparams.attn_max_batch) {
|
||||
n_max_head = n_head/niter;
|
||||
break;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user