mirror of
https://github.com/ikawrakow/ik_llama.cpp.git
synced 2026-05-01 11:51:53 +00:00
prefix kv cache tensors with model name to avoid conflict
This commit is contained in:
@@ -3581,16 +3581,16 @@ static bool llama_kv_cache_init(
|
||||
//LLAMA_LOG_INFO("%s: layer %d: n_embd_head_qk_rope = %d, kv_lora_rank = %d\n", __func__, i, n_embd_head_qk_rope, kv_lora_rank);
|
||||
if (cparams.flash_attn) {
|
||||
ggml_tensor * kv = ggml_new_tensor_2d(ctx, cache.type_k, kv_lora_rank + n_embd_head_qk_rope, kv_size);
|
||||
ggml_format_name(kv, "cache_k_l%d", i);
|
||||
ggml_format_name(kv, "%s.cache_k_l%d", model.name.c_str(), i);
|
||||
cache.k_l.push_back(kv);
|
||||
} else {
|
||||
auto kv_type = cparams.mla_attn == 1 ? cache.type_k : cache.type_v;
|
||||
ggml_tensor * kv = ggml_new_tensor_2d(ctx, kv_type, kv_lora_rank + n_embd_head_qk_rope, kv_size);
|
||||
ggml_format_name(kv, "cache_k_l%d", i);
|
||||
ggml_format_name(kv, "%s.cache_k_l%d", model.name.c_str(), i);
|
||||
cache.k_l.push_back(kv);
|
||||
if (cparams.mla_attn == 1) {
|
||||
ggml_tensor * kvt = ggml_new_tensor_1d(ctx, cache.type_v, kv_lora_rank*kv_size);
|
||||
ggml_format_name(kvt, "cache_v_l%d", i);
|
||||
ggml_format_name(kvt, "%s.cache_v_l%d", model.name.c_str(), i);
|
||||
cache.v_l.push_back(kvt);
|
||||
}
|
||||
}
|
||||
@@ -3599,8 +3599,8 @@ static bool llama_kv_cache_init(
|
||||
else {
|
||||
k = ggml_new_tensor_2d(ctx, type_k, n_embd_head_k, n_head_kv*kv_size);
|
||||
v = ggml_new_tensor_1d(ctx, type_v, n_embd_v_gqa*kv_size);
|
||||
ggml_format_name(k, "cache_k_l%d", i);
|
||||
ggml_format_name(v, "cache_v_l%d", i);
|
||||
ggml_format_name(k, "%s.cache_k_l%d", model.name.c_str(), i);
|
||||
ggml_format_name(v, "%s.cache_v_l%d", model.name.c_str(), i);
|
||||
cache.k_l.push_back(k);
|
||||
cache.v_l.push_back(v);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user