From 27fa27daf9e5566db69774489514288cedc7760c Mon Sep 17 00:00:00 2001 From: Iwan Kawrakow Date: Thu, 12 Sep 2024 18:55:13 +0300 Subject: [PATCH] Disallow mixing bf16 with other types for kv caches --- ggml/src/iqk/iqk_mul_mat.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ggml/src/iqk/iqk_mul_mat.cpp b/ggml/src/iqk/iqk_mul_mat.cpp index 30b545d3..ce868514 100644 --- a/ggml/src/iqk/iqk_mul_mat.cpp +++ b/ggml/src/iqk/iqk_mul_mat.cpp @@ -8139,7 +8139,8 @@ bool iqk_flash_attn_noalibi(int int_type_k, // type of k stride_q /= sizeof(float); // q stride as float #ifdef __AVX512BF16__ - if (type_k == GGML_TYPE_BF16 && type_v == GGML_TYPE_BF16) { + if (type_k == GGML_TYPE_BF16 || type_v == GGML_TYPE_BF16) { + if (type_k != GGML_TYPE_BF16 || type_v != GGML_TYPE_BF16) return false; // we do not support mixing bf16 with other types switch (D) { case 64: iqk_flash_helper_T< 64, 8, 32>(nq1, nk1, stride_q, stride_k, stride_v, stride_m, stride_qkv, q, ck, cv, cm, scale, softcap, qkv); break;