From d5498c446736093067dc24e5285823ee24cedab6 Mon Sep 17 00:00:00 2001 From: Kawrakow Date: Mon, 2 Feb 2026 09:07:45 +0000 Subject: [PATCH] Do not repack q8_0 for batch sizes less than 8 --- ggml/src/iqk/iqk_flash_attn.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/iqk/iqk_flash_attn.cpp b/ggml/src/iqk/iqk_flash_attn.cpp index 47e55b0e..d42cbe78 100644 --- a/ggml/src/iqk/iqk_flash_attn.cpp +++ b/ggml/src/iqk/iqk_flash_attn.cpp @@ -152,7 +152,7 @@ extern "C" IQK_API bool iqk_flash_attn_noalibi(int type_q, int type_mask, float int int_type_k = int_type_k_in; auto work_buffer = work_buffer_in; - if (neq1 >= 8 || (rk2 >= 8 && nek2 > 1)) { + if (neq1 >= 8) { uint64_t row_size = 0; work_buffer = iqk_repack_k(int_type_k, Dk, nek1, nek2, nek3, stride_k, nbk2, nbk3, k, work_buffer_in, ith, nth, int_type_k, row_size); if (int_type_k != int_type_k_in) {