Add condition

This commit is contained in:
Kawrakow
2026-01-25 06:52:04 +00:00
parent d08481d0f4
commit aff7aa0cf6

View File

@@ -217,7 +217,8 @@ void ggml_cuda_flash_attn_ext_mma_f16(ggml_backend_cuda_context & ctx, ggml_tens
GGML_ASSERT(Q->ne[2] % K->ne[2] == 0); GGML_ASSERT(Q->ne[2] % K->ne[2] == 0);
const int gqa_ratio = Q->ne[2] / K->ne[2]; const int gqa_ratio = Q->ne[2] / K->ne[2];
if (gqa_ratio == 12) { if (gqa_ratio == 12 && Q->ne[1] == 1 && K->ne[1]*K->ne[2] >= 65536) {
// This is a hack to improve GLM-4.5/4.6/4.7/AIR TG performance
glm45_flash_attention(ctx, dst); glm45_flash_attention(ctx, dst);
return; return;
} }