Adjust ncols for ADA_LOVELACE or better

2026-01-26 17:20:01 +00:00 · 2026-01-26 11:00:42 +02:00
parent bd7e75192e
commit 04829ca412
1 changed files with 5 additions and 1 deletions
--- a/ggml/src/ggml-cuda/fattn-new-mma.cu
+++ b/ggml/src/ggml-cuda/fattn-new-mma.cu
@@ -2155,7 +2155,11 @@ void ggml_cuda_flash_attn_ext_mma_new(ggml_backend_cuda_context & ctx, ggml_tens
    }
    GGML_ASSERT(Q->ne[0] == 576 && K->ne[0] == 576 && V->ne[0] == 512);
    if (gqa_ratio == 20 && Q->ne[1] <= 4 && K->ne[1] >= 2048) {
-        ggml_cuda_flash_attn_ext_mma_f16_case<576, 512, 1, 32>(ctx, dst);
+        if (ggml_cuda_info().devices[ctx.device].cc >= CC_ADA_LOVELACE) {
+            ggml_cuda_flash_attn_ext_mma_f16_case<576, 512, 1, 16>(ctx, dst);
+        } else {
+            ggml_cuda_flash_attn_ext_mma_f16_case<576, 512, 1, 32>(ctx, dst);
+        }
        return;
    }
    if (gqa_ratio % 16 == 0) {