From 385fc14110313d7c25d77b5296d1bdbd67a853f8 Mon Sep 17 00:00:00 2001 From: Kawrakow Date: Mon, 5 Jan 2026 08:21:07 +0200 Subject: [PATCH] Fix race in CUDA FA for head sizes 192/128 (#1104) Co-authored-by: Iwan Kawrakow --- ggml/src/ggml-cuda/fattn-new-mma.cu | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/ggml/src/ggml-cuda/fattn-new-mma.cu b/ggml/src/ggml-cuda/fattn-new-mma.cu index 8a5bd1b1..21253e0b 100644 --- a/ggml/src/ggml-cuda/fattn-new-mma.cu +++ b/ggml/src/ggml-cuda/fattn-new-mma.cu @@ -285,17 +285,17 @@ struct fattn_mma_f16_config; template <> struct fattn_mma_f16_config<192, 128> { - static constexpr int nbatch_fa = 64; + static constexpr int nbatch_fa = 32; static constexpr int nwarps_max = 4; static constexpr bool Q_in_reg = true; static constexpr int nstages_target = 1; static int get_nbatch_K2_host(const int /*cc*/, const int /*ncols*/) { - return 64; + return 96; } static constexpr __device__ int get_nbatch_K2_device(int /*ncols*/) { - return 64; + return 96; } static int get_nbatch_V2_host(const int /*cc*/, const int /*ncols*/) { @@ -317,17 +317,17 @@ struct fattn_mma_f16_config<192, 128> { template <> struct fattn_mma_f16_config<192, 192> { - static constexpr int nbatch_fa = 64; + static constexpr int nbatch_fa = 32; static constexpr int nwarps_max = 4; static constexpr bool Q_in_reg = true; static constexpr int nstages_target = 1; static int get_nbatch_K2_host(const int /*cc*/, const int /*ncols*/) { - return 64; + return 96; } static constexpr __device__ int get_nbatch_K2_device(int /*ncols*/) { - return 64; + return 96; } static int get_nbatch_V2_host(const int /*cc*/, const int /*ncols*/) {