From 942c747e061488489604f71948abe98b153b4efe Mon Sep 17 00:00:00 2001 From: Kawrakow Date: Tue, 27 Jan 2026 12:11:57 +0000 Subject: [PATCH] TG tweak --- ggml/src/ggml-cuda/fattn-common.cuh | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-cuda/fattn-common.cuh b/ggml/src/ggml-cuda/fattn-common.cuh index 8960c68c..688c628c 100644 --- a/ggml/src/ggml-cuda/fattn-common.cuh +++ b/ggml/src/ggml-cuda/fattn-common.cuh @@ -810,7 +810,12 @@ void launch_fattn( cudaStream_t main_stream = ctx.stream(); const int id = ggml_cuda_get_device(); const int cc = ggml_cuda_info().devices[id].cc; - const int nsm = ggml_cuda_info().devices[id].nsm; + const int nsm_actual = ggml_cuda_info().devices[id].nsm; + int nsm = nsm_actual; + if (Q->ne[1] == 1) { + nsm = 1; while (nsm*2 <= nsm_actual) nsm *= 2; + if (K->ne[1] <= 16384 && nsm > 32) nsm /= 2; + } ggml_cuda_pool_alloc K_f16(pool); ggml_cuda_pool_alloc V_f16(pool);