From bf474e9bffb75b4f377253d3958b69f8e03c4dc4 Mon Sep 17 00:00:00 2001 From: Kawrakow Date: Mon, 10 Nov 2025 08:34:24 +0200 Subject: [PATCH] Use fused gemv+add only for TG (#933) Co-authored-by: Iwan Kawrakow --- ggml/src/ggml-cuda.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ggml/src/ggml-cuda.cu b/ggml/src/ggml-cuda.cu index 7e2da7f8..40ac388d 100644 --- a/ggml/src/ggml-cuda.cu +++ b/ggml/src/ggml-cuda.cu @@ -2067,7 +2067,7 @@ static int ggml_cuda_mul_mat_q(ggml_backend_cuda_context & ctx, const ggml_tenso auto stream = ctx.stream(); - auto fusion = ctx.fusion; + auto fusion = ctx.fusion && src1->ne[1] == 1; auto ne10_padded = GGML_PAD(src1->ne[0], MATRIX_ROW_PADDING); auto nb10_padded = ne10_padded*sizeof(block_q8_1)/QK8_1;