From f747cbca081c08ce4ec6811b3f5df1b71f129b2a Mon Sep 17 00:00:00 2001
From: Iwan Kawrakow <iwan.kawrakow@gmail.com>
Date: Fri, 8 Aug 2025 13:42:03 +0300
Subject: [PATCH] Fix MMQ when running with quantized K cache without FA

---
 ggml/src/ggml-cuda/mmq.cu | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ggml/src/ggml-cuda/mmq.cu b/ggml/src/ggml-cuda/mmq.cu
index a0e7da12..1e3accf0 100644
--- a/ggml/src/ggml-cuda/mmq.cu
+++ b/ggml/src/ggml-cuda/mmq.cu
@@ -14,7 +14,7 @@ void ggml_cuda_op_mul_mat_q(
     const int64_t src1_padded_row_size, cudaStream_t stream) {
 
     const int64_t ne00 = src0->ne[0];
-    const int64_t nb01 = src0->nb[1];
+    const int64_t nb01 = ggml_row_size(src0->type, ne00);
 
     const int64_t ne10 = src1->ne[0];
     const int64_t ne11 = src1->ne[1];