From fb6848095baf93b42acb98951b4efdf064350552 Mon Sep 17 00:00:00 2001
From: linqunAMD <qlin@amd.com>
Date: Wed, 27 Aug 2025 13:49:34 +0800
Subject: [PATCH] Fix merge mfma_wmma (part 1) regression (#2749)

root cause: a typo in GetGfx11InputBlkIdx, const ia added by mistake.

[ROCm/composable_kernel commit: 95e4a4efcb967b806dbad401c79c31abb7ffed47]
---
 include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp b/include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp
index 2ce08e7044..0125aa086e 100644
--- a/include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp
+++ b/include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp
@@ -1937,7 +1937,7 @@ struct XdlopsGemm
     template <bool SwizzleA>
     __device__ static auto GetGfx11InputBlkIdx()
     {
-        const auto laneId = GetLaneId() % mfma_instr.num_threads_per_blk;
+        auto laneId = GetLaneId() % mfma_instr.num_threads_per_blk;
         if constexpr(SwizzleA)
         {
             laneId = ((laneId & 1) << 3) | (laneId >> 1);