From fb6848095baf93b42acb98951b4efdf064350552 Mon Sep 17 00:00:00 2001 From: linqunAMD Date: Wed, 27 Aug 2025 13:49:34 +0800 Subject: [PATCH] Fix merge mfma_wmma (part 1) regression (#2749) root cause: a typo in GetGfx11InputBlkIdx, const ia added by mistake. [ROCm/composable_kernel commit: 95e4a4efcb967b806dbad401c79c31abb7ffed47] --- include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp b/include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp index 2ce08e7044..0125aa086e 100644 --- a/include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp +++ b/include/ck/tensor_operation/gpu/warp/xdlops_gemm.hpp @@ -1937,7 +1937,7 @@ struct XdlopsGemm template __device__ static auto GetGfx11InputBlkIdx() { - const auto laneId = GetLaneId() % mfma_instr.num_threads_per_blk; + auto laneId = GetLaneId() % mfma_instr.num_threads_per_blk; if constexpr(SwizzleA) { laneId = ((laneId & 1) << 3) | (laneId >> 1);