Fix merge mfma_wmma (part 1) regression (#2749)

root cause: a typo in GetGfx11InputBlkIdx, const ia added by mistake.
This commit is contained in:
linqunAMD
2025-08-27 13:49:34 +08:00
committed by GitHub
parent 19d5327c45
commit 95e4a4efcb

View File

@@ -1937,7 +1937,7 @@ struct XdlopsGemm
template <bool SwizzleA>
__device__ static auto GetGfx11InputBlkIdx()
{
const auto laneId = GetLaneId() % mfma_instr.num_threads_per_blk;
auto laneId = GetLaneId() % mfma_instr.num_threads_per_blk;
if constexpr(SwizzleA)
{
laneId = ((laneId & 1) << 3) | (laneId >> 1);