[CK_Tile] Fix gemm kernel for 4,64,16 and 64,4,16 warp tile sizes (#2262)

* debugging issue

* debugging issue

* debugging

* debugging

* reverting debugging code

* clang formatted

* updating default_config.json

* fix ci failure

* clang formatted
This commit is contained in:
Khushbu Agarwal
2025-06-03 20:16:10 -07:00
committed by GitHub
parent 1037b21cfe
commit 59a85cb4bc
6 changed files with 46 additions and 17 deletions

View File

@@ -149,7 +149,9 @@ struct DefaultGemm2DEpilogue : public Default2DEpilogue<Problem_, Policy_>
else
{
// In this case each thread has just a single item in Ndim
return WG::WarpGemmAttribute::Impl::kCNLane / WG::kN;
return (WG::WarpGemmAttribute::Impl::kCNLane *
WG::WarpGemmAttribute::Impl::kBNBlock) /
WG::kN;
}
}
// M is contiguous dimension
@@ -158,7 +160,9 @@ struct DefaultGemm2DEpilogue : public Default2DEpilogue<Problem_, Policy_>
if constexpr(isCTransposed)
{
// In this case each thread has just a single item in Mdim
return WG::WarpGemmAttribute::Impl::kCNLane / WG::kN;
return (WG::WarpGemmAttribute::Impl::kCNLane *
WG::WarpGemmAttribute::Impl::kAMBlock) /
WG::kN;
}
else
{