mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 13:11:25 +00:00
added padding of K into gemm_v2r3 (#887)
* added kpad support into v2r3 * add generic instances * fixed comments * fixed mnk padding * Update device_batched_gemm_xdl.hpp --------- Co-authored-by: Jing Zhang <jizha@amd.com>
This commit is contained in:
@@ -185,7 +185,7 @@ struct DeviceBatchedGemmXdl : public DeviceBatchedGemm<ALayout,
|
||||
AElementwiseOperation,
|
||||
BElementwiseOperation,
|
||||
CElementwiseOperation,
|
||||
GemmSpecialization::MNPadding,
|
||||
GemmSpecialization::MNKPadding,
|
||||
MPerBlock,
|
||||
NPerBlock,
|
||||
K0PerBlock,
|
||||
@@ -315,11 +315,6 @@ struct DeviceBatchedGemmXdl : public DeviceBatchedGemm<ALayout,
|
||||
return false;
|
||||
}
|
||||
|
||||
if(problem.K % K1 != 0)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return GridwiseGemm::CheckValidity(problem);
|
||||
}
|
||||
|
||||
@@ -416,7 +411,12 @@ struct DeviceBatchedGemmXdl : public DeviceBatchedGemm<ALayout,
|
||||
<< BlockSize << ", "
|
||||
<< MPerBlock << ", "
|
||||
<< NPerBlock << ", "
|
||||
<< K0PerBlock
|
||||
<< K0PerBlock << ", "
|
||||
<< K1 << ", "
|
||||
<< MPerXDL << ", "
|
||||
<< NPerXDL << ", "
|
||||
<< MXdlPerWave << ", "
|
||||
<< NXdlPerWave << ", "
|
||||
<< ">"
|
||||
<< " NumGemmKPrefetchStage: "
|
||||
<< NumGemmKPrefetchStage << ", "
|
||||
|
||||
Reference in New Issue
Block a user