add vmcnt guard for async copy

This commit is contained in:
aska-0096
2025-05-28 03:47:46 +00:00
parent b99c50a5d5
commit 78d0fd4e65
2 changed files with 3 additions and 3 deletions

View File

@@ -50,14 +50,14 @@ using DeviceOpInstance = ck::tensor_operation::device::DeviceGemmMX_Xdl_CShuffle
GemmSpec, // GemmSpec
ScaleBlockSize, // ScaleBlockSize: Scaling block size
256, // BlockSize: Thread block size
128, // MPerBlock
256, // MPerBlock
256, // NPerBlock
KPerBlock, // KPerBlock
16, // AK1
16, // BK1
16, // MPerXDL
16, // NPerXDL
4, // MXdlPerWave
8, // MXdlPerWave
8, // NXdlPerWave
S<8, 32, 1>, // ABlockTransferThreadClusterLengths_AK0_M_AK1
S<1, 0, 2>, // ABlockTransferThreadClusterArrangeOrder

View File

@@ -523,7 +523,7 @@ struct BlockwiseGemmXdlops_pipeline_v3_mx<BlockGemmPipelineScheduler::Intrawave,
do
{
auto LoopFunc = [&](auto scale_comp_buf, auto scale_mem_buf) {
// __builtin_amdgcn_s_waitcnt(3952);
__builtin_amdgcn_s_waitcnt(3952);
block_sync_lds();
a_blockwise_copy.Run(