Fix on the error (#1956)

[ROCm/composable_kernel commit: 9d51d17dd0]
This commit is contained in:
Thomas Ning
2025-03-07 13:43:52 -08:00
committed by GitHub
parent 88bd358d65
commit ed0649e4e6
2 changed files with 7 additions and 6 deletions

View File

@@ -211,11 +211,12 @@ struct BlockwiseGemmXdlops_pipeline_bpreshuffle_v1<BlockGemmPipelineScheduler::I
});
// A local
static_for<0, num_ds_read_inst_a / 2 * mfma_interleave, 1>{}([&](auto i) {
ignore = i;
__builtin_amdgcn_sched_group_barrier(0x008, 1, 0); // MFMA
__builtin_amdgcn_sched_group_barrier(0x100, 2 / mfma_interleave, 0); // DS read
});
static_for<0, MPerXDL == 32 ? num_ds_read_inst_a / 2 : num_ds_read_inst_a, 1>{}(
[&](auto i) {
ignore = i;
__builtin_amdgcn_sched_group_barrier(0x008, 1, 0); // MFMA
__builtin_amdgcn_sched_group_barrier(0x100, MPerXDL == 32 ? 2 : 1, 0); // DS read
});
}
template <bool HasMainLoop,

View File

@@ -85,7 +85,7 @@ struct BlockwiseGemmXdlops_pipeline_hotloop_inst
static constexpr index_t A_LDS_Read_Inst_Num =
WaveNumN * MPerBlock * KPerBlock / (BlockSize * ALDSReadWidth);
static constexpr index_t B_LDS_Read_Inst_Num =
WaveNumM * MPerBlock * KPerBlock / (BlockSize * BLDSReadWidth);
WaveNumM * NPerBlock * KPerBlock / (BlockSize * BLDSReadWidth);
static constexpr index_t C_MFMA_Inst_Num =
MPerBlock * NPerBlock * KPerBlock / (BlockSize / WaveSize) / (MPerXDL * NPerXDL * KPerXDL);