mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-06-28 18:56:59 +00:00
WIP: fix assembly
This commit is contained in:
@@ -248,6 +248,14 @@ struct BlockGemmARegBRegCRegEightWavesV1
|
||||
merge_sequences(c_iter_idx{}, c_warp_y_index_zeros),
|
||||
merge_sequences(sequence<1, 1>{}, c_warp_y_lengths),
|
||||
c_warp_tensor.get_thread_buffer());
|
||||
__builtin_amdgcn_sched_group_barrier(0x008, 1, 0);
|
||||
if constexpr(nIter == 0 && mIter == MIterPerWarp - 1 && kIter == 0)
|
||||
{
|
||||
s_nop();
|
||||
s_waitcnt_lgkm<4>();
|
||||
__builtin_amdgcn_sched_group_barrier(0x004, 1, 0);
|
||||
__builtin_amdgcn_sched_barrier(0);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -183,12 +183,6 @@ struct GemmPipelineAgBgCrCompAsyncEightWaves : public BaseGemmPipelineAgBgCrComp
|
||||
// Hot loop scheduler
|
||||
// ------------------
|
||||
auto hot_loop_scheduler = [&]() {
|
||||
__builtin_amdgcn_sched_group_barrier(0x008, MIterPerWarp, 0); // MFMA
|
||||
s_waitcnt_lgkm<4>();
|
||||
__builtin_amdgcn_sched_group_barrier(0x004, 1, 0); // lgkmcnt / SALU
|
||||
static_for<0, MFMA_INST - MIterPerWarp, 1>{}([&](auto) {
|
||||
__builtin_amdgcn_sched_group_barrier(0x008, 1, 0); // MFMA
|
||||
});
|
||||
__builtin_amdgcn_sched_barrier(0);
|
||||
};
|
||||
|
||||
|
||||
@@ -383,7 +383,6 @@ struct GemmPipelineAgBgCrEightWavesImplBase : public GemmPipelineAgBgCrImplBase<
|
||||
|
||||
auto calc_gemm = [&](index_t i) {
|
||||
__builtin_amdgcn_sched_barrier(0);
|
||||
s_nop();
|
||||
block_gemm(
|
||||
c_block_tile, a_block_tile, b_block_tiles, aq_block_tile[i], bq_block_tile[i]);
|
||||
scheduler_func();
|
||||
@@ -392,6 +391,7 @@ struct GemmPipelineAgBgCrEightWavesImplBase : public GemmPipelineAgBgCrImplBase<
|
||||
auto main_body = [&](auto tic, auto toc) {
|
||||
__builtin_amdgcn_sched_barrier(0);
|
||||
__builtin_amdgcn_s_setprio(1);
|
||||
__builtin_amdgcn_sched_barrier(0);
|
||||
|
||||
calc_gemm(tic);
|
||||
|
||||
|
||||
@@ -255,6 +255,8 @@ struct ABQuantBlockUniversalGemmAsBsCrAsync : public BlockGemmQuantBase
|
||||
"C block tensor data type!");
|
||||
constexpr auto warp_size = get_warp_size();
|
||||
|
||||
s_nop();
|
||||
|
||||
auto q_block_tensor = aq_block_tensor;
|
||||
if constexpr(Traits::NQPerBlock / NWarp == 1)
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user