fix performance regression on blockgemm v3 pipe

This commit is contained in:
coderfeli
2024-12-27 06:40:43 +00:00
parent 400cac2839
commit 031ddf356d

View File

@@ -477,6 +477,9 @@ struct BlockwiseGemmXdlops_pipeline_v3<BlockGemmPipelineScheduler::Intrawave,
b_thread_buf_tail);
});
});
HotLoopScheduler();
__builtin_amdgcn_sched_barrier(0);
}
}
@@ -692,6 +695,9 @@ struct BlockwiseGemmXdlops_pipeline_v3<BlockGemmPipelineScheduler::Intrawave,
});
});
HotLoopScheduler();
__builtin_amdgcn_sched_barrier(0);
static_for<0, KRepeat, 1>{}([&](auto k0) {
static_for<0, MRepeat, 1>{}([&](auto m0) {
static_for<0, NRepeat, 1>{}([&](auto n0) {