mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-19 14:29:05 +00:00
add the sync barrier for persistent kernel (#2977)
This commit is contained in:
@@ -1134,6 +1134,7 @@ struct UniversalGemmKernel
|
||||
|
||||
while(block_id < num_work)
|
||||
{
|
||||
s_waitcnt_barrier();
|
||||
// Get the tile index for this block
|
||||
const auto tile_idx = amd_wave_read_first_lane(block_id % num_tiles);
|
||||
const auto [iM, iN] = TilePartitioner{kargs.M, kargs.N}.GetOutputTileIndex(tile_idx);
|
||||
|
||||
Reference in New Issue
Block a user