From 5722f8afbc2bc964656914c19109550327f2deb9 Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Sun, 7 Dec 2025 10:11:43 +0000 Subject: [PATCH] [Performance] Change __builtin_amdgcn_sched_barrier() in block_gemm --- .../gemm/block/block_gemm_areg_bsmem_creg_v2_prefetch_k.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/ck_tile/ops/gemm/block/block_gemm_areg_bsmem_creg_v2_prefetch_k.hpp b/include/ck_tile/ops/gemm/block/block_gemm_areg_bsmem_creg_v2_prefetch_k.hpp index ffd6bb18bf..14ace3d6af 100644 --- a/include/ck_tile/ops/gemm/block/block_gemm_areg_bsmem_creg_v2_prefetch_k.hpp +++ b/include/ck_tile/ops/gemm/block/block_gemm_areg_bsmem_creg_v2_prefetch_k.hpp @@ -141,7 +141,7 @@ struct BlockGemmARegBSmemCRegV2PrefetchK {nIter * NPerBlockPerIter, 0 * KPerBlockPerIter}); b_warp_tensors[I0] = load_tile(b_warp_windows(nIter)(I0)); - __builtin_amdgcn_sched_barrier(0); + __builtin_amdgcn_sched_barrier(0x00000001); static_for<0, KIterPerWarp, 1>{}([&](auto kIter) { if constexpr(kIter < KIterPerWarp - 1) @@ -154,7 +154,7 @@ struct BlockGemmARegBSmemCRegV2PrefetchK load_tile(b_warp_windows(nIter)(number{})); }; - __builtin_amdgcn_sched_barrier(0); + __builtin_amdgcn_sched_barrier(0x00000001); static_for<0, MIterPerWarp, 1>{}([&](auto mIter) { // read A warp tensor from A block tensor