From d3f550f30c0a4d9df15c613015d5dff268d6746d Mon Sep 17 00:00:00 2001 From: "PoYen, Chen" Date: Thu, 22 Aug 2024 09:03:01 +0000 Subject: [PATCH] Add s_barrier to sync threads --- .../ops/fmha/pipeline/block_fmha_fwd_appendkv_pipeline.hpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/ck_tile/ops/fmha/pipeline/block_fmha_fwd_appendkv_pipeline.hpp b/include/ck_tile/ops/fmha/pipeline/block_fmha_fwd_appendkv_pipeline.hpp index def3477055..be451389b3 100644 --- a/include/ck_tile/ops/fmha/pipeline/block_fmha_fwd_appendkv_pipeline.hpp +++ b/include/ck_tile/ops/fmha/pipeline/block_fmha_fwd_appendkv_pipeline.hpp @@ -145,6 +145,7 @@ struct BlockFmhaFwdAppendKVPipeline rotary_sin_window, rotary_dim, thread_end); + __builtin_amdgcn_s_barrier(); } if constexpr(kIsPagedKV) @@ -222,6 +223,7 @@ struct BlockFmhaFwdAppendKVPipeline BlockRotaryEmbedding::apply( q_tile, q_window, rotary_cos_window, rotary_sin_window, rotary_dim, thread_end); + __builtin_amdgcn_s_barrier(); store_tile(q_dram_block_window, q_tile); }