From ff807ddd1a5fb36fae5f9240fd1b1a22df052fcd Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Thu, 26 Jun 2025 10:09:51 +0000 Subject: [PATCH] Tiny movement in the code lines of the pipeline --- .../hstu_attention_fwd_pipeline.hpp | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_fwd_pipeline.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_fwd_pipeline.hpp index d705f38d36..5d819aef4a 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_fwd_pipeline.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_fwd_pipeline.hpp @@ -368,7 +368,9 @@ struct HstuAttentionFwdPipelineQRKSVS move_tile_window(v_dram_window, {0, kK1}); - __builtin_amdgcn_sched_barrier(0); + // for i_k1 = k1_loop-1, the loading is for next iteration + k_tile = load_tile(k_dram_window); + move_tile_window(k_dram_window, {kK1, 0}); block_sync_lds(); // execute current unroll of gemm_0 @@ -395,14 +397,6 @@ struct HstuAttentionFwdPipelineQRKSVS tile_elementwise_inout([&scale_s](auto& x) { x = x * scale_s; }, sacc_tile); } - __builtin_amdgcn_sched_barrier(0); - - // for i_k1 = k1_loop-1, the loading is for next iteration - k_tile = load_tile(k_dram_window); - move_tile_window(k_dram_window, {kK1, 0}); - - __builtin_amdgcn_sched_barrier(0); - if constexpr(HstuMask::IsMasking) { if constexpr(HstuMask::kUseLocal)