From cf012c23fc738a8ab00729a449888b48d758709f Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Wed, 23 Jul 2025 13:23:11 +0000 Subject: [PATCH] Adjust the codes related to calculate i_m0 in the kernel --- .../hstu_attention_fwd_kernel.hpp | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_fwd_kernel.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_fwd_kernel.hpp index 34e3c98ac6..4c5c50ea44 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_fwd_kernel.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_fwd_kernel.hpp @@ -651,23 +651,31 @@ struct HstuAttentionFwdKernel int num_target = (kargs.num_targets_ptr == nullptr) ? 0 : kargs.num_targets_ptr[i_batch]; index_t seqlen_in_first_split = kargs.seqlen; + bool is_tile_in_first_split = true; + index_t i_m0; if constexpr(kHasLocalMask) { if(kargs.min_full_attn_seqlen > 0) + { seqlen_in_first_split = kargs.seqlen - kargs.min_full_attn_seqlen - num_target; - }; - index_t num_tile_in_first_split = - ck_tile::integer_divide_ceil(seqlen_in_first_split, HstuAttentionPipeline::kM0); + index_t num_tile_in_first_split = + ck_tile::integer_divide_ceil(seqlen_in_first_split, HstuAttentionPipeline::kM0); - bool is_tile_in_first_split = (i_tile_m < num_tile_in_first_split); + is_tile_in_first_split = (i_tile_m < num_tile_in_first_split); - index_t i_m0 = is_tile_in_first_split + i_m0 = is_tile_in_first_split ? __builtin_amdgcn_readfirstlane(i_tile_m * HstuAttentionPipeline::kM0) : __builtin_amdgcn_readfirstlane((i_tile_m - num_tile_in_first_split) * HstuAttentionPipeline::kM0) + seqlen_in_first_split; + } + else + i_m0 = __builtin_amdgcn_readfirstlane(i_tile_m * HstuAttentionPipeline::kM0); + } + else + i_m0 = __builtin_amdgcn_readfirstlane(i_tile_m * HstuAttentionPipeline::kM0); const index_t i_n1 = __builtin_amdgcn_readfirstlane(i_tile_n * HstuAttentionPipeline::kN1);