From 1d4d925ba3b665775b309a52df620a099ccd9a9e Mon Sep 17 00:00:00 2001 From: Qianfeng Zhang Date: Sat, 27 Dec 2025 05:43:11 +0000 Subject: [PATCH] Fix in K-LdsBuffer and V-LdsBuffer over-lap checking --- .../hstu_attention_with_softmax_fwd_trload_pipeline.hpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_with_softmax_fwd_trload_pipeline.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_with_softmax_fwd_trload_pipeline.hpp index c151c15d0e..7db48fc368 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_with_softmax_fwd_trload_pipeline.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_with_softmax_fwd_trload_pipeline.hpp @@ -428,9 +428,9 @@ struct HstuAttentionWithSoftmaxFwdPipelineQRKSVSTrLoad __builtin_amdgcn_sched_barrier(0x00000001); - // check whether first V-LdsBufer overlap with last K-LdsBuffer, - // this does not occur when k1_loops == 2 and NumKVLdsBuffers == 4 - if constexpr((k1_loops - 1) % NumKVLdsBuffers == 2 % NumKVLdsBuffers) + // check whether first V-LdsBuffer overlap with last K-LdsBuffer, + // this does not occur when n0_loops == 2/4 and NumKVLdsBuffers == 4 + if constexpr((n0_loops - 1) % NumKVLdsBuffers == 2 % NumKVLdsBuffers) { __builtin_amdgcn_s_barrier(); };