diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_no_softmax_fwd_trload_pipeline.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_no_softmax_fwd_trload_pipeline.hpp index 94fa7b1494..2502ab3e64 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_no_softmax_fwd_trload_pipeline.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_no_softmax_fwd_trload_pipeline.hpp @@ -380,7 +380,7 @@ struct HstuAttentionNoSoftmaxFwdPipelineQRKSVSTrLoad { // STAGE 1, Gemm_0 ( S = Q@K ) static_for<0, k1_loops, 1>{}([&](auto i_k1) { - store_tile(k_lds_write_windows[i_k1], + store_tile(k_lds_write_windows[number{}], tile_elementwise_in(k_element_func, k_tiles[i_k1])); __builtin_amdgcn_sched_barrier(0x00000001); @@ -493,7 +493,7 @@ struct HstuAttentionNoSoftmaxFwdPipelineQRKSVSTrLoad gemm_1( o_acc, get_slice_tile(p, sequence<0, i_k1 * kK1>{}, sequence{}), - v_lds_windows[number{}]); + v_lds_windows[number<(i_k1 + 2) % NumKVLdsBuffers>{}]); }); } while(seqlen_k_curr < seqlen_k_end); diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_with_softmax_fwd_trload_pipeline.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_with_softmax_fwd_trload_pipeline.hpp index 04b9584875..7f0cc215c3 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_with_softmax_fwd_trload_pipeline.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_with_softmax_fwd_trload_pipeline.hpp @@ -391,7 +391,7 @@ struct HstuAttentionWithSoftmaxFwdPipelineQRKSVSTrLoad { // STAGE 1, Gemm_0 ( S = Q@K ) static_for<0, k1_loops, 1>{}([&](auto i_k1) { - store_tile(k_lds_write_windows[i_k1], + store_tile(k_lds_write_windows[number{}], tile_elementwise_in(k_element_func, k_tiles[i_k1])); __builtin_amdgcn_sched_barrier(0x00000001); @@ -571,7 +571,7 @@ struct HstuAttentionWithSoftmaxFwdPipelineQRKSVSTrLoad gemm_1( o_acc, get_slice_tile(p, sequence<0, i_k1 * kK1>{}, sequence{}), - v_lds_windows[number{}]); + v_lds_windows[number<(i_k1 + 2) % NumKVLdsBuffers>{}]); }); } while(seqlen_k_curr < seqlen_k_end);