diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_fwd_kernel.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_fwd_kernel.hpp index 750cb3af9f..324916bf77 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_fwd_kernel.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_fwd_kernel.hpp @@ -586,7 +586,7 @@ struct HstuAttentionFwdKernel return pad_tensor_view(k_dram_naive, make_tuple(number{}, - number{}), + number{}), sequence{}); }(); const auto v_dram = [&]() { @@ -631,7 +631,7 @@ struct HstuAttentionFwdKernel auto k_dram_window = make_tile_window(k_dram, make_tuple(number{}, - number{}), + number{}), {0, 0}); auto v_dram_window = make_tile_window( diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_no_softmax_fwd_pipeline.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_no_softmax_fwd_pipeline.hpp index dc17803a35..d1c4ebccd4 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_no_softmax_fwd_pipeline.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_no_softmax_fwd_pipeline.hpp @@ -150,7 +150,8 @@ struct HstuAttentionNoSoftmaxFwdPipelineQRKSVS static_assert(kM0 == QDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] && kN0 == KDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] && - kQKHeaddim == KDramBlockWindowTmp{}.get_window_lengths()[number<1>{}] && + kSubQKHeaddim == + KDramBlockWindowTmp{}.get_window_lengths()[number<1>{}] && kN1 == VDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] && kK1 == VDramBlockWindowTmp{}.get_window_lengths()[number<1>{}] && kM0 == BiasDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] && @@ -179,7 +180,7 @@ struct HstuAttentionNoSoftmaxFwdPipelineQRKSVS auto q_dram_window = make_tile_window(q_dram_block_window_tmp.get_bottom_tensor_view(), - make_tuple(number{}, number{}), + make_tuple(number{}, number{}), q_dram_block_window_tmp.get_window_origin(), Policy::template MakeQDramSingleRepMTileDistribution()); @@ -189,7 +190,7 @@ struct HstuAttentionNoSoftmaxFwdPipelineQRKSVS auto k_dram_window = make_tile_window(k_dram_block_window_tmp.get_bottom_tensor_view(), - make_tuple(number{}, number{}), + make_tuple(number{}, number{}), {seqlen_k_start, 0}, Policy::template MakeKDramTileDistribution()); diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_no_softmax_fwd_trload_pipeline.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_no_softmax_fwd_trload_pipeline.hpp index c4ffd580a9..bbceb132de 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_no_softmax_fwd_trload_pipeline.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_no_softmax_fwd_trload_pipeline.hpp @@ -178,7 +178,7 @@ struct HstuAttentionNoSoftmaxFwdPipelineQRKSVSTrLoad auto q_dram_window = make_tile_window(q_dram_block_window_tmp.get_bottom_tensor_view(), - make_tuple(number{}, number{}), + make_tuple(number{}, number{}), q_dram_block_window_tmp.get_window_origin(), Policy::template MakeQDramTileDistribution()); @@ -188,7 +188,7 @@ struct HstuAttentionNoSoftmaxFwdPipelineQRKSVSTrLoad auto k_dram_window = make_tile_window(k_dram_block_window_tmp.get_bottom_tensor_view(), - make_tuple(number{}, number{}), + make_tuple(number{}, number{}), {seqlen_k_start, 0}, Policy::template MakeKDramTileDistribution()); diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_with_softmax_fwd_pipeline.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_with_softmax_fwd_pipeline.hpp index 85aff422a1..7858e28d04 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_with_softmax_fwd_pipeline.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_with_softmax_fwd_pipeline.hpp @@ -152,7 +152,8 @@ struct HstuAttentionWithSoftmaxFwdPipelineQRKSVS static_assert(kM0 == QDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] && kN0 == KDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] && - kQKHeaddim == KDramBlockWindowTmp{}.get_window_lengths()[number<1>{}] && + kSubQKHeaddim == + KDramBlockWindowTmp{}.get_window_lengths()[number<1>{}] && kN1 == VDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] && kK1 == VDramBlockWindowTmp{}.get_window_lengths()[number<1>{}] && kM0 == BiasDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] && @@ -194,7 +195,7 @@ struct HstuAttentionWithSoftmaxFwdPipelineQRKSVS auto q_dram_window = make_tile_window(q_dram_block_window_tmp.get_bottom_tensor_view(), - make_tuple(number{}, number{}), + make_tuple(number{}, number{}), q_dram_block_window_tmp.get_window_origin(), Policy::template MakeQDramSingleRepMTileDistribution()); @@ -204,7 +205,7 @@ struct HstuAttentionWithSoftmaxFwdPipelineQRKSVS auto k_dram_window = make_tile_window(k_dram_block_window_tmp.get_bottom_tensor_view(), - make_tuple(number{}, number{}), + make_tuple(number{}, number{}), {seqlen_k_start, 0}, Policy::template MakeKDramTileDistribution()); diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_with_softmax_fwd_trload_pipeline.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_with_softmax_fwd_trload_pipeline.hpp index c089c915c9..bf2d2173e8 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_with_softmax_fwd_trload_pipeline.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_with_softmax_fwd_trload_pipeline.hpp @@ -193,7 +193,7 @@ struct HstuAttentionWithSoftmaxFwdPipelineQRKSVSTrLoad auto q_dram_window = make_tile_window(q_dram_block_window_tmp.get_bottom_tensor_view(), - make_tuple(number{}, number{}), + make_tuple(number{}, number{}), q_dram_block_window_tmp.get_window_origin(), Policy::template MakeQDramTileDistribution()); @@ -203,7 +203,7 @@ struct HstuAttentionWithSoftmaxFwdPipelineQRKSVSTrLoad auto k_dram_window = make_tile_window(k_dram_block_window_tmp.get_bottom_tensor_view(), - make_tuple(number{}, number{}), + make_tuple(number{}, number{}), {seqlen_k_start, 0}, Policy::template MakeKDramTileDistribution());