Clarify the using of kSubQKHeaddim and kQKHeaddim

2026-07-17 09:08:35 +00:00 · 2025-12-03 08:18:13 +00:00
parent 7234b2fc1a
commit 2549bc1fee
5 changed files with 14 additions and 12 deletions
--- a/example/ck_tile/18_hstu_attention/hstu_attention_fwd_kernel.hpp
+++ b/example/ck_tile/18_hstu_attention/hstu_attention_fwd_kernel.hpp
@@ -586,7 +586,7 @@ struct HstuAttentionFwdKernel

            return pad_tensor_view(k_dram_naive,
                                   make_tuple(number<HstuAttentionPipeline::kN0>{},
-                                              number<HstuAttentionPipeline::kQKHeaddim>{}),
+                                              number<HstuAttentionPipeline::kSubQKHeaddim>{}),
                                   sequence<false, kPadHeadDimQK>{});
        }();
        const auto v_dram = [&]() {
@@ -631,7 +631,7 @@ struct HstuAttentionFwdKernel
        auto k_dram_window =
            make_tile_window(k_dram,
                             make_tuple(number<HstuAttentionPipeline::kN0>{},
-                                        number<HstuAttentionPipeline::kQKHeaddim>{}),
+                                        number<HstuAttentionPipeline::kSubQKHeaddim>{}),
                             {0, 0});

        auto v_dram_window = make_tile_window(
--- a/example/ck_tile/18_hstu_attention/hstu_attention_no_softmax_fwd_pipeline.hpp
+++ b/example/ck_tile/18_hstu_attention/hstu_attention_no_softmax_fwd_pipeline.hpp
@@ -150,7 +150,8 @@ struct HstuAttentionNoSoftmaxFwdPipelineQRKSVS

        static_assert(kM0 == QDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] &&
                          kN0 == KDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] &&
-                          kQKHeaddim == KDramBlockWindowTmp{}.get_window_lengths()[number<1>{}] &&
+                          kSubQKHeaddim ==
+                              KDramBlockWindowTmp{}.get_window_lengths()[number<1>{}] &&
                          kN1 == VDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] &&
                          kK1 == VDramBlockWindowTmp{}.get_window_lengths()[number<1>{}] &&
                          kM0 == BiasDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] &&
@@ -179,7 +180,7 @@ struct HstuAttentionNoSoftmaxFwdPipelineQRKSVS

        auto q_dram_window =
            make_tile_window(q_dram_block_window_tmp.get_bottom_tensor_view(),
-                             make_tuple(number<kGemmSingleRepM>{}, number<kQKHeaddim>{}),
+                             make_tuple(number<kGemmSingleRepM>{}, number<kSubQKHeaddim>{}),
                             q_dram_block_window_tmp.get_window_origin(),
                             Policy::template MakeQDramSingleRepMTileDistribution<Problem>());

@@ -189,7 +190,7 @@ struct HstuAttentionNoSoftmaxFwdPipelineQRKSVS

        auto k_dram_window =
            make_tile_window(k_dram_block_window_tmp.get_bottom_tensor_view(),
-                             make_tuple(number<kK1>{}, number<kQKHeaddim>{}),
+                             make_tuple(number<kK1>{}, number<kSubQKHeaddim>{}),
                             {seqlen_k_start, 0},
                             Policy::template MakeKDramTileDistribution<Problem>());

--- a/example/ck_tile/18_hstu_attention/hstu_attention_no_softmax_fwd_trload_pipeline.hpp
+++ b/example/ck_tile/18_hstu_attention/hstu_attention_no_softmax_fwd_trload_pipeline.hpp
@@ -178,7 +178,7 @@ struct HstuAttentionNoSoftmaxFwdPipelineQRKSVSTrLoad

        auto q_dram_window =
            make_tile_window(q_dram_block_window_tmp.get_bottom_tensor_view(),
-                             make_tuple(number<kM0>{}, number<kQKHeaddim>{}),
+                             make_tuple(number<kM0>{}, number<kSubQKHeaddim>{}),
                             q_dram_block_window_tmp.get_window_origin(),
                             Policy::template MakeQDramTileDistribution<Problem>());

@@ -188,7 +188,7 @@ struct HstuAttentionNoSoftmaxFwdPipelineQRKSVSTrLoad

        auto k_dram_window =
            make_tile_window(k_dram_block_window_tmp.get_bottom_tensor_view(),
-                             make_tuple(number<kK1>{}, number<kQKHeaddim>{}),
+                             make_tuple(number<kK1>{}, number<kSubQKHeaddim>{}),
                             {seqlen_k_start, 0},
                             Policy::template MakeKDramTileDistribution<Problem>());

--- a/example/ck_tile/18_hstu_attention/hstu_attention_with_softmax_fwd_pipeline.hpp
+++ b/example/ck_tile/18_hstu_attention/hstu_attention_with_softmax_fwd_pipeline.hpp
@@ -152,7 +152,8 @@ struct HstuAttentionWithSoftmaxFwdPipelineQRKSVS

        static_assert(kM0 == QDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] &&
                          kN0 == KDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] &&
-                          kQKHeaddim == KDramBlockWindowTmp{}.get_window_lengths()[number<1>{}] &&
+                          kSubQKHeaddim ==
+                              KDramBlockWindowTmp{}.get_window_lengths()[number<1>{}] &&
                          kN1 == VDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] &&
                          kK1 == VDramBlockWindowTmp{}.get_window_lengths()[number<1>{}] &&
                          kM0 == BiasDramBlockWindowTmp{}.get_window_lengths()[number<0>{}] &&
@@ -194,7 +195,7 @@ struct HstuAttentionWithSoftmaxFwdPipelineQRKSVS

        auto q_dram_window =
            make_tile_window(q_dram_block_window_tmp.get_bottom_tensor_view(),
-                             make_tuple(number<kGemmSingleRepM>{}, number<kQKHeaddim>{}),
+                             make_tuple(number<kGemmSingleRepM>{}, number<kSubQKHeaddim>{}),
                             q_dram_block_window_tmp.get_window_origin(),
                             Policy::template MakeQDramSingleRepMTileDistribution<Problem>());

@@ -204,7 +205,7 @@ struct HstuAttentionWithSoftmaxFwdPipelineQRKSVS

        auto k_dram_window =
            make_tile_window(k_dram_block_window_tmp.get_bottom_tensor_view(),
-                             make_tuple(number<kK1>{}, number<kQKHeaddim>{}),
+                             make_tuple(number<kK1>{}, number<kSubQKHeaddim>{}),
                             {seqlen_k_start, 0},
                             Policy::template MakeKDramTileDistribution<Problem>());

--- a/example/ck_tile/18_hstu_attention/hstu_attention_with_softmax_fwd_trload_pipeline.hpp
+++ b/example/ck_tile/18_hstu_attention/hstu_attention_with_softmax_fwd_trload_pipeline.hpp
@@ -193,7 +193,7 @@ struct HstuAttentionWithSoftmaxFwdPipelineQRKSVSTrLoad

        auto q_dram_window =
            make_tile_window(q_dram_block_window_tmp.get_bottom_tensor_view(),
-                             make_tuple(number<kM0>{}, number<kQKHeaddim>{}),
+                             make_tuple(number<kM0>{}, number<kSubQKHeaddim>{}),
                             q_dram_block_window_tmp.get_window_origin(),
                             Policy::template MakeQDramTileDistribution<Problem>());

@@ -203,7 +203,7 @@ struct HstuAttentionWithSoftmaxFwdPipelineQRKSVSTrLoad

        auto k_dram_window =
            make_tile_window(k_dram_block_window_tmp.get_bottom_tensor_view(),
-                             make_tuple(number<kK1>{}, number<kQKHeaddim>{}),
+                             make_tuple(number<kK1>{}, number<kSubQKHeaddim>{}),
                             {seqlen_k_start, 0},
                             Policy::template MakeKDramTileDistribution<Problem>());