diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_fwd_kernel.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_fwd_kernel.hpp index ef689b5666..854e5c8116 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_fwd_kernel.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_fwd_kernel.hpp @@ -564,9 +564,9 @@ struct HstuAttentionFwdKernel const index_t i_nhead = blockIdx.y; const index_t i_block = blockIdx.z; #else - const index_t i_block = blockIdx.x; - const index_t i_nhead = blockIdx.y; - const index_t i_batch = blockIdx.z; + const index_t i_block = blockIdx.x; + const index_t i_nhead = blockIdx.y; + const index_t i_batch = blockIdx.z; #endif const auto f = [](index_t dividend, index_t divisor) { @@ -591,16 +591,16 @@ struct HstuAttentionFwdKernel const index_t i_nhead = blockIdx.y; const index_t i_block = blockIdx.z; #else - const index_t i_block = blockIdx.x; - const index_t i_nhead = blockIdx.y; - const index_t i_batch = blockIdx.z; + const index_t i_block = blockIdx.x; + const index_t i_nhead = blockIdx.y; + const index_t i_batch = blockIdx.z; #endif #if HSTU_SCHED_BATCH_AS_FIRST_GRID_DIM index_t i_tile_m = i_block; i_tile_m = gridDim.z - 1 - i_tile_m; #else - const index_t i_tile_m = i_block; + const index_t i_tile_m = i_block; #endif const index_t i_tile_n = 0; diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_fwd_pipeline_policy.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_fwd_pipeline_policy.hpp index 39077f0c0c..d765a29a89 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_fwd_pipeline_policy.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_fwd_pipeline_policy.hpp @@ -582,8 +582,8 @@ struct HstuAttentionFwdPipelineQRKSVSPolicy auto warp_gemm = [&]() { if constexpr((std::is_same_v || - std::is_same_v)&&std:: - is_same_v) + std::is_same_v) && + std::is_same_v) { constexpr index_t WarpGemmM = Problem::HstuAttentionTileSetting::Gemm0WarpTile::at(number<0>{}); @@ -654,8 +654,8 @@ struct HstuAttentionFwdPipelineQRKSVSPolicy auto warp_gemm = [&]() { if constexpr((std::is_same_v || - std::is_same_v)&&std:: - is_same_v) + std::is_same_v) && + std::is_same_v) { constexpr index_t WarpGemmM = Problem::HstuAttentionTileSetting::Gemm1WarpTile::at(number<0>{}); diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_fwd_splitkv_kernel.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_fwd_splitkv_kernel.hpp index 7f702e36e8..1c9204dcf2 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_fwd_splitkv_kernel.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_fwd_splitkv_kernel.hpp @@ -562,9 +562,9 @@ struct HstuAttentionFwdSplitKVKernel const index_t i_nhead = blockIdx.y; const index_t i_block = blockIdx.z; #else - const index_t i_block = blockIdx.x; - const index_t i_nhead = blockIdx.y; - const index_t i_batch = blockIdx.z; + const index_t i_block = blockIdx.x; + const index_t i_nhead = blockIdx.y; + const index_t i_batch = blockIdx.z; #endif #if HSTU_SCHED_BATCH_AS_FIRST_GRID_DIM @@ -586,9 +586,9 @@ struct HstuAttentionFwdSplitKVKernel const index_t i_nhead = blockIdx.y; const index_t i_block = blockIdx.z; #else - const index_t i_block = blockIdx.x; - const index_t i_nhead = blockIdx.y; - const index_t i_batch = blockIdx.z; + const index_t i_block = blockIdx.x; + const index_t i_nhead = blockIdx.y; + const index_t i_batch = blockIdx.z; #endif #if HSTU_SCHED_BATCH_AS_FIRST_GRID_DIM @@ -596,8 +596,8 @@ struct HstuAttentionFwdSplitKVKernel auto [i_tile_m, i_split] = f(i_tile_m_i_split, kargs.num_splits); i_tile_m = gridDim.z / kargs.num_splits - 1 - i_tile_m; #else - index_t i_tile_m_i_split = i_block; - auto [i_tile_m, i_split] = f(i_tile_m_i_split, kargs.num_splits); + index_t i_tile_m_i_split = i_block; + auto [i_tile_m, i_split] = f(i_tile_m_i_split, kargs.num_splits); #endif const index_t i_tile_n = 0; diff --git a/example/ck_tile/18_hstu_attention/hstu_attention_util.hpp b/example/ck_tile/18_hstu_attention/hstu_attention_util.hpp index 6ad710b6a9..e08bed90ad 100644 --- a/example/ck_tile/18_hstu_attention/hstu_attention_util.hpp +++ b/example/ck_tile/18_hstu_attention/hstu_attention_util.hpp @@ -120,7 +120,7 @@ CK_TILE_DEVICE static void scale_tile_in_pack(InOutDstrTensor& in_out_dstr_tenso static_for<0, thread_buffer_size, 2>{}([&](auto idx) { fp32x2_t input = {in_out_dstr_tensor.thread_buf_[idx], - in_out_dstr_tensor.thread_buf_[idx + 1]}; + in_out_dstr_tensor.thread_buf_[idx + 1]}; auto output = pk_mul_f32(input, pk_scale); in_out_dstr_tensor.thread_buf_[idx] = output.x; in_out_dstr_tensor.thread_buf_[idx + 1] = output.y;