diff --git a/include/ck_tile/ops/fmha/kernel/fmha_fwd_appendkv_kernel.hpp b/include/ck_tile/ops/fmha/kernel/fmha_fwd_appendkv_kernel.hpp index dd3ec71723..3558cb3513 100644 --- a/include/ck_tile/ops/fmha/kernel/fmha_fwd_appendkv_kernel.hpp +++ b/include/ck_tile/ops/fmha/kernel/fmha_fwd_appendkv_kernel.hpp @@ -524,7 +524,7 @@ struct FmhaFwdAppendKVKernel } }(); constexpr auto rotary_cos_sin_dram_window_lengths = - make_tuple(number{}, number{}); + make_tuple(number{}, number{}); const auto rotary_cos_dram_window = [&]() { if constexpr(kApplyRoPE) { @@ -532,7 +532,7 @@ struct FmhaFwdAppendKVKernel const auto rotary_cos_dram_native = make_naive_tensor_view( reinterpret_cast(kargs.rotary_cos_ptr), - make_tuple(kargs.seqlen_k + kargs.seqlen_knew, kargs.hdim_q), + make_tuple(kargs.seqlen_k + kargs.seqlen_knew, kargs.rotary_dim / 2), make_tuple(kargs.rotary_dim / 2, 1), number<8>{}, number<1>{}); @@ -557,7 +557,7 @@ struct FmhaFwdAppendKVKernel const auto rotary_sin_dram_native = make_naive_tensor_view( reinterpret_cast(kargs.rotary_sin_ptr), - make_tuple(kargs.seqlen_k + kargs.seqlen_knew, kargs.hdim_q), + make_tuple(kargs.seqlen_k + kargs.seqlen_knew, kargs.rotary_dim / 2), make_tuple(kargs.rotary_dim / 2, 1), number<8>{}, number<1>{});