diff --git a/include/ck_tile/ops/unified_attention/kernel/unified_attention_kernel.hpp b/include/ck_tile/ops/unified_attention/kernel/unified_attention_kernel.hpp index ea1c4f3bf0..5765fd858c 100644 --- a/include/ck_tile/ops/unified_attention/kernel/unified_attention_kernel.hpp +++ b/include/ck_tile/ops/unified_attention/kernel/unified_attention_kernel.hpp @@ -354,15 +354,13 @@ struct FmhaFwdV3Kernel q_dram_pad, make_tuple( make_merge_transform( - make_tuple(seq_len, num_queries_per_kv) + make_tuple(seq_len_padded, num_queries_per_kv) ), make_pass_through_transform(HEAD_SIZE_PADDED) ), make_tuple(sequence<0, 1>{}, sequence<2>{}), make_tuple(sequence<0>{}, sequence<1>{}) - ); - - // TODO are we padding the tensor view or the block here? + ); // flattens the first two dims, head dim is the fastest changing dim in the merged dim return q_dram_merged; }();