This commit is contained in:
Juuso Korhonen
2025-10-13 10:30:11 +00:00
parent af94aaf1cb
commit b721f79f99

View File

@@ -354,15 +354,13 @@ struct FmhaFwdV3Kernel
q_dram_pad,
make_tuple(
make_merge_transform(
make_tuple(seq_len, num_queries_per_kv)
make_tuple(seq_len_padded, num_queries_per_kv)
),
make_pass_through_transform(HEAD_SIZE_PADDED)
),
make_tuple(sequence<0, 1>{}, sequence<2>{}),
make_tuple(sequence<0>{}, sequence<1>{})
);
// TODO are we padding the tensor view or the block here?
); // flattens the first two dims, head dim is the fastest changing dim in the merged dim
return q_dram_merged;
}();