Add consideration for max_seqlen_q <= 64 in get_hstu_attention_fwd_mtile()

This commit is contained in:
Qianfeng Zhang
2026-03-26 14:40:01 +00:00
parent 76da618c85
commit eefe426ef7

View File

@@ -471,6 +471,9 @@ static int get_hstu_attention_fwd_mtile(int num_batches, int num_heads, int max_
int num_CUs = get_number_of_cu();
auto ceildiv = [](int a, int b) { return (a + b - 1) / b; };
if(max_seqlen_q <= 64)
return 64;
int nbatch_nhead_mblocks = num_batches * num_heads * ceildiv(max_seqlen_q, 128);
// assuming each CU is assigned two work-groups