mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 10:09:41 +00:00
Add consideration for max_seqlen_q <= 64 in get_hstu_attention_fwd_mtile()
This commit is contained in:
@@ -471,6 +471,9 @@ static int get_hstu_attention_fwd_mtile(int num_batches, int num_heads, int max_
|
||||
int num_CUs = get_number_of_cu();
|
||||
auto ceildiv = [](int a, int b) { return (a + b - 1) / b; };
|
||||
|
||||
if(max_seqlen_q <= 64)
|
||||
return 64;
|
||||
|
||||
int nbatch_nhead_mblocks = num_batches * num_heads * ceildiv(max_seqlen_q, 128);
|
||||
|
||||
// assuming each CU is assigned two work-groups
|
||||
|
||||
Reference in New Issue
Block a user