mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-07-03 05:37:34 +00:00
Expand medium tier dispatch threshold for multi-seq prefill
Raise the medium tier boundary from avg_q <= 32 to avg_q <= 128. This routes prefill shapes with 76-253 sequences (avg query length 32-108) to the 4-warp kBlockM=128 kernel instead of the 8-warp kBlockM=256 kernel. Results on affected shapes (via aiter): 76-seq prefill: 1.44x vs Triton -> 1.03x (near-tied) 157-seq prefill: 1.61x -> 1.02x 181-seq prefill: 1.74x -> 1.00x (tied) 229-seq prefill: 1.77x -> 1.05x Made-with: Cursor
This commit is contained in:
@@ -50,7 +50,7 @@ static tile_tier select_tile_tier(const unified_attention_args& args)
|
||||
return tile_tier::small; // pure decode: 2 warps, kBlockM=64
|
||||
|
||||
const index_t kBlockQ_medium = 128 / args.num_queries_per_kv; // kBlockQ for 4-warp kernel
|
||||
if(avg_q <= kBlockQ_medium * 2)
|
||||
if(avg_q <= kBlockQ_medium * 8)
|
||||
return tile_tier::medium; // many short seqs: 4 warps, kBlockM=128
|
||||
|
||||
return tile_tier::large; // long prefill: 8 warps, kBlockM=256
|
||||
|
||||
Reference in New Issue
Block a user