[FMHA] Enable page size 16 for batch prefill kernel (#3568)

* [FMHA] Enable page size 16 for batch prefill kernel * Refactor batch prefill KV offset logic to simplify template arguments - Remove redundant `kLog2PageSize` and `kIsVTileFitsInPage` from template args. - Add static assert to forbid `page_size=1` with vectorized layout.
2026-04-19 22:39:03 +00:00 · 2026-01-15 22:11:44 +08:00
parent 5122637215
commit 993d3e2f0e
3 changed files with 62 additions and 28 deletions
--- a/example/ck_tile/01_fmha/codegen/ops/fmha_batch_prefill.py
+++ b/example/ck_tile/01_fmha/codegen/ops/fmha_batch_prefill.py
@@ -36,7 +36,7 @@ DTYPE_BITS = {

 K0_MAX_SUBMAX_MAP = {32: 32, 64: 64, 96: 128, 128: 128, 256: 256}

-SUPPORTED_PAGE_SIZE = [1, 128, 256, 1024]
+SUPPORTED_PAGE_SIZE = [1, 16, 1024]
 SUPPORTED_KV_MEMORY_LAYOUT = ["vectorized", "linear"]
 SUPPORTED_KV_LOOKUP_TABLE = ["vllm", "sglang"]
 KV_MEMORY_LAYOUT_ENUM_MAP = {