flashattention fwd add (80, 96) instance (#3415)

* add hdim (96,96) instance * change to (80,96) * format py * remove 96 in optdim * when N=6 change to llvm_amdgcn_raw_buffer_load_i32x3
2026-05-04 05:31:24 +00:00 · 2025-12-18 01:16:11 +08:00
parent fe3d52d9b0
commit 92653168c2
6 changed files with 127 additions and 6 deletions
--- a/example/ck_tile/01_fmha/CMakeLists.txt
+++ b/example/ck_tile/01_fmha/CMakeLists.txt
@@ -47,7 +47,7 @@ set(FMHA_FWD_CODE_GEN_COMMON_ARGS
  ${CMAKE_CURRENT_LIST_DIR}/generate.py
  --targets ${FMHA_TARGETS_ARG}
  --api ${FMHA_FWD_APIS}
-  --optdim 32,64,128,256
+  --optdim 32,64,80,128,256
  # --filter fmha_fwd...
 )
 set(FMHA_BWD_CODE_GEN_COMMON_ARGS