Support fp8 dynamic quantization for fmha (#3206)

* Support qscale for dynamic quant, remove static quant

* Support hdim=256

* Remove bias test case for fp8

---------

Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com>
Co-authored-by: asleepzzz <hanwen.chang@amd.com>
This commit is contained in:
rocking
2025-11-24 16:28:25 +08:00
committed by GitHub
parent 096f0a3b23
commit 5948dbffe4
17 changed files with 369 additions and 280 deletions

View File

@@ -9,10 +9,10 @@ set(FMHA_FWD_INSTANCES "tile_fmha_fwd_instances")
set(TEST_NAME "test_ck_tile_fmha")
function(add_gtest_fwd test_group)
set(V_TYPES "fp16" "bf16" "fp8" "fp32")
set(V_TYPES "fp16" "bf16" "fp8bf16" "fp32")
set(CPP_TYPE_fp16 "FmhaFwdFp16")
set(CPP_TYPE_bf16 "FmhaFwdBf16")
set(CPP_TYPE_fp8 "FmhaFwdFp8")
set(CPP_TYPE_fp8bf16 "FmhaFwdFp8Bf16")
set(CPP_TYPE_fp32 "FmhaFwdFp32")
set(all_tests)