Add input fp8 and output bf16 attention (#2726)

* change host using fp16 to check

* fp8 to fp8 compare

* rewrite input parameters

* add not squant

* remove some output code

* for scale = 1

* format

* saturates only for fp8

* add fp8bf16 data type

* add fp8bf16 data type

* fix test fp8 code

* add run_fp8bf16_tests

* change fmha fwd example parameter(adding fp8bf16)

* Support fp8bf16 for Aiter

* Support aiter fp8bf16 in c++

* fix comment about fp8 in readme.md

* add fp8fp32

* add fp8fp32 test

* remove range_q etc.

* format

* fix test parameters about squant and fmha example input fp8bf16 fp8fp32 data type

* add fp8bf16 to data_type function

* change colmajor to rowmajor in test_ck_tile_fmha_fwd_fp8

* format

* reset atol for fp8

* fix bug for atol

---------

Co-authored-by: rocking <ChunYu.Lai@amd.com>
Co-authored-by: asleepzzz <hanwen.chang@amd.com>
This commit is contained in:
ltqin
2025-09-19 14:26:43 +08:00
committed by GitHub
parent e469fee046
commit dd249f1cd6
12 changed files with 262 additions and 153 deletions

View File

@@ -94,7 +94,30 @@ run_fp8_tests() {
for b in 1 2 ; do
for hdim in 64 128 256 ; do
run_exe -prec=fp8 -init=3 -b=$b -h=1 -d=128 -s=128 -bias=$bias -iperm=$perm -operm=$perm -vlayout=c -squant=1 -kname=$KNAME $COMMON_ARGS
$EXE -prec=fp8 -init=0 -b=$b -h=1 -d=128 -s=128 -bias=$bias -iperm=$perm -operm=$perm -vlayout=r -squant=1 -kname=$KNAME $COMMON_ARGS
done ; done ; done ; done
}
run_fp8bf16_tests() {
for perm in 0 1 ; do
for bias in "n" "e" "a" ; do
for b in 1 2 ; do
for hdim in 64 128 256 ; do
$EXE -prec=fp8bf16 -init=0 -b=$b -h=1 -d=128 -s=128 -bias=$bias -iperm=$perm -operm=$perm -vlayout=r -squant=1 -kname=$KNAME $COMMON_ARGS
done ; done ; done ; done
}
run_fp8fp32_tests() {
for perm in 0 1 ; do
for bias in "n" "e" "a" ; do
for b in 1 2 ; do
for hdim in 64 128 256 ; do
$EXE -prec=fp8fp32 -init=0 -b=$b -h=1 -d=128 -s=128 -bias=$bias -iperm=$perm -operm=$perm -vlayout=r -squant=1 -kname=$KNAME $COMMON_ARGS
done ; done ; done ; done
}
@@ -117,7 +140,9 @@ run_fp16_appendkv_tests() {
set -x
run_fp16_bf16_tests
# run_fp8_tests
run_fp8_tests
run_fp8bf16_tests
run_fp8fp32_tests
if [ $TEST_APPENDKV -eq 1 ] ; then
run_fp16_appendkv_tests