Implement fp8 quant for layernorm and rmsnorm (#1814)

This commit is contained in:
ruanjm
2025-01-24 16:40:43 +08:00
committed by GitHub
parent 5b9b083dbc
commit 64d5c4d6cb
9 changed files with 67 additions and 19 deletions

View File

@@ -1,7 +1,7 @@
#!/bin/sh
EXE="$(find . -name tile_example_layernorm2d_fwd -type f | head -n 1)"
for fquant in "" "-fquant=1 -prec_o=int8"; do
for fquant in "" "-fquant=1 -prec_o=int8" "-fquant=1 -prec_o=fp8"; do
for pr_i in "fp16" "bf16" ; do
for fadd in "0" "1"; do
$EXE -prec_i=$pr_i -fadd=$fadd $fquant -m=99 -n=13