Support fp8 dynamic quantization for fmha (#3206)

* Support qscale for dynamic quant, remove static quant

* Support hdim=256

* Remove bias test case for fp8

---------

Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com>
Co-authored-by: asleepzzz <hanwen.chang@amd.com>
This commit is contained in:
rocking
2025-11-24 16:28:25 +08:00
committed by GitHub
parent 096f0a3b23
commit 5948dbffe4
17 changed files with 369 additions and 280 deletions

View File

@@ -625,7 +625,7 @@ void dump_fmha_fwd_json_results(const std::string& json_filename,
float scale_s,
float p_drop,
bool lse,
bool squant,
const std::string& qscale,
const std::string& bias,
const std::string& vlayout,
bool pass,
@@ -650,7 +650,7 @@ void dump_fmha_fwd_json_results(const std::string& json_filename,
ADD_KEY_VALUE("scale_s", scale_s);
ADD_KEY_VALUE("p_drop", p_drop);
ADD_KEY_VALUE("lse", lse);
ADD_KEY_VALUE("squant", squant);
ADD_KEY_VALUE("qscale", qscale);
ADD_KEY_VALUE("bias", bias);
ADD_KEY_VALUE("vlayout", vlayout);
ADD_KEY_VALUE("verification", pass ? "pass" : "fail");