Revert "Revert " Fp8 block scale quantization for fmha fwd (#3330)" (#3633)" (#3635)

This reverts commit de5a1d730d.

Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com>
This commit is contained in:
ltqin
2026-01-24 01:03:22 +08:00
committed by GitHub
parent 2e08a7e5ab
commit 67f0b74ec6
14 changed files with 667 additions and 84 deletions

View File

@@ -13,6 +13,7 @@ enum class quant_scale_enum
{
no_scale = 0,
pertensor = 1,
blockscale,
};
struct quant_scale_info
@@ -25,6 +26,8 @@ struct quant_scale_info
os << "n";
else if(type == quant_scale_enum::pertensor)
os << "pt";
else if(type == quant_scale_enum::blockscale)
os << "bs";
}
static quant_scale_info decode(std::string str)
@@ -38,6 +41,10 @@ struct quant_scale_info
{
info.type = quant_scale_enum::pertensor;
}
else if(str == "bs" || str == "2")
{
info.type = quant_scale_enum::blockscale;
}
else
{
throw std::invalid_argument("invalid quant scale value: " + str);