Revert "Revert " Fp8 block scale quantization for fmha fwd (#3330)" (#3633)" (#3635)

This reverts commit de5a1d730d.

Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com>
This commit is contained in:
ltqin
2026-01-24 01:03:22 +08:00
committed by GitHub
parent 2e08a7e5ab
commit 67f0b74ec6
14 changed files with 667 additions and 84 deletions

View File

@@ -12,6 +12,7 @@ enum class BlockAttentionQuantScaleEnum
{
NO_SCALE = 0,
PERTENSOR = 1,
BLOCKSCALE,
};
template <BlockAttentionQuantScaleEnum>
@@ -27,5 +28,10 @@ struct BlockAttentionQuantScaleEnumToStr<BlockAttentionQuantScaleEnum::PERTENSOR
{
static constexpr const char* name = "pertensor";
};
template <>
struct BlockAttentionQuantScaleEnumToStr<BlockAttentionQuantScaleEnum::BLOCKSCALE>
{
static constexpr const char* name = "blockscale";
};
} // namespace ck_tile