fp8 fmha async pipeline (#3339)

* replace qr with async pipeline

* Add fp8fp32 to DTYPE_BITS

* Add kAlignmentRandVal to avoid compile fail

* format

---------

Co-authored-by: Thomas Ning <Thomas.Ning@amd.com>
This commit is contained in:
rocking
2025-12-04 12:18:25 +08:00
committed by GitHub
parent 4baa4c9fae
commit eb7f617713
2 changed files with 18 additions and 7 deletions

View File

@@ -87,6 +87,8 @@ struct BlockFmhaBatchPrefillPipelineQRKSVSAsync
static constexpr index_t kAlignmentO = Policy::template GetAlignmentO<Problem>();
static constexpr index_t kAlignmentBias =
kPadSeqLenK ? 1 : Policy::template GetAlignmentBias<Problem>();
static constexpr index_t kAlignmentRandVal =
kPadSeqLenK ? 1 : Policy::template GetAlignmentRandVal<Problem>();
#if CK_TILE_FMHA_FWD_FAST_EXP2
static constexpr auto R_LOG2E = 1.0 / log2e_v<SaccDataType>;