mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 14:59:17 +00:00
MX GEMM - FP6 Example (#2419)
Adds support for MX FP6 data type in MX GEMM block pipeline version v1. Provides an example of MX FP6 GEMM algorithm. --------- Co-authored-by: OscarXu <huaiguxu@amd.com> Co-authored-by: aska-0096 <haocwang@amd.com> Co-authored-by: mtgu0705 <mtgu@amd.com> Co-authored-by: Your Name <you@example.com> Co-authored-by: lalala-sh <Jiaxing.Wen@amd.com> Co-authored-by: valarLip <340077269@qq.com> Co-authored-by: Ding, Yi <yi.ding@amd.com> Co-authored-by: feifei14119 <feiw@amd.com> Co-authored-by: Lin, Qun <qlin@amd.com> Co-authored-by: joye <joye@amd.com>
This commit is contained in:
committed by
GitHub
parent
bfe573d3ba
commit
054f85ab7c
@@ -66,9 +66,12 @@ struct BlockwiseGemmXdlops_mx_pipeline_base
|
||||
static constexpr index_t AMmaKStride = KPack;
|
||||
static constexpr index_t BMmaKStride = KPack;
|
||||
|
||||
//> store rows/cols into thread registers in chunks of 16
|
||||
//> e.g. [k0,...,k15,k64,...,k79] or [k0,...,k15,k32,...,k47]
|
||||
static constexpr index_t KThreadChunk = 16 / sizeof(ComputeTypeA);
|
||||
// store rows/cols into thread registers in chunks of 16 for FP8
|
||||
// e.g. [k0,...,k15,k64,...,k79] or [k0,...,k15,k32,...,k47]
|
||||
// or in chunks of 32 / APackedSize for FP6/FP4
|
||||
static constexpr index_t KThreadChunk = (APackedSize == 1) ? 16 : 32 / APackedSize;
|
||||
|
||||
static_assert(APackedSize == BPackedSize, "APackedSize must be equal to BPackedSize for now");
|
||||
|
||||
static constexpr index_t KPerThread = KPerBlock / xdlops_gemm.K0PerXdlops;
|
||||
static constexpr index_t KRepeat = KPerThread / KPack;
|
||||
|
||||
Reference in New Issue
Block a user