mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-04 13:41:24 +00:00
Mx fp6 flatmm (#3601)
* add fp6 data-type and support sync/async dwordx3 load/store * clang-format * pre-commit * 1st commit * default mnk pass ut * fix a distrubution * fix * fix bdram distr * update * pass ut * improve perf * update * clean code * resolve copilot comment * reslove comment * clang-format --------- Co-authored-by: ZheWang <zhewan@amd.com>
This commit is contained in:
@@ -179,10 +179,11 @@ auto preShuffleWeight(ck_tile::HostTensor<dtype>& src)
|
||||
const int K = src_lengths[0];
|
||||
const int N = src_lengths[1];
|
||||
constexpr int packed_size = ck_tile::numeric_traits<dtype>::PackedSize;
|
||||
int KPack = 16 * packed_size; // fp4:32 or fp8:16
|
||||
int NLane = N_Warp_Tile;
|
||||
int KLane = 64 / NLane;
|
||||
int K0 = K / (KLane * KPack);
|
||||
int KPack =
|
||||
std::is_same_v<dtype, ck_tile::pk_fp6x16_t> ? 32 : 16 * packed_size; // fp4/fp6:32 or fp8:16
|
||||
int NLane = N_Warp_Tile;
|
||||
int KLane = 64 / NLane;
|
||||
int K0 = K / (KLane * KPack);
|
||||
|
||||
ck_tile::HostTensor<dtype> shuffled(ck_tile::HostTensorDescriptor({N * K}, {1}));
|
||||
|
||||
@@ -295,7 +296,14 @@ int run_mx_flatmm_example(int argc, char* argv[])
|
||||
}
|
||||
else if(mx_prec == "fp6" || mx_prec == "fp6xfp6")
|
||||
{
|
||||
throw std::runtime_error("fp6xfp6 is not supported.");
|
||||
if(persistent_opt == 0)
|
||||
return run_mx_flatmm_with_layouts<ck_tile::pk_fp6x16_t,
|
||||
ck_tile::pk_fp6x16_t,
|
||||
ck_tile::fp16_t,
|
||||
MXfp6_FlatmmConfig16,
|
||||
false>(argc, argv, Row{}, Col{}, Row{});
|
||||
else
|
||||
throw std::runtime_error("Only support non-persistent kernel now!");
|
||||
}
|
||||
else if(mx_prec == "fp8" || mx_prec == "fp8xfp8")
|
||||
{
|
||||
|
||||
Reference in New Issue
Block a user