GemmGemm TNNT instances (#399)

* add gemm_gemm TNNT instance

* sanitize Gemm1KPack

* disable instances that failed validation on mi100

[ROCm/composable_kernel commit: fe52c94c98]
This commit is contained in:
Anthony Chang
2022-09-07 02:38:01 +08:00
committed by GitHub
parent 5643625481
commit 2d119fda7b
6 changed files with 107 additions and 3 deletions

View File

@@ -602,8 +602,9 @@ struct GridwiseBatchedGemmGemm_Xdl_CShuffle
static_cast<FloatAB*>(p_shared) + SharedMemTrait::b1_block_space_offset,
b1_block_desc_bk0_n_bk1.GetElementSpaceSize());
// selected_mfma.k_per_blk <= B1K1 <= selected_mfma.group_size
constexpr index_t Gemm1KPack = math::max(
math::lcm(MfmaSelector<FloatAB, MPerXdl, NPerXdl>::selected_mfma.group_size, B1K1),
math::gcd(MfmaSelector<FloatAB, MPerXdl, NPerXdl>::selected_mfma.group_size, B1K1),
MfmaSelector<FloatAB, MPerXdl, NPerXdl>::selected_mfma.k_per_blk);
auto gemm1_blockwise_gemm = BlockwiseGemmXdlops_v2<

View File

@@ -608,8 +608,9 @@ struct GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle
static_cast<FloatAB*>(p_shared) + SharedMemTrait::b1_block_space_offset,
b1_block_desc_bk0_n_bk1.GetElementSpaceSize());
// selected_mfma.k_per_blk <= B1K1 <= selected_mfma.group_size
constexpr index_t Gemm1KPack = math::max(
math::lcm(MfmaSelector<FloatAB, MPerXdl, NPerXdl>::selected_mfma.group_size, B1K1),
math::gcd(MfmaSelector<FloatAB, MPerXdl, NPerXdl>::selected_mfma.group_size, B1K1),
MfmaSelector<FloatAB, MPerXdl, NPerXdl>::selected_mfma.k_per_blk);
auto gemm1_blockwise_gemm = BlockwiseGemmXdlops_v2<