From ca47a6cfe2f42a777968ad3f69ad66d21d0a6569 Mon Sep 17 00:00:00 2001 From: rocking5566 Date: Tue, 25 Jan 2022 12:44:13 +0800 Subject: [PATCH] Do not hardcode the function parameter, use template instead. (#72) * Do not hardcode the function parameter, use template instead. * [What] Remove AThreadTransferSrcResetCoordinateAfterRun and BThreadTransferSrcResetCoordinateAfterRun in host API [Why] "C_Shuffle" version is supposed to be similar to the vanilla one * Fix typo Let DeviceGemmXdl_C_Shuffle use kernel_gemm_xdlops_v3r1 --- .../include/device_gemm_xdl_c_shuffle.hpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/device_operation/include/device_gemm_xdl_c_shuffle.hpp b/device_operation/include/device_gemm_xdl_c_shuffle.hpp index 2c70e955d7..da19b5ec4f 100644 --- a/device_operation/include/device_gemm_xdl_c_shuffle.hpp +++ b/device_operation/include/device_gemm_xdl_c_shuffle.hpp @@ -156,20 +156,20 @@ struct DeviceGemmXdl_C_Shuffle MXdlPerWave, NXdlPerWave, ABlockTransferThreadClusterLengths_K0_M_K1, - Sequence<1, 0, 2>, // ABlockTransferThreadClusterArrangeOrder, - Sequence<1, 0, 2>, // ABlockTransferSrcAccessOrder, - 2, // ABlockTransferSrcVectorDim, + ABlockTransferThreadClusterArrangeOrder, + ABlockTransferSrcAccessOrder, + ABlockTransferSrcVectorDim, ABlockTransferSrcScalarPerVector, ABlockTransferDstScalarPerVector_K1, - false, // AThreadTransferSrcResetCoordinateAfterRun, + false, ABlockLdsAddExtraM, BBlockTransferThreadClusterLengths_K0_N_K1, - Sequence<1, 0, 2>, // BBlockTransferThreadClusterArrangeOrder, - Sequence<1, 0, 2>, // BBlockTransferSrcAccessOrder, - 2, // BBlockTransferSrcVectorDim, + BBlockTransferThreadClusterArrangeOrder, + BBlockTransferSrcAccessOrder, + BBlockTransferSrcVectorDim, BBlockTransferSrcScalarPerVector, BBlockTransferDstScalarPerVector_K1, - false, // BThreadTransferSrcResetCoordinateAfterRun, + false, BBlockLdsAddExtraN, CShuffleMXdlPerWavePerShuffle, CShuffleNXdlPerWavePerShuffle, @@ -317,7 +317,7 @@ struct DeviceGemmXdl_C_Shuffle } else { - const auto kernel = kernel_gemm_xdlops_v2r3< + const auto kernel = kernel_gemm_xdlops_v3r1< GridwiseGemm, ADataType, // TODO: distiguish A/B datatype CDataType,