Do not hardcode the function parameter, use template instead. (#72)

* Do not hardcode the function parameter, use template instead.

* [What] Remove AThreadTransferSrcResetCoordinateAfterRun and BThreadTransferSrcResetCoordinateAfterRun in host API
[Why] "C_Shuffle" version is supposed to be similar to the vanilla one

* Fix typo
Let DeviceGemmXdl_C_Shuffle use kernel_gemm_xdlops_v3r1
This commit is contained in:
rocking5566
2022-01-25 12:44:13 +08:00
committed by GitHub
parent 4d40b1974e
commit ca47a6cfe2

View File

@@ -156,20 +156,20 @@ struct DeviceGemmXdl_C_Shuffle
MXdlPerWave,
NXdlPerWave,
ABlockTransferThreadClusterLengths_K0_M_K1,
Sequence<1, 0, 2>, // ABlockTransferThreadClusterArrangeOrder,
Sequence<1, 0, 2>, // ABlockTransferSrcAccessOrder,
2, // ABlockTransferSrcVectorDim,
ABlockTransferThreadClusterArrangeOrder,
ABlockTransferSrcAccessOrder,
ABlockTransferSrcVectorDim,
ABlockTransferSrcScalarPerVector,
ABlockTransferDstScalarPerVector_K1,
false, // AThreadTransferSrcResetCoordinateAfterRun,
false,
ABlockLdsAddExtraM,
BBlockTransferThreadClusterLengths_K0_N_K1,
Sequence<1, 0, 2>, // BBlockTransferThreadClusterArrangeOrder,
Sequence<1, 0, 2>, // BBlockTransferSrcAccessOrder,
2, // BBlockTransferSrcVectorDim,
BBlockTransferThreadClusterArrangeOrder,
BBlockTransferSrcAccessOrder,
BBlockTransferSrcVectorDim,
BBlockTransferSrcScalarPerVector,
BBlockTransferDstScalarPerVector_K1,
false, // BThreadTransferSrcResetCoordinateAfterRun,
false,
BBlockLdsAddExtraN,
CShuffleMXdlPerWavePerShuffle,
CShuffleNXdlPerWavePerShuffle,
@@ -317,7 +317,7 @@ struct DeviceGemmXdl_C_Shuffle
}
else
{
const auto kernel = kernel_gemm_xdlops_v2r3<
const auto kernel = kernel_gemm_xdlops_v3r1<
GridwiseGemm,
ADataType, // TODO: distiguish A/B datatype
CDataType,