mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-20 12:59:49 +00:00
Merge commit '2444c448955897918d0f724b2bb49fb630a540e6' into develop
This commit is contained in:
@@ -1,5 +1,5 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <iostream>
|
||||
|
||||
@@ -48,10 +48,10 @@ using DeviceCGemmInstance = ck::tensor_operation::device::DeviceCGemm_4Gemm_Xdl_
|
||||
16, // index_t KPerBlock
|
||||
4, // index_t AK1
|
||||
4, // index_t BK1
|
||||
32, // index_t MPerXDL
|
||||
32, // index_t NPerXDL
|
||||
4, // index_t MXdlPerWave
|
||||
2, // index_t NXdlPerWave
|
||||
16, // index_t MPerXDL
|
||||
16, // index_t NPerXDL
|
||||
8, // index_t MXdlPerWave
|
||||
4, // index_t NXdlPerWave
|
||||
S<4, 64, 1>, // typename ABlockTransferThreadClusterLengths_AK0_M_AK1
|
||||
S<1, 0, 2>, // typename ABlockTransferThreadClusterArrangeOrder
|
||||
S<1, 0, 2>, // typename ABlockTransferSrcAccessOrder
|
||||
@@ -69,11 +69,16 @@ using DeviceCGemmInstance = ck::tensor_operation::device::DeviceCGemm_4Gemm_Xdl_
|
||||
1, // index_t CShuffleMXdlPerWavePerShuffle
|
||||
1, // index_t CShuffleNXdlPerWavePerShuffle
|
||||
S<1, 16, 1, 16>, // typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock
|
||||
4>; // index_t CShuffleBlockTransferScalarPerVector_NPerBlock
|
||||
2>; // index_t CShuffleBlockTransferScalarPerVector_NPerBlock
|
||||
// clang-format on
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
if(ck::is_gfx11_supported() || ck::is_gfx12_supported())
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool do_verification = true;
|
||||
int init_method = 1;
|
||||
bool time_kernel = false;
|
||||
@@ -87,25 +92,25 @@ int main(int argc, char* argv[])
|
||||
ck::index_t StrideB = 4096;
|
||||
ck::index_t StrideC = 4096;
|
||||
|
||||
if(argc == 4)
|
||||
if(argc == 1)
|
||||
{
|
||||
do_verification = std::stoi(argv[1]);
|
||||
init_method = std::stoi(argv[2]);
|
||||
time_kernel = std::stoi(argv[3]);
|
||||
// use default case
|
||||
}
|
||||
else if(argc == 10)
|
||||
else if(argc == 4 || argc == 10)
|
||||
{
|
||||
do_verification = std::stoi(argv[1]);
|
||||
init_method = std::stoi(argv[2]);
|
||||
time_kernel = std::stoi(argv[3]);
|
||||
if(argc == 10)
|
||||
{
|
||||
M = std::stoi(argv[4]);
|
||||
N = std::stoi(argv[5]);
|
||||
K = std::stoi(argv[6]);
|
||||
|
||||
M = std::stoi(argv[4]);
|
||||
N = std::stoi(argv[5]);
|
||||
K = std::stoi(argv[6]);
|
||||
|
||||
StrideA = std::stoi(argv[7]);
|
||||
StrideB = std::stoi(argv[8]);
|
||||
StrideC = std::stoi(argv[9]);
|
||||
StrideA = std::stoi(argv[7]);
|
||||
StrideB = std::stoi(argv[8]);
|
||||
StrideC = std::stoi(argv[9]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -114,7 +119,7 @@ int main(int argc, char* argv[])
|
||||
<< "arg3: run kernel # of times (>1)\n"
|
||||
<< "arg4 to 9: M (256x), N(128x), K(32x), StrideA, StrideB, StrideC\n"
|
||||
<< std::endl;
|
||||
exit(0);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
return !run_cgemm_xdl<ADataType,
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
|
||||
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <iostream>
|
||||
|
||||
@@ -48,10 +48,10 @@ using DeviceCGemmInstance = ck::tensor_operation::device::DeviceCGemm_4Gemm_Xdl_
|
||||
64, // index_t KPerBlock
|
||||
16, // index_t AK1
|
||||
16, // index_t BK1
|
||||
32, // index_t MPerXDL
|
||||
32, // index_t NPerXDL
|
||||
4, // index_t MXdlPerWave
|
||||
2, // index_t NXdlPerWave
|
||||
16, // index_t MPerXDL
|
||||
16, // index_t NPerXDL
|
||||
8, // index_t MXdlPerWave
|
||||
4, // index_t NXdlPerWave
|
||||
S<4, 64, 1>, // typename ABlockTransferThreadClusterLengths_AK0_M_AK1
|
||||
S<1, 0, 2>, // typename ABlockTransferThreadClusterArrangeOrder
|
||||
S<1, 0, 2>, // typename ABlockTransferSrcAccessOrder
|
||||
@@ -68,8 +68,8 @@ using DeviceCGemmInstance = ck::tensor_operation::device::DeviceCGemm_4Gemm_Xdl_
|
||||
1, // index_t BBlockLdsExtraN
|
||||
1, // index_t CShuffleMXdlPerWavePerShuffle
|
||||
1, // index_t CShuffleNXdlPerWavePerShuffle
|
||||
S<1, 64, 1, 4>, // typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock
|
||||
16>; // index_t CShuffleBlockTransferScalarPerVector_NPerBlock
|
||||
S<1, 32, 1, 8>, // typename CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock
|
||||
4>; // index_t CShuffleBlockTransferScalarPerVector_NPerBlock
|
||||
// clang-format on
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
@@ -87,25 +87,25 @@ int main(int argc, char* argv[])
|
||||
ck::index_t StrideB = 4096;
|
||||
ck::index_t StrideC = 4096;
|
||||
|
||||
if(argc == 4)
|
||||
if(argc == 1)
|
||||
{
|
||||
do_verification = std::stoi(argv[1]);
|
||||
init_method = std::stoi(argv[2]);
|
||||
time_kernel = std::stoi(argv[3]);
|
||||
// use default case
|
||||
}
|
||||
else if(argc == 10)
|
||||
else if(argc == 4 || argc == 10)
|
||||
{
|
||||
do_verification = std::stoi(argv[1]);
|
||||
init_method = std::stoi(argv[2]);
|
||||
time_kernel = std::stoi(argv[3]);
|
||||
if(argc == 10)
|
||||
{
|
||||
M = std::stoi(argv[4]);
|
||||
N = std::stoi(argv[5]);
|
||||
K = std::stoi(argv[6]);
|
||||
|
||||
M = std::stoi(argv[4]);
|
||||
N = std::stoi(argv[5]);
|
||||
K = std::stoi(argv[6]);
|
||||
|
||||
StrideA = std::stoi(argv[7]);
|
||||
StrideB = std::stoi(argv[8]);
|
||||
StrideC = std::stoi(argv[9]);
|
||||
StrideA = std::stoi(argv[7]);
|
||||
StrideB = std::stoi(argv[8]);
|
||||
StrideC = std::stoi(argv[9]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -114,7 +114,7 @@ int main(int argc, char* argv[])
|
||||
<< "arg3: run kernel # of times (>1)\n"
|
||||
<< "arg4 to 9: M (256x), N(128x), K(32x), StrideA, StrideB, StrideC\n"
|
||||
<< std::endl;
|
||||
exit(0);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
return !run_cgemm_xdl<ADataType,
|
||||
|
||||
Reference in New Issue
Block a user