[CK] Add command option instance_index and param_mask to run partial ck test (#2889)

* [CK] Add command option instance_index and param_mask to run partial ck test

Many CK test are instance test. it will loop all instance in the instance library. It causes test often out-of-time if we run test on simulator/emulator.
This PR add option instance_index and param_mask to reduce the workload of instance test

instance_index: only run test 1 available instance with specified index.
param_mask: filter the embedded parameter with specified mask

* fix CI error

* fix clang format

---------

Co-authored-by: illsilin_amdeng <Illia.Silin@amd.com>

[ROCm/composable_kernel commit: e78a897ec0]
This commit is contained in:
linqunAMD
2025-09-30 23:24:40 +08:00
committed by GitHub
parent 780456f1ce
commit 6c4ff0b062
113 changed files with 2804 additions and 704 deletions

View File

@@ -100,13 +100,13 @@ int main(int argc, char* argv[])
const std::array<int, 2> reduceDims = {3, 4};
// const std::array<int, 3> invariantDims = {0, 1, 2};
const std::vector<size_t> inLengths_1 = {64, 320, 80, 4, 128};
std::vector<size_t> inLengths_1 = {64, 320, 80, 4, 128};
// input lengths of the second reduction, which is also the output lengths of the first
// reduction
const std::vector<size_t> inLengths_2 = {64, 320, 80, 4};
std::vector<size_t> inLengths_2 = {64, 320, 80, 4};
const std::vector<size_t> outLengths = {64, 320, 80};
std::vector<size_t> outLengths = {64, 320, 80};
if(argc == 1)
{
@@ -114,11 +114,26 @@ int main(int argc, char* argv[])
init_method = 2;
time_kernel = true;
}
else if(argc == 4)
else if((argc == 4) || (argc == 9))
{
do_verify = static_cast<bool>(argv[1]);
init_method = atoi(argv[2]);
time_kernel = static_cast<bool>(atoi(argv[3]));
if(argc == 9)
{
inLengths_1[0] = atoi(argv[4]);
inLengths_1[1] = atoi(argv[5]);
inLengths_1[2] = atoi(argv[6]);
inLengths_1[3] = atoi(argv[7]);
inLengths_1[4] = atoi(argv[8]);
inLengths_2[0] = inLengths_1[0];
inLengths_2[1] = inLengths_1[1];
inLengths_2[2] = inLengths_1[2];
inLengths_2[3] = inLengths_1[3];
outLengths[0] = inLengths_1[0];
outLengths[1] = inLengths_1[1];
outLengths[2] = inLengths_1[2];
}
}
else
{

View File

@@ -50,14 +50,14 @@ template<> struct emb_kernel<ck::half_t, 8192> { using kernel_type = DeviceInsta
// clang-format on
int main()
int main(int argc, char* argv[])
{
bool time_kernel = true;
constexpr auto num_rows = 65536;
constexpr auto dims = ck::Sequence<256, 512, 768, 1024, 1536, 2048, 4096, 8192>{};
// constexpr auto dims = ck::Sequence<256, 512>{};
constexpr auto index_length = 2048;
ck::index_t num_rows = 65536;
constexpr auto dims = ck::Sequence<256, 512, 768, 1024, 1536, 2048, 4096, 8192>{};
ck::index_t index_length = 2048;
ck::index_t dim_mask = 0xffff;
constexpr AccDataType epsilon = 1e-4;
auto f_host_tensor_desc_1d = [](std::size_t len_) { return HostTensorDescriptor({len_}); };
@@ -73,121 +73,140 @@ int main()
BetaDataType,
AccDataType,
OutType>;
if(argc == 1)
{
// Use default value
}
else if(argc == 4)
{
num_rows = atoi(argv[1]);
dim_mask = strtol(argv[2], nullptr, 0);
index_length = atoi(argv[3]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1-3: num_rows dim_mask index_length" << std::endl;
}
ck::static_for<0, dims.Size(), 1>{}([&](auto I) {
std::srand(std::time(nullptr));
constexpr auto current_dim = dims.At(I);
Tensor<EmbType> emb_a(f_host_tensor_desc_2d(num_rows, current_dim));
Tensor<EmbType> emb_b(f_host_tensor_desc_2d(num_rows, current_dim));
Tensor<EmbType> emb_c(f_host_tensor_desc_2d(num_rows, current_dim));
Tensor<IndexType> index_a(f_host_tensor_desc_1d(index_length));
Tensor<IndexType> index_b(f_host_tensor_desc_1d(index_length));
Tensor<IndexType> index_c(f_host_tensor_desc_1d(index_length));
Tensor<GammaDataType> gamma(f_host_tensor_desc_1d(current_dim));
Tensor<BetaDataType> beta(f_host_tensor_desc_1d(current_dim));
Tensor<OutType> out(f_host_tensor_desc_2d(index_length, current_dim));
emb_a.GenerateTensorValue(GeneratorTensor_3<EmbType>{0.0, 1.0});
emb_b.GenerateTensorValue(GeneratorTensor_3<EmbType>{0.0, 1.0});
emb_c.GenerateTensorValue(GeneratorTensor_3<EmbType>{0.0, 1.0});
index_a.GenerateTensorValue(GeneratorTensor_2<IndexType>{0, num_rows});
index_b.GenerateTensorValue(GeneratorTensor_2<IndexType>{0, num_rows});
index_c.GenerateTensorValue(GeneratorTensor_2<IndexType>{0, num_rows});
gamma.GenerateTensorValue(GeneratorTensor_3<GammaDataType>{0.0, 1.0});
beta.GenerateTensorValue(GeneratorTensor_3<BetaDataType>{0.0, 1.0});
DeviceMem emb_a_dev(sizeof(EmbType) * emb_a.mDesc.GetElementSpaceSize());
DeviceMem emb_b_dev(sizeof(EmbType) * emb_b.mDesc.GetElementSpaceSize());
DeviceMem emb_c_dev(sizeof(EmbType) * emb_c.mDesc.GetElementSpaceSize());
DeviceMem index_a_dev(sizeof(IndexType) * index_a.mDesc.GetElementSpaceSize());
DeviceMem index_b_dev(sizeof(IndexType) * index_b.mDesc.GetElementSpaceSize());
DeviceMem index_c_dev(sizeof(IndexType) * index_c.mDesc.GetElementSpaceSize());
DeviceMem gamma_dev(sizeof(GammaDataType) * gamma.mDesc.GetElementSpaceSize());
DeviceMem beta_dev(sizeof(BetaDataType) * beta.mDesc.GetElementSpaceSize());
DeviceMem out_dev(sizeof(OutType) * out.mDesc.GetElementSpaceSize());
emb_a_dev.ToDevice(emb_a.mData.data());
emb_b_dev.ToDevice(emb_b.mData.data());
emb_c_dev.ToDevice(emb_c.mData.data());
index_a_dev.ToDevice(index_a.mData.data());
index_b_dev.ToDevice(index_b.mData.data());
index_c_dev.ToDevice(index_c.mData.data());
gamma_dev.ToDevice(gamma.mData.data());
beta_dev.ToDevice(beta.mData.data());
auto device_instance = typename emb_kernel<EmbType, current_dim>::kernel_type{};
auto argument_ptr = device_instance.MakeArgumentPointer(
out_dev.GetDeviceBuffer(),
{ck::type_convert<EmbType*>(emb_a_dev.GetDeviceBuffer()),
ck::type_convert<EmbType*>(emb_b_dev.GetDeviceBuffer()),
ck::type_convert<EmbType*>(emb_c_dev.GetDeviceBuffer())},
{ck::type_convert<IndexType*>(index_a_dev.GetDeviceBuffer()),
ck::type_convert<IndexType*>(index_b_dev.GetDeviceBuffer()),
ck::type_convert<IndexType*>(index_c_dev.GetDeviceBuffer())},
gamma_dev.GetDeviceBuffer(),
beta_dev.GetDeviceBuffer(),
current_dim,
index_length,
epsilon,
EmbElementwiseOperation{});
std::cout << "Dim:" << current_dim << ", kernel:" << device_instance.GetTypeString()
<< std::endl
<< std::flush;
bool is_supported = device_instance.IsSupportedArgument(argument_ptr.get());
if(!is_supported)
if(dim_mask & (1 << I.value))
{
std::cout << "Runtime parameters are not supported" << std::endl;
return;
std::srand(std::time(nullptr));
constexpr auto current_dim = dims.At(I);
Tensor<EmbType> emb_a(f_host_tensor_desc_2d(num_rows, current_dim));
Tensor<EmbType> emb_b(f_host_tensor_desc_2d(num_rows, current_dim));
Tensor<EmbType> emb_c(f_host_tensor_desc_2d(num_rows, current_dim));
Tensor<IndexType> index_a(f_host_tensor_desc_1d(index_length));
Tensor<IndexType> index_b(f_host_tensor_desc_1d(index_length));
Tensor<IndexType> index_c(f_host_tensor_desc_1d(index_length));
Tensor<GammaDataType> gamma(f_host_tensor_desc_1d(current_dim));
Tensor<BetaDataType> beta(f_host_tensor_desc_1d(current_dim));
Tensor<OutType> out(f_host_tensor_desc_2d(index_length, current_dim));
emb_a.GenerateTensorValue(GeneratorTensor_3<EmbType>{0.0, 1.0});
emb_b.GenerateTensorValue(GeneratorTensor_3<EmbType>{0.0, 1.0});
emb_c.GenerateTensorValue(GeneratorTensor_3<EmbType>{0.0, 1.0});
index_a.GenerateTensorValue(GeneratorTensor_2<IndexType>{0, num_rows});
index_b.GenerateTensorValue(GeneratorTensor_2<IndexType>{0, num_rows});
index_c.GenerateTensorValue(GeneratorTensor_2<IndexType>{0, num_rows});
gamma.GenerateTensorValue(GeneratorTensor_3<GammaDataType>{0.0, 1.0});
beta.GenerateTensorValue(GeneratorTensor_3<BetaDataType>{0.0, 1.0});
DeviceMem emb_a_dev(sizeof(EmbType) * emb_a.mDesc.GetElementSpaceSize());
DeviceMem emb_b_dev(sizeof(EmbType) * emb_b.mDesc.GetElementSpaceSize());
DeviceMem emb_c_dev(sizeof(EmbType) * emb_c.mDesc.GetElementSpaceSize());
DeviceMem index_a_dev(sizeof(IndexType) * index_a.mDesc.GetElementSpaceSize());
DeviceMem index_b_dev(sizeof(IndexType) * index_b.mDesc.GetElementSpaceSize());
DeviceMem index_c_dev(sizeof(IndexType) * index_c.mDesc.GetElementSpaceSize());
DeviceMem gamma_dev(sizeof(GammaDataType) * gamma.mDesc.GetElementSpaceSize());
DeviceMem beta_dev(sizeof(BetaDataType) * beta.mDesc.GetElementSpaceSize());
DeviceMem out_dev(sizeof(OutType) * out.mDesc.GetElementSpaceSize());
emb_a_dev.ToDevice(emb_a.mData.data());
emb_b_dev.ToDevice(emb_b.mData.data());
emb_c_dev.ToDevice(emb_c.mData.data());
index_a_dev.ToDevice(index_a.mData.data());
index_b_dev.ToDevice(index_b.mData.data());
index_c_dev.ToDevice(index_c.mData.data());
gamma_dev.ToDevice(gamma.mData.data());
beta_dev.ToDevice(beta.mData.data());
auto device_instance = typename emb_kernel<EmbType, current_dim>::kernel_type{};
auto argument_ptr = device_instance.MakeArgumentPointer(
out_dev.GetDeviceBuffer(),
{ck::type_convert<EmbType*>(emb_a_dev.GetDeviceBuffer()),
ck::type_convert<EmbType*>(emb_b_dev.GetDeviceBuffer()),
ck::type_convert<EmbType*>(emb_c_dev.GetDeviceBuffer())},
{ck::type_convert<IndexType*>(index_a_dev.GetDeviceBuffer()),
ck::type_convert<IndexType*>(index_b_dev.GetDeviceBuffer()),
ck::type_convert<IndexType*>(index_c_dev.GetDeviceBuffer())},
gamma_dev.GetDeviceBuffer(),
beta_dev.GetDeviceBuffer(),
current_dim,
index_length,
epsilon,
EmbElementwiseOperation{});
std::cout << "Dim:" << current_dim << ", kernel:" << device_instance.GetTypeString()
<< std::endl
<< std::flush;
bool is_supported = device_instance.IsSupportedArgument(argument_ptr.get());
if(!is_supported)
{
std::cout << "Runtime parameters are not supported" << std::endl;
return;
}
auto invoker_ptr = device_instance.MakeInvokerPointer();
float time_ms =
invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr, time_kernel});
bool pass = true;
{
Tensor<OutType> out_from_dev(f_host_tensor_desc_2d(index_length, current_dim));
ReferenceInstance ref;
auto ref_argument = ref.MakeArgument(out,
emb_a,
emb_b,
emb_c,
index_a,
index_b,
index_c,
gamma,
beta,
num_rows,
current_dim,
index_length,
epsilon);
auto ref_invoker = ref.MakeInvoker();
ref_invoker.Run(ref_argument);
out_dev.FromDevice(out_from_dev.mData.data());
pass &=
ck::utils::check_err(out_from_dev, out, "Error: Incorrect results", 1e-3, 1e-3);
}
double total_read = current_dim * index_length * 3 * sizeof(EmbType) +
current_dim * sizeof(GammaDataType) +
current_dim * sizeof(BetaDataType);
double total_write = current_dim * index_length * sizeof(OutType);
double gbps = (total_read + total_write) / time_ms / 1e6;
std::cout << ", total bytes:" << (total_read + total_write) << ", time:" << time_ms
<< ", gbps:" << gbps << ", valid:" << (pass ? "y" : "n") << std::endl
<< std::flush;
}
auto invoker_ptr = device_instance.MakeInvokerPointer();
float time_ms = invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr, time_kernel});
bool pass = true;
{
Tensor<OutType> out_from_dev(f_host_tensor_desc_2d(index_length, current_dim));
ReferenceInstance ref;
auto ref_argument = ref.MakeArgument(out,
emb_a,
emb_b,
emb_c,
index_a,
index_b,
index_c,
gamma,
beta,
num_rows,
current_dim,
index_length,
epsilon);
auto ref_invoker = ref.MakeInvoker();
ref_invoker.Run(ref_argument);
out_dev.FromDevice(out_from_dev.mData.data());
pass &= ck::utils::check_err(out_from_dev, out, "Error: Incorrect results", 1e-3, 1e-3);
}
double total_read = current_dim * index_length * 3 * sizeof(EmbType) +
current_dim * sizeof(GammaDataType) +
current_dim * sizeof(BetaDataType);
double total_write = current_dim * index_length * sizeof(OutType);
double gbps = (total_read + total_write) / time_ms / 1e6;
std::cout << ", total bytes:" << (total_read + total_write) << ", time:" << time_ms
<< ", gbps:" << gbps << ", valid:" << (pass ? "y" : "n") << std::endl
<< std::flush;
});
return 0;

View File

@@ -68,6 +68,24 @@ int main(int argc, char* argv[])
}
std::vector<std::size_t> nchw = {16, 128, 32, 64};
if(argc == 1)
{
// use default case
}
else if(argc == 5)
{
nchw[0] = std::stoi(argv[1]);
nchw[1] = std::stoi(argv[2]);
nchw[2] = std::stoi(argv[3]);
nchw[3] = std::stoi(argv[4]);
}
else
{
std::cerr << "arg1 to 4: N, C, H, W" << std::endl;
return 1;
}
std::array<ck::index_t, 4> ab_lengths;
std::array<ck::index_t, 4> ab_strides = {static_cast<int>(nchw[1] * nchw[2] * nchw[3]),
static_cast<int>(nchw[2] * nchw[3]),

View File

@@ -1,5 +1,5 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
@@ -98,8 +98,23 @@ int main(int argc, char* argv[])
exit(0);
}
ck::index_t M = 48 * 256;
ck::index_t N = 1024;
ck::index_t M = 48 * 256;
ck::index_t N = 1024;
if(argc == 1)
{
// use default case
}
else if(argc == 3)
{
M = std::stoi(argv[1]);
N = std::stoi(argv[2]);
}
else
{
std::cerr << "arg1 to 2: M, N" << std::endl;
return 1;
}
ck::index_t Stride = N;
auto f_host_tensor_descriptor1d = [](std::size_t len, std::size_t stride) {

View File

@@ -100,7 +100,7 @@ using GammaBetaDeviceInstance = ck::tensor_operation::device::DeviceNormalizatio
4, // DGammaDstVectorSize
4>; // DBetaDstVectorSize
int main()
int main(int argc, char* argv[])
{
bool time_kernel = false;
@@ -110,6 +110,25 @@ int main()
ck::index_t G = 32;
ck::index_t C = 64;
if(argc == 1)
{
// use default case
}
else if(argc == 6)
{
N = std::stoi(argv[1]);
H = std::stoi(argv[2]);
W = std::stoi(argv[3]);
G = std::stoi(argv[4]);
C = std::stoi(argv[5]);
}
else
{
std::cerr << "arg1 to 5: N, H, W, G, C" << std::endl;
return 1;
}
Tensor<DYDataType> dy({N, H, W, G, C});
Tensor<XDataType> x({N, H, W, G, C});
Tensor<GammaDataType> gamma({G, C});

View File

@@ -39,7 +39,8 @@ bool profile_avg_pool2d_bwd_impl(int do_verification,
std::vector<index_t> window_strides,
std::vector<index_t> window_dilations,
std::vector<index_t> input_left_pads,
std::vector<index_t> input_right_pads)
std::vector<index_t> input_right_pads,
index_t instance_index = -1)
{
constexpr index_t InOutRank = 4;
constexpr index_t WindowRank = 2;
@@ -166,6 +167,11 @@ bool profile_avg_pool2d_bwd_impl(int do_verification,
{
++num_kernel;
instance_found = true;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
}
else
{
@@ -249,7 +255,11 @@ bool profile_avg_pool2d_bwd_impl(int do_verification,
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
if(instance_index != -1)
{
std::cout << "avg_pool2d_bwd_instance (" << instance_index << "/" << num_kernel
<< "): Passed" << std::endl;
}
return pass && instance_found;
}

View File

@@ -48,7 +48,8 @@ bool profile_avg_pool3d_bwd_impl(int do_verification,
std::vector<index_t> window_strides,
std::vector<index_t> window_dilations,
std::vector<index_t> input_left_pads,
std::vector<index_t> input_right_pads)
std::vector<index_t> input_right_pads,
index_t instance_index = -1)
{
constexpr index_t InOutRank = 5;
constexpr index_t WindowRank = 3;
@@ -166,6 +167,11 @@ bool profile_avg_pool3d_bwd_impl(int do_verification,
if(inst_ptr->IsSupportedArgument(argument_ptr.get()))
{
++num_kernel;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
}
else
{
@@ -246,7 +252,11 @@ bool profile_avg_pool3d_bwd_impl(int do_verification,
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
if(instance_index != -1)
{
std::cout << "avg_pool3d_bwd_instance (" << instance_index << "/" << num_kernel
<< "): Passed" << std::endl;
}
return true;
}

View File

@@ -49,10 +49,10 @@ bool profile_batched_gemm_bias_softmax_gemm_permute_impl(bool do_verification,
int O,
int G0,
int G1,
float alpha = -1.f)
float alpha = -1.f,
int instance_index = -1)
{
using PassThrough = tensor_operation::element_wise::PassThrough;
using ScaleAdd = tensor_operation::element_wise::ScaleAdd;
using AElementOp = PassThrough;
@@ -277,7 +277,7 @@ bool profile_batched_gemm_bias_softmax_gemm_permute_impl(bool do_verification,
float best_ave_time = 0;
float best_tflops = 0;
float best_gb_per_sec = 0;
int num_kernel = 0;
// profile device op instances
for(auto& op_ptr : op_ptrs)
{
@@ -314,6 +314,13 @@ bool profile_batched_gemm_bias_softmax_gemm_permute_impl(bool do_verification,
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
++num_kernel;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
std::string op_name = op_ptr->GetTypeString();
float ave_time =
@@ -392,6 +399,11 @@ bool profile_batched_gemm_bias_softmax_gemm_permute_impl(bool do_verification,
std::cout << "Best Perf: " << best_ave_time << " ms, " << best_tflops << " TFlops, "
<< best_gb_per_sec << " GB/s, " << best_op_name << std::endl;
if(instance_index != -1)
{
std::cout << "batched_gemm_bias_softmax_gemm_permute_instance (" << instance_index << "/"
<< num_kernel << "): Passed" << std::endl;
}
return pass;
}

View File

@@ -47,7 +47,8 @@ bool profile_batched_gemm_impl(int do_verification,
int BatchStrideA,
int BatchStrideB,
int BatchStrideC,
int BatchCount)
int BatchCount,
int instance_index = -1)
{
bool pass = true;
@@ -138,6 +139,7 @@ bool profile_batched_gemm_impl(int do_verification,
float best_ave_time = 0;
float best_tflops = 0;
float best_gb_per_sec = 0;
int num_kernel = 0;
// profile device op instances
for(auto& op_ptr : op_ptrs)
@@ -203,6 +205,12 @@ bool profile_batched_gemm_impl(int do_verification,
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
num_kernel++;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
// re-init C to zero before profiling next kernel
c_device_buf.SetZero();
@@ -259,6 +267,11 @@ bool profile_batched_gemm_impl(int do_verification,
std::cout << "Best Perf: " << best_ave_time << " ms, " << best_tflops << " TFlops, "
<< best_gb_per_sec << " GB/s, " << best_op_name << std::endl;
if(instance_index != -1)
{
std::cout << "batched_gemm_instance (" << instance_index << "/" << num_kernel << "): Passed"
<< std::endl;
}
return pass;
}

View File

@@ -40,19 +40,19 @@ bool profile_batched_gemm_softmax_gemm_impl(bool do_verification,
int N,
int K,
int O,
int BatchCount = 1,
int StrideA = -1,
int StrideB0 = -1,
int StrideB1 = -1,
int StrideC = -1,
int BatchStrideA = -1,
int BatchStrideB0 = -1,
int BatchStrideB1 = -1,
int BatchStrideC = -1,
float alpha = -1.f)
int BatchCount = 1,
int StrideA = -1,
int StrideB0 = -1,
int StrideB1 = -1,
int StrideC = -1,
int BatchStrideA = -1,
int BatchStrideB0 = -1,
int BatchStrideB1 = -1,
int BatchStrideC = -1,
float alpha = -1.f,
int instance_index = -1)
{
using Row = tensor_layout::gemm::RowMajor;
using Col = tensor_layout::gemm::ColumnMajor;
using PassThrough = tensor_operation::element_wise::PassThrough;
@@ -253,7 +253,7 @@ bool profile_batched_gemm_softmax_gemm_impl(bool do_verification,
float best_ave_time = 0;
float best_tflops = 0;
float best_gb_per_sec = 0;
int num_kernel = 0;
// profile device op instances
for(auto& op_ptr : op_ptrs)
{
@@ -285,6 +285,13 @@ bool profile_batched_gemm_softmax_gemm_impl(bool do_verification,
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
++num_kernel;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
std::string op_name = op_ptr->GetTypeString();
float ave_time =
@@ -341,7 +348,11 @@ bool profile_batched_gemm_softmax_gemm_impl(bool do_verification,
std::cout << "Best Perf: " << best_ave_time << " ms, " << best_tflops << " TFlops, "
<< best_gb_per_sec << " GB/s, " << best_op_name << std::endl;
if(instance_index != -1)
{
std::cout << "batched_gemm_softmax_gemm_instance (" << instance_index << "/" << num_kernel
<< "): Passed" << std::endl;
}
return pass;
}

View File

@@ -48,10 +48,10 @@ bool profile_batched_gemm_softmax_gemm_permute_impl(bool do_verification,
int O,
int G0,
int G1,
float alpha = -1.f)
float alpha = -1.f,
int instance_index = -1)
{
using PassThrough = tensor_operation::element_wise::PassThrough;
using Scale = tensor_operation::element_wise::Scale;
using AElementOp = PassThrough;
@@ -254,6 +254,7 @@ bool profile_batched_gemm_softmax_gemm_permute_impl(bool do_verification,
float best_ave_time = 0;
float best_tflops = 0;
float best_gb_per_sec = 0;
int num_kernel = 0;
// profile device op instances
for(auto& op_ptr : op_ptrs)
@@ -287,6 +288,13 @@ bool profile_batched_gemm_softmax_gemm_permute_impl(bool do_verification,
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
++num_kernel;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
std::string op_name = op_ptr->GetTypeString();
float ave_time =
@@ -362,7 +370,11 @@ bool profile_batched_gemm_softmax_gemm_permute_impl(bool do_verification,
std::cout << "Best Perf: " << best_ave_time << " ms, " << best_tflops << " TFlops, "
<< best_gb_per_sec << " GB/s, " << best_op_name << std::endl;
if(instance_index != -1)
{
std::cout << "batched_gemm_softmax_gemm_permute_instance (" << instance_index << "/"
<< num_kernel << "): Passed" << std::endl;
}
return pass;
}

View File

@@ -34,7 +34,8 @@ bool profile_batchnorm_backward_impl(bool do_verification,
const std::vector<size_t> inOutLengths,
const std::vector<int> reduceDims,
bool haveSavedMeanInvVar,
double epsilon)
double epsilon,
index_t instance_index = -1)
{
if(inOutLengths.size() != Rank || reduceDims.size() != NumBatchNormReduceDim)
{
@@ -293,6 +294,11 @@ bool profile_batchnorm_backward_impl(bool do_verification,
if(inst_ptr->IsSupportedArgument(argument_ptr.get()))
{
num_kernel++;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
}
else
{
@@ -382,7 +388,11 @@ bool profile_batchnorm_backward_impl(bool do_verification,
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
if (instance_index != -1)
{
std::cout << "batchnorm_backward_instance (" << instance_index << "/" << num_kernel
<< "): Passed" << std::endl;
}
return pass;
}

View File

@@ -35,7 +35,8 @@ bool profile_batchnorm_forward_impl(int do_verification,
bool updateMovingAverage,
bool saveMeanAndInvVariance,
double averageFactor,
double epsilon)
double epsilon,
index_t instance_index = -1)
{
if(inOutLengths.size() != Rank || reduceDims.size() != NumBatchNormReduceDim)
{
@@ -287,6 +288,11 @@ bool profile_batchnorm_forward_impl(int do_verification,
if(inst_ptr->IsSupportedArgument(argument_ptr.get()))
{
num_kernel++;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
}
else
{
@@ -404,7 +410,11 @@ bool profile_batchnorm_forward_impl(int do_verification,
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
if(instance_index != -1)
{
std::cout << "batchnorm_forward_instance (" << instance_index << "/" << num_kernel
<< "): Passed" << std::endl;
}
return pass;
}

View File

@@ -32,7 +32,8 @@ bool profile_batchnorm_infer_impl(int do_verification,
bool time_kernel,
const std::vector<size_t> inOutLengths,
const std::vector<int> reduceDims,
double epsilon)
double epsilon,
index_t instance_index = -1)
{
if(inOutLengths.size() != Rank || reduceDims.size() != NumBatchNormReduceDim)
{
@@ -253,6 +254,11 @@ bool profile_batchnorm_infer_impl(int do_verification,
if(inst_ptr->IsSupportedArgument(argument_ptr.get()))
{
num_kernel++;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
}
else
{
@@ -327,7 +333,11 @@ bool profile_batchnorm_infer_impl(int do_verification,
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
if (instance_index != -1)
{
std::cout << "batchnorm_infer_instance (" << instance_index << "/" << num_kernel
<< "): Passed" << std::endl;
}
return pass;
}

View File

@@ -54,7 +54,8 @@ int profile_contraction_impl(ck::index_t do_verification,
const std::vector<ck::index_t>& StridesA, // [M0, M1, K0, K1]
const std::vector<ck::index_t>& StridesB, // [N0, N1, K0, K1]
const std::vector<ck::index_t>& StridesE, // [M0, M1, N0, N1]
const std::vector<ck::index_t>& StridesD) // [M0, M1, N0, N1]
const std::vector<ck::index_t>& StridesD, // [M0, M1, N0, N1]
int instance_index = -1)
{
bool pass = true;
@@ -197,7 +198,7 @@ int profile_contraction_impl(ck::index_t do_verification,
float best_avg_time = 0;
float best_tflops = 0;
float best_gb_per_sec = 0;
int num_kernel = 0;
// profile device op instances
for(auto& op_ptr : op_ptrs)
{
@@ -256,6 +257,12 @@ int profile_contraction_impl(ck::index_t do_verification,
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
++num_kernel;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
// re-init C to zero before profiling next kernel
e_device_buf.SetZero();
@@ -376,6 +383,11 @@ int profile_contraction_impl(ck::index_t do_verification,
<< " ms, " << best_tflops << " TFlops, " << best_gb_per_sec << " GB/s, "
<< best_op_name << std::endl;
if(instance_index != -1)
{
std::cout << "contraction_instance (" << instance_index << "/" << num_kernel << "): Passed"
<< std::endl;
}
return pass;
}

View File

@@ -58,7 +58,8 @@ bool profile_conv_bwd_data_impl(int do_verification,
int init_method,
bool do_log,
bool time_kernel,
const ck::utils::conv::ConvParam& conv_param)
const ck::utils::conv::ConvParam& conv_param,
int instance_index = -1)
{
using InElementOp = ck::tensor_operation::element_wise::PassThrough;
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
@@ -174,7 +175,7 @@ bool profile_conv_bwd_data_impl(int do_verification,
float best_avg_time = 0;
float best_tflops = 0;
float best_gb_per_sec = 0;
int num_kernel = 0;
// profile device Conv instances
bool pass = true;
@@ -200,6 +201,12 @@ bool profile_conv_bwd_data_impl(int do_verification,
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
++num_kernel;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
// for conv bwd data, some input tensor element are zero, but not written by kernel,
// need to set zero
in_device_buf.SetZero();
@@ -263,7 +270,11 @@ bool profile_conv_bwd_data_impl(int do_verification,
std::cout << "Best configuration parameters:" << "\nname: " << best_op_name
<< "\navg_time: " << best_avg_time << "\ntflops: " << best_tflops
<< "\nGB/s: " << best_gb_per_sec << std::endl;
if(instance_index != -1)
{
std::cout << "conv_bwd_data_instance (" << instance_index << "/" << num_kernel
<< "): Passed" << std::endl;
}
return pass;
}

View File

@@ -36,7 +36,8 @@ bool profile_conv_fwd_impl(int do_verification,
int init_method,
bool do_log,
bool time_kernel,
const ck::utils::conv::ConvParam& conv_param)
const ck::utils::conv::ConvParam& conv_param,
int instance_index = -1)
{
using InElementOp = ck::tensor_operation::element_wise::PassThrough;
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
@@ -156,7 +157,7 @@ bool profile_conv_fwd_impl(int do_verification,
float best_avg_time = 0;
float best_tflops = 0;
float best_gb_per_sec = 0;
int num_kernel = 0;
// profile device op instances
bool pass = true;
@@ -182,6 +183,12 @@ bool profile_conv_fwd_impl(int do_verification,
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
++num_kernel;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
// re-init output to zero before profiling next kernel
out_device_buf.SetZero();
@@ -236,7 +243,11 @@ bool profile_conv_fwd_impl(int do_verification,
std::cout << "Best configuration parameters:" << "\nname: " << best_op_name
<< "\navg_time: " << best_avg_time << "\ntflops: " << best_tflops
<< "\nGB/s: " << best_gb_per_sec << std::endl;
if(instance_index != -1)
{
std::cout << "conv_fwd_instance (" << instance_index << "/" << num_kernel << "): Passed"
<< std::endl;
}
return pass;
}

View File

@@ -122,7 +122,8 @@ bool profile_conv_tensor_rearrange_impl(int do_verification,
int init_method,
bool do_log,
bool time_kernel,
const ck::utils::conv::ConvParam& conv_param)
const ck::utils::conv::ConvParam& conv_param,
index_t instance_index = -1)
{
const ck::index_t NDoHoWo =
conv_param.N_ *
@@ -226,7 +227,7 @@ bool profile_conv_tensor_rearrange_impl(int do_verification,
// profile device op instances
bool pass = true;
bool is_supporting_instance = false;
index_t num_kernel = 0;
for(auto& op_ptr : op_ptrs)
{
auto argument_ptr = op_ptr->MakeArgumentPointer(
@@ -247,6 +248,12 @@ bool profile_conv_tensor_rearrange_impl(int do_verification,
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
num_kernel++;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
is_supporting_instance = true;
// re-init output to zero before profiling next kernel
out_device_buf.SetZero();
@@ -291,6 +298,11 @@ bool profile_conv_tensor_rearrange_impl(int do_verification,
std::cout << "Best configuration parameters:" << "\nname: " << best_op_name
<< "\navg_time: " << best_avg_time << "\nGB/s: " << best_gb_per_sec << std::endl;
if(instance_index != -1)
{
std::cout << "conv_tensor_rearrange_instance (" << instance_index << "/" << num_kernel
<< "): Passed" << std::endl;
}
return is_supporting_instance && pass;
}

View File

@@ -49,7 +49,8 @@ bool profile_elementwise_layernorm_impl(int do_verification,
int init_method,
bool do_log,
bool time_kernel,
std::vector<index_t> length)
std::vector<index_t> length,
index_t instance_index = -1)
{
using Add = ck::tensor_operation::element_wise::Add;
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
@@ -199,6 +200,11 @@ bool profile_elementwise_layernorm_impl(int do_verification,
if(inst_ptr->IsSupportedArgument(argument_ptr.get()))
{
++num_kernel;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
}
else
{
@@ -270,6 +276,11 @@ bool profile_elementwise_layernorm_impl(int do_verification,
return false;
}
if(instance_index != -1)
{
std::cout << "elementwise_layernorm_instance (" << instance_index << "/" << num_kernel
<< "): Passed" << std::endl;
}
return true;
}

View File

@@ -70,7 +70,8 @@ bool profile_gemm_reduce_impl(int do_verification,
int K,
int StrideA,
int StrideB,
int StrideC)
int StrideC,
int instance_index = -1)
{
bool pass = true;
@@ -249,7 +250,7 @@ bool profile_gemm_reduce_impl(int do_verification,
float best_ave_time = 0;
float best_tflops = 0;
float best_gb_per_sec = 0;
int num_kernel = 0;
// profile device GEMM instances
for(auto& gemm_ptr : gemm_ptrs)
{
@@ -275,6 +276,12 @@ bool profile_gemm_reduce_impl(int do_verification,
if(gemm_ptr->IsSupportedArgument(argument_ptr.get()))
{
++num_kernel;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
// init DO, D1 to 0
reduce0_device_buf.SetZero();
reduce1_device_buf.SetZero();
@@ -345,7 +352,11 @@ bool profile_gemm_reduce_impl(int do_verification,
std::cout << "Best Perf: " << best_ave_time << " ms, " << best_tflops << " TFlops, "
<< best_gb_per_sec << " GB/s, " << best_gemm_name << std::endl;
if(instance_index != -1)
{
std::cout << "gemm_reduce_instance (" << instance_index << "/" << num_kernel << "): Passed"
<< std::endl;
}
return pass;
}

View File

@@ -44,7 +44,8 @@ bool profile_gemm_splitk_impl(int do_verification,
int StrideC,
int KBatch,
int n_warmup,
int n_iter)
int n_iter,
int instance_index = -1)
{
bool pass = true;
@@ -141,6 +142,7 @@ bool profile_gemm_splitk_impl(int do_verification,
float best_tflops = 0;
float best_gb_per_sec = 0;
float best_kbatch = 0;
int num_kernel = 0;
// profile device GEMM instances
for(auto& op_ptr : op_ptrs)
@@ -175,7 +177,12 @@ bool profile_gemm_splitk_impl(int do_verification,
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
++num_kernel;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
// re-init C to zero before profiling next kernel
c_device_buf.SetZero();
@@ -294,7 +301,11 @@ bool profile_gemm_splitk_impl(int do_verification,
<< " StrideB = " << StrideB << " StrideC = " << StrideC << " KBatch = " << best_kbatch
<< " : " << best_ave_time << " ms, " << best_tflops << " TFlops, " << best_gb_per_sec
<< " GB/s, " << best_op_name << std::endl;
if(instance_index != -1)
{
std::cout << "gemm_splitk_instance (" << instance_index << "/" << num_kernel << "): Passed"
<< std::endl;
}
return pass;
}

View File

@@ -35,7 +35,8 @@ bool profile_grouped_conv_bwd_data_impl(int do_verification,
bool do_log,
bool time_kernel,
const ck::utils::conv::ConvParam& conv_param,
ck::index_t split_k = 1)
ck::index_t split_k = 1,
index_t instance_index = -1)
{
using OutElementOp = ck::tensor_operation::element_wise::PassThrough;
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
@@ -123,9 +124,9 @@ bool profile_grouped_conv_bwd_data_impl(int do_verification,
ck::index_t best_split_k = 1;
// profile device op instances
bool pass = true;
auto run_impl = [&](auto& op_ptr, auto& argument_ptr, const index_t& split_k_for_run) {
bool pass = true;
index_t num_kernel = 0;
auto run_impl = [&](auto& op_ptr, auto& argument_ptr, const index_t& split_k_for_run) {
// workspace_sz will be equal to 0 for other layout than NGCHW
const std::size_t workspace_sz = op_ptr->GetWorkSpaceSize(argument_ptr.get());
DeviceMem workspace_dev(workspace_sz);
@@ -133,6 +134,12 @@ bool profile_grouped_conv_bwd_data_impl(int do_verification,
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
num_kernel++;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
return;
}
std::string op_name = op_ptr->GetTypeString();
auto invoker_ptr = op_ptr->MakeInvokerPointer();
@@ -165,8 +172,8 @@ bool profile_grouped_conv_bwd_data_impl(int do_verification,
in_device_buf.FromDevice(in_device.mData.data());
using ComputeType = std::conditional_t<sizeof(OutDataType) < sizeof(WeiDataType),
OutDataType,
WeiDataType>;
OutDataType,
WeiDataType>;
using AccDataType =
std::conditional_t<std::is_same_v<ComputeType, int8_t>, int32_t, float>;
const index_t num_accums = conv_param.K_;
@@ -297,6 +304,11 @@ bool profile_grouped_conv_bwd_data_impl(int do_verification,
<< "\navg_time: " << best_avg_time << "\ntflops: " << best_tflops
<< "\nGB/s: " << best_gb_per_sec << ", SplitK " << best_split_k << std::endl;
if(instance_index != -1)
{
std::cout << "grouped_conv_bwd_data_instance (" << instance_index << "/" << num_kernel
<< "): Passed" << std::endl;
}
return pass;
}

View File

@@ -41,7 +41,8 @@ bool profile_grouped_conv_bwd_weight_impl(int do_verification,
bool do_log,
bool time_kernel,
const ck::utils::conv::ConvParam& conv_param,
const std::string& split_k)
const std::string& split_k,
index_t instance_index = -1)
{
using InElementOp = ck::tensor_operation::element_wise::PassThrough;
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
@@ -187,6 +188,7 @@ bool profile_grouped_conv_bwd_weight_impl(int do_verification,
}
}
index_t num_kernel = 0;
for(auto& op_ptr : op_ptrs)
{
for(std::size_t split_k_id = 0; split_k_id < split_k_list.size(); split_k_id++)
@@ -226,6 +228,12 @@ bool profile_grouped_conv_bwd_weight_impl(int do_verification,
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
num_kernel++;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
std::string op_name = op_ptr->GetTypeString();
@@ -326,6 +334,11 @@ bool profile_grouped_conv_bwd_weight_impl(int do_verification,
<< "\navg_time: " << best_avg_time << "\ntflops: " << best_tflops
<< "\nGB/s: " << best_gb_per_sec << ", SplitK " << best_split_k << std::endl;
if(instance_index != -1)
{
std::cout << "grouped_conv_bwd_weight_instance (" << instance_index << "/" << num_kernel
<< "): Passed" << std::endl;
}
return all_pass;
}

View File

@@ -126,7 +126,8 @@ bool profile_grouped_conv_fwd_bias_clamp_impl(int do_verification,
int init_method,
bool do_log,
bool time_kernel,
const ck::utils::conv::ConvParam& conv_param)
const ck::utils::conv::ConvParam& conv_param,
int instance_index = -1)
{
const float floor = 0.f;
const float ceil = 2048.f;
@@ -295,6 +296,7 @@ bool profile_grouped_conv_fwd_bias_clamp_impl(int do_verification,
float best_avg_time = 0;
float best_tflops = 0;
float best_gb_per_sec = 0;
int num_kernel = 0;
// profile device op instances
bool pass = true;
@@ -307,6 +309,13 @@ bool profile_grouped_conv_fwd_bias_clamp_impl(int do_verification,
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
++num_kernel;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
std::cout << op_ptr->GetTypeString() << " skipped" << std::endl;
return;
}
// re-init output to zero before profiling next kernel
out_device_buf.SetZero();
@@ -420,7 +429,11 @@ bool profile_grouped_conv_fwd_bias_clamp_impl(int do_verification,
std::cout << "Best configuration parameters:" << "\nname: " << best_op_name
<< "\navg_time: " << best_avg_time << "\ntflops: " << best_tflops
<< "\nGB/s: " << best_gb_per_sec << std::endl;
if(instance_index != -1)
{
std::cout << "grouped_conv_fwd_bias_bnorm_clamp_instance (" << instance_index << "/"
<< num_kernel << "): Passed" << std::endl;
}
return pass;
}

View File

@@ -64,7 +64,8 @@ bool profile_grouped_conv_fwd_bias_clamp_impl(int do_verification,
int init_method,
bool do_log,
bool time_kernel,
const ck::utils::conv::ConvParam& conv_param)
const ck::utils::conv::ConvParam& conv_param,
int instance_index = -1)
{
using InElementOp = ck::tensor_operation::element_wise::PassThrough;
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
@@ -194,7 +195,7 @@ bool profile_grouped_conv_fwd_bias_clamp_impl(int do_verification,
float best_avg_time = 0;
float best_tflops = 0;
float best_gb_per_sec = 0;
int num_kernel = 0;
// profile device op instances
bool pass = true;
@@ -206,6 +207,13 @@ bool profile_grouped_conv_fwd_bias_clamp_impl(int do_verification,
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
++num_kernel;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
std::cout << op_ptr->GetTypeString() << " skipped" << std::endl;
return;
}
// re-init output to zero before profiling next kernel
out_device_buf.SetZero();
@@ -317,7 +325,11 @@ bool profile_grouped_conv_fwd_bias_clamp_impl(int do_verification,
std::cout << "Best configuration parameters:" << "\nname: " << best_op_name
<< "\navg_time: " << best_avg_time << "\ntflops: " << best_tflops
<< "\nGB/s: " << best_gb_per_sec << std::endl;
if(instance_index != -1)
{
std::cout << "grouped_conv_fwd_bias_clamp_instance (" << instance_index << "/" << num_kernel
<< "): Passed" << std::endl;
}
return pass;
}

View File

@@ -42,7 +42,8 @@ bool profile_grouped_conv_fwd_impl(int do_verification,
bool do_log,
bool time_kernel,
const ck::utils::conv::ConvParam& conv_param,
const OutElementOp out_element_op = OutElementOp{})
const OutElementOp out_element_op = OutElementOp{},
index_t instance_index = -1)
{
using InElementOp = ck::tensor_operation::element_wise::PassThrough;
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
@@ -144,7 +145,7 @@ bool profile_grouped_conv_fwd_impl(int do_verification,
float best_avg_time = 0;
float best_tflops = 0;
float best_gb_per_sec = 0;
index_t num_kernel = 0;
// profile device op instances
bool pass = true;
@@ -156,6 +157,13 @@ bool profile_grouped_conv_fwd_impl(int do_verification,
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
num_kernel++;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
return;
}
std::string op_name = op_ptr->GetTypeString();
auto invoker_ptr = op_ptr->MakeInvokerPointer();
@@ -253,7 +261,11 @@ bool profile_grouped_conv_fwd_impl(int do_verification,
std::cout << "Best configuration parameters:" << "\nname: " << best_op_name
<< "\navg_time: " << best_avg_time << "\ntflops: " << best_tflops
<< "\nGB/s: " << best_gb_per_sec << std::endl;
if(instance_index != -1)
{
std::cout << "grouped_conv_fwd_instance (" << instance_index << "/" << num_kernel
<< "): Passed" << std::endl;
}
return pass;
}

View File

@@ -44,7 +44,8 @@ bool profile_grouped_gemm_impl(int do_verification,
const std::vector<int>& StrideCs,
const std::vector<int>& kbatches = {},
int n_warmup = 1,
int n_iter = 10)
int n_iter = 10,
int instance_index = -1)
{
bool pass = true;
// TODO: Fixme - we do not pass compute data type here but need it
@@ -195,8 +196,8 @@ bool profile_grouped_gemm_impl(int do_verification,
float best_tflops = 0;
float best_gb_per_sec = 0;
float best_kbatch = 0;
auto p_ds = std::vector<std::array<const void*, 0>>{};
int num_kernel = 0;
auto p_ds = std::vector<std::array<const void*, 0>>{};
if(do_verification)
{
@@ -279,6 +280,13 @@ bool profile_grouped_gemm_impl(int do_verification,
if(gemm_ptr->IsSupportedArgument(argument_ptr.get()))
{
++num_kernel;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
for(std::size_t i = 0; i < gemm_descs.size(); i++)
c_device_buf[i]->SetZero();
@@ -371,7 +379,11 @@ bool profile_grouped_gemm_impl(int do_verification,
<< best_gb_per_sec << " GB/s, " << best_gemm_name << ", KBatch = " << best_kbatch
<< std::endl;
}
if(instance_index != -1)
{
std::cout << "grouped_gemm_instance (" << instance_index << "/" << num_kernel << "): Passed"
<< std::endl;
}
return pass;
}

View File

@@ -26,7 +26,8 @@ bool profile_groupnorm_bwd_data_impl(int do_verification,
int init_method,
bool do_log,
bool time_kernel,
std::vector<index_t> length)
std::vector<index_t> length,
index_t instance_index = -1)
{
// we don't need DGamma and DBeta here, just for reference class
using DGammaDataType = DXDataType;
@@ -162,6 +163,11 @@ bool profile_groupnorm_bwd_data_impl(int do_verification,
if(inst_ptr->IsSupportedArgument(argument_ptr.get()))
{
++num_kernel;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
}
else
{
@@ -242,7 +248,11 @@ bool profile_groupnorm_bwd_data_impl(int do_verification,
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
if(instance_index != -1)
{
std::cout << "groupnorm_bwd_data_instance (" << instance_index << "/" << num_kernel
<< "): Passed" << std::endl;
}
return true;
}

View File

@@ -29,7 +29,8 @@ bool profile_groupnorm_impl(int do_verification,
int init_method,
bool do_log,
bool time_kernel,
std::vector<index_t> length)
std::vector<index_t> length,
index_t instance_index = -1)
{
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
@@ -178,6 +179,11 @@ bool profile_groupnorm_impl(int do_verification,
if(inst_ptr->IsSupportedArgument(argument_ptr.get()))
{
++num_kernel;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
}
else
{
@@ -267,6 +273,12 @@ bool profile_groupnorm_impl(int do_verification,
return false;
}
if(instance_index != -1)
{
std::cout << "groupnorm_instance (" << instance_index << "/" << num_kernel << "): Passed"
<< std::endl;
}
return true;
}

View File

@@ -27,7 +27,8 @@ bool profile_layernorm_bwd_data_impl(int do_verification,
int init_method,
bool do_log,
bool time_kernel,
std::vector<index_t> length)
std::vector<index_t> length,
index_t instance_index = -1)
{
// we don't need DGamma and DBeta here, just for reference class
using DGammaDataType = DXDataType;
@@ -167,6 +168,11 @@ bool profile_layernorm_bwd_data_impl(int do_verification,
if(inst_ptr->IsSupportedArgument(argument_ptr.get()))
{
++num_kernel;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
}
else
{
@@ -247,7 +253,11 @@ bool profile_layernorm_bwd_data_impl(int do_verification,
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
if(instance_index != -1)
{
std::cout << "layernorm_bwd_data_instance (" << instance_index << "/" << num_kernel
<< "): Passed" << std::endl;
}
return true;
}

View File

@@ -27,7 +27,8 @@ bool profile_layernorm_bwd_gamma_beta_impl(int do_verification,
int init_method,
bool do_log,
bool time_kernel,
std::vector<index_t> length)
std::vector<index_t> length,
index_t instance_index = -1)
{
// we don't need GammaDataType and DXDataType here, just for reference class
using GammaDataType = DYDataType;
@@ -178,6 +179,11 @@ bool profile_layernorm_bwd_gamma_beta_impl(int do_verification,
if(inst_ptr->IsSupportedArgument(argument_ptr.get()))
{
++num_kernel;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
}
else
{
@@ -255,7 +261,11 @@ bool profile_layernorm_bwd_gamma_beta_impl(int do_verification,
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
if(instance_index != -1)
{
std::cout << "layernorm_bwd_gamma_instance (" << instance_index << "/" << num_kernel
<< "): Passed" << std::endl;
}
return true;
}

View File

@@ -28,7 +28,8 @@ bool profile_layernorm_impl(int do_verification,
int init_method,
bool do_log,
bool time_kernel,
std::vector<index_t> length)
std::vector<index_t> length,
index_t instance_index = -1)
{
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
@@ -188,6 +189,11 @@ bool profile_layernorm_impl(int do_verification,
if(inst_ptr->IsSupportedArgument(argument_ptr.get()))
{
++num_kernel;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
}
else
{
@@ -286,6 +292,12 @@ bool profile_layernorm_impl(int do_verification,
return false;
}
if(instance_index != -1)
{
std::cout << "layernorm_instance (" << instance_index << "/" << num_kernel << "): Passed"
<< std::endl;
}
return true;
}

View File

@@ -34,7 +34,8 @@ bool profile_max_pool2d_bwd_impl(int do_verification,
std::vector<index_t> window_strides,
std::vector<index_t> window_dilations,
std::vector<index_t> input_left_pads,
std::vector<index_t> input_right_pads)
std::vector<index_t> input_right_pads,
index_t instance_index = -1)
{
// AtomicAdd only support f32 for now. ComputeDataType must be float32
using ComputeDataType = float;
@@ -199,6 +200,11 @@ bool profile_max_pool2d_bwd_impl(int do_verification,
{
++num_kernel;
instance_found = true;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
}
else
{
@@ -289,7 +295,11 @@ bool profile_max_pool2d_bwd_impl(int do_verification,
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
if(instance_index != -1)
{
std::cout << "max_pool2d_bwd_instance (" << instance_index << "/" << num_kernel
<< "): Passed" << std::endl;
}
return pass && instance_found;
}

View File

@@ -34,7 +34,8 @@ bool profile_max_pool3d_bwd_impl(int do_verification,
std::vector<index_t> window_strides,
std::vector<index_t> window_dilations,
std::vector<index_t> input_left_pads,
std::vector<index_t> input_right_pads)
std::vector<index_t> input_right_pads,
index_t instance_index = -1)
{
// AtomicAdd only support f32 for now. ComputeDataType must be float32
using ComputeDataType = float;
@@ -193,6 +194,11 @@ bool profile_max_pool3d_bwd_impl(int do_verification,
if(inst_ptr->IsSupportedArgument(argument_ptr.get()))
{
++num_kernel;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
}
else
{
@@ -281,7 +287,11 @@ bool profile_max_pool3d_bwd_impl(int do_verification,
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
if(instance_index != -1)
{
std::cout << "max_pool3d_bwd_instance (" << instance_index << "/" << num_kernel
<< "): Passed" << std::endl;
}
return true;
}

View File

@@ -35,7 +35,8 @@ bool profile_pool2d_fwd_impl(int do_verification,
std::vector<index_t> window_strides,
std::vector<index_t> window_dilations,
std::vector<index_t> input_left_pads,
std::vector<index_t> input_right_pads)
std::vector<index_t> input_right_pads,
index_t instance_index = -1)
{
constexpr index_t InOutRank = 4;
constexpr index_t WindowRank = 2;
@@ -171,6 +172,11 @@ bool profile_pool2d_fwd_impl(int do_verification,
if(inst_ptr->IsSupportedArgument(argument_ptr.get()))
{
++num_kernel;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
}
else
{
@@ -268,7 +274,11 @@ bool profile_pool2d_fwd_impl(int do_verification,
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
if(instance_index != -1)
{
std::cout << "max_pool2d_fwd_instance (" << instance_index << "/" << num_kernel
<< "): Passed" << std::endl;
}
return true;
}

View File

@@ -46,7 +46,9 @@ template <typename InDataType,
ck::ReduceTensorOp ReduceOpId,
bool PropagateNan,
bool OutputIndex>
bool profile_pool3d_fwd_impl(PoolFwdInputParams& in_params, PoolFwdKernelParams& kernel_params)
bool profile_pool3d_fwd_impl(PoolFwdInputParams& in_params,
PoolFwdKernelParams& kernel_params,
index_t instance_index = -1)
{
constexpr index_t InOutRank = 5;
constexpr index_t WindowRank = 3;
@@ -199,6 +201,11 @@ bool profile_pool3d_fwd_impl(PoolFwdInputParams& in_params, PoolFwdKernelParams&
if(inst_ptr->IsSupportedArgument(argument_ptr.get()))
{
++num_kernel;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
}
else
{
@@ -328,7 +335,11 @@ bool profile_pool3d_fwd_impl(PoolFwdInputParams& in_params, PoolFwdKernelParams&
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
}
if(instance_index != -1)
{
std::cout << "max_pool3d_fwd_instance (" << instance_index << "/" << num_kernel
<< "): Passed" << std::endl;
}
return true;
}

View File

@@ -144,7 +144,8 @@ bool profile_reduce_impl_impl(bool do_verification,
const std::vector<size_t>& inLengths,
const std::array<int, NumReduceDim>& reduceDims,
float alpha,
float beta)
float beta,
index_t instance_index = -1)
{
using namespace ck::tensor_operation::device;
using namespace ck::tensor_operation::device::instance;
@@ -373,7 +374,14 @@ bool profile_reduce_impl_impl(bool do_verification,
if(!reduce_ptr->IsSupportedArgument(argument_ptr.get()))
continue;
else
{
num_kernel++;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
}
std::string reduce_name = reduce_ptr->GetTypeString();
@@ -452,7 +460,11 @@ bool profile_reduce_impl_impl(bool do_verification,
std::cout << "Error: No kernel is applicable" << std::endl;
return false;
};
if(instance_index != -1)
{
std::cout << "reduce_instance (" << instance_index << "/" << num_kernel << "): Passed"
<< std::endl;
}
return pass;
};
@@ -467,7 +479,8 @@ bool profile_reduce_impl(bool do_verification,
bool PropagateNan,
bool UseIndex,
float alpha,
float beta)
float beta,
index_t instance_index = -1)
{
bool matched = false;
bool pass = true;
@@ -505,7 +518,8 @@ bool profile_reduce_impl(bool do_verification,
inLengths,
arrReduceDims,
alpha,
beta);
beta,
instance_index);
matched = true;
});

View File

@@ -53,7 +53,8 @@ bool profile_softmax_impl(int do_verification,
std::vector<index_t> in_strides,
std::vector<index_t> reduce_dims,
double alpha,
double beta)
double beta,
index_t instance_index = -1)
{
if(Rank != in_length.size())
{
@@ -124,7 +125,7 @@ bool profile_softmax_impl(int do_verification,
float best_avg_time = std::numeric_limits<float>::max();
float best_gb_per_sec = 0;
std::vector<bool> instance_pass;
index_t num_kernel = 0;
for(auto& inst_ptr : instances)
{
auto argument_ptr = inst_ptr->MakeArgumentPointer(in_tensor_lengths,
@@ -146,6 +147,15 @@ bool profile_softmax_impl(int do_verification,
instance_pass.push_back(true);
continue;
}
else
{
num_kernel++;
if((instance_index != -1) && (instance_index + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
}
out_dev.ToDevice(prior_out.data());
auto invoker_ptr = inst_ptr->MakeInvokerPointer();
@@ -216,6 +226,11 @@ bool profile_softmax_impl(int do_verification,
std::cout << "alpha = " << alpha << ", " << "beta = " << beta << ", " << best_avg_time
<< " ms, " << best_gb_per_sec << " GB/s, " << best_instance_name << std::endl;
}
if(instance_index != -1)
{
std::cout << "reduce_instance (" << instance_index << "/" << num_kernel << "): Passed"
<< std::endl;
}
return std::all_of(
std::begin(instance_pass), std::end(instance_pass), [](bool p) { return p; });
}

View File

@@ -12,7 +12,8 @@
#include "profiler/profile_batched_gemm_impl.hpp"
#include "ck/library/tensor_operation_instance/gpu/batched_gemm.hpp"
static ck::index_t param_mask = 0xffff;
static ck::index_t instance_index = -1;
struct GemmParams
{
ck::index_t M;
@@ -37,96 +38,153 @@ class TestBatchedGemm : public ::testing::Test
using namespace ck::tensor_operation::device;
bool pass = true;
for(auto& param : params)
for(size_t i = 0; i < params.size(); i++)
{
if((param_mask & (1 << i)) == 0)
{
continue;
}
auto& param = params[i];
const auto M = param.M;
const auto N = param.N;
const auto K = param.K;
const auto BatchCount = param.BatchCount;
pass =
pass && ck::profiler::profile_batched_gemm_impl<DataType,
DataType,
DataType,
Row,
Row,
Row,
PassThrough,
PassThrough,
PassThrough,
DeviceBatchedGemm<Row,
Row,
Row,
DataType,
DataType,
DataType,
PassThrough,
PassThrough,
PassThrough>>(
true, 1, false, 1, M, N, K, K, N, N, M * K, K * N, M * N, BatchCount);
pass = pass && ck::profiler::profile_batched_gemm_impl<DataType,
DataType,
DataType,
Row,
Row,
Row,
PassThrough,
PassThrough,
PassThrough,
DeviceBatchedGemm<Row,
Row,
Row,
DataType,
DataType,
DataType,
PassThrough,
PassThrough,
PassThrough>>(
true,
1,
false,
1,
M,
N,
K,
K,
N,
N,
M * K,
K * N,
M * N,
BatchCount,
instance_index);
pass =
pass && ck::profiler::profile_batched_gemm_impl<DataType,
DataType,
DataType,
Row,
Col,
Row,
PassThrough,
PassThrough,
PassThrough,
DeviceBatchedGemm<Row,
Col,
Row,
DataType,
DataType,
DataType,
PassThrough,
PassThrough,
PassThrough>>(
true, 1, false, 1, M, N, K, K, K, N, M * K, K * N, M * N, BatchCount);
pass = pass && ck::profiler::profile_batched_gemm_impl<DataType,
DataType,
DataType,
Row,
Col,
Row,
PassThrough,
PassThrough,
PassThrough,
DeviceBatchedGemm<Row,
Col,
Row,
DataType,
DataType,
DataType,
PassThrough,
PassThrough,
PassThrough>>(
true,
1,
false,
1,
M,
N,
K,
K,
K,
N,
M * K,
K * N,
M * N,
BatchCount,
instance_index);
pass =
pass && ck::profiler::profile_batched_gemm_impl<DataType,
DataType,
DataType,
Col,
Row,
Row,
PassThrough,
PassThrough,
PassThrough,
DeviceBatchedGemm<Col,
Row,
Row,
DataType,
DataType,
DataType,
PassThrough,
PassThrough,
PassThrough>>(
true, 1, false, 1, M, N, K, M, N, N, M * K, K * N, M * N, BatchCount);
pass = pass && ck::profiler::profile_batched_gemm_impl<DataType,
DataType,
DataType,
Col,
Row,
Row,
PassThrough,
PassThrough,
PassThrough,
DeviceBatchedGemm<Col,
Row,
Row,
DataType,
DataType,
DataType,
PassThrough,
PassThrough,
PassThrough>>(
true,
1,
false,
1,
M,
N,
K,
M,
N,
N,
M * K,
K * N,
M * N,
BatchCount,
instance_index);
pass =
pass && ck::profiler::profile_batched_gemm_impl<DataType,
DataType,
DataType,
Col,
Col,
Row,
PassThrough,
PassThrough,
PassThrough,
DeviceBatchedGemm<Col,
Col,
Row,
DataType,
DataType,
DataType,
PassThrough,
PassThrough,
PassThrough>>(
true, 1, false, 1, M, N, K, M, K, N, M * K, K * N, M * N, BatchCount);
pass = pass && ck::profiler::profile_batched_gemm_impl<DataType,
DataType,
DataType,
Col,
Col,
Row,
PassThrough,
PassThrough,
PassThrough,
DeviceBatchedGemm<Col,
Col,
Row,
DataType,
DataType,
DataType,
PassThrough,
PassThrough,
PassThrough>>(
true,
1,
false,
1,
M,
N,
K,
M,
K,
N,
M * K,
K * N,
M * N,
BatchCount,
instance_index);
}
EXPECT_TRUE(pass);
}
@@ -191,3 +249,20 @@ TEST_F(TestBatchedGemm, fp16)
// this->template Run<float>();
// }
// #endif
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -13,6 +13,9 @@
#include "ck/library/tensor_operation_instance/gpu/batched_gemm.hpp"
static ck::index_t param_mask = 0xffff;
static ck::index_t instance_index = -1;
struct GemmParams
{
ck::index_t M;
@@ -37,96 +40,153 @@ class TestBatchedGemm : public ::testing::Test
using namespace ck::tensor_operation::device;
bool pass = true;
for(auto& param : params)
for(size_t i = 0; i < params.size(); i++)
{
if((param_mask & (1 << i)) == 0)
{
continue;
}
auto& param = params[i];
const auto M = param.M;
const auto N = param.N;
const auto K = param.K;
const auto BatchCount = param.BatchCount;
pass =
pass && ck::profiler::profile_batched_gemm_impl<DataType,
DataType,
DataType,
Row,
Row,
Row,
PassThrough,
PassThrough,
PassThrough,
DeviceBatchedGemm<Row,
Row,
Row,
DataType,
DataType,
DataType,
PassThrough,
PassThrough,
PassThrough>>(
true, 1, false, 1, M, N, K, K, N, N, M * K, K * N, M * N, BatchCount);
pass = pass && ck::profiler::profile_batched_gemm_impl<DataType,
DataType,
DataType,
Row,
Row,
Row,
PassThrough,
PassThrough,
PassThrough,
DeviceBatchedGemm<Row,
Row,
Row,
DataType,
DataType,
DataType,
PassThrough,
PassThrough,
PassThrough>>(
true,
1,
false,
1,
M,
N,
K,
K,
N,
N,
M * K,
K * N,
M * N,
BatchCount,
instance_index);
pass =
pass && ck::profiler::profile_batched_gemm_impl<DataType,
DataType,
DataType,
Row,
Col,
Row,
PassThrough,
PassThrough,
PassThrough,
DeviceBatchedGemm<Row,
Col,
Row,
DataType,
DataType,
DataType,
PassThrough,
PassThrough,
PassThrough>>(
true, 1, false, 1, M, N, K, K, K, N, M * K, K * N, M * N, BatchCount);
pass = pass && ck::profiler::profile_batched_gemm_impl<DataType,
DataType,
DataType,
Row,
Col,
Row,
PassThrough,
PassThrough,
PassThrough,
DeviceBatchedGemm<Row,
Col,
Row,
DataType,
DataType,
DataType,
PassThrough,
PassThrough,
PassThrough>>(
true,
1,
false,
1,
M,
N,
K,
K,
K,
N,
M * K,
K * N,
M * N,
BatchCount,
instance_index);
pass =
pass && ck::profiler::profile_batched_gemm_impl<DataType,
DataType,
DataType,
Col,
Row,
Row,
PassThrough,
PassThrough,
PassThrough,
DeviceBatchedGemm<Col,
Row,
Row,
DataType,
DataType,
DataType,
PassThrough,
PassThrough,
PassThrough>>(
true, 1, false, 1, M, N, K, M, N, N, M * K, K * N, M * N, BatchCount);
pass = pass && ck::profiler::profile_batched_gemm_impl<DataType,
DataType,
DataType,
Col,
Row,
Row,
PassThrough,
PassThrough,
PassThrough,
DeviceBatchedGemm<Col,
Row,
Row,
DataType,
DataType,
DataType,
PassThrough,
PassThrough,
PassThrough>>(
true,
1,
false,
1,
M,
N,
K,
M,
N,
N,
M * K,
K * N,
M * N,
BatchCount,
instance_index);
pass =
pass && ck::profiler::profile_batched_gemm_impl<DataType,
DataType,
DataType,
Col,
Col,
Row,
PassThrough,
PassThrough,
PassThrough,
DeviceBatchedGemm<Col,
Col,
Row,
DataType,
DataType,
DataType,
PassThrough,
PassThrough,
PassThrough>>(
true, 1, false, 1, M, N, K, M, K, N, M * K, K * N, M * N, BatchCount);
pass = pass && ck::profiler::profile_batched_gemm_impl<DataType,
DataType,
DataType,
Col,
Col,
Row,
PassThrough,
PassThrough,
PassThrough,
DeviceBatchedGemm<Col,
Col,
Row,
DataType,
DataType,
DataType,
PassThrough,
PassThrough,
PassThrough>>(
true,
1,
false,
1,
M,
N,
K,
M,
K,
N,
M * K,
K * N,
M * N,
BatchCount,
instance_index);
}
EXPECT_TRUE(pass);
}
@@ -183,3 +243,20 @@ TEST_F(TestBatchedGemm, fp32)
this->template Run<float>();
}
#endif
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -7,6 +7,8 @@
#include "profiler/profile_batched_gemm_impl.hpp"
#include "ck/library/tensor_operation_instance/gpu/batched_gemm_multi_d.hpp"
static ck::index_t instance_index = -1;
namespace {
using F16 = ck::half_t;
@@ -70,7 +72,8 @@ class TestBatchedGemmMultiD : public ::testing::Test
M * K,
K * N,
M * N,
BatchCount);
BatchCount,
instance_index);
EXPECT_TRUE(pass);
}
};
@@ -88,3 +91,18 @@ TYPED_TEST(TestBatchedGemmMultiD, f16) { this->template Run<F16>(); }
#ifdef CK_ENABLE_INT8
TYPED_TEST(TestBatchedGemmMultiD, int8) { this->template Run<int8_t>(); }
#endif
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 2)
{
instance_index = atoi(argv[1]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1: instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -4,6 +4,9 @@
#include "gtest/gtest.h"
#include "test_batched_gemm_softmax_gemm_util.hpp"
ck::index_t param_mask = 0xffff;
ck::index_t instance_index = -1;
template <typename Tuple>
class TestBatchedGemmSoftmaxGemmFP16 : public TestBatchedGemmSoftmaxGemm<Tuple>
{
@@ -174,3 +177,20 @@ TYPED_TEST(TestBatchedGemmSoftmaxGemmFP16, AdhocTest)
};
this->Run();
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -9,6 +9,9 @@
#include "profiler/profile_batched_gemm_softmax_gemm_impl.hpp"
using ck::tensor_operation::device::GemmSpecialization;
extern ck::index_t param_mask;
extern ck::index_t instance_index;
template <ck::index_t N>
using I = ck::Number<N>;
@@ -57,15 +60,38 @@ struct TestBatchedGemmSoftmaxGemm : public ::testing::Test
B1Layout,
CLayout,
MaskingType::value>(
verify_, 1, false, bench_, M, N, K, O, BatchCount);
verify_,
1,
false,
bench_,
M,
N,
K,
O,
BatchCount,
-1, // StrideA
-1, // StrideB0
-1, // StrideB1
-1, // StrideC
-1, // BatchStrideA
-1, // BatchStrideB0
-1, // BatchStrideB1
-1, // BatchStrideC
-1, // alpha
instance_index);
EXPECT_TRUE(pass);
}
void Run()
{
for(auto lengths : this->lengths_)
for(size_t i = 0; i < this->lengths_.size(); i++)
{
if((param_mask & (1 << i)) == 0)
{
continue;
}
auto& lengths = this->lengths_[i];
int M = lengths[0];
int N = lengths[1];
int K = lengths[2];

View File

@@ -4,6 +4,8 @@
#include "gtest/gtest.h"
#include "test_batched_gemm_bias_softmax_gemm_permute_util.hpp"
ck::index_t param_mask = 0xffff;
ck::index_t instance_index = -1;
template <typename Tuple>
class TestBatchedGemmMaskingScaleSoftmaxGemmPermuteBF16
: public TestBatchedGemmMaskingScaleSoftmaxGemmPermute<Tuple>
@@ -180,3 +182,20 @@ TYPED_TEST(TestBatchedGemmMaskingScaleSoftmaxGemmPermuteBF16, AdhocTest)
};
this->Run();
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -4,6 +4,8 @@
#include "gtest/gtest.h"
#include "test_batched_gemm_softmax_gemm_permute_util.hpp"
ck::index_t param_mask = 0xffff;
ck::index_t instance_index = -1;
template <typename Tuple>
class TestBatchedGemmMaskingScaleSoftmaxGemmPermuteFP16
: public TestBatchedGemmMaskingScaleSoftmaxGemmPermute<Tuple>
@@ -180,3 +182,20 @@ TYPED_TEST(TestBatchedGemmMaskingScaleSoftmaxGemmPermuteFP16, AdhocTest)
};
this->Run();
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -10,7 +10,8 @@
#include "profiler/profile_batched_gemm_bias_softmax_gemm_permute_impl.hpp"
#include <hip/hip_runtime.h>
extern ck::index_t param_mask;
extern ck::index_t instance_index;
using ck::tensor_operation::device::GemmSpecialization;
using ck::tensor_operation::device::MaskingSpecialization;
using ck::tensor_operation::device::TensorSpecialization;
@@ -66,21 +67,26 @@ struct TestBatchedGemmMaskingScaleSoftmaxGemmPermute : public ::testing::Test
Acc0BiasDataType,
Acc1BiasDataType,
MaskingType::value>(
verify_, 2, false, bench_, M, N, K, O, G0, G1);
verify_, 2, false, bench_, M, N, K, O, G0, G1, -1, instance_index);
EXPECT_TRUE(pass);
}
void Run()
{
for(auto lengths : this->lengths_)
for(size_t i = 0; i < this->lengths_.size(); i++)
{
int M = lengths[0];
int N = lengths[1];
int K = lengths[2];
int O = lengths[3];
int G0 = lengths[4];
int G1 = lengths[5];
if((param_mask & (1 << i)) == 0)
{
continue;
}
auto& lengths = this->lengths_[i];
int M = lengths[0];
int N = lengths[1];
int K = lengths[2];
int O = lengths[3];
int G0 = lengths[4];
int G1 = lengths[5];
this->RunSingle(M, N, K, O, G0, G1);
}

View File

@@ -5,6 +5,8 @@
#include "test_batched_gemm_softmax_gemm_permute_util.hpp"
#include "test_batched_gemm_device_utils.hpp"
ck::index_t param_mask = 0xffff;
ck::index_t instance_index = -1;
template <typename Tuple>
class TestBatchedGemmMaskingScaleSoftmaxGemmPermuteBF16
: public TestBatchedGemmMaskingScaleSoftmaxGemmPermute<Tuple>
@@ -228,3 +230,20 @@ TYPED_TEST(TestBatchedGemmMaskingScaleSoftmaxGemmPermuteBF16, AdhocTest)
};
this->Run();
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -5,6 +5,9 @@
#include "test_batched_gemm_softmax_gemm_permute_util.hpp"
#include "test_batched_gemm_device_utils.hpp"
ck::index_t param_mask = 0xffff;
ck::index_t instance_index = -1;
template <typename Tuple>
class TestBatchedGemmMaskingScaleSoftmaxGemmPermuteFP16
: public TestBatchedGemmMaskingScaleSoftmaxGemmPermute<Tuple>
@@ -191,3 +194,20 @@ TYPED_TEST(TestBatchedGemmMaskingScaleSoftmaxGemmPermuteFP16, AdhocTest)
};
this->Run();
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -9,6 +9,8 @@
#include "ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_permute_xdl_cshuffle.hpp"
#include "profiler/profile_batched_gemm_softmax_gemm_permute_impl.hpp"
extern ck::index_t param_mask;
extern ck::index_t instance_index;
using ck::tensor_operation::device::GemmSpecialization;
using ck::tensor_operation::device::MaskingSpecialization;
using ck::tensor_operation::device::TensorSpecialization;
@@ -64,21 +66,26 @@ struct TestBatchedGemmMaskingScaleSoftmaxGemmPermute : public ::testing::Test
ck::Tuple<>,
ck::Tuple<>,
MaskingType::value>(
verify_, 2, false, bench_, M, N, K, O, G0, G1);
verify_, 2, false, bench_, M, N, K, O, G0, G1, -1, instance_index);
EXPECT_TRUE(pass);
}
void Run()
{
for(auto lengths : this->lengths_)
for(size_t i = 0; i < this->lengths_.size(); i++)
{
int M = lengths[0];
int N = lengths[1];
int K = lengths[2];
int O = lengths[3];
int G0 = lengths[4];
int G1 = lengths[5];
if((param_mask & (1 << i)) == 0)
{
continue;
}
auto& lengths = this->lengths_[i];
int M = lengths[0];
int N = lengths[1];
int K = lengths[2];
int O = lengths[3];
int G0 = lengths[4];
int G1 = lengths[5];
this->RunSingle(M, N, K, O, G0, G1);
}

View File

@@ -15,6 +15,9 @@ using F32 = float;
using BF16 = ck::bhalf_t;
using F64 = double;
static ck::index_t param_mask = 0xffff;
static ck::index_t instance_index = -1;
template <typename Tuple>
class TestBatchNormBwdRank4 : public ::testing::Test
{
@@ -37,33 +40,48 @@ class TestBatchNormBwdRank4 : public ::testing::Test
template <int NumReduceDim>
void Run()
{
for(auto& inOutLengths : list_of_lengths)
for(size_t i = 0; i < list_of_lengths.size(); i++)
{
bool pass = true;
if((param_mask & (1 << i)) == 0)
{
continue;
}
auto& inOutLengths = list_of_lengths[i];
bool pass = true;
EXPECT_FALSE(reduceDims.size() != NumReduceDim);
pass = pass && ck::profiler::profile_batchnorm_backward_impl<XDataType,
DxDataType,
DyDataType,
AccDataType,
ScaleDataType,
BiasDataType,
MeanVarDataType,
4,
NumReduceDim>(
true, 3, false, false, inOutLengths, reduceDims, true, epsilon);
pass =
pass &&
ck::profiler::profile_batchnorm_backward_impl<XDataType,
DxDataType,
DyDataType,
AccDataType,
ScaleDataType,
BiasDataType,
MeanVarDataType,
4,
NumReduceDim>(
true, 3, false, false, inOutLengths, reduceDims, true, epsilon, instance_index);
pass = pass && ck::profiler::profile_batchnorm_backward_impl<XDataType,
DxDataType,
DyDataType,
AccDataType,
ScaleDataType,
BiasDataType,
MeanVarDataType,
4,
NumReduceDim>(
true, 3, false, false, inOutLengths, reduceDims, false, epsilon);
pass =
pass && ck::profiler::profile_batchnorm_backward_impl<XDataType,
DxDataType,
DyDataType,
AccDataType,
ScaleDataType,
BiasDataType,
MeanVarDataType,
4,
NumReduceDim>(true,
3,
false,
false,
inOutLengths,
reduceDims,
false,
epsilon,
instance_index);
EXPECT_TRUE(pass);
}
@@ -103,3 +121,19 @@ TYPED_TEST(TestBatchNormBwdRank4, nchw)
this->reduceDims = {0, 2, 3};
this->template Run<3>();
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -16,6 +16,9 @@ using BF16 = ck::bhalf_t;
using I8 = int8_t;
using F64 = double;
static ck::index_t param_mask = 0xffff;
static ck::index_t instance_index = -1;
template <typename Tuple>
class TestBatchNormFwdRank4 : public ::testing::Test
{
@@ -38,9 +41,14 @@ class TestBatchNormFwdRank4 : public ::testing::Test
template <int NumReduceDim>
void Run()
{
for(auto& inOutLengths : list_of_lengths)
for(size_t i = 0; i < list_of_lengths.size(); i++)
{
bool pass = true;
if((param_mask & (1 << i)) == 0)
{
continue;
}
auto& inOutLengths = list_of_lengths[i];
bool pass = true;
EXPECT_FALSE(reduceDims.size() != NumReduceDim);
@@ -61,7 +69,8 @@ class TestBatchNormFwdRank4 : public ::testing::Test
true,
true,
epsilon,
averageFactor);
averageFactor,
instance_index);
pass =
pass && ck::profiler::profile_batchnorm_forward_impl<XDataType,
@@ -80,7 +89,8 @@ class TestBatchNormFwdRank4 : public ::testing::Test
false,
false,
epsilon,
averageFactor);
averageFactor,
instance_index);
EXPECT_TRUE(pass);
}
@@ -120,3 +130,19 @@ TYPED_TEST(TestBatchNormFwdRank4, nchw)
this->reduceDims = {0, 2, 3};
this->template Run<3>();
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -10,6 +10,9 @@
#include "profiler/profile_batchnorm_infer_impl.hpp"
static ck::index_t param_mask = 0xffff;
static ck::index_t instance_index = -1;
using F16 = ck::half_t;
using F32 = float;
using BF16 = ck::bhalf_t;
@@ -36,31 +39,38 @@ class TestBatchNormInferRank4 : public ::testing::Test
template <int NumReduceDim>
void Run()
{
for(auto& inOutLengths : list_of_lengths)
for(size_t i = 0; i < list_of_lengths.size(); i++)
{
bool pass = true;
if((param_mask & (1 << i)) == 0)
{
continue;
}
auto& inOutLengths = list_of_lengths[i];
bool pass = true;
EXPECT_FALSE(reduceDims.size() != NumReduceDim);
pass = pass && ck::profiler::profile_batchnorm_infer_impl<XDataType,
YDataType,
AccDataType,
ScaleDataType,
BiasDataType,
MeanVarDataType,
4,
NumReduceDim>(
true, 3, false, false, inOutLengths, reduceDims, epsilon);
pass = pass &&
ck::profiler::profile_batchnorm_infer_impl<XDataType,
YDataType,
AccDataType,
ScaleDataType,
BiasDataType,
MeanVarDataType,
4,
NumReduceDim>(
true, 3, false, false, inOutLengths, reduceDims, epsilon, instance_index);
pass = pass && ck::profiler::profile_batchnorm_infer_impl<XDataType,
YDataType,
AccDataType,
ScaleDataType,
BiasDataType,
MeanVarDataType,
4,
NumReduceDim>(
true, 3, false, false, inOutLengths, reduceDims, epsilon);
pass = pass &&
ck::profiler::profile_batchnorm_infer_impl<XDataType,
YDataType,
AccDataType,
ScaleDataType,
BiasDataType,
MeanVarDataType,
4,
NumReduceDim>(
true, 3, false, false, inOutLengths, reduceDims, epsilon, instance_index);
EXPECT_TRUE(pass);
}
@@ -100,3 +110,20 @@ TYPED_TEST(TestBatchNormInferRank4, nchw)
this->reduceDims = {0, 2, 3};
this->template Run<3>();
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -12,10 +12,11 @@
#include "profiler/profile_contraction_impl.hpp"
#include "profiler/profile_contraction_utils.hpp"
using F16 = ck::half_t;
using BF16 = ck::bhalf_t;
using F32 = float;
using F64 = double;
static ck::index_t instance_index = -1;
using F16 = ck::half_t;
using BF16 = ck::bhalf_t;
using F32 = float;
using F64 = double;
using Row = ck::tensor_layout::gemm::RowMajor;
using Col = ck::tensor_layout::gemm::ColumnMajor;
@@ -95,7 +96,8 @@ class TestContraction : public ::testing::Test
StridesA,
StridesB,
StridesC,
StridesD);
StridesD,
instance_index);
EXPECT_TRUE(pass);
}
}
@@ -219,3 +221,18 @@ TYPED_TEST(TestContractionScaleMixedPrecision, scale)
this->template Run<2>({{8, 16}, {1, 1}, {8, 16}});
this->template Run<2>({{1, 1}, {1, 1}, {1, 1}});
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 2)
{
instance_index = atoi(argv[1]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1: instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -11,6 +11,9 @@
#include "profiler/profile_conv_tensor_rearrange_impl.hpp"
static ck::index_t param_mask = 0xffff;
static ck::index_t instance_index = -1;
template <typename Tuple>
class TestConvTensorRearrange : public ::testing::Test
{
@@ -25,18 +28,24 @@ class TestConvTensorRearrange : public ::testing::Test
{
EXPECT_FALSE(conv_params.empty());
bool pass = true;
for(auto& param : conv_params)
for(size_t i = 0; i < conv_params.size(); i++)
{
pass = pass && ck::profiler::profile_conv_tensor_rearrange_impl<NDimSpatial,
ImLayout,
InDataType,
OutDataType,
ConvTensorRearrangeOp>(
if((param_mask & (1 << i)) == 0)
{
continue;
}
auto& param = conv_params[i];
pass = pass && ck::profiler::profile_conv_tensor_rearrange_impl<NDimSpatial,
ImLayout,
InDataType,
OutDataType,
ConvTensorRearrangeOp>(
true, // do_verification
1, // init_method: integer value
false, // do_log
false, // time_kernel
param);
param,
instance_index);
}
EXPECT_TRUE(pass);
}
@@ -157,3 +166,19 @@ TYPED_TEST(TestConvTensorRearrange3d, Test3D)
this->template Run<3, int8_t, int8_t>();
#endif
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -9,7 +9,8 @@
#include <gtest/gtest.h>
#include "profiler/profile_conv_bwd_data_impl.hpp"
static ck::index_t param_mask = 0xffff;
static ck::index_t instance_index = -1;
template <typename Tuple>
class TestConvndBwdData : public ::testing::Test
{
@@ -20,10 +21,15 @@ class TestConvndBwdData : public ::testing::Test
template <ck::index_t NDimSpatial>
void Run()
{
for(auto& param : conv_params)
EXPECT_FALSE(conv_params.empty());
for(size_t i = 0; i < conv_params.size(); i++)
{
if((param_mask & (1 << i)) == 0)
{
continue;
}
auto& param = conv_params[i];
bool pass;
EXPECT_FALSE(conv_params.empty());
pass = ck::profiler::profile_conv_bwd_data_impl<
NDimSpatial,
ck::tuple_element_t<NDimSpatial - 1,
@@ -44,7 +50,8 @@ class TestConvndBwdData : public ::testing::Test
1, // init_method integer value
false, // do_log
false, // time_kernel
param);
param,
instance_index);
EXPECT_TRUE(pass);
}
}
@@ -91,3 +98,19 @@ TYPED_TEST(TestConvndBwdData, Conv3dBwdData)
{3, 1, 128, 128, 256, {1, 1, 1}, {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->template Run<3>();
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -10,6 +10,8 @@
#include "profiler/profile_conv_fwd_impl.hpp"
static ck::index_t param_mask = 0xffff;
static ck::index_t instance_index = -1;
template <typename Tuple>
class TestConvndFwd : public ::testing::Test
{
@@ -20,10 +22,15 @@ class TestConvndFwd : public ::testing::Test
template <ck::index_t NDimSpatial>
void Run()
{
for(auto& param : conv_params)
EXPECT_FALSE(conv_params.empty());
for(size_t i = 0; i < conv_params.size(); i++)
{
if((param_mask & (1 << i)) == 0)
{
continue;
}
auto& param = conv_params[i];
bool pass;
EXPECT_FALSE(conv_params.empty());
pass = ck::profiler::profile_conv_fwd_impl<
NDimSpatial,
ck::tuple_element_t<NDimSpatial - 1,
@@ -44,7 +51,8 @@ class TestConvndFwd : public ::testing::Test
1, // init_method integer value
false, // do_log
false, // time_kernel
param);
param,
instance_index);
EXPECT_TRUE(pass);
}
}
@@ -90,3 +98,19 @@ TYPED_TEST(TestConvndFwd, Conv3dFwd)
{3, 1, 128, 128, 256, {1, 1, 1}, {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
this->template Run<3>();
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -8,6 +8,9 @@ using F16 = ck::half_t;
using F32 = float;
using ck::index_t;
static ck::index_t param_mask = 0xffff;
static ck::index_t instance_index = -1;
template <typename Tuple>
class TestElementwiseLayernorm : public ::testing::Test
{
@@ -25,15 +28,20 @@ class TestElementwiseLayernorm : public ::testing::Test
std::vector<std::vector<ck::index_t>> lengths = {
{1, 1}, {25, 16}, {39, 777}, {100, 200}, {1024, 1024}, {48 * 256, 2048}, {4096, 8192}};
for(auto length : lengths)
for(size_t i = 0; i < lengths.size(); i++)
{
if((param_mask & (1 << i)) == 0)
{
continue;
}
auto& length = lengths[i];
bool success = ck::profiler::profile_elementwise_layernorm_impl<ADataType,
BDataType,
GammaDataType,
BetaDataType,
AccDataType,
YDataType>(
true, 2, false, false, length);
true, 2, false, false, length, instance_index);
EXPECT_TRUE(success);
}
}
@@ -45,3 +53,19 @@ using KernelTypes = ::testing::Types<
TYPED_TEST_SUITE(TestElementwiseLayernorm, KernelTypes);
TYPED_TEST(TestElementwiseLayernorm, Test_FP16) { this->Run(); }
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -31,4 +31,4 @@ using AccDataType = float;
#include "run_gemm_test.inc"
int main() { return run_gemm_test(); }
int main(int argc, char* argv[]) { return run_gemm_test(argc, argv); }

View File

@@ -31,4 +31,4 @@ using AccDataType = float;
#include "run_gemm_test.inc"
int main() { return run_gemm_test(); }
int main(int argc, char* argv[]) { return run_gemm_test(argc, argv); }

View File

@@ -31,4 +31,4 @@ using AccDataType = float;
#include "run_gemm_test.inc"
int main() { return run_gemm_test(); }
int main(int argc, char* argv[]) { return run_gemm_test(argc, argv); }

View File

@@ -31,4 +31,4 @@ using AccDataType = double;
#include "run_gemm_test.inc"
int main() { return run_gemm_test(); }
int main(int argc, char* argv[]) { return run_gemm_test(argc, argv); }

View File

@@ -31,4 +31,4 @@ using AccDataType = int32_t;
#include "run_gemm_test.inc"
int main() { return run_gemm_test(); }
int main(int argc, char* argv[]) { return run_gemm_test(argc, argv); }

View File

@@ -105,6 +105,7 @@ int main(int argc, char* argv[])
bool do_verification = true;
bool time_kernel = true;
int problem_index = -1;
if(argc == 1)
{
@@ -115,16 +116,28 @@ int main(int argc, char* argv[])
do_verification = std::stoi(argv[1]);
time_kernel = std::stoi(argv[2]);
}
else if(argc == 4)
{
do_verification = std::stoi(argv[1]);
time_kernel = std::stoi(argv[2]);
problem_index = std::stoi(argv[3]);
}
else
{
std::cerr << "arg1: verification (0=no, 1=yes)" << std::endl
<< "arg2: time kernel (0=no, 1=yes)" << std::endl;
<< "arg2: time kernel (0=no, 1=yes)" << std::endl
<< "arg3: problem index (0-35, -1 means all)" << std::endl;
return 0;
}
bool pass = true;
for(auto& p : problems)
for(size_t i = 0; i < problems.size(); i++)
{
if(problem_index != -1 && problem_index != static_cast<ck::index_t>(i))
{
continue;
}
auto& p = problems[i];
GemmParams& problem_size = std::get<0>(p);
const LayoutConfig& layout_config = std::get<1>(p);
const auto& factory = std::get<2>(p);

View File

@@ -261,6 +261,44 @@ struct TestGemm
return true;
}
}
template <template <class...> class DeviceGemmPtr_,
typename ALayout,
typename BLayout,
typename CLayout,
typename ADataType,
typename BDataType,
typename CDataType,
typename AElementwiseOperation,
typename BElementwiseOperation,
typename CElementwiseOperation>
bool IsSupportedArgument(DeviceGemmPtr_<ALayout,
BLayout,
CLayout,
ADataType,
BDataType,
CDataType,
AElementwiseOperation,
BElementwiseOperation,
CElementwiseOperation>* gemmPtr,
const GemmParams& params = GemmParams{})
{
auto invoker_ptr = gemmPtr->MakeInvokerPointer();
auto argument_ptr = gemmPtr->MakeArgumentPointer(static_cast<ADataType*>(nullptr),
static_cast<BDataType*>(nullptr),
static_cast<CDataType*>(nullptr),
params.M,
params.N,
params.K,
params.StrideA,
params.StrideB,
params.StrideC,
AElementwiseOperation{},
BElementwiseOperation{},
CElementwiseOperation{});
return gemmPtr->IsSupportedArgument(argument_ptr.get());
}
};
} // namespace gemm_util

View File

@@ -1,13 +1,39 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
int run_gemm_test()
int run_gemm_test(int argc, char* argv[])
{
using Row = ck::tensor_layout::gemm::RowMajor;
using Col = ck::tensor_layout::gemm::ColumnMajor;
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
ck::gemm_util::GemmParams params;
ck::index_t instance_index = -1;
if(argc == 1)
{
// use default params
}
else if(argc == 4 || argc == 5)
{
params.M = atoi(argv[1]);
params.N = atoi(argv[2]);
params.K = atoi(argv[3]);
params.StrideA = params.M;
params.StrideB = params.N;
params.StrideC = params.K;
if(argc == 5)
{
instance_index = atoi(argv[4]);
}
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1-4: M N K instance_index(-1 means all)" << std::endl;
}
std::cout << "Params (M, N, K, index) " << params.M << " " << params.N << " " << params.K << " "
<< instance_index << std::endl;
auto test = [&](auto a_layout, auto b_layout, auto c_layout) {
bool pass = true;
@@ -24,10 +50,31 @@ int run_gemm_test()
const auto gemmPtrs =
ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
DeviceOp>::GetInstances();
ck::index_t num_instance = 0;
for(auto& gemmPtr : gemmPtrs)
{
pass &= ck::gemm_util::TestGemm<AccDataType>{}(gemmPtr.get());
if(instance_index == -1)
{
pass &= ck::gemm_util::TestGemm<AccDataType>{}(gemmPtr.get(), params);
}
else
{
auto test_gemm = ck::gemm_util::TestGemm<AccDataType>{};
if(test_gemm.IsSupportedArgument(gemmPtr.get(), params))
{
if(num_instance == instance_index)
{
pass &= test_gemm(gemmPtr.get(), params);
}
num_instance++;
}
}
}
if(instance_index != -1)
{
std::cout << "TestGemm_instance (" << instance_index << "/" << num_instance
<< "): " << (pass ? "Passed" : "Failed") << std::endl;
}
return pass;

View File

@@ -4,9 +4,20 @@
#include <iostream>
#include "profiler/profile_gemm_reduce_impl.hpp"
int main()
static ck::index_t instance_index = -1;
int main(int argc, char** argv)
{
if(argc == 1) {}
else if(argc == 2)
{
instance_index = atoi(argv[1]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1: instance_index(-1 means all)" << std::endl;
}
using Row = ck::tensor_layout::gemm::RowMajor;
using Col = ck::tensor_layout::gemm::ColumnMajor;
@@ -19,22 +30,22 @@ int main()
pass = pass &&
ck::profiler::
profile_gemm_reduce_impl<ck::half_t, ck::half_t, ck::half_t, float, Row, Row, Row>(
true, 1, false, false, M, N, K, K, N, N);
true, 1, false, false, M, N, K, K, N, N, instance_index);
pass = pass &&
ck::profiler::
profile_gemm_reduce_impl<ck::half_t, ck::half_t, ck::half_t, float, Row, Col, Row>(
true, 1, false, false, M, N, K, K, K, N);
true, 1, false, false, M, N, K, K, K, N, instance_index);
pass = pass &&
ck::profiler::
profile_gemm_reduce_impl<ck::half_t, ck::half_t, ck::half_t, float, Col, Row, Row>(
true, 1, false, false, M, N, K, M, N, N);
true, 1, false, false, M, N, K, M, N, N, instance_index);
pass = pass &&
ck::profiler::
profile_gemm_reduce_impl<ck::half_t, ck::half_t, ck::half_t, float, Col, Col, Row>(
true, 1, false, false, M, N, K, M, K, N);
true, 1, false, false, M, N, K, M, K, N, instance_index);
if(pass)
{

View File

@@ -15,6 +15,8 @@
#include "include/ck/utility/data_type.hpp"
#include "profiler/profile_gemm_splitk_impl.hpp"
extern ck::index_t param_mask;
extern ck::index_t instance_index;
namespace ck {
namespace test {
@@ -48,8 +50,13 @@ class TestGemmSplitK : public testing::Test
const int StrideB,
const int StrideC)
{
for(auto kb : k_batches_)
for(size_t i = 0; i < k_batches_.size(); i++)
{
if((param_mask & (1 << i)) == 0)
{
continue;
}
auto kb = k_batches_[i];
RunSingle(M, N, K, StrideA, StrideB, StrideC, kb);
}
}
@@ -82,7 +89,8 @@ class TestGemmSplitK : public testing::Test
StrideC,
kbatch,
n_warmup,
n_iter);
n_iter,
instance_index);
EXPECT_TRUE(pass);
}
};

View File

@@ -7,6 +7,9 @@
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "test_gemm_splitk_util.hpp"
ck::index_t param_mask = 0xffff;
ck::index_t instance_index = -1;
using F16 = ck::half_t;
using F32 = float;
@@ -64,3 +67,20 @@ TYPED_TEST_SUITE(TestGemmSplitK_KM_KN, KernelTypes);
TYPED_TEST_SUITE(TestGemmSplitK_KM_NK, KernelTypes);
#include "test_gemm_splitk_ut_cases.inc"
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -14,7 +14,8 @@
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "include/ck/utility/data_type.hpp"
#include "profiler/profile_gemm_universal_impl.hpp"
extern ck::index_t param_mask;
extern ck::index_t instance_index;
namespace ck {
namespace test {
@@ -49,8 +50,13 @@ class TestGemmUniversal : public testing::Test
const int StrideB,
const int StrideC)
{
for(auto kb : k_batches_)
for(size_t i = 0; i < k_batches_.size(); i++)
{
if((param_mask & (1 << i)) == 0)
{
continue;
}
auto kb = k_batches_[i];
RunSingle(M, N, K, StrideA, StrideB, StrideC, kb);
}
}
@@ -84,7 +90,8 @@ class TestGemmUniversal : public testing::Test
StrideC,
kbatch,
n_warmup,
n_iter);
n_iter,
instance_index);
EXPECT_TRUE(pass);
}
};

View File

@@ -6,10 +6,11 @@
#include "gtest/gtest.h"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "test_gemm_universal_util.hpp"
using I4 = ck::pk_i4_t;
using BF16 = ck::bhalf_t;
using F32 = float;
ck::index_t param_mask = 0xffff;
ck::index_t instance_index = -1;
using I4 = ck::pk_i4_t;
using BF16 = ck::bhalf_t;
using F32 = float;
using Row = ck::tensor_layout::gemm::RowMajor;
using Col = ck::tensor_layout::gemm::ColumnMajor;
@@ -85,3 +86,19 @@ TYPED_TEST_SUITE(TestGemmUniversal_BF16_KM_KN, KernelTypes_KM_KN);
TYPED_TEST_SUITE(TestGemmUniversal_BF16_KM_NK, KernelTypes_KM_NK);
#include "test_gemm_universal_ut_cases_bf16.inc"
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -6,10 +6,11 @@
#include "gtest/gtest.h"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "test_gemm_universal_util.hpp"
using I4 = ck::pk_i4_t;
using F8 = ck::f8_t;
using F16 = ck::half_t;
ck::index_t param_mask = 0xffff;
ck::index_t instance_index = -1;
using I4 = ck::pk_i4_t;
using F8 = ck::f8_t;
using F16 = ck::half_t;
using F32 = float;
@@ -99,3 +100,19 @@ TYPED_TEST_SUITE(TestGemmUniversal_FP16_KM_NK, KernelTypes_KM_NK);
TYPED_TEST_SUITE(TestGemmUniversal_FP16_KM_KN, KernelTypes_KM_KN);
#include "test_gemm_universal_ut_cases_fp16.inc"
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -6,7 +6,8 @@
#include "gtest/gtest.h"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "test_gemm_universal_util.hpp"
ck::index_t param_mask = 0xffff;
ck::index_t instance_index = -1;
#if defined(CK_USE_WMMA_FP8)
using F8 = ck::f8_t;
@@ -59,3 +60,19 @@ TYPED_TEST_SUITE(TestGemmUniversal_FP8_MK_NK, KernelTypes_MK_NK);
#include "test_gemm_universal_ut_cases_fp8.inc"
#endif // CK_USE_WMMA_FP8
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -6,9 +6,10 @@
#include "gtest/gtest.h"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "test_gemm_universal_util.hpp"
using BF16 = ck::bhalf_t;
using F32 = float;
ck::index_t param_mask = 0xffff;
ck::index_t instance_index = -1;
using BF16 = ck::bhalf_t;
using F32 = float;
using Row = ck::tensor_layout::gemm::RowMajor;
using Col = ck::tensor_layout::gemm::ColumnMajor;
@@ -80,3 +81,19 @@ TYPED_TEST_SUITE(TestGemmUniversal_BF16_KM_KN, KernelTypes_KM_KN);
TYPED_TEST_SUITE(TestGemmUniversal_BF16_KM_NK, KernelTypes_KM_NK);
#include "test_gemm_universal_ut_cases_bf16.inc"
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -6,9 +6,10 @@
#include "gtest/gtest.h"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "test_gemm_universal_util.hpp"
using F8 = ck::f8_t;
using F16 = ck::half_t;
ck::index_t param_mask = 0xffff;
ck::index_t instance_index = -1;
using F8 = ck::f8_t;
using F16 = ck::half_t;
using F32 = float;
@@ -92,3 +93,19 @@ TYPED_TEST_SUITE(TestGemmUniversal_FP16_KM_NK, KernelTypes_KM_NK);
TYPED_TEST_SUITE(TestGemmUniversal_FP16_KM_KN, KernelTypes_KM_KN);
#include "test_gemm_universal_ut_cases_fp16.inc"
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -6,11 +6,12 @@
#include "gtest/gtest.h"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "test_gemm_universal_util.hpp"
using F8 = ck::f8_t;
using F16 = ck::half_t;
using BF16 = ck::bhalf_t;
using F32 = float;
ck::index_t param_mask = 0xffff;
ck::index_t instance_index = -1;
using F8 = ck::f8_t;
using F16 = ck::half_t;
using BF16 = ck::bhalf_t;
using F32 = float;
using Row = ck::tensor_layout::gemm::RowMajor;
using Col = ck::tensor_layout::gemm::ColumnMajor;
@@ -69,3 +70,19 @@ TYPED_TEST_SUITE(TestGemmUniversal_FP8_MK_NK, KernelTypes_MK_NK);
#include "test_gemm_universal_ut_cases_fp8.inc"
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -15,6 +15,9 @@
#include "include/ck/utility/data_type.hpp"
#include "profiler/profile_gemm_universal_streamk_impl.hpp"
extern ck::index_t param_mask;
extern ck::index_t instance_index;
namespace ck {
namespace test {
@@ -56,8 +59,13 @@ class TestGemmUniversal_Streamk : public testing::Test
const int StrideB,
const int StrideC)
{
for(auto streamk_sel : streamk_sel_list)
for(size_t i = 0; i < streamk_sel_list.size(); i++)
{
if((param_mask & (1 << i)) == 0)
{
continue;
}
auto streamk_sel = streamk_sel_list[i];
RunSingle(M, N, K, StrideA, StrideB, StrideC, streamk_sel, -1);
}
}
@@ -93,7 +101,8 @@ class TestGemmUniversal_Streamk : public testing::Test
streamk_sel,
Grid_size,
n_warmup,
n_iter);
n_iter,
instance_index);
EXPECT_TRUE(pass);
}
};

View File

@@ -7,6 +7,9 @@
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "test_gemm_universal_streamk_util.hpp"
ck::index_t param_mask = 0xffff;
ck::index_t instance_index = -1;
using BF16 = ck::bhalf_t;
using F32 = float;
@@ -83,3 +86,19 @@ TYPED_TEST_SUITE(TestGemmUniversal_Streamk_BF16_KM_KN, KernelTypes_KM_KN);
TYPED_TEST_SUITE(TestGemmUniversal_Streamk_BF16_KM_NK, KernelTypes_KM_NK);
#include "test_gemm_universal_streamk_ut_cases_bf16.inc"
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -7,6 +7,9 @@
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "test_gemm_universal_streamk_util.hpp"
ck::index_t param_mask = 0xffff;
ck::index_t instance_index = -1;
using F8 = ck::f8_t;
using F16 = ck::half_t;
@@ -82,3 +85,20 @@ TYPED_TEST_SUITE(TestGemmUniversal_Streamk_FP16_MK_KN, KernelTypes_MK_KN);
TYPED_TEST_SUITE(TestGemmUniversal_Streamk_FP16_MK_NK, KernelTypes_MK_NK);
#include "test_gemm_universal_streamk_ut_cases_fp16.inc"
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -7,6 +7,9 @@
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "test_gemm_universal_streamk_util.hpp"
ck::index_t param_mask = 0xffff;
ck::index_t instance_index = -1;
using F8 = ck::f8_t;
using F16 = ck::half_t;
using BF16 = ck::bhalf_t;
@@ -72,3 +75,19 @@ TYPED_TEST_SUITE(TestGemmUniversal_Streamk_FP8_MK_KN, KernelTypes_MK_KN);
TYPED_TEST_SUITE(TestGemmUniversal_Streamk_FP8_MK_NK, KernelTypes_MK_NK);
#include "test_gemm_universal_streamk_ut_cases_fp8.inc"
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -11,6 +11,9 @@
#include "profiler/profile_grouped_conv_bwd_data_impl.hpp"
static ck::index_t param_mask = 0xffff;
static ck::index_t instance_index = -1;
template <typename Tuple>
class TestGroupedConvndBwdDataWmma : public ::testing::Test
{
@@ -27,20 +30,27 @@ class TestGroupedConvndBwdDataWmma : public ::testing::Test
{
EXPECT_FALSE(conv_params.empty());
bool pass = true;
for(auto& param : conv_params)
for(size_t i = 0; i < conv_params.size(); i++)
{
pass = pass && ck::profiler::profile_grouped_conv_bwd_data_impl<NDimSpatial,
OutLayout,
WeiLayout,
InLayout,
DataType,
DataType,
DataType>(
if((param_mask & (1 << i)) == 0)
{
continue;
}
auto& param = conv_params[i];
pass = pass && ck::profiler::profile_grouped_conv_bwd_data_impl<NDimSpatial,
OutLayout,
WeiLayout,
InLayout,
DataType,
DataType,
DataType>(
true, // do_verification
1, // init_method: integer value
false, // do_log
false, // time_kernel
param);
param,
1, // splitK
instance_index);
}
EXPECT_TRUE(pass);
}
@@ -106,3 +116,20 @@ TYPED_TEST(TestGroupedConvndBwdDataWmma3d, Test3D)
{3, 1, 1, 1, 1, {3, 3, 3}, {32, 32, 32}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->template Run<3>();
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -11,6 +11,9 @@
#include "profiler/profile_grouped_conv_bwd_data_impl.hpp"
static ck::index_t param_mask = 0xffffff;
static ck::index_t instance_index = -1;
template <typename Tuple>
class TestGroupedConvndBwdDataXdl : public ::testing::Test
{
@@ -30,21 +33,27 @@ class TestGroupedConvndBwdDataXdl : public ::testing::Test
bool pass = true;
for(auto split_k : split_ks)
{
for(auto& param : conv_params)
for(size_t i = 0; i < conv_params.size(); i++)
{
pass = pass && ck::profiler::profile_grouped_conv_bwd_data_impl<NDimSpatial,
OutLayout,
WeiLayout,
InLayout,
DataType,
DataType,
DataType>(
if((param_mask & (1 << i)) == 0)
{
continue;
}
auto& param = conv_params[i];
pass = pass && ck::profiler::profile_grouped_conv_bwd_data_impl<NDimSpatial,
OutLayout,
WeiLayout,
InLayout,
DataType,
DataType,
DataType>(
true, // do_verification
1, // init_method: integer value
false, // do_log
false, // time_kernel
param,
split_k);
split_k,
instance_index);
}
}
EXPECT_TRUE(pass);
@@ -149,3 +158,19 @@ TYPED_TEST(TestGroupedConvndBwdDataXdl3d, Test3D)
{3, 1, 1, 1, 1, {3, 3, 3}, {4, 16, 16}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->template Run<3>();
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -23,6 +23,8 @@
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_conv_bwd_weight.hpp"
static ck::index_t param_mask = 0xffff;
static ck::index_t instance_index = -1;
template <typename Tuple>
class TestGroupedConvndBwdWeight : public ::testing::Test
{
@@ -83,7 +85,8 @@ class TestGroupedConvndBwdWeight : public ::testing::Test
}
bool PerformConvWeightBilinear(ck::utils::conv::ConvParam& conv_param,
const ck::index_t split_k)
const ck::index_t split_k,
ck::index_t instance_index_ = -1)
{
bool passed = true;
@@ -163,6 +166,7 @@ class TestGroupedConvndBwdWeight : public ::testing::Test
// get device op instances
const auto op_ptrs = ck::tensor_operation::device::instance::DeviceOperationInstanceFactory<
DeviceOp>::GetInstances();
int num_kernel = 0;
for(std::size_t i = 0; i < op_ptrs.size(); ++i)
{
@@ -197,6 +201,12 @@ class TestGroupedConvndBwdWeight : public ::testing::Test
if(op_ptr->IsSupportedArgument(argument_ptr.get()))
{
++num_kernel;
if((instance_index_ != -1) && (instance_index_ + 1 != num_kernel))
{
// skip test if instance_index is specified
continue;
}
float avg_time = invoker_ptr->Run(argument_ptr.get(), StreamConfig{nullptr});
wei_device_buf.FromDevice(wei_device.mData.data());
passed &= ck::utils::check_err(wei_device, wei_host, "Error: incorrect results!");
@@ -218,6 +228,11 @@ class TestGroupedConvndBwdWeight : public ::testing::Test
std::cerr << op_name << " does not support this problem" << std::endl;
}
}
if(instance_index != -1)
{
std::cout << "grouped_conv_bwd_weight_instance (" << instance_index << "/" << num_kernel
<< "): Passed" << std::endl;
}
return passed;
}
@@ -228,9 +243,14 @@ class TestGroupedConvndBwdWeight : public ::testing::Test
for(auto split_k : split_ks)
{
for(auto& param : conv_params)
for(size_t i = 0; i < conv_params.size(); i++)
{
pass = pass && PerformConvWeightBilinear(param, split_k);
if((param_mask & (1 << i)) == 0)
{
continue;
}
auto& param = conv_params[i];
pass = pass && PerformConvWeightBilinear(param, split_k, instance_index);
}
}
EXPECT_TRUE(pass);
@@ -268,3 +288,20 @@ TYPED_TEST(TestGroupedConvndBwdWeight3d, Test3D)
{3, 1, 1, 4, 4, {3, 3, 3}, {14, 28, 28}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->Run();
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -15,6 +15,9 @@
#include "profiler/profile_grouped_conv_bwd_weight_impl.hpp"
static ck::index_t param_mask = 0xffff;
static ck::index_t instance_index = -1;
using namespace ck::tensor_layout::convolution;
template <typename Tuple>
@@ -92,8 +95,13 @@ class TestGroupedConvndBwdWeight : public ::testing::Test
for(auto split_k : split_ks)
{
for(auto& param : conv_params)
for(size_t i = 0; i < conv_params.size(); i++)
{
if((param_mask & (1 << i)) == 0)
{
continue;
}
auto& param = conv_params[i];
if(!skip_case(split_k))
{
pass = pass && ck::profiler::profile_grouped_conv_bwd_weight_impl<NDimSpatial{},
@@ -108,7 +116,8 @@ class TestGroupedConvndBwdWeight : public ::testing::Test
false, // do_log
false, // time_kernel
param,
std::to_string(split_k));
std::to_string(split_k),
instance_index);
}
}
}
@@ -224,3 +233,20 @@ TYPED_TEST(TestGroupedConvndBwdWeight3d, Test3D)
{3, 16, 16, 1, 1, {3, 3, 3}, {28, 28, 28}, {2, 2, 2}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->Run();
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -9,6 +9,9 @@
#include "profiler/profile_grouped_conv_fwd_impl.hpp"
static ck::index_t param_mask = 0xffff;
static ck::index_t instance_index = -1;
template <typename Tuple>
class TestGroupedConvndFwd : public ::testing::Test
{
@@ -26,23 +29,30 @@ class TestGroupedConvndFwd : public ::testing::Test
{
EXPECT_FALSE(conv_params.empty());
bool pass = true;
for(auto& param : conv_params)
for(size_t i = 0; i < conv_params.size(); i++)
{
pass = pass && ck::profiler::profile_grouped_conv_fwd_impl<NDimSpatial,
InLayout,
WeiLayout,
OutLayout,
DataType,
DataType,
DataType,
DataType,
DataType,
IndexType>(
if((param_mask & (1 << i)) == 0)
{
continue;
}
auto& param = conv_params[i];
pass = pass && ck::profiler::profile_grouped_conv_fwd_impl<NDimSpatial,
InLayout,
WeiLayout,
OutLayout,
DataType,
DataType,
DataType,
DataType,
DataType,
IndexType>(
true, // do_verification
1, // init_method: integer value
false, // do_log
false, // time_kernel
param);
param,
ck::tensor_operation::element_wise::PassThrough{},
instance_index);
}
EXPECT_TRUE(pass);
}
@@ -148,3 +158,20 @@ TYPED_TEST(TestGroupedConvndFwd3d, Test3D)
{3, 96, 1, 1, 1, {3, 3, 3}, {4, 30, 160}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->template Run<3>();
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -11,7 +11,9 @@
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
using BiasNormalizeInInferClamp = ck::tensor_operation::element_wise::BiasNormalizeInInferClamp;
static ck::index_t param_mask = 0xffffff;
static ck::index_t instance_index = -1;
using BiasNormalizeInInferClamp = ck::tensor_operation::element_wise::BiasNormalizeInInferClamp;
template <typename Tuple>
class TestGroupedConvndFwd : public ::testing::Test
@@ -30,8 +32,13 @@ class TestGroupedConvndFwd : public ::testing::Test
{
EXPECT_FALSE(conv_params.empty());
bool pass = true;
for(auto& param : conv_params)
for(size_t i = 0; i < conv_params.size(); i++)
{
if((param_mask & (1 << i)) == 0)
{
continue;
}
auto& param = conv_params[i];
pass = pass && ck::profiler::profile_grouped_conv_fwd_bias_clamp_impl<NDimSpatial,
InLayout,
WeiLayout,
@@ -47,7 +54,8 @@ class TestGroupedConvndFwd : public ::testing::Test
1, // init_method: integer value
false, // do_log
false, // time_kernel
param);
param,
instance_index);
}
EXPECT_TRUE(pass);
}
@@ -95,3 +103,19 @@ TYPED_TEST(TestGroupedConvndFwd3d, Test3D)
{3, 2, 32, 128, 256, {3, 3, 3}, {14, 14, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->template Run<3>();
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -10,8 +10,9 @@
#include "profiler/profile_grouped_conv_fwd_bias_clamp_impl.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
using AddClamp = ck::tensor_operation::element_wise::AddClamp;
static ck::index_t param_mask = 0xffffff;
static ck::index_t instance_index = -1;
using AddClamp = ck::tensor_operation::element_wise::AddClamp;
template <typename Tuple>
class TestGroupedConvndFwd : public ::testing::Test
@@ -30,8 +31,13 @@ class TestGroupedConvndFwd : public ::testing::Test
{
EXPECT_FALSE(conv_params.empty());
bool pass = true;
for(auto& param : conv_params)
for(size_t i = 0; i < conv_params.size(); i++)
{
if((param_mask & (1 << i)) == 0)
{
continue;
}
auto& param = conv_params[i];
pass = pass && ck::profiler::profile_grouped_conv_fwd_bias_clamp_impl<NDimSpatial,
InLayout,
WeiLayout,
@@ -47,7 +53,8 @@ class TestGroupedConvndFwd : public ::testing::Test
1, // init_method: integer value
false, // do_log
false, // time_kernel
param);
param,
instance_index);
}
EXPECT_TRUE(pass);
}
@@ -95,3 +102,19 @@ TYPED_TEST(TestGroupedConvndFwd3d, Test3D)
{3, 2, 32, 128, 256, {3, 3, 3}, {14, 14, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->template Run<3>();
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -10,8 +10,9 @@
#include "profiler/profile_grouped_conv_fwd_bias_clamp_impl.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
using AddClamp = ck::tensor_operation::element_wise::AddClamp;
static ck::index_t param_mask = 0xffffff;
static ck::index_t instance_index = -1;
using AddClamp = ck::tensor_operation::element_wise::AddClamp;
template <typename Tuple>
class TestGroupedConvndFwd : public ::testing::Test
@@ -30,8 +31,13 @@ class TestGroupedConvndFwd : public ::testing::Test
{
EXPECT_FALSE(conv_params.empty());
bool pass = true;
for(auto& param : conv_params)
for(size_t i = 0; i < conv_params.size(); i++)
{
if((param_mask & (1 << i)) == 0)
{
continue;
}
auto& param = conv_params[i];
pass = pass && ck::profiler::profile_grouped_conv_fwd_bias_clamp_impl<NDimSpatial,
InLayout,
WeiLayout,
@@ -47,7 +53,8 @@ class TestGroupedConvndFwd : public ::testing::Test
1, // init_method: integer value
false, // do_log
false, // time_kernel
param);
param,
instance_index);
}
EXPECT_TRUE(pass);
}
@@ -133,3 +140,19 @@ TYPED_TEST(TestGroupedConvndFwdBiasClamp3d, Test3D)
{1, 1, 1}});
this->template Run<3>();
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -10,8 +10,9 @@
#include "profiler/profile_grouped_conv_fwd_impl.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
using Clamp = ck::tensor_operation::element_wise::Clamp;
static ck::index_t param_mask = 0xffffff;
static ck::index_t instance_index = -1;
using Clamp = ck::tensor_operation::element_wise::Clamp;
template <typename Tuple>
class TestGroupedConvndFwd : public ::testing::Test
@@ -31,25 +32,31 @@ class TestGroupedConvndFwd : public ::testing::Test
EXPECT_FALSE(conv_params.empty());
bool pass = true;
Clamp out_element_op{0.f, 256.f};
for(auto& param : conv_params)
for(size_t i = 0; i < conv_params.size(); i++)
{
pass = pass && ck::profiler::profile_grouped_conv_fwd_impl<NDimSpatial,
InLayout,
WeiLayout,
OutLayout,
DataType,
DataType,
DataType,
DataType,
DataType,
IndexType,
Clamp>(
if((param_mask & (1 << i)) == 0)
{
continue;
}
auto& param = conv_params[i];
pass = pass && ck::profiler::profile_grouped_conv_fwd_impl<NDimSpatial,
InLayout,
WeiLayout,
OutLayout,
DataType,
DataType,
DataType,
DataType,
DataType,
IndexType,
Clamp>(
true, // do_verification
1, // init_method: integer value
false, // do_log
false, // time_kernel
param,
out_element_op);
out_element_op,
instance_index);
}
EXPECT_TRUE(pass);
}
@@ -97,3 +104,19 @@ TYPED_TEST(TestGroupedConvndFwd3d, Test3D)
{3, 2, 32, 128, 256, {3, 3, 3}, {14, 14, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->template Run<3>();
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -10,8 +10,9 @@
#include "profiler/profile_grouped_conv_fwd_bias_bnorm_clamp_impl.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
using BiasNormalizeInInferClamp = ck::tensor_operation::element_wise::BiasNormalizeInInferClamp;
static ck::index_t param_mask = 0xffffff;
static ck::index_t instance_index = -1;
using BiasNormalizeInInferClamp = ck::tensor_operation::element_wise::BiasNormalizeInInferClamp;
template <typename Tuple>
class TestGroupedConvndFwd : public ::testing::Test
@@ -30,9 +31,14 @@ class TestGroupedConvndFwd : public ::testing::Test
{
EXPECT_FALSE(conv_params.empty());
bool pass = true;
for(auto& param : conv_params)
for(size_t i = 0; i < conv_params.size(); i++)
{
pass = pass &&
if((param_mask & (1 << i)) == 0)
{
continue;
}
auto& param = conv_params[i];
pass = pass &&
ck::profiler::profile_grouped_conv_fwd_bias_clamp_impl<NDimSpatial,
InLayout,
WeiLayout,
@@ -48,7 +54,8 @@ class TestGroupedConvndFwd : public ::testing::Test
1, // init_method: integer value
false, // do_log
false, // time_kernel
param);
param,
instance_index);
}
EXPECT_TRUE(pass);
}
@@ -96,3 +103,19 @@ TYPED_TEST(TestGroupedConvndFwd3d, Test3D)
{3, 2, 32, 128, 256, {3, 3, 3}, {14, 14, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->template Run<3>();
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -10,8 +10,9 @@
#include "profiler/profile_grouped_conv_fwd_bias_clamp_impl.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
using AddClamp = ck::tensor_operation::element_wise::AddClamp;
static ck::index_t param_mask = 0xffffff;
static ck::index_t instance_index = -1;
using AddClamp = ck::tensor_operation::element_wise::AddClamp;
template <typename Tuple>
class TestGroupedConvndFwd : public ::testing::Test
@@ -47,7 +48,8 @@ class TestGroupedConvndFwd : public ::testing::Test
1, // init_method: integer value
false, // do_log
false, // time_kernel
param);
param,
instance_index);
}
EXPECT_TRUE(pass);
}
@@ -95,3 +97,19 @@ TYPED_TEST(TestGroupedConvndFwd3d, Test3D)
{3, 2, 32, 128, 256, {3, 3, 3}, {14, 14, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
this->template Run<3>();
}
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -10,6 +10,9 @@
#include "gtest/gtest.h"
#include "test_grouped_gemm_util.hpp"
ck::index_t param_mask = 0xffffff;
ck::index_t instance_index = -1;
using F16 = ck::half_t;
using BF16 = ck::bhalf_t;
using F8 = ck::f8_t;
@@ -42,3 +45,19 @@ using KernelTypes = ::testing::Types<
TYPED_TEST_SUITE(TestGroupedGemm, KernelTypes);
#include "test_grouped_gemm_ut_cases.inc"
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
param_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: param_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -23,6 +23,9 @@
#include "ck/utility/number.hpp"
#include "profiler/profile_grouped_gemm_impl.hpp"
extern ck::index_t param_mask;
extern ck::index_t instance_index;
namespace ck {
namespace test {
@@ -109,8 +112,16 @@ class TestGroupedGemm : public testing::Test
{
SetStrides<ELayout>(stride_cs, Ms, Ns);
}
std::vector<int> k_batches;
for(size_t i = 0; i < k_batches_.size(); i++)
{
if(param_mask & (1 << i))
{
k_batches.push_back(k_batches_[i]);
}
}
RunSingle(Ms, Ns, Ks, stride_as, stride_bs, stride_cs, k_batches_);
RunSingle(Ms, Ns, Ks, stride_as, stride_bs, stride_cs, k_batches);
}
void RunSingle(const std::vector<int>& Ms,
@@ -139,7 +150,8 @@ class TestGroupedGemm : public testing::Test
StrideCs,
kbatches,
n_warmup_,
n_iter_);
n_iter_,
instance_index);
EXPECT_TRUE(pass);
}
};

View File

@@ -56,10 +56,27 @@ __host__ void cpu_magic_number_division(uint32_t magic_multiplier,
}
}
int main(int, char*[])
int main(int argc, char* argv[])
{
uint64_t num_divisor = 4096;
uint64_t num_dividend = 1L << 16;
uint64_t num_divisor = 4096;
uint64_t num_dividend = 1L << 16;
uint32_t divisor_start = 0;
uint32_t divisor_end = num_divisor;
if(argc == 1)
{
// use default range
}
else if(argc == 3)
{
divisor_start = std::stoi(argv[1]);
divisor_end = std::stoi(argv[2]);
}
else
{
std::cerr << "arg1 to 2: divisor_start divisor_end" << std::endl;
return 1;
}
std::vector<int32_t> divisors_host(num_divisor);
std::vector<int32_t> dividends_host(num_dividend);
@@ -90,6 +107,10 @@ int main(int, char*[])
for(std::size_t i = 0; i < num_divisor; ++i)
{
if(i < divisor_start || i > divisor_end)
{
continue;
}
// run naive division on GPU
gpu_naive_division<<<1024, 256>>>(
divisors_host[i],

View File

@@ -8,6 +8,9 @@ using F16 = ck::half_t;
using F32 = float;
using ck::index_t;
static ck::index_t length_mask = 0xffff;
static ck::index_t instance_index = -1;
template <typename Tuple>
class TestgroupnormBwdData : public ::testing::Test
{
@@ -29,15 +32,20 @@ class TestgroupnormBwdData : public ::testing::Test
{1, 32, 32, 32, 20},
{1, 16, 16, 32, 40}};
for(auto length : lengths)
for(size_t i = 0; i < lengths.size(); i++)
{
if((length_mask & (1 << i)) == 0)
{
continue;
}
auto length = lengths[i];
bool success = ck::profiler::profile_groupnorm_bwd_data_impl<DYDataType,
XDataType,
GammaDataType,
MeanInvStdDataType,
ComputeDataType,
DXDataType>(
true, 2, false, false, length);
true, 2, false, false, length, instance_index);
EXPECT_TRUE(success);
}
}
@@ -49,3 +57,19 @@ using KernelTypes = ::testing::Types<
TYPED_TEST_SUITE(TestgroupnormBwdData, KernelTypes);
TYPED_TEST(TestgroupnormBwdData, Test_FP32) { this->Run(); }
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
length_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: length_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -8,6 +8,9 @@ using F16 = ck::half_t;
using F32 = float;
using ck::index_t;
static ck::index_t length_mask = 0xffff;
static ck::index_t instance_index = -1;
template <typename Tuple>
class TestLayernorm2dBwdData : public ::testing::Test
{
@@ -25,16 +28,21 @@ class TestLayernorm2dBwdData : public ::testing::Test
std::vector<std::vector<ck::index_t>> lengths = {
{4, 256}, {8, 511}, {9, 1032}, {4, 2048}, {1, 8192}, {4000, 2000}};
for(auto length : lengths)
for(size_t i = 0; i < lengths.size(); i++)
{
bool success =
ck::profiler::profile_layernorm_bwd_data_impl<DYDataType,
XDataType,
GammaDataType,
MeanInvStdDataType,
ComputeDataType,
DXDataType,
2>(true, 2, false, false, length);
if((length_mask & (1 << i)) == 0)
{
continue;
}
auto length = lengths[i];
bool success = ck::profiler::profile_layernorm_bwd_data_impl<DYDataType,
XDataType,
GammaDataType,
MeanInvStdDataType,
ComputeDataType,
DXDataType,
2>(
true, 2, false, false, length, instance_index);
EXPECT_TRUE(success);
}
}
@@ -46,3 +54,20 @@ using KernelTypes = ::testing::Types<
TYPED_TEST_SUITE(TestLayernorm2dBwdData, KernelTypes);
TYPED_TEST(TestLayernorm2dBwdData, Test_FP32) { this->Run(); }
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
length_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: length_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -8,6 +8,9 @@ using F16 = ck::half_t;
using F32 = float;
using ck::index_t;
static ck::index_t length_mask = 0xffff;
static ck::index_t instance_index = -1;
template <typename Tuple>
class TestLayernorm2dBwdGammaBeta : public ::testing::Test
{
@@ -25,8 +28,13 @@ class TestLayernorm2dBwdGammaBeta : public ::testing::Test
std::vector<std::vector<ck::index_t>> lengths = {
{4, 256}, {8, 511}, {9, 1032}, {4, 2048}, {1, 8192}, {4000, 2000}};
for(auto length : lengths)
for(size_t i = 0; i < lengths.size(); i++)
{
if((length_mask & (1 << i)) == 0)
{
continue;
}
auto length = lengths[i];
bool success = ck::profiler::profile_layernorm_bwd_gamma_beta_impl<DYDataType,
XDataType,
MeanInvStdDataType,
@@ -34,7 +42,7 @@ class TestLayernorm2dBwdGammaBeta : public ::testing::Test
DGammaDataType,
DBetaDataType,
2>(
true, 2, false, false, length);
true, 2, false, false, length, instance_index);
EXPECT_TRUE(success);
}
}
@@ -46,3 +54,20 @@ using KernelTypes = ::testing::Types<
TYPED_TEST_SUITE(TestLayernorm2dBwdGammaBeta, KernelTypes);
TYPED_TEST(TestLayernorm2dBwdGammaBeta, Test_FP32) { this->Run(); }
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
length_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: length_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -8,6 +8,9 @@ using F16 = ck::half_t;
using F32 = float;
using ck::index_t;
static ck::index_t length_mask = 0xffff;
static ck::index_t instance_index = -1;
template <typename Tuple>
class TestGroupnorm : public ::testing::Test
{
@@ -31,16 +34,21 @@ class TestGroupnorm : public ::testing::Test
{2, 32, 32, 32, 40},
{1, 16, 16, 32, 40}};
for(auto length : lengths)
for(size_t i = 0; i < lengths.size(); i++)
{
bool success =
ck::profiler::profile_groupnorm_impl<XDataType,
GammaDataType,
BetaDataType,
ComputeDataType,
YDataType,
SaveMeanInvStdDataType,
true>(true, 2, false, false, length);
if((length_mask & (1 << i)) == 0)
{
continue;
}
auto length = lengths[i];
bool success = ck::profiler::profile_groupnorm_impl<XDataType,
GammaDataType,
BetaDataType,
ComputeDataType,
YDataType,
SaveMeanInvStdDataType,
true>(
true, 2, false, false, length, instance_index);
EXPECT_TRUE(success);
}
}
@@ -52,3 +60,20 @@ using KernelTypes = ::testing::Types<
TYPED_TEST_SUITE(TestGroupnorm, KernelTypes);
TYPED_TEST(TestGroupnorm, Test_FP16) { this->Run(); }
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
length_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: length_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -8,6 +8,9 @@ using F16 = ck::half_t;
using F32 = float;
using ck::index_t;
static ck::index_t length_mask = 0xffff;
static ck::index_t instance_index = -1;
template <typename Tuple>
class TestGroupnorm : public ::testing::Test
{
@@ -29,16 +32,21 @@ class TestGroupnorm : public ::testing::Test
{1, 32, 32, 32, 20},
{1, 16, 16, 32, 40}};
for(auto length : lengths)
for(size_t i = 0; i < lengths.size(); i++)
{
bool success =
ck::profiler::profile_groupnorm_impl<XDataType,
GammaDataType,
BetaDataType,
ComputeDataType,
YDataType,
SaveMeanInvStdDataType,
true>(true, 2, false, false, length);
if((length_mask & (1 << i)) == 0)
{
continue;
}
auto length = lengths[i];
bool success = ck::profiler::profile_groupnorm_impl<XDataType,
GammaDataType,
BetaDataType,
ComputeDataType,
YDataType,
SaveMeanInvStdDataType,
true>(
true, 2, false, false, length, instance_index);
EXPECT_TRUE(success);
}
}
@@ -50,3 +58,20 @@ using KernelTypes = ::testing::Types<
TYPED_TEST_SUITE(TestGroupnorm, KernelTypes);
TYPED_TEST(TestGroupnorm, Test_FP32) { this->Run(); }
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
length_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: length_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -8,6 +8,9 @@ using F16 = ck::half_t;
using F32 = float;
using ck::index_t;
static ck::index_t length_mask = 0xffff;
static ck::index_t instance_index = -1;
template <typename Tuple>
class TestLayernorm2d : public ::testing::Test
{
@@ -25,8 +28,13 @@ class TestLayernorm2d : public ::testing::Test
std::vector<std::vector<ck::index_t>> lengths = {
{4, 256}, {8, 511}, {9, 1032}, {4, 2048}, {1, 8192}, {4000, 2000}};
for(auto length : lengths)
for(size_t i = 0; i < lengths.size(); i++)
{
if((length_mask & (1 << i)) == 0)
{
continue;
}
auto length = lengths[i];
bool success = ck::profiler::profile_layernorm_impl<XDataType,
GammaDataType,
BetaDataType,
@@ -34,7 +42,8 @@ class TestLayernorm2d : public ::testing::Test
YDataType,
SaveMeanInvStdDataType,
true,
2>(true, 2, false, false, length);
2>(
true, 2, false, false, length, instance_index);
EXPECT_TRUE(success);
}
}
@@ -46,3 +55,19 @@ using KernelTypes = ::testing::Types<
TYPED_TEST_SUITE(TestLayernorm2d, KernelTypes);
TYPED_TEST(TestLayernorm2d, Test_FP16) { this->Run(); }
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
length_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: length_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

View File

@@ -8,6 +8,9 @@ using F16 = ck::half_t;
using F32 = float;
using ck::index_t;
static ck::index_t length_mask = 0xffff;
static ck::index_t instance_index = -1;
template <typename Tuple>
class TestLayernorm2d : public ::testing::Test
{
@@ -25,8 +28,13 @@ class TestLayernorm2d : public ::testing::Test
std::vector<std::vector<ck::index_t>> lengths = {
{4, 256}, {8, 511}, {9, 1032}, {4, 2048}, {1, 8192}, {4000, 2000}};
for(auto length : lengths)
for(size_t i = 0; i < lengths.size(); i++)
{
if((length_mask & (1 << i)) == 0)
{
continue;
}
auto length = lengths[i];
bool success = ck::profiler::profile_layernorm_impl<XDataType,
GammaDataType,
BetaDataType,
@@ -34,7 +42,8 @@ class TestLayernorm2d : public ::testing::Test
YDataType,
SaveMeanInvStdDataType,
true,
2>(true, 2, false, false, length);
2>(
true, 2, false, false, length, instance_index);
EXPECT_TRUE(success);
}
}
@@ -46,3 +55,19 @@ using KernelTypes = ::testing::Types<
TYPED_TEST_SUITE(TestLayernorm2d, KernelTypes);
TYPED_TEST(TestLayernorm2d, Test_FP32) { this->Run(); }
int main(int argc, char** argv)
{
testing::InitGoogleTest(&argc, argv);
if(argc == 1) {}
else if(argc == 3)
{
length_mask = strtol(argv[1], nullptr, 0);
instance_index = atoi(argv[2]);
}
else
{
std::cout << "Usage of " << argv[0] << std::endl;
std::cout << "Arg1,2: length_mask instance_index(-1 means all)" << std::endl;
}
return RUN_ALL_TESTS();
}

Some files were not shown because too many files have changed in this diff Show More