Clean up conv example, Instances, profiler and test (#324)

* convnd_fwd fp16 example

* update example

* update example

* update instance

* updating refernce conv

* update reference conv

* update conv fwd profiler

* update conv 1d and 3d instance

* update include path

* clean

* update profiler for conv bwd data and weight

* update conv bwd weight

* clean

* update conv example

* update profiler for conv bwd weight

* update ckprofiler for conv bwd data

* fix reference conv bwd data bug; update conv bwd data test

* update examples

* fix initialization issue

* update test for conv fwd

* clean

* clean

* remove test case too sensitive to error threshhold

* fix test

* clean

* fix build

* adding conv multiple d

* adding conv multiple D

* add matrix padder

* add gemm padding to convnd

* adding group conv

* update gemm multi-d

* refactor

* refactor

* refactor

* clean

* clean

* refactor

* refactor

* reorg

* add ds

* add bias

* clean

* add G

* adding group

* adding group

* adding group

* update Tensor

* clean

* update example

* update DeviceGemmMultipleD_Xdl_CShuffle

* update conv bwd-data and bwd-weight

* upate contraction example

* update gemm and batch gemm with e permute

* fix example build

* instance for grouped conv1d

* update example

* adding group conv instance

* update gemm bilinear instance

* update gemm+add+add+fastgelu instance

* update profiler

* update profiler

* update test

* update test and client example

* clean

* add grouped conv into profiler

* update profiler

* clean

* add test grouped conv, update all conv test to gtest

* update test

[ROCm/composable_kernel commit: 500fa99512]
This commit is contained in:
Chao Liu
2022-07-29 18:19:25 -05:00
committed by GitHub
parent 1450273dc5
commit 236f946292
373 changed files with 17544 additions and 17013 deletions

View File

@@ -41,11 +41,11 @@ add_subdirectory(gemm_reduce)
add_subdirectory(batched_gemm)
add_subdirectory(batched_gemm_reduce)
add_subdirectory(grouped_gemm)
add_subdirectory(convnd_fwd)
add_subdirectory(reduce)
add_subdirectory(conv2d_bwd_weight)
add_subdirectory(convnd_fwd)
add_subdirectory(convnd_bwd_weight)
add_subdirectory(convnd_bwd_data)
add_subdirectory(grouped_convnd_fwd)
add_subdirectory(block_to_ctile_map)
add_subdirectory(softmax)
add_subdirectory(layernorm)

View File

@@ -1,4 +1,4 @@
add_test_executable(test_batched_gemm_fp16 batched_gemm_fp16.cpp)
target_link_libraries(test_batched_gemm_fp16 PRIVATE host_tensor)
target_link_libraries(test_batched_gemm_fp16 PRIVATE utility)
target_link_libraries(test_batched_gemm_fp16 PRIVATE device_batched_gemm_instance)

View File

@@ -1,3 +1,3 @@
add_test_executable(test_batched_gemm_reduce_fp16 batched_gemm_reduce_fp16.cpp)
target_link_libraries(test_batched_gemm_reduce_fp16 PRIVATE host_tensor)
target_link_libraries(test_batched_gemm_reduce_fp16 PRIVATE utility)
target_link_libraries(test_batched_gemm_reduce_fp16 PRIVATE device_batched_gemm_reduce_instance)

View File

@@ -1,3 +0,0 @@
add_test_executable(test_conv2d_bwd_data conv2d_bwd_data.cpp)
target_link_libraries(test_conv2d_bwd_data PRIVATE host_tensor)
target_link_libraries(test_conv2d_bwd_data PRIVATE device_conv2d_bwd_data_instance)

View File

@@ -1,330 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "config.hpp"
#include "device.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp"
#include "host_conv.hpp"
#include "tensor_layout.hpp"
#include "device_tensor.hpp"
#include "device_conv_bwd_data.hpp"
#include "element_wise_operation.hpp"
#include "reference_conv_bwd_data.hpp"
using F16 = ck::half_t;
using F32 = float;
using BF16 = ck::bhalf_t;
using INT8 = int8_t;
namespace ck {
namespace tensor_operation {
namespace device {
namespace instance {
using DeviceConvBwdDataNoOpPtr =
DeviceConvBwdDataPtr<ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough>;
void add_device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instances(
std::vector<DeviceConvBwdDataNoOpPtr>&);
void add_device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instances(
std::vector<DeviceConvBwdDataNoOpPtr>&);
void add_device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instances(
std::vector<DeviceConvBwdDataNoOpPtr>&);
void add_device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instances(
std::vector<DeviceConvBwdDataNoOpPtr>&);
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
using InElementOp = ck::tensor_operation::element_wise::PassThrough;
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
using OutElementOp = ck::tensor_operation::element_wise::PassThrough;
template <typename T>
static bool check_out(const Tensor<T>& ref, const Tensor<T>& result)
{
float max_diff = 1e-6;
for(int i = 0; i < ref.mData.size(); ++i)
{
float diff = std::abs(double(ref.mData[i]) - double(result.mData[i]));
if(max_diff < diff)
{
return false;
}
}
return true;
}
int main(int argc, char* argv[])
{
int data_type = 0;
int init_method = 0;
// Conv shape
ck::index_t N = 128;
ck::index_t K = 256;
ck::index_t C = 192;
ck::index_t Y = 3;
ck::index_t X = 3;
ck::index_t Hi = 71;
ck::index_t Wi = 71;
ck::index_t conv_stride_h = 2;
ck::index_t conv_stride_w = 2;
ck::index_t conv_dilation_h = 1;
ck::index_t conv_dilation_w = 1;
ck::index_t in_left_pad_h = 1;
ck::index_t in_left_pad_w = 1;
ck::index_t in_right_pad_h = 1;
ck::index_t in_right_pad_w = 1;
if(argc == 1)
{
data_type = 1;
init_method = 1;
}
else if(argc == 3)
{
data_type = std::stoi(argv[1]);
init_method = std::stoi(argv[2]);
}
else if(argc == 18)
{
data_type = std::stoi(argv[1]);
init_method = std::stoi(argv[2]);
N = std::stoi(argv[3]);
K = std::stoi(argv[4]);
C = std::stoi(argv[5]);
Y = std::stoi(argv[6]);
X = std::stoi(argv[7]);
Hi = std::stoi(argv[8]);
Wi = std::stoi(argv[9]);
conv_stride_h = std::stoi(argv[10]);
conv_stride_w = std::stoi(argv[11]);
conv_dilation_h = std::stoi(argv[12]);
conv_dilation_w = std::stoi(argv[13]);
in_left_pad_h = std::stoi(argv[14]);
in_left_pad_w = std::stoi(argv[15]);
in_right_pad_h = std::stoi(argv[16]);
in_right_pad_w = std::stoi(argv[17]);
}
else
{
printf("arg1: data type (0=fp32, 1=fp16, 2= bfp16, 3= int8_t )\n");
printf("arg2: initialization (0=no init, 1=integer value, 2=decimal value)\n");
printf("arg3 to 17: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx\n");
exit(1);
}
auto Run = [&](auto input_type, auto wei_type, auto out_type, auto acc_type) {
using InDataType = decltype(input_type);
using WeiDataType = decltype(wei_type);
using OutDataType = decltype(out_type);
using AccDataType = decltype(acc_type);
using ReferenceConvBwdInstance =
ck::tensor_operation::host::ReferenceConvBwdData<InDataType,
WeiDataType,
OutDataType,
AccDataType,
InElementOp,
WeiElementOp,
OutElementOp>;
const ck::index_t YEff = (Y - 1) * conv_dilation_h + 1;
const ck::index_t XEff = (X - 1) * conv_dilation_w + 1;
const ck::index_t Ho = (Hi + in_left_pad_h + in_right_pad_h - YEff) / conv_stride_h + 1;
const ck::index_t Wo = (Wi + in_left_pad_w + in_right_pad_w - XEff) / conv_stride_w + 1;
const std::vector<ck::index_t> input_spatial_lengths{{Hi, Wi}};
const std::vector<ck::index_t> filter_spatial_lengths{{Y, X}};
const std::vector<ck::index_t> output_spatial_lengths{{Ho, Wo}};
const std::vector<ck::index_t> conv_filter_strides{{conv_stride_h, conv_stride_w}};
const std::vector<ck::index_t> conv_filter_dilations{{conv_dilation_h, conv_dilation_w}};
const std::vector<ck::index_t> input_left_pads{{in_left_pad_h, in_left_pad_w}};
const std::vector<ck::index_t> input_right_pads{{in_right_pad_h, in_right_pad_w}};
auto f_host_tensor_descriptor =
[](std::size_t N_, std::size_t C_, std::size_t H, std::size_t W) {
return HostTensorDescriptor(std::vector<std::size_t>({N_, C_, H, W}),
std::vector<std::size_t>({C_ * H * W, 1, W * C_, C_}));
};
Tensor<OutDataType> out_n_k_ho_wo(f_host_tensor_descriptor(N, K, Ho, Wo));
Tensor<WeiDataType> wei_k_c_y_x(f_host_tensor_descriptor(K, C, Y, X));
Tensor<InDataType> in_n_c_hi_wi_host_result(f_host_tensor_descriptor(N, C, Hi, Wi));
Tensor<InDataType> in_n_c_hi_wi_device_result(f_host_tensor_descriptor(N, C, Hi, Wi));
std::cout << "in_n_c_hi_wi: " << in_n_c_hi_wi_host_result.mDesc << std::endl;
std::cout << "wei_k_c_y_x: " << wei_k_c_y_x.mDesc << std::endl;
std::cout << "out_n_k_ho_wo: " << out_n_k_ho_wo.mDesc << std::endl;
switch(init_method)
{
case 0: break;
case 1:
out_n_k_ho_wo.GenerateTensorValue(GeneratorTensor_2<OutDataType>{-5, 5});
wei_k_c_y_x.GenerateTensorValue(GeneratorTensor_2<WeiDataType>{-5, 5});
break;
default:
out_n_k_ho_wo.GenerateTensorValue(GeneratorTensor_1<OutDataType>{1});
wei_k_c_y_x.GenerateTensorValue(GeneratorTensor_1<WeiDataType>{1});
}
DeviceMem in_device_buf(sizeof(InDataType) *
in_n_c_hi_wi_device_result.mDesc.GetElementSpace());
DeviceMem wei_device_buf(sizeof(WeiDataType) * wei_k_c_y_x.mDesc.GetElementSpace());
DeviceMem out_device_buf(sizeof(OutDataType) * out_n_k_ho_wo.mDesc.GetElementSpace());
out_device_buf.ToDevice(out_n_k_ho_wo.mData.data());
wei_device_buf.ToDevice(wei_k_c_y_x.mData.data());
// reset input to zero
in_n_c_hi_wi_device_result.GenerateTensorValue(GeneratorTensor_1<InDataType>{0});
in_device_buf.ToDevice(in_n_c_hi_wi_device_result.mData.data());
// get host result
{
auto ref_conv = ReferenceConvBwdInstance{};
auto ref_invoker = ref_conv.MakeInvoker();
auto ref_argument = ref_conv.MakeArgument(in_n_c_hi_wi_host_result,
wei_k_c_y_x,
out_n_k_ho_wo,
conv_filter_strides,
conv_filter_dilations,
input_left_pads,
input_right_pads,
InElementOp{},
WeiElementOp{},
OutElementOp{});
ref_invoker.Run(ref_argument);
}
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
using DeviceConvBwdDataNoOpPtr = ck::tensor_operation::device::
DeviceConvBwdDataPtr<PassThrough, PassThrough, PassThrough>;
// add device Conv instances
std::vector<DeviceConvBwdDataNoOpPtr> conv_ptrs;
if constexpr(ck::is_same_v<ck::remove_cv_t<InDataType>, float> &&
ck::is_same_v<ck::remove_cv_t<WeiDataType>, float> &&
ck::is_same_v<ck::remove_cv_t<OutDataType>, float>)
{
ck::tensor_operation::device::instance::
add_device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instances(conv_ptrs);
}
else if constexpr(ck::is_same_v<ck::remove_cv_t<InDataType>, ck::half_t> &&
ck::is_same_v<ck::remove_cv_t<WeiDataType>, ck::half_t> &&
ck::is_same_v<ck::remove_cv_t<OutDataType>, ck::half_t>)
{
ck::tensor_operation::device::instance::
add_device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instances(conv_ptrs);
}
else if constexpr(ck::is_same_v<ck::remove_cv_t<InDataType>, ck::bhalf_t> &&
ck::is_same_v<ck::remove_cv_t<WeiDataType>, ck::bhalf_t> &&
ck::is_same_v<ck::remove_cv_t<OutDataType>, ck::bhalf_t>)
{
ck::tensor_operation::device::instance::
add_device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instances(conv_ptrs);
}
else if constexpr(ck::is_same_v<ck::remove_cv_t<InDataType>, int8_t> &&
ck::is_same_v<ck::remove_cv_t<WeiDataType>, int8_t> &&
ck::is_same_v<ck::remove_cv_t<OutDataType>, int8_t>)
{
ck::tensor_operation::device::instance::
add_device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instances(conv_ptrs);
}
if(conv_ptrs.size() <= 0)
{
throw std::runtime_error("wrong! no device Conv instance found");
}
// profile device Conv instances
bool success = true;
for(auto& conv_ptr : conv_ptrs)
{
auto argument_ptr = conv_ptr->MakeArgumentPointer(
static_cast<InDataType*>(in_device_buf.GetDeviceBuffer()),
static_cast<WeiDataType*>(wei_device_buf.GetDeviceBuffer()),
static_cast<OutDataType*>(out_device_buf.GetDeviceBuffer()),
N,
K,
C,
input_spatial_lengths,
filter_spatial_lengths,
output_spatial_lengths,
conv_filter_strides,
conv_filter_dilations,
input_left_pads,
input_right_pads,
InElementOp{},
WeiElementOp{},
OutElementOp{});
if(conv_ptr->IsSupportedArgument(argument_ptr.get()))
{
auto invoker_ptr = conv_ptr->MakeInvokerPointer();
invoker_ptr->Run(argument_ptr.get(), 1);
in_device_buf.FromDevice(in_n_c_hi_wi_device_result.mData.data());
if(!check_out(in_n_c_hi_wi_host_result, in_n_c_hi_wi_device_result))
{
std::cout << "Fail Info: " << conv_ptr->GetTypeString() << std::endl;
success = false;
}
else
{
std::cout << "Pass Info: " << conv_ptr->GetTypeString() << std::endl;
}
}
else
{
std::cout << "Not support Info: " << conv_ptr->GetTypeString() << std::endl;
}
}
if(success)
{
std::cout << "test conv2d bwd : Pass" << std::endl;
return 0;
}
else
{
std::cout << "test conv2d bwd: Fail " << std::endl;
return -1;
}
};
if(data_type == 0)
{
return Run(F32(), F32(), F32(), F32());
}
else if(data_type == 1)
{
return Run(F16(), F16(), F16(), F32());
}
else if(data_type == 2)
{
return Run(BF16(), BF16(), BF16(), F32());
}
else if(data_type == 3)
{
return Run(INT8(), INT8(), INT8(), int());
}
else
{
return 1;
}
}

View File

@@ -1,2 +0,0 @@
#add_test_executable(test_conv2d_bwd_weight conv2d_bwd_weight.cpp)
#target_link_libraries(test_conv2d_bwd_weight PRIVATE host_tensor device_conv2d_bwd_weight_instance conv_util)

View File

@@ -1,217 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <vector>
#include "test/convnd_fwd/conv_util.hpp"
#include "profiler/include/profile_conv_bwd_weight_impl.hpp"
int test_self()
{
bool pass = true;
std::vector<ck::utils::conv::ConvParams> params;
params.push_back({2, 128, 256, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}});
params.push_back({2, 128, 256, 256, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
params.push_back({2, 128, 256, 256, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
for(auto& param : params)
{
// f32
pass &= ck::profiler::profile_conv_bwd_weight_impl<2,
float,
float,
float,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_,
2);
// fp16
pass &= ck::profiler::profile_conv_bwd_weight_impl<2,
ck::half_t,
ck::half_t,
ck::half_t,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_,
2);
}
return pass;
}
int main(int argc, char* argv[])
{
int data_type = 1;
int init_method = 1;
// Conv shape
ck::index_t N = 128;
ck::index_t K = 256;
ck::index_t C = 192;
ck::index_t Y = 3;
ck::index_t X = 3;
ck::index_t Hi = 71;
ck::index_t Wi = 71;
ck::index_t conv_stride_h = 2;
ck::index_t conv_stride_w = 2;
ck::index_t conv_dilation_h = 1;
ck::index_t conv_dilation_w = 1;
ck::index_t in_left_pad_h = 1;
ck::index_t in_left_pad_w = 1;
ck::index_t in_right_pad_h = 1;
ck::index_t in_right_pad_w = 1;
ck::index_t split_k = 1;
bool pass = true;
if(argc == 1)
{
pass = test_self();
}
else
{
if(argc == 3)
{
data_type = std::stoi(argv[1]);
init_method = std::stoi(argv[2]);
}
else if(argc == 19)
{
data_type = std::stoi(argv[1]);
init_method = std::stoi(argv[2]);
N = std::stoi(argv[3]);
K = std::stoi(argv[4]);
C = std::stoi(argv[5]);
Y = std::stoi(argv[6]);
X = std::stoi(argv[7]);
Hi = std::stoi(argv[8]);
Wi = std::stoi(argv[9]);
conv_stride_h = std::stoi(argv[10]);
conv_stride_w = std::stoi(argv[11]);
conv_dilation_h = std::stoi(argv[12]);
conv_dilation_w = std::stoi(argv[13]);
in_left_pad_h = std::stoi(argv[14]);
in_left_pad_w = std::stoi(argv[15]);
in_right_pad_h = std::stoi(argv[16]);
in_right_pad_w = std::stoi(argv[17]);
split_k = std::stoi(argv[18]);
}
else
{
printf("arg1: data type (0=fp32, 1=fp16, 2= bfp16, 3= int8_t )\n");
printf("arg2: initialization (0=no init, 1=integer value, 2=decimal value)\n");
printf("arg3 to 17: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx\n");
exit(1);
}
ck::utils::conv::ConvParams param{2,
N,
K,
C,
{Y, X},
{Hi, Wi},
{conv_stride_h, conv_stride_w},
{conv_dilation_h, conv_dilation_w},
{in_left_pad_h, in_left_pad_w},
{in_right_pad_h, in_right_pad_w}};
if(data_type == 0)
{
pass = ck::profiler::profile_conv_bwd_weight_impl<2,
float,
float,
float,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
true, // do_verification
init_method,
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_,
split_k);
}
else if(data_type == 1)
{
pass = ck::profiler::profile_conv_bwd_weight_impl<2,
ck::half_t,
ck::half_t,
ck::half_t,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
true, // do_verification
init_method,
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_,
split_k);
}
else
{
std::cout << "Not support data type" << std::endl;
return 1;
}
}
if(pass)
{
std::cout << "test conv2d bwd weight : Pass" << std::endl;
return 0;
}
else
{
std::cout << "test conv2d bwd weight: Fail " << std::endl;
return -1;
}
}

View File

@@ -1,2 +1,2 @@
add_gtest_executable(test_conv_util conv_util.cpp)
target_link_libraries(test_conv_util PRIVATE host_tensor conv_util)
target_link_libraries(test_conv_util PRIVATE utility)

View File

@@ -10,198 +10,147 @@
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/conv_util.hpp"
#include "ck/library/utility/convolution_parameter.hpp"
namespace {
class TestConvUtil : public ::testing::Test
{
public:
void SetNDParams(std::size_t ndims)
void SetNDParams(std::size_t ndims, std::size_t s, std::size_t d, std::size_t p)
{
conv_params.num_dim_spatial_ = ndims;
conv_params.filter_spatial_lengths_ = std::vector<ck::index_t>(ndims, 3);
conv_params.input_spatial_lengths_ = std::vector<ck::index_t>(ndims, 71);
conv_params.conv_filter_strides_ = std::vector<ck::index_t>(ndims, 2);
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>(ndims, 1);
conv_params.input_left_pads_ = std::vector<ck::index_t>(ndims, 1);
conv_params.input_right_pads_ = std::vector<ck::index_t>(ndims, 1);
conv_params = ck::utils::conv::ConvParam(ndims,
2,
128,
192,
256,
std::vector<ck::index_t>(ndims, 3),
std::vector<ck::index_t>(ndims, 71),
std::vector<ck::index_t>(ndims, s),
std::vector<ck::index_t>(ndims, d),
std::vector<ck::index_t>(ndims, p),
std::vector<ck::index_t>(ndims, p));
}
protected:
// ------- default 2D -------
// input NCHW {128,192,71,71},
// weights KCYX {256,192,3,3},
// stride {2,2},
// dilations {1,1},
// padding {{1,1}, {1,1}}
ck::utils::conv::ConvParams conv_params;
// input GNCHW {2, 128, 192, 71, 71},
// weights GKCYX {2, 256, 192, 3, 3},
// stride {s, s},
// dilations {d, d},
// padding {{p, p}, {p, p}
ck::utils::conv::ConvParam conv_params;
};
} // namespace
TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths2D)
{
ck::utils::conv::ConvParams conv_params;
std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{36, 36},
"Error: ConvParams 2D default constructor."));
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1};
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{71, 71}, "Error: ConvParams 2D stride {1,1}."));
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{2, 2};
conv_params.input_left_pads_ = std::vector<ck::index_t>{2, 2};
conv_params.input_right_pads_ = std::vector<ck::index_t>{2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{37, 37},
"Error: ConvParams 2D padding left/right {2,2}."));
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36, 36}, "Error: ConvParams 2D dilation {2,2}."));
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{3, 3};
conv_params.input_left_pads_ = std::vector<ck::index_t>{1, 1};
conv_params.input_right_pads_ = std::vector<ck::index_t>{1, 1};
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(
ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{23, 23},
"Error: ConvParams 2D strides{3,3}, padding {1,1}, dilations {2,2}."));
}
TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths1D)
{
SetNDParams(1);
// stride 2, dilation 1, pad 1
SetNDParams(1, 2, 1, 1);
std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36}, "Error: ConvParams 1D."));
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{1};
out_spatial_len = conv_params.GetOutputSpatialLengths();
// stride 1, dilation 1, pad 1
SetNDParams(1, 1, 1, 1);
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{71}, "Error: ConvParams 1D stride {1}."));
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{2};
conv_params.input_left_pads_ = std::vector<ck::index_t>{2};
conv_params.input_right_pads_ = std::vector<ck::index_t>{2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
// stride 2, dilation 1, pad 2
SetNDParams(1, 2, 1, 2);
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{37},
"Error: ConvParams 1D padding left/right {2}."));
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
// stride 2, dilation 2, pad 2
SetNDParams(1, 2, 2, 2);
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36}, "Error: ConvParams 1D dilation {2}."));
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{3};
conv_params.input_left_pads_ = std::vector<ck::index_t>{1};
conv_params.input_right_pads_ = std::vector<ck::index_t>{1};
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
// stride 3, dilation 2, pad 1
SetNDParams(1, 3, 2, 1);
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(
ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{23},
"Error: ConvParams 1D strides{3}, padding {1}, dilations {2}."));
}
TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths2D)
{
// stride 2, dilation 1, pad 1
SetNDParams(2, 2, 1, 1);
std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{36, 36},
"Error: ConvParams 2D default constructor."));
// stride 1, dilation 1, pad 1
SetNDParams(2, 1, 1, 1);
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{71, 71}, "Error: ConvParams 2D stride {1,1}."));
// stride 2, dilation 1, pad 2
SetNDParams(2, 2, 1, 2);
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{37, 37},
"Error: ConvParams 2D padding left/right {2,2}."));
// stride 2, dilation 2, pad 2
SetNDParams(2, 2, 2, 2);
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36, 36}, "Error: ConvParams 2D dilation {2,2}."));
// stride 3, dilation 2, pad 1
SetNDParams(2, 3, 2, 1);
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(
ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{23, 23},
"Error: ConvParams 2D strides{3,3}, padding {1,1}, dilations {2,2}."));
}
TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths3D)
{
SetNDParams(3);
// stride 2, dilation 1, pad 1
SetNDParams(3, 2, 1, 1);
std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(
out_spatial_len, std::vector<ck::index_t>{36, 36, 36}, "Error: ConvParams 3D."));
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1, 1};
out_spatial_len = conv_params.GetOutputSpatialLengths();
// stride 1, dilation 1, pad 1
SetNDParams(3, 1, 1, 1);
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{71, 71, 71},
"Error: ConvParams 3D stride {1, 1, 1}."));
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{2, 2, 2};
conv_params.input_left_pads_ = std::vector<ck::index_t>{2, 2, 2};
conv_params.input_right_pads_ = std::vector<ck::index_t>{2, 2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
// stride 2, dilation 1, pad 2
SetNDParams(3, 2, 1, 2);
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{37, 37, 37},
"Error: ConvParams 3D padding left/right {2, 2, 2}."));
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2, 2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
// stride 2, dilation 2, pad 2
SetNDParams(3, 2, 2, 2);
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
std::vector<ck::index_t>{36, 36, 36},
"Error: ConvParams 3D dilation {2, 2, 2}."));
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{3, 3, 3};
conv_params.input_left_pads_ = std::vector<ck::index_t>{1, 1, 1};
conv_params.input_right_pads_ = std::vector<ck::index_t>{1, 1, 1};
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2, 2, 2};
out_spatial_len = conv_params.GetOutputSpatialLengths();
// stride 3, dilation 2, pad 1
SetNDParams(3, 3, 2, 1);
out_spatial_len = conv_params.GetOutputSpatialLengths();
EXPECT_TRUE(ck::utils::check_err(
out_spatial_len,
std::vector<ck::index_t>{23, 23, 23},
"Error: ConvParams 3D strides{3, 3, 3}, padding {1, 1, 1}, dilations {2, 2, 2}."));
}
TEST(ConvUtil, GetHostTensorDescriptor)
{
namespace tl = ck::tensor_layout::convolution;
std::vector<std::size_t> dims{2, 3, 4, 5};
HostTensorDescriptor h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NHWC{});
EXPECT_TRUE(ck::utils::check_err(
h.GetLengths(), {2, 3, 4, 5}, "Error: wrong NHWC dimensions lengths!"));
EXPECT_TRUE(ck::utils::check_err(
h.GetStrides(), {3 * 4 * 5, 1, 3 * 5, 3}, "Error: wrong NHWC dimensions strides!"));
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NCHW{});
EXPECT_TRUE(ck::utils::check_err(
h.GetLengths(), {2, 3, 4, 5}, "Error: wrong NCHW dimensions lengths!"));
EXPECT_TRUE(ck::utils::check_err(
h.GetStrides(), {3 * 4 * 5, 4 * 5, 5, 1}, "Error: wrong NCHW dimensions strides!"));
dims = std::vector<std::size_t>{2, 3, 4};
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NWC{});
EXPECT_TRUE(
ck::utils::check_err(h.GetLengths(), {2, 3, 4}, "Error: wrong NWC dimensions lengths!"));
EXPECT_TRUE(ck::utils::check_err(
h.GetStrides(), {3 * 4, 1, 3}, "Error: wrong NWC dimensions strides!"));
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NCW{});
EXPECT_TRUE(
ck::utils::check_err(h.GetLengths(), {2, 3, 4}, "Error: wrong NCW dimensions lengths!"));
EXPECT_TRUE(ck::utils::check_err(
h.GetStrides(), {3 * 4, 4, 1}, "Error: wrong NCW dimensions strides!"));
dims = std::vector<std::size_t>{2, 3, 4, 5, 6};
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NDHWC{});
EXPECT_TRUE(
ck::utils::check_err(h.GetLengths(), dims, "Error: wrong NDHWC dimensions lengths!"));
EXPECT_TRUE(ck::utils::check_err(h.GetStrides(),
{3 * 4 * 5 * 6, // N
1, // C
3 * 5 * 6, // D
3 * 6, // H
3}, // W
"Error: wrong NDHWC dimensions strides!"));
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NCDHW{});
EXPECT_TRUE(
ck::utils::check_err(h.GetLengths(), dims, "Error: wrong NCDHW dimensions lengths!"));
EXPECT_TRUE(ck::utils::check_err(h.GetStrides(),
{3 * 4 * 5 * 6, // N
4 * 5 * 6, // C
5 * 6, // D
6, // H
1}, // W
"Error: wrong NCDHW dimensions strides!"));
}

View File

@@ -1,2 +1,2 @@
add_test_executable(test_convnd_bwd_data convnd_bwd_data.cpp)
target_link_libraries(test_convnd_bwd_data PRIVATE host_tensor device_convnd_bwd_data_instance conv_util)
add_gtest_executable(test_convnd_bwd_data convnd_bwd_data.cpp)
target_link_libraries(test_convnd_bwd_data PRIVATE utility device_conv1d_bwd_data_instance device_conv2d_bwd_data_instance device_conv3d_bwd_data_instance)

View File

@@ -1,331 +1,241 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <vector>
#include <gtest/gtest.h>
#include "profiler/include/profile_convnd_bwd_data_impl.hpp"
#include "profiler/include/profile_conv_bwd_data_impl.hpp"
int main()
class TestConvndBwdData : public ::testing::Test
{
bool pass = true;
// check 1d
std::vector<ck::utils::conv::ConvParams> params;
params.push_back({1, 128, 128, 256, {1}, {14}, {2}, {1}, {0}, {0}});
params.push_back({1, 128, 128, 256, {3}, {28}, {1}, {1}, {1}, {1}});
params.push_back({1, 128, 128, 256, {1}, {3}, {1}, {1}, {0}, {0}});
protected:
std::vector<ck::utils::conv::ConvParam> conv_params;
};
for(auto& param : params)
// 1d
TEST_F(TestConvndBwdData, Conv1dBwdData)
{
conv_params.clear();
conv_params.push_back({1, 1, 128, 128, 256, {1}, {14}, {2}, {1}, {0}, {0}});
conv_params.push_back({1, 1, 128, 128, 256, {3}, {28}, {1}, {1}, {1}, {1}});
conv_params.push_back({1, 1, 128, 128, 256, {1}, {3}, {1}, {1}, {0}, {0}});
for(auto& param : conv_params)
{
pass &= ck::profiler::profile_convnd_bwd_data_impl<1,
float,
float,
float,
float,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK>(
true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_);
bool pass;
pass &= ck::profiler::profile_convnd_bwd_data_impl<1,
ck::half_t,
ck::half_t,
ck::half_t,
float,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK>(
true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_);
// fp32
pass = ck::profiler::profile_conv_bwd_data_impl<1,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK,
float,
float,
float>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
pass &= ck::profiler::profile_convnd_bwd_data_impl<1,
ck::bhalf_t,
ck::bhalf_t,
ck::bhalf_t,
float,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK>(
true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_);
EXPECT_TRUE(pass);
pass &= ck::profiler::profile_convnd_bwd_data_impl<1,
int8_t,
int8_t,
int8_t,
int,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK>(
true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_);
}
// fp16
pass = ck::profiler::profile_conv_bwd_data_impl<1,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK,
ck::half_t,
ck::half_t,
ck::half_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
// check 2d
params.clear();
params.push_back({2, 128, 128, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}});
params.push_back({2, 128, 128, 256, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
params.push_back({2, 128, 128, 256, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
EXPECT_TRUE(pass);
for(auto& param : params)
{
pass &= ck::profiler::profile_convnd_bwd_data_impl<2,
float,
float,
float,
float,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_);
// bf16
pass = ck::profiler::profile_conv_bwd_data_impl<1,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK,
ck::bhalf_t,
ck::bhalf_t,
ck::bhalf_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
pass &= ck::profiler::profile_convnd_bwd_data_impl<2,
ck::half_t,
ck::half_t,
ck::half_t,
float,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_);
EXPECT_TRUE(pass);
pass &= ck::profiler::profile_convnd_bwd_data_impl<2,
ck::bhalf_t,
ck::bhalf_t,
ck::bhalf_t,
float,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_);
// int8
pass = ck::profiler::profile_conv_bwd_data_impl<1,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK,
int8_t,
int8_t,
int8_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
pass &= ck::profiler::profile_convnd_bwd_data_impl<2,
int8_t,
int8_t,
int8_t,
int,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_);
}
// check 3d
params.clear();
params.push_back(
{3, 128, 128, 256, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
params.push_back(
{3, 128, 128, 256, {3, 3, 3}, {14, 14, 14}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
params.push_back(
{3, 128, 128, 256, {1, 1, 1}, {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
for(auto& param : params)
{
pass &= ck::profiler::profile_convnd_bwd_data_impl<3,
float,
float,
float,
float,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(
true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_);
pass &= ck::profiler::profile_convnd_bwd_data_impl<3,
ck::half_t,
ck::half_t,
ck::half_t,
float,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(
true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_);
pass &= ck::profiler::profile_convnd_bwd_data_impl<3,
ck::bhalf_t,
ck::bhalf_t,
ck::bhalf_t,
float,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(
true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_);
pass &= ck::profiler::profile_convnd_bwd_data_impl<3,
int8_t,
int8_t,
int8_t,
int,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(
true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_);
}
if(pass)
{
std::cout << "test convnd bwd : Pass" << std::endl;
return 0;
}
else
{
std::cout << "test convnd bwd: Fail " << std::endl;
return -1;
EXPECT_TRUE(pass);
}
}
// 2d
TEST_F(TestConvndBwdData, Conv2dBwdData)
{
conv_params.clear();
conv_params.push_back({2, 1, 128, 128, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}});
conv_params.push_back({2, 1, 128, 128, 256, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
conv_params.push_back({2, 1, 128, 128, 256, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
for(auto& param : conv_params)
{
bool pass;
// fp32
pass = ck::profiler::profile_conv_bwd_data_impl<2,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK,
float,
float,
float>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// fp16
pass = ck::profiler::profile_conv_bwd_data_impl<2,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK,
ck::half_t,
ck::half_t,
ck::half_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// bf16
pass = ck::profiler::profile_conv_bwd_data_impl<2,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK,
ck::bhalf_t,
ck::bhalf_t,
ck::bhalf_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// int8
pass = ck::profiler::profile_conv_bwd_data_impl<2,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK,
int8_t,
int8_t,
int8_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
}
}
// 3d
TEST_F(TestConvndBwdData, Conv3dBwdData)
{
conv_params.clear();
conv_params.push_back(
{3, 1, 128, 128, 256, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
conv_params.push_back(
{3, 1, 128, 128, 256, {3, 3, 3}, {14, 14, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
conv_params.push_back(
{3, 1, 128, 128, 256, {1, 1, 1}, {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
for(auto& param : conv_params)
{
bool pass;
// fp32
pass = ck::profiler::profile_conv_bwd_data_impl<3,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK,
float,
float,
float>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// fp16
pass = ck::profiler::profile_conv_bwd_data_impl<3,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK,
ck::half_t,
ck::half_t,
ck::half_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// bf16
pass = ck::profiler::profile_conv_bwd_data_impl<3,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK,
ck::bhalf_t,
ck::bhalf_t,
ck::bhalf_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// int8
pass = ck::profiler::profile_conv_bwd_data_impl<3,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK,
int8_t,
int8_t,
int8_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
}
}

View File

@@ -1,2 +1,2 @@
add_test_executable(test_convnd_bwd_weight convnd_bwd_weight.cpp)
target_link_libraries(test_convnd_bwd_weight PRIVATE host_tensor device_convnd_bwd_weight_instance conv_util)
add_gtest_executable(test_convnd_bwd_weight convnd_bwd_weight.cpp)
target_link_libraries(test_convnd_bwd_weight PRIVATE utility device_conv1d_bwd_weight_instance device_conv2d_bwd_weight_instance device_conv3d_bwd_weight_instance)

View File

@@ -1,283 +1,205 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <vector>
#include <gtest/gtest.h>
#include "test/convnd_fwd/conv_util.hpp"
#include "profiler/include/profile_convnd_bwd_weight_impl.hpp"
#include "profiler/include/profile_conv_bwd_weight_impl.hpp"
int test_self()
class TestConvndBwdWeight : public ::testing::Test
{
bool pass = true;
std::vector<ck::utils::conv::ConvParams> params;
protected:
std::vector<ck::utils::conv::ConvParam> conv_params;
};
params.push_back({1, 128, 256, 256, {1}, {7}, {2}, {1}, {0}, {0}});
params.push_back({1, 128, 256, 256, {3}, {14}, {1}, {1}, {1}, {1}});
params.push_back({1, 128, 256, 256, {1}, {3}, {1}, {1}, {0}, {0}});
for(auto& param : params)
{
// f32
pass &= ck::profiler::profile_convnd_bwd_weight_impl<1,
float,
float,
float,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK>(
true, // do_verification
1, // init_method
false, // do_log
true, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_,
2);
// fp16
pass &= ck::profiler::profile_convnd_bwd_weight_impl<1,
ck::half_t,
ck::half_t,
ck::half_t,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK>(
true, // do_verification
1, // init_method
false, // do_log
true, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_,
2);
// bf16
pass &= ck::profiler::profile_convnd_bwd_weight_impl<1,
ck::bhalf_t,
ck::bhalf_t,
ck::bhalf_t,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK>(
true, // do_verification
1, // init_method
false, // do_log
true, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_,
2);
}
// check 2d
params.clear();
params.push_back({2, 128, 256, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}});
params.push_back({2, 128, 256, 256, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
params.push_back({2, 128, 256, 256, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
for(auto& param : params)
{
// f32
pass &= ck::profiler::profile_convnd_bwd_weight_impl<2,
float,
float,
float,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
true, // do_verification
1, // init_method
false, // do_log
true, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_,
2);
// fp16
pass &= ck::profiler::profile_convnd_bwd_weight_impl<2,
ck::half_t,
ck::half_t,
ck::half_t,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
true, // do_verification
1, // init_method
false, // do_log
true, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_,
2);
// bf16
pass &= ck::profiler::profile_convnd_bwd_weight_impl<2,
ck::bhalf_t,
ck::bhalf_t,
ck::bhalf_t,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK>(
true, // do_verification
1, // init_method
false, // do_log
true, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_,
2);
}
// check 2d
params.clear();
params.push_back(
{3, 128, 256, 256, {1, 1, 1}, {4, 4, 4}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
params.push_back(
{3, 128, 256, 256, {3, 3, 3}, {4, 4, 8}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
params.push_back(
{3, 128, 256, 256, {1, 1, 1}, {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
for(auto& param : params)
{
// f32
pass &= ck::profiler::profile_convnd_bwd_weight_impl<3,
float,
float,
float,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(
true, // do_verification
1, // init_method
false, // do_log
true, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_,
2);
// fp16
pass &= ck::profiler::profile_convnd_bwd_weight_impl<3,
ck::half_t,
ck::half_t,
ck::half_t,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(
true, // do_verification
1, // init_method
false, // do_log
true, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_,
2);
// bf16
pass &= ck::profiler::profile_convnd_bwd_weight_impl<3,
ck::bhalf_t,
ck::bhalf_t,
ck::bhalf_t,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK>(
true, // do_verification
1, // init_method
false, // do_log
true, // time_kernel
param.N_,
param.K_,
param.C_,
param.input_spatial_lengths_,
param.filter_spatial_lengths_,
param.GetOutputSpatialLengths(),
param.conv_filter_strides_,
param.conv_filter_dilations_,
param.input_left_pads_,
param.input_right_pads_,
2);
}
return pass;
}
int main()
// 1d
TEST_F(TestConvndBwdWeight, Conv1dBwdWeight)
{
// int data_type = 1;
// int init_method = 1;
conv_params.clear();
conv_params.push_back({1, 1, 128, 128, 256, {1}, {14}, {2}, {1}, {0}, {0}});
conv_params.push_back({1, 1, 128, 128, 256, {3}, {28}, {1}, {1}, {1}, {1}});
conv_params.push_back({1, 1, 128, 128, 256, {1}, {3}, {1}, {1}, {0}, {0}});
bool pass = true;
pass = test_self();
if(pass)
for(auto& param : conv_params)
{
std::cout << "test conv2d bwd weight : Pass" << std::endl;
return 0;
}
else
{
std::cout << "test conv2d bwd weight: Fail " << std::endl;
return -1;
bool pass;
// fp32
pass = ck::profiler::profile_conv_bwd_weight_impl<1,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK,
float,
float,
float>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param,
2);
EXPECT_TRUE(pass);
// fp16
pass = ck::profiler::profile_conv_bwd_weight_impl<1,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK,
ck::half_t,
ck::half_t,
ck::half_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param,
2);
EXPECT_TRUE(pass);
// bf16
pass = ck::profiler::profile_conv_bwd_weight_impl<1,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK,
ck::bhalf_t,
ck::bhalf_t,
ck::bhalf_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param,
2);
EXPECT_TRUE(pass);
}
}
// 2d
TEST_F(TestConvndBwdWeight, Conv2dBwdWeight)
{
conv_params.clear();
conv_params.push_back({2, 1, 128, 128, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}});
conv_params.push_back({2, 1, 32, 128, 256, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
conv_params.push_back({2, 1, 128, 128, 256, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
for(auto& param : conv_params)
{
bool pass;
// fp32
pass = ck::profiler::profile_conv_bwd_weight_impl<2,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK,
float,
float,
float>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param,
2);
EXPECT_TRUE(pass);
// fp16
pass = ck::profiler::profile_conv_bwd_weight_impl<2,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK,
ck::half_t,
ck::half_t,
ck::half_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param,
2);
EXPECT_TRUE(pass);
// bf16
pass = ck::profiler::profile_conv_bwd_weight_impl<2,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK,
ck::bhalf_t,
ck::bhalf_t,
ck::bhalf_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param,
2);
EXPECT_TRUE(pass);
}
}
// 3d
TEST_F(TestConvndBwdWeight, Conv3dBwdWeight)
{
conv_params.clear();
conv_params.push_back(
{3, 1, 128, 128, 256, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
conv_params.push_back(
{3, 1, 32, 128, 256, {3, 3, 3}, {14, 14, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
conv_params.push_back(
{3, 1, 128, 128, 256, {1, 1, 1}, {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
for(auto& param : conv_params)
{
bool pass;
// fp32
pass = ck::profiler::profile_conv_bwd_weight_impl<3,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK,
float,
float,
float>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param,
2);
EXPECT_TRUE(pass);
// fp16
pass = ck::profiler::profile_conv_bwd_weight_impl<3,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK,
ck::half_t,
ck::half_t,
ck::half_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param,
2);
EXPECT_TRUE(pass);
// bf16
pass = ck::profiler::profile_conv_bwd_weight_impl<3,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK,
ck::bhalf_t,
ck::bhalf_t,
ck::bhalf_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param,
2);
EXPECT_TRUE(pass);
}
}

View File

@@ -1,13 +1,2 @@
add_custom_target(test_convnd_fwd)
add_gtest_executable(test_conv1d_fwd conv1d_fwd.cpp)
target_link_libraries(test_conv1d_fwd PRIVATE host_tensor device_conv1d_fwd_instance conv_util)
add_dependencies(test_convnd_fwd test_conv1d_fwd)
add_gtest_executable(test_conv2d_fwd conv2d_fwd.cpp)
target_link_libraries(test_conv2d_fwd PRIVATE host_tensor device_conv2d_fwd_instance device_convnd_2d_fwd_instance conv_util)
add_dependencies(test_convnd_fwd test_conv2d_fwd)
add_gtest_executable(test_conv3d_fwd conv3d_fwd.cpp)
target_link_libraries(test_conv3d_fwd PRIVATE host_tensor device_conv3d_fwd_instance conv_util)
add_dependencies(test_convnd_fwd test_conv3d_fwd)
add_gtest_executable(test_convnd_fwd convnd_fwd.cpp)
target_link_libraries(test_convnd_fwd PRIVATE utility device_conv2d_fwd_instance)

View File

@@ -1,192 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <tuple>
#include <vector>
#include <gtest/gtest.h>
#include "ck/utility/data_type.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/conv_util.hpp"
#include "test/convnd_fwd/conv_util.hpp"
namespace {
class Conv1dFwdNWCInstances : public ::testing::Test
{
public:
template <typename T>
bool test_conv1d_nwc_instances(const std::vector<test::conv::DeviceConvFwdNoOpPtr>& conv_ptrs,
const ck::utils::conv::ConvParams& params)
{
using namespace std::placeholders;
using namespace ck::utils;
namespace ctl = ck::tensor_layout::convolution;
conv::ConvFwdOpInstance<T,
T,
T,
ctl::NWC,
ctl::KXC,
ctl::NWK,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
FillUniformDistributionIntegerValue<T>,
FillUniformDistributionIntegerValue<T>>
conv_instance(params,
true,
FillUniformDistributionIntegerValue<T>{},
FillUniformDistributionIntegerValue<T>{});
auto reference_conv_fwd_fun =
std::bind(conv::run_reference_convolution_forward<1, T, T, T>, params, _1, _2, _3);
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
run_engine.SetAtol(atol_);
run_engine.SetRtol(rtol_);
return run_engine.Test(conv_ptrs);
}
template <typename T>
bool test_default()
{
return test_conv1d_nwc_instances<T>(
ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<1>(), params_default_);
}
template <typename T>
bool test_filter1x1_stride1_pad0()
{
return test_conv1d_nwc_instances<T>(
ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<1>(),
params_filter1x1_stride1_pad0_);
}
template <typename T>
bool test_filter1x1_pad0()
{
return test_conv1d_nwc_instances<T>(
ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<1>(),
params_filter1x1_pad0_);
}
static inline ck::utils::conv::ConvParams params_default_{
1, 4, 256, 64, {3}, {71}, {2}, {2}, {2}, {2}};
static inline ck::utils::conv::ConvParams params_filter1x1_stride1_pad0_{
1, 4, 256, 64, {1}, {28}, {1}, {1}, {0}, {0}};
static inline ck::utils::conv::ConvParams params_filter1x1_pad0_{
1, 4, 256, 64, {1}, {28}, {2}, {1}, {0}, {0}};
private:
double atol_{1e-5};
double rtol_{1e-4};
};
} // anonymous namespace
TEST(Conv1DFwdNWC, IntegerValues)
{
using namespace std::placeholders;
using namespace ck::utils;
namespace ctl = ck::tensor_layout::convolution;
using T = float;
ck::utils::conv::ConvParams params{1, 4, 256, 64, {3}, {36}, {1}, {2}, {2}, {2}};
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
test::conv::get_test_convolution_fwd_instance<1, T, T, T, T>(conv_ptrs);
conv::ConvFwdOpInstance<T,
T,
T,
ctl::NWC,
ctl::KXC,
ctl::NWK,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
FillUniformDistributionIntegerValue<T>,
FillUniformDistributionIntegerValue<T>>
conv_instance(params,
true,
FillUniformDistributionIntegerValue<T>{},
FillUniformDistributionIntegerValue<T>{});
auto reference_conv_fwd_fun =
std::bind(conv::run_reference_convolution_forward<1, T, T, T>, params, _1, _2, _3);
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
run_engine.SetAtol(1e-5);
run_engine.SetRtol(1e-4);
EXPECT_TRUE(run_engine.Test(conv_ptrs));
}
TEST(Conv1DFwdNWC, FloatingPointValues)
{
using namespace std::placeholders;
using namespace ck::utils;
namespace ctl = ck::tensor_layout::convolution;
using T = ck::half_t;
ck::utils::conv::ConvParams params{1, 4, 256, 64, {3}, {36}, {1}, {2}, {2}, {2}};
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
test::conv::get_test_convolution_fwd_instance<1, T, T, T, float>(conv_ptrs);
conv::ConvFwdOpInstance<T,
T,
T,
ctl::NWC,
ctl::KXC,
ctl::NWK,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
FillUniformDistribution<T>,
FillUniformDistribution<T>>
conv_instance(params, true, FillUniformDistribution<T>{}, FillUniformDistribution<T>{});
auto reference_conv_fwd_fun =
std::bind(conv::run_reference_convolution_forward<1, T, T, T>, params, _1, _2, _3);
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
run_engine.SetAtol(0.1);
run_engine.SetRtol(1e-2);
EXPECT_TRUE(run_engine.Test(conv_ptrs));
}
TEST_F(Conv1dFwdNWCInstances, BF16_default) { EXPECT_TRUE(this->test_default<ck::bhalf_t>()); }
TEST_F(Conv1dFwdNWCInstances, BF16_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<ck::bhalf_t>());
}
TEST_F(Conv1dFwdNWCInstances, BF16_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<ck::bhalf_t>());
}
TEST_F(Conv1dFwdNWCInstances, F16_default) { EXPECT_TRUE(this->test_default<ck::half_t>()); }
TEST_F(Conv1dFwdNWCInstances, F16_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<ck::half_t>());
}
TEST_F(Conv1dFwdNWCInstances, F16_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<ck::half_t>());
}
TEST_F(Conv1dFwdNWCInstances, F32_default) { EXPECT_TRUE(this->test_default<float>()); }
TEST_F(Conv1dFwdNWCInstances, F32_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<float>());
}
TEST_F(Conv1dFwdNWCInstances, F32_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<float>());
}
TEST_F(Conv1dFwdNWCInstances, I8_default) { EXPECT_TRUE(this->test_default<int8_t>()); }
TEST_F(Conv1dFwdNWCInstances, I8_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<int8_t>());
}
TEST_F(Conv1dFwdNWCInstances, I8_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<int8_t>());
}

View File

@@ -1,266 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <tuple>
#include <vector>
#include <gtest/gtest.h>
#include "ck/utility/data_type.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/conv_util.hpp"
#include "test/convnd_fwd/conv_util.hpp"
namespace {
class Conv2dFwdNHWCInstances : public ::testing::Test
{
public:
template <typename T>
bool test_conv2d_nhwc_instances(const std::vector<test::conv::DeviceConvFwdNoOpPtr>& conv_ptrs,
const ck::utils::conv::ConvParams& params)
{
using namespace std::placeholders;
using namespace ck::utils;
conv::ConvFwdOpInstance<T,
T,
T,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
FillUniformDistributionIntegerValue<T>,
FillUniformDistributionIntegerValue<T>>
conv_instance(params,
true,
FillUniformDistributionIntegerValue<T>{},
FillUniformDistributionIntegerValue<T>{});
auto reference_conv_fwd_fun =
std::bind(conv::run_reference_convolution_forward<2, T, T, T>, params, _1, _2, _3);
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
run_engine.SetAtol(atol_);
run_engine.SetRtol(rtol_);
return run_engine.Test(conv_ptrs);
}
template <typename T>
bool test_default(bool use_convnd = false)
{
if(use_convnd)
{
return test_conv2d_nhwc_instances<T>(
test::conv::ConvolutionNDFwdInstances<T, T, T>::Get(2), params_default_);
}
else
{
return test_conv2d_nhwc_instances<T>(
ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<2>(),
params_default_);
}
}
template <typename T>
bool test_filter1x1_stride1_pad0(bool use_convnd = false)
{
if(use_convnd)
{
return test_conv2d_nhwc_instances<T>(
test::conv::ConvolutionNDFwdInstances<T, T, T>::Get(2),
params_filter1x1_stride1_pad0_);
}
else
{
return test_conv2d_nhwc_instances<T>(
ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<2>(),
params_filter1x1_stride1_pad0_);
}
}
template <typename T>
bool test_filter1x1_pad0(bool use_convnd = false)
{
if(use_convnd)
{
return test_conv2d_nhwc_instances<T>(
test::conv::ConvolutionNDFwdInstances<T, T, T>::Get(2), params_filter1x1_pad0_);
}
else
{
return test_conv2d_nhwc_instances<T>(
ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<2>(),
params_filter1x1_pad0_);
}
}
template <typename T>
bool test_oddC()
{
return test_conv2d_nhwc_instances<T>(
ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<2>(), params_oddC_);
}
static inline ck::utils::conv::ConvParams params_default_{
2, 4, 256, 64, {3, 3}, {36, 36}, {2, 2}, {2, 2}, {2, 2}, {2, 2}};
static inline ck::utils::conv::ConvParams params_filter1x1_stride1_pad0_{
2, 4, 256, 64, {1, 1}, {28, 28}, {1, 1}, {1, 1}, {0, 0}, {0, 0}};
static inline ck::utils::conv::ConvParams params_filter1x1_pad0_{
2, 4, 256, 64, {1, 1}, {28, 28}, {2, 2}, {1, 1}, {0, 0}, {0, 0}};
static inline ck::utils::conv::ConvParams params_oddC_{
2, 4, 256, 3, {3, 3}, {28, 28}, {1, 1}, {1, 1}, {0, 0}, {0, 0}};
private:
double atol_{1e-5};
double rtol_{1e-4};
};
} // anonymous namespace
TEST(Conv2DFwdNHWC, IntegerValues)
{
using namespace std::placeholders;
using namespace ck::utils;
using T = float;
ck::utils::conv::ConvParams params{
2, 4, 256, 64, {3, 3}, {36, 36}, {1, 1}, {2, 2}, {2, 2}, {2, 2}};
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
test::conv::get_test_convolution_fwd_instance<2, T, T, T, T>(conv_ptrs);
conv::ConvFwdOpInstance<T,
T,
T,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
FillUniformDistributionIntegerValue<T>,
FillUniformDistributionIntegerValue<T>>
conv_instance(params,
true,
FillUniformDistributionIntegerValue<T>{},
FillUniformDistributionIntegerValue<T>{});
auto reference_conv_fwd_fun =
std::bind(conv::run_reference_convolution_forward<2, T, T, T>, params, _1, _2, _3);
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
run_engine.SetAtol(1e-5);
run_engine.SetRtol(1e-4);
EXPECT_TRUE(run_engine.Test(conv_ptrs));
}
TEST(Conv2DFwdNHWC, FloatingPointValues)
{
using namespace std::placeholders;
using namespace ck::utils;
using T = ck::half_t;
ck::utils::conv::ConvParams params{
2, 4, 256, 64, {3, 3}, {36, 36}, {2, 2}, {2, 2}, {2, 2}, {2, 2}};
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
test::conv::get_test_convolution_fwd_instance<2, T, T, T, float>(conv_ptrs);
conv::ConvFwdOpInstance<T,
T,
T,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
FillUniformDistribution<T>,
FillUniformDistribution<T>>
conv_instance(params, true, FillUniformDistribution<T>{}, FillUniformDistribution<T>{});
auto reference_conv_fwd_fun =
std::bind(conv::run_reference_convolution_forward<2, T, T, T>, params, _1, _2, _3);
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
run_engine.SetAtol(2e-4);
run_engine.SetRtol(1e-3);
EXPECT_TRUE(run_engine.Test(conv_ptrs));
}
TEST_F(Conv2dFwdNHWCInstances, BF16_default) { EXPECT_TRUE(this->test_default<ck::bhalf_t>()); }
TEST_F(Conv2dFwdNHWCInstances, BF16_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<ck::bhalf_t>());
}
TEST_F(Conv2dFwdNHWCInstances, BF16_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<ck::bhalf_t>());
}
TEST_F(Conv2dFwdNHWCInstances, F16_default) { EXPECT_TRUE(this->test_default<ck::half_t>()); }
TEST_F(Conv2dFwdNHWCInstances, F16_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<ck::half_t>());
}
TEST_F(Conv2dFwdNHWCInstances, F16_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<ck::half_t>());
}
TEST_F(Conv2dFwdNHWCInstances, F16_oddC) { EXPECT_TRUE(this->test_oddC<ck::half_t>()); }
TEST_F(Conv2dFwdNHWCInstances, F32_default) { EXPECT_TRUE(this->test_default<float>()); }
TEST_F(Conv2dFwdNHWCInstances, F32_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<float>());
}
TEST_F(Conv2dFwdNHWCInstances, F32_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<float>());
}
TEST_F(Conv2dFwdNHWCInstances, I8_default) { EXPECT_TRUE(this->test_default<int8_t>()); }
TEST_F(Conv2dFwdNHWCInstances, I8_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<int8_t>());
}
TEST_F(Conv2dFwdNHWCInstances, I8_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<int8_t>());
}
TEST_F(Conv2dFwdNHWCInstances, ND_BF16_default)
{
EXPECT_TRUE(this->test_default<ck::bhalf_t>(true));
}
TEST_F(Conv2dFwdNHWCInstances, ND_BF16_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<ck::bhalf_t>(true));
}
TEST_F(Conv2dFwdNHWCInstances, ND_BF16_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<ck::bhalf_t>(true));
}
TEST_F(Conv2dFwdNHWCInstances, ND_F16_default)
{
EXPECT_TRUE(this->test_default<ck::half_t>(true));
}
TEST_F(Conv2dFwdNHWCInstances, ND_F16_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<ck::half_t>(true));
}
TEST_F(Conv2dFwdNHWCInstances, ND_F16_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<ck::half_t>(true));
}
TEST_F(Conv2dFwdNHWCInstances, ND_F32_default) { EXPECT_TRUE(this->test_default<float>(true)); }
TEST_F(Conv2dFwdNHWCInstances, ND_F32_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<float>(true));
}
TEST_F(Conv2dFwdNHWCInstances, ND_F32_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<float>(true));
}
TEST_F(Conv2dFwdNHWCInstances, ND_I8_default) { EXPECT_TRUE(this->test_default<int8_t>(true)); }
TEST_F(Conv2dFwdNHWCInstances, ND_I8_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<int8_t>(true));
}
TEST_F(Conv2dFwdNHWCInstances, ND_I8_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<int8_t>(true));
}

View File

@@ -1,317 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream>
#include <stdexcept>
#include <tuple>
#include <vector>
#include <gtest/gtest.h>
#include "ck/utility/data_type.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/utility/conv_util.hpp"
#include "test/convnd_fwd/conv_util.hpp"
namespace {
class Conv3dFwdNDHWCInstances : public ::testing::Test
{
public:
template <typename T>
bool test_conv3d_nwc_instances(const std::vector<test::conv::DeviceConvFwdNoOpPtr>& conv_ptrs,
const ck::utils::conv::ConvParams& params)
{
using namespace std::placeholders;
using namespace ck::utils;
namespace ctl = ck::tensor_layout::convolution;
conv::ConvFwdOpInstance<T,
T,
T,
ctl::NDHWC,
ctl::KZYXC,
ctl::NDHWK,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
FillUniformDistributionIntegerValue<T>,
FillUniformDistributionIntegerValue<T>>
conv_instance(params,
true,
FillUniformDistributionIntegerValue<T>{},
FillUniformDistributionIntegerValue<T>{});
auto reference_conv_fwd_fun =
std::bind(conv::run_reference_convolution_forward<3, T, T, T>, params, _1, _2, _3);
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
run_engine.SetAtol(atol_);
run_engine.SetRtol(rtol_);
return run_engine.Test(conv_ptrs);
}
template <typename T>
bool test_default()
{
return test_conv3d_nwc_instances<T>(
ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<3>(), params_default_);
}
template <typename T>
bool test_filter1x1_stride1_pad0()
{
return test_conv3d_nwc_instances<T>(
ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<3>(),
params_filter1x1_stride1_pad0_);
}
template <typename T>
bool test_filter1x1_pad0()
{
return test_conv3d_nwc_instances<T>(
ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<3>(),
params_filter1x1_pad0_);
}
static inline ck::utils::conv::ConvParams params_default_{
3, 4, 256, 64, {3, 3, 3}, {28, 28, 28}, {2, 2, 2}, {2, 2, 2}, {2, 2, 2}, {2, 2, 2}};
static inline ck::utils::conv::ConvParams params_filter1x1_stride1_pad0_{
3, 4, 256, 64, {1, 1, 1}, {28, 28, 28}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}};
static inline ck::utils::conv::ConvParams params_filter1x1_pad0_{
3, 4, 256, 64, {1, 1, 1}, {28, 28, 28}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}};
private:
double atol_{1e-5};
double rtol_{1e-4};
};
} // anonymous namespace
TEST(Conv3DFwdNDHWC, IntegerValues)
{
using namespace std::placeholders;
using namespace ck::utils;
namespace ctl = ck::tensor_layout::convolution;
using T = float;
ck::utils::conv::ConvParams params{
3, 4, 256, 64, {3, 3, 3}, {18, 18, 18}, {1, 1, 1}, {2, 2, 2}, {2, 2, 2}, {2, 2, 2}};
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
test::conv::get_test_convolution_fwd_instance<3, T, T, T, T>(conv_ptrs);
conv::ConvFwdOpInstance<T,
T,
T,
ctl::NDHWC,
ctl::KZYXC,
ctl::NDHWK,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
FillUniformDistributionIntegerValue<T>,
FillUniformDistributionIntegerValue<T>>
conv_instance(params,
true,
FillUniformDistributionIntegerValue<T>{},
FillUniformDistributionIntegerValue<T>{});
auto reference_conv_fwd_fun =
std::bind(conv::run_reference_convolution_forward<3, T, T, T>, params, _1, _2, _3);
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
run_engine.SetAtol(1e-5);
run_engine.SetRtol(1e-3);
EXPECT_TRUE(run_engine.Test(conv_ptrs));
}
TEST(Conv3DFwdNDHWC, FloatingPointValues)
{
using namespace std::placeholders;
using namespace ck::utils;
namespace ctl = ck::tensor_layout::convolution;
using T = ck::half_t;
ck::utils::conv::ConvParams params{
3, 4, 256, 64, {3, 3, 3}, {18, 18, 18}, {1, 1, 1}, {2, 2, 2}, {2, 2, 2}, {2, 2, 2}};
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
test::conv::get_test_convolution_fwd_instance<3, T, T, T, float>(conv_ptrs);
conv::ConvFwdOpInstance<T,
T,
T,
ctl::NDHWC,
ctl::KZYXC,
ctl::NDHWK,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
ck::tensor_operation::element_wise::PassThrough,
FillUniformDistribution<T>,
FillUniformDistribution<T>>
conv_instance(params, true, FillUniformDistribution<T>{}, FillUniformDistribution<T>{});
auto reference_conv_fwd_fun =
std::bind(conv::run_reference_convolution_forward<3, T, T, T>, params, _1, _2, _3);
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
run_engine.SetAtol(1e-3);
run_engine.SetRtol(1e-3);
EXPECT_TRUE(run_engine.Test(conv_ptrs));
}
TEST(Conv3DFwdNDHWC, InputOver2GB)
{
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
using namespace ck::utils;
using T = float;
// >2GB Input
conv::ConvParams params;
params.num_dim_spatial_ = 3;
params.N_ = 2;
params.K_ = 16;
params.C_ = 32;
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3, 3};
params.input_spatial_lengths_ = std::vector<ck::index_t>{32, 1000, 1000};
params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1, 1};
params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1, 1};
params.input_left_pads_ = std::vector<ck::index_t>{1, 1, 1};
params.input_right_pads_ = std::vector<ck::index_t>{1, 1, 1};
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
test::conv::get_test_convolution_fwd_instance<3, T, T, T, T>(conv_ptrs);
auto arg = conv_ptrs.back()->MakeArgumentPointer(nullptr,
nullptr,
nullptr,
params.N_,
params.K_,
params.C_,
params.input_spatial_lengths_,
params.filter_spatial_lengths_,
params.GetOutputSpatialLengths(),
params.conv_filter_strides_,
params.conv_filter_dilations_,
params.input_left_pads_,
params.input_right_pads_,
PassThrough{},
PassThrough{},
PassThrough{});
EXPECT_FALSE(conv_ptrs.back()->IsSupportedArgument(arg.get()));
}
TEST(Conv3DFwdNDHWC, FiltersOver2GB)
{
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
using namespace ck::utils;
using T = float;
// >2GB Filters
conv::ConvParams params;
params.num_dim_spatial_ = 3;
params.N_ = 2;
params.K_ = 16;
params.C_ = 32;
params.filter_spatial_lengths_ = std::vector<ck::index_t>{4, 1000, 1000};
params.input_spatial_lengths_ = std::vector<ck::index_t>{16, 16, 16};
params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1, 1};
params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1, 1};
params.input_left_pads_ = std::vector<ck::index_t>{1, 1, 1};
params.input_right_pads_ = std::vector<ck::index_t>{1, 1, 1};
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
test::conv::get_test_convolution_fwd_instance<3, T, T, T, T>(conv_ptrs);
auto arg = conv_ptrs.back()->MakeArgumentPointer(nullptr,
nullptr,
nullptr,
params.N_,
params.K_,
params.C_,
params.input_spatial_lengths_,
params.filter_spatial_lengths_,
params.GetOutputSpatialLengths(),
params.conv_filter_strides_,
params.conv_filter_dilations_,
params.input_left_pads_,
params.input_right_pads_,
PassThrough{},
PassThrough{},
PassThrough{});
EXPECT_FALSE(conv_ptrs.back()->IsSupportedArgument(arg.get()));
}
TEST(Conv3DFwdNDHWC, OutputOver2GB)
{
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
using namespace ck::utils;
using T = float;
// >2GB Output
conv::ConvParams params;
params.num_dim_spatial_ = 3;
params.N_ = 2;
params.K_ = 16;
params.C_ = 2;
params.filter_spatial_lengths_ = std::vector<ck::index_t>{1, 1, 1};
params.input_spatial_lengths_ = std::vector<ck::index_t>{1000, 1000, 30};
params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1, 1};
params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1, 1};
params.input_left_pads_ = std::vector<ck::index_t>{2, 2, 2};
params.input_right_pads_ = std::vector<ck::index_t>{2, 2, 2};
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
test::conv::get_test_convolution_fwd_instance<3, T, T, T, T>(conv_ptrs);
auto arg = conv_ptrs.back()->MakeArgumentPointer(nullptr,
nullptr,
nullptr,
params.N_,
params.K_,
params.C_,
params.input_spatial_lengths_,
params.filter_spatial_lengths_,
params.GetOutputSpatialLengths(),
params.conv_filter_strides_,
params.conv_filter_dilations_,
params.input_left_pads_,
params.input_right_pads_,
PassThrough{},
PassThrough{},
PassThrough{});
EXPECT_FALSE(conv_ptrs.back()->IsSupportedArgument(arg.get()));
}
TEST_F(Conv3dFwdNDHWCInstances, BF16_default) { EXPECT_TRUE(this->test_default<ck::bhalf_t>()); }
TEST_F(Conv3dFwdNDHWCInstances, BF16_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<ck::bhalf_t>());
}
TEST_F(Conv3dFwdNDHWCInstances, BF16_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<ck::bhalf_t>());
}
TEST_F(Conv3dFwdNDHWCInstances, F16_default) { EXPECT_TRUE(this->test_default<ck::half_t>()); }
TEST_F(Conv3dFwdNDHWCInstances, F16_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<ck::half_t>());
}
TEST_F(Conv3dFwdNDHWCInstances, F16_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<ck::half_t>());
}
TEST_F(Conv3dFwdNDHWCInstances, F32_default) { EXPECT_TRUE(this->test_default<float>()); }
TEST_F(Conv3dFwdNDHWCInstances, F32_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<float>());
}
TEST_F(Conv3dFwdNDHWCInstances, F32_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<float>());
}
TEST_F(Conv3dFwdNDHWCInstances, I8_default) { EXPECT_TRUE(this->test_default<int8_t>()); }
TEST_F(Conv3dFwdNDHWCInstances, I8_filter1x1_stride1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<int8_t>());
}
TEST_F(Conv3dFwdNDHWCInstances, I8_filter1x1_pad0)
{
EXPECT_TRUE(this->test_filter1x1_pad0<int8_t>());
}

View File

@@ -1,174 +0,0 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <tuple>
#include "ck/ck.hpp"
#include "ck/utility/sequence.hpp"
#include "ck/utility/data_type.hpp"
#include "ck/tensor_operation/gpu/device/device_convnd_fwd_xdl_nhwc_kyxc_nhwk.hpp"
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
namespace ck {
namespace tensor_operation {
namespace device {
using DeviceConvFwdNoOpPtr = DeviceConvFwdPtr<element_wise::PassThrough,
element_wise::PassThrough,
element_wise::PassThrough>;
namespace instance {
void add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instances(std::vector<DeviceConvFwdNoOpPtr>&);
void add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instances(std::vector<DeviceConvFwdNoOpPtr>&);
void add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instances(std::vector<DeviceConvFwdNoOpPtr>&);
void add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instances(std::vector<DeviceConvFwdNoOpPtr>&);
} // namespace instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
namespace test {
namespace conv {
template <ck::index_t... Is>
using S = ck::Sequence<Is...>;
using InElementOp = ck::tensor_operation::element_wise::PassThrough;
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
using OutElementOp = ck::tensor_operation::element_wise::PassThrough;
using DeviceConvFwdNoOpPtr =
ck::tensor_operation::device::DeviceConvFwdPtr<InElementOp, WeiElementOp, OutElementOp>;
static constexpr auto ConvFwdDefault =
ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
template <ck::index_t SpatialDims,
typename InDataType,
typename WeiDataType,
typename OutDataType,
typename AccDataType>
using DeviceConvNDFwdInstance = ck::tensor_operation::device::
DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K<
// clang-format off
InDataType, //
WeiDataType, //
OutDataType, //
AccDataType, // Accumulator data type.
InElementOp, // Input Elementwise Operation
WeiElementOp, // Weights Elementwise Operation
OutElementOp, // Output Elementwise Operation
ConvFwdDefault, // ConvForwardSpecialization
SpatialDims, // SptialDims
256, // BlockSize
128, // MPerBlock
256, // NPerBlock
4, // K0PerBlock
8, // K1
32, // MPerXdl
32, // NPerXdl
2, // MXdlPerWave
4, // NXdlPerWave
S<4, 64, 1>, // ABlockTransferThreadClusterLengths_K0_M_K1
S<1, 0, 2>, // ABlockTransferThreadClusterArrangeOrder
S<1, 0, 2>, // ABlockTransferSrcAccessOrder
2, // ABlockTransferSrcVectorDim
8, // ABlockTransferSrcScalarPerVector
8, // ABlockTransferDstScalarPerVector_K1
true, // ABlockLdsAddExtraM
S<4, 64, 1>, // BBlockTransferThreadClusterLengths_K0_N_K1
S<1, 0, 2>, // BBlockTransferThreadClusterArrangeOrder
S<1, 0, 2>, // BBlockTransferSrcAccessOrder
2, // BBlockTransferSrcVectorDim
8, // BBlockTransferSrcScalarPerVector
8, // BBlockTransferDstScalarPerVector_K1
true, // BBlockLdsAddExtraN
7, // CThreadTransferSrcDstVectorDim
1>; // CThreadTransferDstScalarPerVector
// clang-format on
template <ck::index_t NDim,
typename InDataType,
typename WeiDataType,
typename OutDataType,
typename AccDataType>
void get_test_convolution_fwd_instance(std::vector<DeviceConvFwdNoOpPtr>& instances)
{
using ConvInstanceT =
DeviceConvNDFwdInstance<NDim, InDataType, WeiDataType, OutDataType, AccDataType>;
instances.emplace_back(std::make_unique<ConvInstanceT>());
}
// TODO (aosewski)
// Temporary solution to get all DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
// instances. When switched over to DeviceConvNDFwdXdl for 2D remove ConvolutionNDFwdInstances
// structures.
template <typename InDataType, typename WeiDataType, typename OutDataType>
struct ConvolutionNDFwdInstances;
template <>
struct ConvolutionNDFwdInstances<float, float, float>
{
static std::vector<DeviceConvFwdNoOpPtr> Get(std::size_t num_dim_spatial)
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
if(num_dim_spatial == 2)
{
ck::tensor_operation::device::instance::
add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instances(conv_ptrs);
}
return conv_ptrs;
}
};
template <>
struct ConvolutionNDFwdInstances<ck::half_t, ck::half_t, ck::half_t>
{
static std::vector<DeviceConvFwdNoOpPtr> Get(std::size_t num_dim_spatial)
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
if(num_dim_spatial == 2)
{
ck::tensor_operation::device::instance::
add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instances(conv_ptrs);
}
return conv_ptrs;
}
};
template <>
struct ConvolutionNDFwdInstances<ck::bhalf_t, ck::bhalf_t, ck::bhalf_t>
{
static std::vector<DeviceConvFwdNoOpPtr> Get(std::size_t num_dim_spatial)
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
if(num_dim_spatial == 2)
{
ck::tensor_operation::device::instance::
add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instances(conv_ptrs);
}
return conv_ptrs;
}
};
template <>
struct ConvolutionNDFwdInstances<int8_t, int8_t, int8_t>
{
static std::vector<DeviceConvFwdNoOpPtr> Get(std::size_t num_dim_spatial)
{
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
if(num_dim_spatial == 2)
{
ck::tensor_operation::device::instance::
add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instances(conv_ptrs);
}
return conv_ptrs;
}
};
} // namespace conv
} // namespace test

View File

@@ -0,0 +1,241 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <vector>
#include <gtest/gtest.h>
#include "profiler/include/profile_conv_fwd_impl.hpp"
class TestConvndFwd : public ::testing::Test
{
protected:
std::vector<ck::utils::conv::ConvParam> conv_params;
};
// 1d
TEST_F(TestConvndFwd, Conv1dFwd)
{
conv_params.clear();
conv_params.push_back({1, 1, 128, 128, 256, {1}, {14}, {2}, {1}, {0}, {0}});
conv_params.push_back({1, 1, 128, 128, 256, {3}, {28}, {1}, {1}, {1}, {1}});
conv_params.push_back({1, 1, 128, 128, 256, {1}, {3}, {1}, {1}, {0}, {0}});
for(auto& param : conv_params)
{
bool pass;
// fp32
pass = ck::profiler::profile_conv_fwd_impl<1,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK,
float,
float,
float>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// fp16
pass = ck::profiler::profile_conv_fwd_impl<1,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK,
ck::half_t,
ck::half_t,
ck::half_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// bf16
pass = ck::profiler::profile_conv_fwd_impl<1,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK,
ck::bhalf_t,
ck::bhalf_t,
ck::bhalf_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// int8
pass = ck::profiler::profile_conv_fwd_impl<1,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK,
int8_t,
int8_t,
int8_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
}
}
// 2d
TEST_F(TestConvndFwd, Conv2dFwd)
{
conv_params.clear();
conv_params.push_back({2, 1, 128, 128, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}});
conv_params.push_back({2, 1, 128, 128, 256, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
conv_params.push_back({2, 1, 128, 128, 256, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
for(auto& param : conv_params)
{
bool pass;
// fp32
pass = ck::profiler::profile_conv_fwd_impl<2,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK,
float,
float,
float>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// fp16
pass = ck::profiler::profile_conv_fwd_impl<2,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK,
ck::half_t,
ck::half_t,
ck::half_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// bf16
pass = ck::profiler::profile_conv_fwd_impl<2,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK,
ck::bhalf_t,
ck::bhalf_t,
ck::bhalf_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// int8
pass = ck::profiler::profile_conv_fwd_impl<2,
ck::tensor_layout::convolution::NHWC,
ck::tensor_layout::convolution::KYXC,
ck::tensor_layout::convolution::NHWK,
int8_t,
int8_t,
int8_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
}
}
// 3d
TEST_F(TestConvndFwd, Conv3dFwd)
{
conv_params.clear();
conv_params.push_back(
{3, 1, 128, 128, 256, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
conv_params.push_back(
{3, 1, 128, 128, 256, {3, 3, 3}, {14, 14, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
conv_params.push_back(
{3, 1, 128, 128, 256, {1, 1, 1}, {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
for(auto& param : conv_params)
{
bool pass;
// fp32
pass = ck::profiler::profile_conv_fwd_impl<3,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK,
float,
float,
float>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// fp16
pass = ck::profiler::profile_conv_fwd_impl<3,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK,
ck::half_t,
ck::half_t,
ck::half_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// bf16
pass = ck::profiler::profile_conv_fwd_impl<3,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK,
ck::bhalf_t,
ck::bhalf_t,
ck::bhalf_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// int8
pass = ck::profiler::profile_conv_fwd_impl<3,
ck::tensor_layout::convolution::NDHWC,
ck::tensor_layout::convolution::KZYXC,
ck::tensor_layout::convolution::NDHWK,
int8_t,
int8_t,
int8_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
}
}

View File

@@ -1,15 +1,15 @@
add_test_executable(test_gemm_fp32 gemm_fp32.cpp)
target_link_libraries(test_gemm_fp32 PRIVATE host_tensor)
target_link_libraries(test_gemm_fp32 PRIVATE utility)
target_link_libraries(test_gemm_fp32 PRIVATE device_gemm_instance)
add_test_executable(test_gemm_fp16 gemm_fp16.cpp)
target_link_libraries(test_gemm_fp16 PRIVATE host_tensor)
target_link_libraries(test_gemm_fp16 PRIVATE utility)
target_link_libraries(test_gemm_fp16 PRIVATE device_gemm_instance)
add_test_executable(test_gemm_bf16 gemm_bf16.cpp)
target_link_libraries(test_gemm_bf16 PRIVATE host_tensor)
target_link_libraries(test_gemm_bf16 PRIVATE utility)
target_link_libraries(test_gemm_bf16 PRIVATE device_gemm_instance)
add_test_executable(test_gemm_int8 gemm_int8.cpp)
target_link_libraries(test_gemm_int8 PRIVATE host_tensor)
target_link_libraries(test_gemm_int8 PRIVATE utility)
target_link_libraries(test_gemm_int8 PRIVATE device_gemm_instance)

View File

@@ -17,9 +17,9 @@
#include "ck/library/tensor_operation_instance/gpu/gemm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "test/gemm/gemm_util.hpp"

View File

@@ -17,9 +17,9 @@
#include "ck/library/tensor_operation_instance/gpu/gemm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "test/gemm/gemm_util.hpp"

View File

@@ -17,9 +17,9 @@
#include "ck/library/tensor_operation_instance/gpu/gemm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "test/gemm/gemm_util.hpp"

View File

@@ -17,9 +17,9 @@
#include "ck/library/tensor_operation_instance/gpu/gemm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "test/gemm/gemm_util.hpp"

View File

@@ -17,9 +17,9 @@
#include "ck/library/tensor_operation_instance/gpu/gemm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "test/gemm/gemm_util.hpp"

View File

@@ -6,9 +6,9 @@
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
namespace ck {
@@ -71,9 +71,9 @@ bool RunDeviceGEMM(DeviceGemmPtr_& gemmPtr,
BElementwiseOperation b_element_op,
CElementwiseOperation c_element_op)
{
DeviceMem a_m_k_device_buf(sizeof(ADataType) * A.mDesc.GetElementSpace());
DeviceMem b_k_n_device_buf(sizeof(BDataType) * B.mDesc.GetElementSpace());
DeviceMem c_m_n_device_buf(sizeof(CDataType) * C.mDesc.GetElementSpace());
DeviceMem a_m_k_device_buf(sizeof(ADataType) * A.mDesc.GetElementSpaceSize());
DeviceMem b_k_n_device_buf(sizeof(BDataType) * B.mDesc.GetElementSpaceSize());
DeviceMem c_m_n_device_buf(sizeof(CDataType) * C.mDesc.GetElementSpaceSize());
auto invoker_ptr = gemmPtr->MakeInvokerPointer();
auto argument_ptr =

View File

@@ -1,3 +1,3 @@
add_test_executable(test_gemm_reduce_fp16 gemm_reduce_fp16.cpp)
target_link_libraries(test_gemm_reduce_fp16 PRIVATE host_tensor)
target_link_libraries(test_gemm_reduce_fp16 PRIVATE utility)
target_link_libraries(test_gemm_reduce_fp16 PRIVATE device_gemm_reduce_instance)

View File

@@ -1,3 +1,3 @@
add_test_executable(test_gemm_split_k gemm_split_k.cpp)
target_link_libraries(test_gemm_split_k PRIVATE host_tensor)
target_link_libraries(test_gemm_split_k PRIVATE utility)
target_link_libraries(test_gemm_split_k PRIVATE device_gemm_splitk_instance)

View File

@@ -14,12 +14,12 @@
#include "ck/library/tensor_operation_instance/gpu/gemm_splitk.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "ck/library/host_tensor/host_gemm.hpp"
#include "ck/library/utility/host_gemm.hpp"
enum struct GemmMatrixLayout
{
@@ -127,9 +127,9 @@ int test_gemm(const gemmArgs& args)
ck::tensor_operation::element_wise::PassThrough{},
ck::tensor_operation::element_wise::PassThrough{});
DeviceMem a_device_buf(sizeof(float) * a_m_k.mDesc.GetElementSpace());
DeviceMem b_device_buf(sizeof(float) * b_k_n.mDesc.GetElementSpace());
DeviceMem c_device_buf(sizeof(float) * c_m_n_device_result.mDesc.GetElementSpace());
DeviceMem a_device_buf(sizeof(float) * a_m_k.mDesc.GetElementSpaceSize());
DeviceMem b_device_buf(sizeof(float) * b_k_n.mDesc.GetElementSpaceSize());
DeviceMem c_device_buf(sizeof(float) * c_m_n_device_result.mDesc.GetElementSpaceSize());
a_device_buf.ToDevice(a_m_k.mData.data());
b_device_buf.ToDevice(b_k_n.mData.data());

View File

@@ -0,0 +1,3 @@
add_gtest_executable(test_grouped_convnd_fwd grouped_convnd_fwd.cpp)
target_link_libraries(test_grouped_convnd_fwd PRIVATE utility device_grouped_conv1d_fwd_instance device_grouped_conv2d_fwd_instance device_grouped_conv3d_fwd_instance)

View File

@@ -0,0 +1,270 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib>
#include <iostream>
#include <initializer_list>
#include <vector>
#include <gtest/gtest.h>
#include "profiler/include/profile_grouped_conv_fwd_impl.hpp"
class TestGroupedConvNdFwd : public ::testing::Test
{
protected:
std::vector<ck::utils::conv::ConvParam> conv_params;
};
// 1d GNWC/GKXC/GNWK
TEST_F(TestGroupedConvNdFwd, GroupedConv1dFwdGNWC)
{
conv_params.clear();
conv_params.push_back({1, 2, 128, 128, 256, {1}, {14}, {2}, {1}, {0}, {0}});
conv_params.push_back({1, 2, 128, 128, 256, {3}, {28}, {1}, {1}, {1}, {1}});
conv_params.push_back({1, 2, 128, 128, 256, {1}, {3}, {1}, {1}, {0}, {0}});
for(auto& param : conv_params)
{
bool pass;
// fp32
pass = ck::profiler::profile_grouped_conv_fwd_impl<1,
ck::tensor_layout::convolution::GNWC,
ck::tensor_layout::convolution::GKXC,
ck::tensor_layout::convolution::GNWK,
float,
float,
float>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// fp16
pass = ck::profiler::profile_grouped_conv_fwd_impl<1,
ck::tensor_layout::convolution::GNWC,
ck::tensor_layout::convolution::GKXC,
ck::tensor_layout::convolution::GNWK,
ck::half_t,
ck::half_t,
ck::half_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// bf16
pass = ck::profiler::profile_grouped_conv_fwd_impl<1,
ck::tensor_layout::convolution::GNWC,
ck::tensor_layout::convolution::GKXC,
ck::tensor_layout::convolution::GNWK,
ck::bhalf_t,
ck::bhalf_t,
ck::bhalf_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// int8
pass = ck::profiler::profile_grouped_conv_fwd_impl<1,
ck::tensor_layout::convolution::GNWC,
ck::tensor_layout::convolution::GKXC,
ck::tensor_layout::convolution::GNWK,
int8_t,
int8_t,
int8_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
}
}
// 2d GNHWC/GKYXC/GNHWK
TEST_F(TestGroupedConvNdFwd, GroupedConv2dFwdGNHWC)
{
conv_params.clear();
conv_params.push_back({2, 2, 128, 128, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}});
conv_params.push_back({2, 2, 128, 128, 256, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
conv_params.push_back({2, 2, 128, 128, 256, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
for(auto& param : conv_params)
{
bool pass;
// fp32
pass = ck::profiler::profile_grouped_conv_fwd_impl<2,
ck::tensor_layout::convolution::GNHWC,
ck::tensor_layout::convolution::GKYXC,
ck::tensor_layout::convolution::GNHWK,
float,
float,
float>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// fp16
pass = ck::profiler::profile_grouped_conv_fwd_impl<2,
ck::tensor_layout::convolution::GNHWC,
ck::tensor_layout::convolution::GKYXC,
ck::tensor_layout::convolution::GNHWK,
ck::half_t,
ck::half_t,
ck::half_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// bf16
pass = ck::profiler::profile_grouped_conv_fwd_impl<2,
ck::tensor_layout::convolution::GNHWC,
ck::tensor_layout::convolution::GKYXC,
ck::tensor_layout::convolution::GNHWK,
ck::bhalf_t,
ck::bhalf_t,
ck::bhalf_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// int8
pass = ck::profiler::profile_grouped_conv_fwd_impl<2,
ck::tensor_layout::convolution::GNHWC,
ck::tensor_layout::convolution::GKYXC,
ck::tensor_layout::convolution::GNHWK,
int8_t,
int8_t,
int8_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
}
}
// 3d GNDHWC/GKZYXC/GNDHWK
TEST_F(TestGroupedConvNdFwd, GroupedConv3dFwdGNDHWC)
{
conv_params.clear();
conv_params.push_back(
{3, 2, 128, 128, 256, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
conv_params.push_back(
{3, 2, 128, 128, 256, {3, 3, 3}, {14, 14, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
conv_params.push_back(
{3, 2, 128, 128, 256, {1, 1, 1}, {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
for(auto& param : conv_params)
{
bool pass;
// fp32
pass = ck::profiler::profile_grouped_conv_fwd_impl<3,
ck::tensor_layout::convolution::GNDHWC,
ck::tensor_layout::convolution::GKZYXC,
ck::tensor_layout::convolution::GNDHWK,
float,
float,
float>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// fp16
pass = ck::profiler::profile_grouped_conv_fwd_impl<3,
ck::tensor_layout::convolution::GNDHWC,
ck::tensor_layout::convolution::GKZYXC,
ck::tensor_layout::convolution::GNDHWK,
ck::half_t,
ck::half_t,
ck::half_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// bf16
pass = ck::profiler::profile_grouped_conv_fwd_impl<3,
ck::tensor_layout::convolution::GNDHWC,
ck::tensor_layout::convolution::GKZYXC,
ck::tensor_layout::convolution::GNDHWK,
ck::bhalf_t,
ck::bhalf_t,
ck::bhalf_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
// int8
pass = ck::profiler::profile_grouped_conv_fwd_impl<3,
ck::tensor_layout::convolution::GNDHWC,
ck::tensor_layout::convolution::GKZYXC,
ck::tensor_layout::convolution::GNDHWK,
int8_t,
int8_t,
int8_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
}
}
// 2d NHWGC/KYXGC/NHWGK
TEST_F(TestGroupedConvNdFwd, GroupedConv2dFwdNHWGC)
{
conv_params.clear();
conv_params.push_back({2, 2, 128, 128, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}});
conv_params.push_back({2, 2, 128, 128, 256, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
conv_params.push_back({2, 2, 128, 128, 256, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
for(auto& param : conv_params)
{
bool pass;
// fp16
pass = ck::profiler::profile_grouped_conv_fwd_impl<2,
ck::tensor_layout::convolution::NHWGC,
ck::tensor_layout::convolution::KYXGC,
ck::tensor_layout::convolution::NHWGK,
ck::half_t,
ck::half_t,
ck::half_t>(true, // do_verification
1, // init_method
false, // do_log
false, // time_kernel
param);
EXPECT_TRUE(pass);
}
}

View File

@@ -1,3 +1,3 @@
add_test_executable(test_grouped_gemm_fp16 grouped_gemm_fp16.cpp)
target_link_libraries(test_grouped_gemm_fp16 PRIVATE host_tensor)
target_link_libraries(test_grouped_gemm_fp16 PRIVATE utility)
target_link_libraries(test_grouped_gemm_fp16 PRIVATE device_grouped_gemm_instance)

View File

@@ -2,7 +2,9 @@ add_custom_target(test_layernorm)
add_gtest_executable(test_layernorm_fp32 test_layernorm_fp32.cpp)
add_gtest_executable(test_layernorm_fp16 test_layernorm_fp16.cpp)
target_link_libraries(test_layernorm_fp32 PRIVATE host_tensor)
target_link_libraries(test_layernorm_fp16 PRIVATE host_tensor)
target_link_libraries(test_layernorm_fp32 PRIVATE utility)
target_link_libraries(test_layernorm_fp16 PRIVATE utility)
add_dependencies(test_layernorm test_layernorm_fp32)
add_dependencies(test_layernorm test_layernorm_fp16)
add_dependencies(test_layernorm test_layernorm_fp16)

View File

@@ -12,8 +12,8 @@
#include "ck/tensor_operation/gpu/device/device_layernorm.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_layernorm.hpp"
namespace ck {
@@ -102,10 +102,10 @@ class TestLayernorm : public ::testing::Test
gamma.GenerateTensorValue(GeneratorTensor_3<GammaDataType>{0.0, 1.0});
beta.GenerateTensorValue(GeneratorTensor_3<BetaDataType>{0.0, 1.0});
DeviceMem x_dev(sizeof(XDataType) * x.mDesc.GetElementSpace());
DeviceMem gamma_dev(sizeof(GammaDataType) * gamma.mDesc.GetElementSpace());
DeviceMem beta_dev(sizeof(BetaDataType) * beta.mDesc.GetElementSpace());
DeviceMem y_dev(sizeof(YDataType) * y.mDesc.GetElementSpace());
DeviceMem x_dev(sizeof(XDataType) * x.mDesc.GetElementSpaceSize());
DeviceMem gamma_dev(sizeof(GammaDataType) * gamma.mDesc.GetElementSpaceSize());
DeviceMem beta_dev(sizeof(BetaDataType) * beta.mDesc.GetElementSpaceSize());
DeviceMem y_dev(sizeof(YDataType) * y.mDesc.GetElementSpaceSize());
x_dev.ToDevice(x.mData.data());
gamma_dev.ToDevice(gamma.mData.data());

View File

@@ -1,2 +1,2 @@
add_test_executable(test_magic_number_division magic_number_division.cpp)
target_link_libraries(test_magic_number_division PRIVATE host_tensor)
target_link_libraries(test_magic_number_division PRIVATE utility)

View File

@@ -9,9 +9,9 @@
#include "ck/ck.hpp"
#include "ck/utility/magic_division.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
__global__ void gpu_magic_number_division(uint32_t magic_multiplier,
uint32_t magic_shift,

View File

@@ -1,7 +1,7 @@
add_test_executable(test_reduce_no_index reduce_no_index.cpp)
add_test_executable(test_reduce_with_index reduce_with_index.cpp)
target_link_libraries(test_reduce_no_index PRIVATE host_tensor)
target_link_libraries(test_reduce_no_index PRIVATE utility)
target_link_libraries(test_reduce_no_index PRIVATE device_reduce_instance)
target_link_libraries(test_reduce_with_index PRIVATE host_tensor)
target_link_libraries(test_reduce_with_index PRIVATE utility)
target_link_libraries(test_reduce_with_index PRIVATE device_reduce_instance)

View File

@@ -3,7 +3,7 @@
#include <getopt.h>
#include "ck/library/host_tensor/host_common_util.hpp"
#include "ck/library/utility/host_common_util.hpp"
#include "profiler/include/profile_reduce_impl.hpp"
using namespace ck;

View File

@@ -3,7 +3,7 @@
#include <getopt.h>
#include "ck/library/host_tensor/host_common_util.hpp"
#include "ck/library/utility/host_common_util.hpp"
#include "profiler/include/profile_reduce_impl.hpp"
using namespace ck;

View File

@@ -1,2 +1,2 @@
add_gtest_executable(test_reference_conv_fwd reference_conv_fwd.cpp)
target_link_libraries(test_reference_conv_fwd PRIVATE host_tensor conv_util)
target_link_libraries(test_reference_conv_fwd PRIVATE utility)

View File

@@ -13,74 +13,64 @@
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/utility/conv_util.hpp"
#include "ck/library/utility/fill.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/convolution_parameter.hpp"
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp"
namespace {
using InElementOp = ck::tensor_operation::element_wise::PassThrough;
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
using OutElementOp = ck::tensor_operation::element_wise::PassThrough;
template <ck::index_t NDim,
template <ck::index_t NDimSpatial,
typename InDataType = float,
typename WeiDataType = float,
typename OutDataType = float,
typename InLayout = ck::tensor_layout::convolution::NHWC,
typename WeiLayout = ck::tensor_layout::convolution::KYXC,
typename OutLayout = ck::tensor_layout::convolution::NHWK,
typename InLayout = ck::tensor_layout::convolution::GNHWC,
typename WeiLayout = ck::tensor_layout::convolution::GKYXC,
typename OutLayout = ck::tensor_layout::convolution::GNHWK,
typename FillInputOp = ck::utils::FillMonotonicSeq<InDataType>,
typename FillWeightsOp = ck::utils::FillConstant<WeiDataType>>
Tensor<OutDataType>
run_reference_convolution_forward(const ck::utils::conv::ConvParams& params,
run_reference_convolution_forward(const ck::utils::conv::ConvParam& conv_param,
const FillInputOp& fill_input_op = FillInputOp{},
const FillWeightsOp& fill_weights_op = FillWeightsOp{0.5f})
{
std::vector<std::size_t> input_dims{static_cast<std::size_t>(params.N_),
static_cast<std::size_t>(params.C_)};
input_dims.insert(std::end(input_dims),
std::begin(params.input_spatial_lengths_),
std::end(params.input_spatial_lengths_));
const auto in_g_n_c_wis_desc =
ck::utils::conv::make_input_host_tensor_descriptor_g_n_c_wis_packed<InLayout>(conv_param);
std::vector<std::size_t> filter_dims{static_cast<std::size_t>(params.K_),
static_cast<std::size_t>(params.C_)};
filter_dims.insert(std::end(filter_dims),
std::begin(params.filter_spatial_lengths_),
std::end(params.filter_spatial_lengths_));
const auto wei_g_k_c_xs_desc =
ck::utils::conv::make_weight_host_tensor_descriptor_g_k_c_xs_packed<WeiLayout>(conv_param);
const std::vector<ck::index_t>& output_spatial_lengths = params.GetOutputSpatialLengths();
std::vector<std::size_t> output_dims{static_cast<std::size_t>(params.N_),
static_cast<std::size_t>(params.K_)};
output_dims.insert(std::end(output_dims),
std::begin(output_spatial_lengths),
std::end(output_spatial_lengths));
const auto out_g_n_k_wos_desc =
ck::utils::conv::make_output_host_tensor_descriptor_g_n_k_wos_packed<OutLayout>(conv_param);
Tensor<InDataType> input(ck::utils::conv::get_host_tensor_descriptor(input_dims, InLayout{}));
Tensor<WeiDataType> weights(
ck::utils::conv::get_host_tensor_descriptor(filter_dims, WeiLayout{}));
Tensor<OutDataType> host_output(
ck::utils::conv::get_host_tensor_descriptor(output_dims, OutLayout{}));
Tensor<InDataType> input(in_g_n_c_wis_desc);
Tensor<WeiDataType> weights(wei_g_k_c_xs_desc);
Tensor<OutDataType> host_output(out_g_n_k_wos_desc);
fill_input_op(input.begin(), input.end());
fill_weights_op(weights.begin(), weights.end());
std::fill(host_output.begin(), host_output.end(), OutDataType(0.f));
auto ref_conv = ck::tensor_operation::host::ReferenceConvFwd<InDataType,
auto ref_conv = ck::tensor_operation::host::ReferenceConvFwd<NDimSpatial,
InDataType,
WeiDataType,
OutDataType,
InElementOp,
WeiElementOp,
OutElementOp,
NDim>();
OutElementOp>();
auto ref_invoker = ref_conv.MakeInvoker();
auto ref_argument = ref_conv.MakeArgument(input,
weights,
host_output,
params.conv_filter_strides_,
params.conv_filter_dilations_,
params.input_left_pads_,
params.input_right_pads_,
conv_param.conv_filter_strides_,
conv_param.conv_filter_dilations_,
conv_param.input_left_pads_,
conv_param.input_right_pads_,
InElementOp{},
WeiElementOp{},
OutElementOp{});
@@ -91,21 +81,29 @@ run_reference_convolution_forward(const ck::utils::conv::ConvParams& params,
} // anonymous namespace
TEST(ReferenceConvolutionFWD, Conv2DNHWC)
// Eeference convolution assume dimensions of tensor descriptors are in GNCDHW/GKCZYX/GNKDHW order,
// regardless of physical tensor layouts in memory.
// Some tests below assume dimensions of tensor descriptors can be in other order, and therefore
// are disabled
// TODO: add more tests, which comply with assumption about dimension order of reference convolution
// and add tests for more physical layout
#if 0
TEST(ReferenceConvolutionFWD, Conv2DGNHWC)
{
ck::utils::conv::ConvParams params;
params.N_ = 1;
params.K_ = 1;
params.C_ = 2;
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3};
params.input_spatial_lengths_ = std::vector<ck::index_t>{6, 6};
params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1};
params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1};
params.input_left_pads_ = std::vector<ck::index_t>{0, 0};
params.input_right_pads_ = std::vector<ck::index_t>{0, 0};
ck::utils::conv::ConvParam conv_param(2,
1,
1,
1,
2,
std::vector<ck::index_t>{3, 3},
std::vector<ck::index_t>{6, 6},
std::vector<ck::index_t>{1, 1},
std::vector<ck::index_t>{1, 1},
std::vector<ck::index_t>{0, 0},
std::vector<ck::index_t>{0, 0});
auto out_tensor = run_reference_convolution_forward<2>(params);
std::vector<std::size_t> ref_dims{1, 1, 4, 4};
auto out_tensor = run_reference_convolution_forward<2>(conv_param);
std::vector<std::size_t> ref_dims{1, 1, 4, 4, 1};
std::vector<float> ref_data{130.5,
148.5,
166.5,
@@ -127,21 +125,22 @@ TEST(ReferenceConvolutionFWD, Conv2DNHWC)
EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!"));
}
TEST(ReferenceConvolutionFWD, Conv2DNHWCStridesDilationsPadding)
TEST(ReferenceConvolutionFWD, Conv2DGNHWCStridesDilationsPadding)
{
ck::utils::conv::ConvParams params;
params.N_ = 1;
params.K_ = 2;
params.C_ = 2;
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3};
params.input_spatial_lengths_ = std::vector<ck::index_t>{12, 12};
params.conv_filter_strides_ = std::vector<ck::index_t>{2, 2};
params.conv_filter_dilations_ = std::vector<ck::index_t>{2, 2};
params.input_left_pads_ = std::vector<ck::index_t>{1, 1};
params.input_right_pads_ = std::vector<ck::index_t>{1, 1};
ck::utils::conv::ConvParam conv_param(2,
1,
1,
2,
2,
std::vector<ck::index_t>{3, 3},
std::vector<ck::index_t>{12, 12},
std::vector<ck::index_t>{2, 2},
std::vector<ck::index_t>{2, 2},
std::vector<ck::index_t>{1, 1},
std::vector<ck::index_t>{1, 1});
auto out_tensor = run_reference_convolution_forward<2>(params);
std::vector<std::size_t> ref_dims = std::vector<std::size_t>{1, 2, 5, 5};
auto out_tensor = run_reference_convolution_forward<2>(conv_param);
std::vector<std::size_t> ref_dims = std::vector<std::size_t>{1, 5, 5, 2};
std::vector<float> ref_data{
210., 210., 327., 327., 351., 351., 375., 375., 399., 399.,
459., 459., 706.5, 706.5, 742.5, 742.5, 778.5, 778.5, 814.5, 814.5,
@@ -153,88 +152,88 @@ TEST(ReferenceConvolutionFWD, Conv2DNHWCStridesDilationsPadding)
EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!"));
}
TEST(ReferenceConvolutionFWD, Conv1DNWC)
TEST(ReferenceConvolutionFWD, Conv1DGNWC)
{
ck::utils::conv::ConvParams params;
params.num_dim_spatial_ = 1;
params.N_ = 1;
params.K_ = 1;
params.C_ = 2;
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3};
params.input_spatial_lengths_ = std::vector<ck::index_t>{6};
params.conv_filter_strides_ = std::vector<ck::index_t>{1};
params.conv_filter_dilations_ = std::vector<ck::index_t>{1};
params.input_left_pads_ = std::vector<ck::index_t>{0};
params.input_right_pads_ = std::vector<ck::index_t>{0};
ck::utils::conv::ConvParam conv_param(1,
1,
1,
1,
2,
std::vector<ck::index_t>{3},
std::vector<ck::index_t>{6},
std::vector<ck::index_t>{1},
std::vector<ck::index_t>{1},
std::vector<ck::index_t>{0},
std::vector<ck::index_t>{0});
auto out_tensor =
run_reference_convolution_forward<1,
float,
float,
float,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK>(params);
std::vector<std::size_t> ref_dims{1, 1, 4};
ck::tensor_layout::convolution::GNWC,
ck::tensor_layout::convolution::GKXC,
ck::tensor_layout::convolution::GNWK>(conv_param);
std::vector<std::size_t> ref_dims{1, 1, 4, 1};
std::vector<float> ref_data{7.5, 13.5, 19.5, 25.5};
EXPECT_TRUE(ck::utils::check_err(
out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!"));
}
TEST(ReferenceConvolutionFWD, Conv1DNWCStridesDilationsPadding)
TEST(ReferenceConvolutionFWD, Conv1DGNWCStridesDilationsPadding)
{
ck::utils::conv::ConvParams params;
params.num_dim_spatial_ = 1;
params.N_ = 1;
params.K_ = 2;
params.C_ = 2;
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3};
params.input_spatial_lengths_ = std::vector<ck::index_t>{12};
params.conv_filter_strides_ = std::vector<ck::index_t>{2};
params.conv_filter_dilations_ = std::vector<ck::index_t>{2};
params.input_left_pads_ = std::vector<ck::index_t>{1};
params.input_right_pads_ = std::vector<ck::index_t>{1};
ck::utils::conv::ConvParam conv_param(1,
1,
1,
2,
2,
std::vector<ck::index_t>{3},
std::vector<ck::index_t>{12},
std::vector<ck::index_t>{2},
std::vector<ck::index_t>{2},
std::vector<ck::index_t>{1},
std::vector<ck::index_t>{1});
auto out_tensor =
run_reference_convolution_forward<1,
float,
float,
float,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK>(params);
std::vector<std::size_t> ref_dims{1, 2, 5};
ck::tensor_layout::convolution::GNWC,
ck::tensor_layout::convolution::GKXC,
ck::tensor_layout::convolution::GNWK>(conv_param);
std::vector<std::size_t> ref_dims{1, 1, 5, 2};
std::vector<float> ref_data{9., 9., 19.5, 19.5, 31.5, 31.5, 43.5, 43.5, 55.5, 55.5};
EXPECT_TRUE(ck::utils::check_err(
out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!"));
}
TEST(ReferenceConvolutionFWD, Conv1DNWCSameOutputSize)
TEST(ReferenceConvolutionFWD, Conv1DGNWCSameOutputSize)
{
ck::utils::conv::ConvParams params;
params.num_dim_spatial_ = 1;
params.N_ = 2;
params.K_ = 16;
params.C_ = 4;
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3};
params.input_spatial_lengths_ = std::vector<ck::index_t>{16};
params.conv_filter_strides_ = std::vector<ck::index_t>{1};
params.conv_filter_dilations_ = std::vector<ck::index_t>{1};
params.input_left_pads_ = std::vector<ck::index_t>{1};
params.input_right_pads_ = std::vector<ck::index_t>{1};
ck::utils::conv::ConvParam conv_param(1,
1,
2,
16,
4,
std::vector<ck::index_t>{3},
std::vector<ck::index_t>{16},
std::vector<ck::index_t>{1},
std::vector<ck::index_t>{1},
std::vector<ck::index_t>{1},
std::vector<ck::index_t>{1});
auto out_tensor2 = run_reference_convolution_forward<1,
float,
float,
float,
ck::tensor_layout::convolution::NWC,
ck::tensor_layout::convolution::KXC,
ck::tensor_layout::convolution::NWK>(
params, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f});
ck::tensor_layout::convolution::GNWC,
ck::tensor_layout::convolution::GKXC,
ck::tensor_layout::convolution::GNWK>(
conv_param, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f});
std::vector<std::size_t> ref_dims{2, 16, 16};
std::vector<std::size_t> ref_dims{1, 2, 16, 16};
std::vector<float> ref_data{
1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4,
1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4,
@@ -304,30 +303,31 @@ TEST(ReferenceConvolutionFWD, Conv1DNWCSameOutputSize)
out_tensor2.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
EXPECT_TRUE(ck::utils::check_err(out_tensor2.mData, ref_data, "Error: incorrect results!"));
}
#endif
TEST(ReferenceConvolutionFWD, Conv3DNCDHW)
TEST(ReferenceConvolutionFWD, Conv3DGNCDHW)
{
ck::utils::conv::ConvParams params;
params.num_dim_spatial_ = 3;
params.N_ = 1;
params.K_ = 1;
params.C_ = 2;
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3, 3};
params.input_spatial_lengths_ = std::vector<ck::index_t>{6, 6, 6};
params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1, 1};
params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1, 1};
params.input_left_pads_ = std::vector<ck::index_t>{0, 0, 0};
params.input_right_pads_ = std::vector<ck::index_t>{0, 0, 0};
ck::utils::conv::ConvParam conv_param(3,
1,
1,
1,
2,
std::vector<ck::index_t>{3, 3, 3},
std::vector<ck::index_t>{6, 6, 6},
std::vector<ck::index_t>{1, 1, 1},
std::vector<ck::index_t>{1, 1, 1},
std::vector<ck::index_t>{0, 0, 0},
std::vector<ck::index_t>{0, 0, 0});
auto out_tensor = run_reference_convolution_forward<3,
float,
float,
float,
ck::tensor_layout::convolution::NCDHW,
ck::tensor_layout::convolution::KCZYX,
ck::tensor_layout::convolution::NKDHW>(
params, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f});
std::vector<std::size_t> ref_dims{1, 1, 4, 4, 4};
ck::tensor_layout::convolution::GNCDHW,
ck::tensor_layout::convolution::GKCZYX,
ck::tensor_layout::convolution::GNKDHW>(
conv_param, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f});
std::vector<std::size_t> ref_dims{1, 1, 1, 4, 4, 4};
std::vector<float> ref_data{
407.7, 410.40002, 413.09998, 415.80002, 423.90002, 426.6, 429.30002, 432.,
440.1, 442.80002, 445.5, 448.2, 456.30002, 459., 461.7, 464.40002,
@@ -344,29 +344,29 @@ TEST(ReferenceConvolutionFWD, Conv3DNCDHW)
ck::utils::check_err(out_tensor.mData, ref_data, "Error [case 1]: incorrect results!"));
}
TEST(ReferenceConvolutionFWD, Conv3DNCDHWStridesDilations)
TEST(ReferenceConvolutionFWD, Conv3DGNCDHWStridesDilations)
{
ck::utils::conv::ConvParams params;
params.num_dim_spatial_ = 3;
params.N_ = 1;
params.K_ = 2;
params.C_ = 2;
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3, 3};
params.input_spatial_lengths_ = std::vector<ck::index_t>{12, 12, 12};
params.conv_filter_strides_ = std::vector<ck::index_t>{3, 3, 3};
params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1, 1};
params.input_left_pads_ = std::vector<ck::index_t>{0, 0, 0};
params.input_right_pads_ = std::vector<ck::index_t>{0, 0, 0};
ck::utils::conv::ConvParam conv_param(3,
1,
1,
2,
2,
std::vector<ck::index_t>{3, 3, 3},
std::vector<ck::index_t>{12, 12, 12},
std::vector<ck::index_t>{3, 3, 3},
std::vector<ck::index_t>{1, 1, 1},
std::vector<ck::index_t>{0, 0, 0},
std::vector<ck::index_t>{0, 0, 0});
auto out_tensor = run_reference_convolution_forward<3,
float,
float,
float,
ck::tensor_layout::convolution::NCDHW,
ck::tensor_layout::convolution::KCZYX,
ck::tensor_layout::convolution::NKDHW>(
params, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f});
std::vector<std::size_t> ref_dims{1, 2, 4, 4, 4};
ck::tensor_layout::convolution::GNCDHW,
ck::tensor_layout::convolution::GKCZYX,
ck::tensor_layout::convolution::GNKDHW>(
conv_param, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f});
std::vector<std::size_t> ref_dims{1, 1, 2, 4, 4, 4};
std::vector<float> ref_data{
2756.7002, 2764.7998, 2772.9001, 2781., 2853.9001, 2862., 2870.1, 2878.2002,
2951.1, 2959.2002, 2967.2998, 2975.4001, 3048.2998, 3056.4001, 3064.5, 3072.6,

View File

@@ -3,9 +3,9 @@ add_custom_target(test_softmax)
add_gtest_executable(test_softmax_fp32 test_softmax_fp32.cpp)
add_gtest_executable(test_softmax_fp16 test_softmax_fp16.cpp)
add_gtest_executable(test_softmax_int8 test_softmax_int8.cpp)
target_link_libraries(test_softmax_fp32 PRIVATE host_tensor)
target_link_libraries(test_softmax_fp16 PRIVATE host_tensor)
target_link_libraries(test_softmax_int8 PRIVATE host_tensor)
target_link_libraries(test_softmax_fp32 PRIVATE utility)
target_link_libraries(test_softmax_fp16 PRIVATE utility)
target_link_libraries(test_softmax_int8 PRIVATE utility)
add_dependencies(test_softmax test_softmax_fp32)
add_dependencies(test_softmax test_softmax_fp16)
add_dependencies(test_softmax test_softmax_int8)
add_dependencies(test_softmax test_softmax_int8)

View File

@@ -12,8 +12,8 @@
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
#include "ck/library/utility/check_err.hpp"
#include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/device_memory.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_softmax.hpp"
namespace ck {
@@ -80,8 +80,8 @@ class TestSoftmax : public ::testing::Test
Tensor<OutDataType> out_ref(out);
DeviceMem in_dev(sizeof(InDataType) * in.mDesc.GetElementSpace());
DeviceMem out_dev(sizeof(OutDataType) * out.mDesc.GetElementSpace());
DeviceMem in_dev(sizeof(InDataType) * in.mDesc.GetElementSpaceSize());
DeviceMem out_dev(sizeof(OutDataType) * out.mDesc.GetElementSpaceSize());
in_dev.ToDevice(in.mData.data());
out_dev.ToDevice(out.mData.data());