mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-16 19:09:59 +00:00
Clean up conv example, Instances, profiler and test (#324)
* convnd_fwd fp16 example
* update example
* update example
* update instance
* updating refernce conv
* update reference conv
* update conv fwd profiler
* update conv 1d and 3d instance
* update include path
* clean
* update profiler for conv bwd data and weight
* update conv bwd weight
* clean
* update conv example
* update profiler for conv bwd weight
* update ckprofiler for conv bwd data
* fix reference conv bwd data bug; update conv bwd data test
* update examples
* fix initialization issue
* update test for conv fwd
* clean
* clean
* remove test case too sensitive to error threshhold
* fix test
* clean
* fix build
* adding conv multiple d
* adding conv multiple D
* add matrix padder
* add gemm padding to convnd
* adding group conv
* update gemm multi-d
* refactor
* refactor
* refactor
* clean
* clean
* refactor
* refactor
* reorg
* add ds
* add bias
* clean
* add G
* adding group
* adding group
* adding group
* update Tensor
* clean
* update example
* update DeviceGemmMultipleD_Xdl_CShuffle
* update conv bwd-data and bwd-weight
* upate contraction example
* update gemm and batch gemm with e permute
* fix example build
* instance for grouped conv1d
* update example
* adding group conv instance
* update gemm bilinear instance
* update gemm+add+add+fastgelu instance
* update profiler
* update profiler
* update test
* update test and client example
* clean
* add grouped conv into profiler
* update profiler
* clean
* add test grouped conv, update all conv test to gtest
* update test
[ROCm/composable_kernel commit: 500fa99512]
This commit is contained in:
@@ -41,11 +41,11 @@ add_subdirectory(gemm_reduce)
|
||||
add_subdirectory(batched_gemm)
|
||||
add_subdirectory(batched_gemm_reduce)
|
||||
add_subdirectory(grouped_gemm)
|
||||
add_subdirectory(convnd_fwd)
|
||||
add_subdirectory(reduce)
|
||||
add_subdirectory(conv2d_bwd_weight)
|
||||
add_subdirectory(convnd_fwd)
|
||||
add_subdirectory(convnd_bwd_weight)
|
||||
add_subdirectory(convnd_bwd_data)
|
||||
add_subdirectory(grouped_convnd_fwd)
|
||||
add_subdirectory(block_to_ctile_map)
|
||||
add_subdirectory(softmax)
|
||||
add_subdirectory(layernorm)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
add_test_executable(test_batched_gemm_fp16 batched_gemm_fp16.cpp)
|
||||
target_link_libraries(test_batched_gemm_fp16 PRIVATE host_tensor)
|
||||
target_link_libraries(test_batched_gemm_fp16 PRIVATE utility)
|
||||
target_link_libraries(test_batched_gemm_fp16 PRIVATE device_batched_gemm_instance)
|
||||
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
add_test_executable(test_batched_gemm_reduce_fp16 batched_gemm_reduce_fp16.cpp)
|
||||
target_link_libraries(test_batched_gemm_reduce_fp16 PRIVATE host_tensor)
|
||||
target_link_libraries(test_batched_gemm_reduce_fp16 PRIVATE utility)
|
||||
target_link_libraries(test_batched_gemm_reduce_fp16 PRIVATE device_batched_gemm_reduce_instance)
|
||||
|
||||
@@ -1,3 +0,0 @@
|
||||
add_test_executable(test_conv2d_bwd_data conv2d_bwd_data.cpp)
|
||||
target_link_libraries(test_conv2d_bwd_data PRIVATE host_tensor)
|
||||
target_link_libraries(test_conv2d_bwd_data PRIVATE device_conv2d_bwd_data_instance)
|
||||
@@ -1,330 +0,0 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include "config.hpp"
|
||||
#include "device.hpp"
|
||||
#include "host_tensor.hpp"
|
||||
#include "host_tensor_generator.hpp"
|
||||
#include "host_conv.hpp"
|
||||
#include "tensor_layout.hpp"
|
||||
#include "device_tensor.hpp"
|
||||
#include "device_conv_bwd_data.hpp"
|
||||
#include "element_wise_operation.hpp"
|
||||
#include "reference_conv_bwd_data.hpp"
|
||||
|
||||
using F16 = ck::half_t;
|
||||
using F32 = float;
|
||||
using BF16 = ck::bhalf_t;
|
||||
using INT8 = int8_t;
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
namespace instance {
|
||||
|
||||
using DeviceConvBwdDataNoOpPtr =
|
||||
DeviceConvBwdDataPtr<ck::tensor_operation::element_wise::PassThrough,
|
||||
ck::tensor_operation::element_wise::PassThrough,
|
||||
ck::tensor_operation::element_wise::PassThrough>;
|
||||
|
||||
void add_device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instances(
|
||||
std::vector<DeviceConvBwdDataNoOpPtr>&);
|
||||
void add_device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instances(
|
||||
std::vector<DeviceConvBwdDataNoOpPtr>&);
|
||||
void add_device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instances(
|
||||
std::vector<DeviceConvBwdDataNoOpPtr>&);
|
||||
void add_device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instances(
|
||||
std::vector<DeviceConvBwdDataNoOpPtr>&);
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
|
||||
using InElementOp = ck::tensor_operation::element_wise::PassThrough;
|
||||
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
|
||||
using OutElementOp = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
template <typename T>
|
||||
static bool check_out(const Tensor<T>& ref, const Tensor<T>& result)
|
||||
{
|
||||
float max_diff = 1e-6;
|
||||
|
||||
for(int i = 0; i < ref.mData.size(); ++i)
|
||||
{
|
||||
float diff = std::abs(double(ref.mData[i]) - double(result.mData[i]));
|
||||
if(max_diff < diff)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
int data_type = 0;
|
||||
int init_method = 0;
|
||||
|
||||
// Conv shape
|
||||
ck::index_t N = 128;
|
||||
ck::index_t K = 256;
|
||||
ck::index_t C = 192;
|
||||
ck::index_t Y = 3;
|
||||
ck::index_t X = 3;
|
||||
ck::index_t Hi = 71;
|
||||
ck::index_t Wi = 71;
|
||||
ck::index_t conv_stride_h = 2;
|
||||
ck::index_t conv_stride_w = 2;
|
||||
ck::index_t conv_dilation_h = 1;
|
||||
ck::index_t conv_dilation_w = 1;
|
||||
ck::index_t in_left_pad_h = 1;
|
||||
ck::index_t in_left_pad_w = 1;
|
||||
ck::index_t in_right_pad_h = 1;
|
||||
ck::index_t in_right_pad_w = 1;
|
||||
|
||||
if(argc == 1)
|
||||
{
|
||||
data_type = 1;
|
||||
init_method = 1;
|
||||
}
|
||||
else if(argc == 3)
|
||||
{
|
||||
data_type = std::stoi(argv[1]);
|
||||
init_method = std::stoi(argv[2]);
|
||||
}
|
||||
else if(argc == 18)
|
||||
{
|
||||
data_type = std::stoi(argv[1]);
|
||||
init_method = std::stoi(argv[2]);
|
||||
|
||||
N = std::stoi(argv[3]);
|
||||
K = std::stoi(argv[4]);
|
||||
C = std::stoi(argv[5]);
|
||||
Y = std::stoi(argv[6]);
|
||||
X = std::stoi(argv[7]);
|
||||
Hi = std::stoi(argv[8]);
|
||||
Wi = std::stoi(argv[9]);
|
||||
conv_stride_h = std::stoi(argv[10]);
|
||||
conv_stride_w = std::stoi(argv[11]);
|
||||
conv_dilation_h = std::stoi(argv[12]);
|
||||
conv_dilation_w = std::stoi(argv[13]);
|
||||
in_left_pad_h = std::stoi(argv[14]);
|
||||
in_left_pad_w = std::stoi(argv[15]);
|
||||
in_right_pad_h = std::stoi(argv[16]);
|
||||
in_right_pad_w = std::stoi(argv[17]);
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("arg1: data type (0=fp32, 1=fp16, 2= bfp16, 3= int8_t )\n");
|
||||
printf("arg2: initialization (0=no init, 1=integer value, 2=decimal value)\n");
|
||||
printf("arg3 to 17: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
|
||||
"RightPx\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
auto Run = [&](auto input_type, auto wei_type, auto out_type, auto acc_type) {
|
||||
using InDataType = decltype(input_type);
|
||||
using WeiDataType = decltype(wei_type);
|
||||
using OutDataType = decltype(out_type);
|
||||
using AccDataType = decltype(acc_type);
|
||||
|
||||
using ReferenceConvBwdInstance =
|
||||
ck::tensor_operation::host::ReferenceConvBwdData<InDataType,
|
||||
WeiDataType,
|
||||
OutDataType,
|
||||
AccDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp>;
|
||||
|
||||
const ck::index_t YEff = (Y - 1) * conv_dilation_h + 1;
|
||||
const ck::index_t XEff = (X - 1) * conv_dilation_w + 1;
|
||||
|
||||
const ck::index_t Ho = (Hi + in_left_pad_h + in_right_pad_h - YEff) / conv_stride_h + 1;
|
||||
const ck::index_t Wo = (Wi + in_left_pad_w + in_right_pad_w - XEff) / conv_stride_w + 1;
|
||||
|
||||
const std::vector<ck::index_t> input_spatial_lengths{{Hi, Wi}};
|
||||
const std::vector<ck::index_t> filter_spatial_lengths{{Y, X}};
|
||||
const std::vector<ck::index_t> output_spatial_lengths{{Ho, Wo}};
|
||||
const std::vector<ck::index_t> conv_filter_strides{{conv_stride_h, conv_stride_w}};
|
||||
const std::vector<ck::index_t> conv_filter_dilations{{conv_dilation_h, conv_dilation_w}};
|
||||
const std::vector<ck::index_t> input_left_pads{{in_left_pad_h, in_left_pad_w}};
|
||||
const std::vector<ck::index_t> input_right_pads{{in_right_pad_h, in_right_pad_w}};
|
||||
|
||||
auto f_host_tensor_descriptor =
|
||||
[](std::size_t N_, std::size_t C_, std::size_t H, std::size_t W) {
|
||||
return HostTensorDescriptor(std::vector<std::size_t>({N_, C_, H, W}),
|
||||
std::vector<std::size_t>({C_ * H * W, 1, W * C_, C_}));
|
||||
};
|
||||
|
||||
Tensor<OutDataType> out_n_k_ho_wo(f_host_tensor_descriptor(N, K, Ho, Wo));
|
||||
Tensor<WeiDataType> wei_k_c_y_x(f_host_tensor_descriptor(K, C, Y, X));
|
||||
Tensor<InDataType> in_n_c_hi_wi_host_result(f_host_tensor_descriptor(N, C, Hi, Wi));
|
||||
Tensor<InDataType> in_n_c_hi_wi_device_result(f_host_tensor_descriptor(N, C, Hi, Wi));
|
||||
|
||||
std::cout << "in_n_c_hi_wi: " << in_n_c_hi_wi_host_result.mDesc << std::endl;
|
||||
std::cout << "wei_k_c_y_x: " << wei_k_c_y_x.mDesc << std::endl;
|
||||
std::cout << "out_n_k_ho_wo: " << out_n_k_ho_wo.mDesc << std::endl;
|
||||
|
||||
switch(init_method)
|
||||
{
|
||||
case 0: break;
|
||||
case 1:
|
||||
out_n_k_ho_wo.GenerateTensorValue(GeneratorTensor_2<OutDataType>{-5, 5});
|
||||
wei_k_c_y_x.GenerateTensorValue(GeneratorTensor_2<WeiDataType>{-5, 5});
|
||||
break;
|
||||
default:
|
||||
out_n_k_ho_wo.GenerateTensorValue(GeneratorTensor_1<OutDataType>{1});
|
||||
wei_k_c_y_x.GenerateTensorValue(GeneratorTensor_1<WeiDataType>{1});
|
||||
}
|
||||
|
||||
DeviceMem in_device_buf(sizeof(InDataType) *
|
||||
in_n_c_hi_wi_device_result.mDesc.GetElementSpace());
|
||||
DeviceMem wei_device_buf(sizeof(WeiDataType) * wei_k_c_y_x.mDesc.GetElementSpace());
|
||||
DeviceMem out_device_buf(sizeof(OutDataType) * out_n_k_ho_wo.mDesc.GetElementSpace());
|
||||
|
||||
out_device_buf.ToDevice(out_n_k_ho_wo.mData.data());
|
||||
wei_device_buf.ToDevice(wei_k_c_y_x.mData.data());
|
||||
// reset input to zero
|
||||
in_n_c_hi_wi_device_result.GenerateTensorValue(GeneratorTensor_1<InDataType>{0});
|
||||
in_device_buf.ToDevice(in_n_c_hi_wi_device_result.mData.data());
|
||||
|
||||
// get host result
|
||||
{
|
||||
auto ref_conv = ReferenceConvBwdInstance{};
|
||||
auto ref_invoker = ref_conv.MakeInvoker();
|
||||
|
||||
auto ref_argument = ref_conv.MakeArgument(in_n_c_hi_wi_host_result,
|
||||
wei_k_c_y_x,
|
||||
out_n_k_ho_wo,
|
||||
conv_filter_strides,
|
||||
conv_filter_dilations,
|
||||
input_left_pads,
|
||||
input_right_pads,
|
||||
InElementOp{},
|
||||
WeiElementOp{},
|
||||
OutElementOp{});
|
||||
ref_invoker.Run(ref_argument);
|
||||
}
|
||||
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
using DeviceConvBwdDataNoOpPtr = ck::tensor_operation::device::
|
||||
DeviceConvBwdDataPtr<PassThrough, PassThrough, PassThrough>;
|
||||
|
||||
// add device Conv instances
|
||||
std::vector<DeviceConvBwdDataNoOpPtr> conv_ptrs;
|
||||
|
||||
if constexpr(ck::is_same_v<ck::remove_cv_t<InDataType>, float> &&
|
||||
ck::is_same_v<ck::remove_cv_t<WeiDataType>, float> &&
|
||||
ck::is_same_v<ck::remove_cv_t<OutDataType>, float>)
|
||||
{
|
||||
ck::tensor_operation::device::instance::
|
||||
add_device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f32_instances(conv_ptrs);
|
||||
}
|
||||
else if constexpr(ck::is_same_v<ck::remove_cv_t<InDataType>, ck::half_t> &&
|
||||
ck::is_same_v<ck::remove_cv_t<WeiDataType>, ck::half_t> &&
|
||||
ck::is_same_v<ck::remove_cv_t<OutDataType>, ck::half_t>)
|
||||
{
|
||||
ck::tensor_operation::device::instance::
|
||||
add_device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_f16_instances(conv_ptrs);
|
||||
}
|
||||
else if constexpr(ck::is_same_v<ck::remove_cv_t<InDataType>, ck::bhalf_t> &&
|
||||
ck::is_same_v<ck::remove_cv_t<WeiDataType>, ck::bhalf_t> &&
|
||||
ck::is_same_v<ck::remove_cv_t<OutDataType>, ck::bhalf_t>)
|
||||
{
|
||||
ck::tensor_operation::device::instance::
|
||||
add_device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_bf16_instances(conv_ptrs);
|
||||
}
|
||||
else if constexpr(ck::is_same_v<ck::remove_cv_t<InDataType>, int8_t> &&
|
||||
ck::is_same_v<ck::remove_cv_t<WeiDataType>, int8_t> &&
|
||||
ck::is_same_v<ck::remove_cv_t<OutDataType>, int8_t>)
|
||||
{
|
||||
ck::tensor_operation::device::instance::
|
||||
add_device_conv2d_bwd_data_xdl_nhwc_kyxc_nhwk_int8_instances(conv_ptrs);
|
||||
}
|
||||
|
||||
if(conv_ptrs.size() <= 0)
|
||||
{
|
||||
throw std::runtime_error("wrong! no device Conv instance found");
|
||||
}
|
||||
|
||||
// profile device Conv instances
|
||||
bool success = true;
|
||||
for(auto& conv_ptr : conv_ptrs)
|
||||
{
|
||||
auto argument_ptr = conv_ptr->MakeArgumentPointer(
|
||||
static_cast<InDataType*>(in_device_buf.GetDeviceBuffer()),
|
||||
static_cast<WeiDataType*>(wei_device_buf.GetDeviceBuffer()),
|
||||
static_cast<OutDataType*>(out_device_buf.GetDeviceBuffer()),
|
||||
N,
|
||||
K,
|
||||
C,
|
||||
input_spatial_lengths,
|
||||
filter_spatial_lengths,
|
||||
output_spatial_lengths,
|
||||
conv_filter_strides,
|
||||
conv_filter_dilations,
|
||||
input_left_pads,
|
||||
input_right_pads,
|
||||
InElementOp{},
|
||||
WeiElementOp{},
|
||||
OutElementOp{});
|
||||
|
||||
if(conv_ptr->IsSupportedArgument(argument_ptr.get()))
|
||||
{
|
||||
auto invoker_ptr = conv_ptr->MakeInvokerPointer();
|
||||
invoker_ptr->Run(argument_ptr.get(), 1);
|
||||
|
||||
in_device_buf.FromDevice(in_n_c_hi_wi_device_result.mData.data());
|
||||
|
||||
if(!check_out(in_n_c_hi_wi_host_result, in_n_c_hi_wi_device_result))
|
||||
{
|
||||
std::cout << "Fail Info: " << conv_ptr->GetTypeString() << std::endl;
|
||||
success = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "Pass Info: " << conv_ptr->GetTypeString() << std::endl;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "Not support Info: " << conv_ptr->GetTypeString() << std::endl;
|
||||
}
|
||||
}
|
||||
|
||||
if(success)
|
||||
{
|
||||
std::cout << "test conv2d bwd : Pass" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "test conv2d bwd: Fail " << std::endl;
|
||||
return -1;
|
||||
}
|
||||
};
|
||||
|
||||
if(data_type == 0)
|
||||
{
|
||||
return Run(F32(), F32(), F32(), F32());
|
||||
}
|
||||
else if(data_type == 1)
|
||||
{
|
||||
return Run(F16(), F16(), F16(), F32());
|
||||
}
|
||||
else if(data_type == 2)
|
||||
{
|
||||
return Run(BF16(), BF16(), BF16(), F32());
|
||||
}
|
||||
else if(data_type == 3)
|
||||
{
|
||||
return Run(INT8(), INT8(), INT8(), int());
|
||||
}
|
||||
else
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
@@ -1,2 +0,0 @@
|
||||
#add_test_executable(test_conv2d_bwd_weight conv2d_bwd_weight.cpp)
|
||||
#target_link_libraries(test_conv2d_bwd_weight PRIVATE host_tensor device_conv2d_bwd_weight_instance conv_util)
|
||||
@@ -1,217 +0,0 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <iostream>
|
||||
#include <numeric>
|
||||
#include <initializer_list>
|
||||
#include <cstdlib>
|
||||
#include <vector>
|
||||
|
||||
#include "test/convnd_fwd/conv_util.hpp"
|
||||
#include "profiler/include/profile_conv_bwd_weight_impl.hpp"
|
||||
|
||||
int test_self()
|
||||
{
|
||||
bool pass = true;
|
||||
std::vector<ck::utils::conv::ConvParams> params;
|
||||
|
||||
params.push_back({2, 128, 256, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}});
|
||||
params.push_back({2, 128, 256, 256, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
|
||||
params.push_back({2, 128, 256, 256, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
|
||||
|
||||
for(auto& param : params)
|
||||
{
|
||||
// f32
|
||||
pass &= ck::profiler::profile_conv_bwd_weight_impl<2,
|
||||
float,
|
||||
float,
|
||||
float,
|
||||
ck::tensor_layout::convolution::NHWC,
|
||||
ck::tensor_layout::convolution::KYXC,
|
||||
ck::tensor_layout::convolution::NHWK>(
|
||||
true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param.N_,
|
||||
param.K_,
|
||||
param.C_,
|
||||
param.input_spatial_lengths_,
|
||||
param.filter_spatial_lengths_,
|
||||
param.GetOutputSpatialLengths(),
|
||||
param.conv_filter_strides_,
|
||||
param.conv_filter_dilations_,
|
||||
param.input_left_pads_,
|
||||
param.input_right_pads_,
|
||||
2);
|
||||
|
||||
// fp16
|
||||
pass &= ck::profiler::profile_conv_bwd_weight_impl<2,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
ck::tensor_layout::convolution::NHWC,
|
||||
ck::tensor_layout::convolution::KYXC,
|
||||
ck::tensor_layout::convolution::NHWK>(
|
||||
true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param.N_,
|
||||
param.K_,
|
||||
param.C_,
|
||||
param.input_spatial_lengths_,
|
||||
param.filter_spatial_lengths_,
|
||||
param.GetOutputSpatialLengths(),
|
||||
param.conv_filter_strides_,
|
||||
param.conv_filter_dilations_,
|
||||
param.input_left_pads_,
|
||||
param.input_right_pads_,
|
||||
2);
|
||||
}
|
||||
return pass;
|
||||
}
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
int data_type = 1;
|
||||
int init_method = 1;
|
||||
|
||||
// Conv shape
|
||||
ck::index_t N = 128;
|
||||
ck::index_t K = 256;
|
||||
ck::index_t C = 192;
|
||||
ck::index_t Y = 3;
|
||||
ck::index_t X = 3;
|
||||
ck::index_t Hi = 71;
|
||||
ck::index_t Wi = 71;
|
||||
ck::index_t conv_stride_h = 2;
|
||||
ck::index_t conv_stride_w = 2;
|
||||
ck::index_t conv_dilation_h = 1;
|
||||
ck::index_t conv_dilation_w = 1;
|
||||
ck::index_t in_left_pad_h = 1;
|
||||
ck::index_t in_left_pad_w = 1;
|
||||
ck::index_t in_right_pad_h = 1;
|
||||
ck::index_t in_right_pad_w = 1;
|
||||
ck::index_t split_k = 1;
|
||||
|
||||
bool pass = true;
|
||||
if(argc == 1)
|
||||
{
|
||||
pass = test_self();
|
||||
}
|
||||
else
|
||||
{
|
||||
if(argc == 3)
|
||||
{
|
||||
data_type = std::stoi(argv[1]);
|
||||
init_method = std::stoi(argv[2]);
|
||||
}
|
||||
else if(argc == 19)
|
||||
{
|
||||
data_type = std::stoi(argv[1]);
|
||||
init_method = std::stoi(argv[2]);
|
||||
|
||||
N = std::stoi(argv[3]);
|
||||
K = std::stoi(argv[4]);
|
||||
C = std::stoi(argv[5]);
|
||||
Y = std::stoi(argv[6]);
|
||||
X = std::stoi(argv[7]);
|
||||
Hi = std::stoi(argv[8]);
|
||||
Wi = std::stoi(argv[9]);
|
||||
conv_stride_h = std::stoi(argv[10]);
|
||||
conv_stride_w = std::stoi(argv[11]);
|
||||
conv_dilation_h = std::stoi(argv[12]);
|
||||
conv_dilation_w = std::stoi(argv[13]);
|
||||
in_left_pad_h = std::stoi(argv[14]);
|
||||
in_left_pad_w = std::stoi(argv[15]);
|
||||
in_right_pad_h = std::stoi(argv[16]);
|
||||
in_right_pad_w = std::stoi(argv[17]);
|
||||
split_k = std::stoi(argv[18]);
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("arg1: data type (0=fp32, 1=fp16, 2= bfp16, 3= int8_t )\n");
|
||||
printf("arg2: initialization (0=no init, 1=integer value, 2=decimal value)\n");
|
||||
printf("arg3 to 17: N, K, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
|
||||
"RightPx\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
ck::utils::conv::ConvParams param{2,
|
||||
N,
|
||||
K,
|
||||
C,
|
||||
{Y, X},
|
||||
{Hi, Wi},
|
||||
{conv_stride_h, conv_stride_w},
|
||||
{conv_dilation_h, conv_dilation_w},
|
||||
{in_left_pad_h, in_left_pad_w},
|
||||
{in_right_pad_h, in_right_pad_w}};
|
||||
if(data_type == 0)
|
||||
{
|
||||
pass = ck::profiler::profile_conv_bwd_weight_impl<2,
|
||||
float,
|
||||
float,
|
||||
float,
|
||||
ck::tensor_layout::convolution::NHWC,
|
||||
ck::tensor_layout::convolution::KYXC,
|
||||
ck::tensor_layout::convolution::NHWK>(
|
||||
true, // do_verification
|
||||
init_method,
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param.N_,
|
||||
param.K_,
|
||||
param.C_,
|
||||
param.input_spatial_lengths_,
|
||||
param.filter_spatial_lengths_,
|
||||
param.GetOutputSpatialLengths(),
|
||||
param.conv_filter_strides_,
|
||||
param.conv_filter_dilations_,
|
||||
param.input_left_pads_,
|
||||
param.input_right_pads_,
|
||||
split_k);
|
||||
}
|
||||
else if(data_type == 1)
|
||||
{
|
||||
pass = ck::profiler::profile_conv_bwd_weight_impl<2,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
ck::tensor_layout::convolution::NHWC,
|
||||
ck::tensor_layout::convolution::KYXC,
|
||||
ck::tensor_layout::convolution::NHWK>(
|
||||
true, // do_verification
|
||||
init_method,
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param.N_,
|
||||
param.K_,
|
||||
param.C_,
|
||||
param.input_spatial_lengths_,
|
||||
param.filter_spatial_lengths_,
|
||||
param.GetOutputSpatialLengths(),
|
||||
param.conv_filter_strides_,
|
||||
param.conv_filter_dilations_,
|
||||
param.input_left_pads_,
|
||||
param.input_right_pads_,
|
||||
split_k);
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "Not support data type" << std::endl;
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
if(pass)
|
||||
{
|
||||
std::cout << "test conv2d bwd weight : Pass" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "test conv2d bwd weight: Fail " << std::endl;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
@@ -1,2 +1,2 @@
|
||||
add_gtest_executable(test_conv_util conv_util.cpp)
|
||||
target_link_libraries(test_conv_util PRIVATE host_tensor conv_util)
|
||||
target_link_libraries(test_conv_util PRIVATE utility)
|
||||
|
||||
@@ -10,198 +10,147 @@
|
||||
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
|
||||
|
||||
#include "ck/library/utility/check_err.hpp"
|
||||
#include "ck/library/utility/conv_util.hpp"
|
||||
#include "ck/library/utility/convolution_parameter.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
class TestConvUtil : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
void SetNDParams(std::size_t ndims)
|
||||
void SetNDParams(std::size_t ndims, std::size_t s, std::size_t d, std::size_t p)
|
||||
{
|
||||
conv_params.num_dim_spatial_ = ndims;
|
||||
conv_params.filter_spatial_lengths_ = std::vector<ck::index_t>(ndims, 3);
|
||||
conv_params.input_spatial_lengths_ = std::vector<ck::index_t>(ndims, 71);
|
||||
conv_params.conv_filter_strides_ = std::vector<ck::index_t>(ndims, 2);
|
||||
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>(ndims, 1);
|
||||
conv_params.input_left_pads_ = std::vector<ck::index_t>(ndims, 1);
|
||||
conv_params.input_right_pads_ = std::vector<ck::index_t>(ndims, 1);
|
||||
conv_params = ck::utils::conv::ConvParam(ndims,
|
||||
2,
|
||||
128,
|
||||
192,
|
||||
256,
|
||||
std::vector<ck::index_t>(ndims, 3),
|
||||
std::vector<ck::index_t>(ndims, 71),
|
||||
std::vector<ck::index_t>(ndims, s),
|
||||
std::vector<ck::index_t>(ndims, d),
|
||||
std::vector<ck::index_t>(ndims, p),
|
||||
std::vector<ck::index_t>(ndims, p));
|
||||
}
|
||||
|
||||
protected:
|
||||
// ------- default 2D -------
|
||||
// input NCHW {128,192,71,71},
|
||||
// weights KCYX {256,192,3,3},
|
||||
// stride {2,2},
|
||||
// dilations {1,1},
|
||||
// padding {{1,1}, {1,1}}
|
||||
ck::utils::conv::ConvParams conv_params;
|
||||
// input GNCHW {2, 128, 192, 71, 71},
|
||||
// weights GKCYX {2, 256, 192, 3, 3},
|
||||
// stride {s, s},
|
||||
// dilations {d, d},
|
||||
// padding {{p, p}, {p, p}
|
||||
ck::utils::conv::ConvParam conv_params;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths2D)
|
||||
{
|
||||
ck::utils::conv::ConvParams conv_params;
|
||||
std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths();
|
||||
EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
|
||||
std::vector<ck::index_t>{36, 36},
|
||||
"Error: ConvParams 2D default constructor."));
|
||||
|
||||
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1};
|
||||
out_spatial_len = conv_params.GetOutputSpatialLengths();
|
||||
EXPECT_TRUE(ck::utils::check_err(
|
||||
out_spatial_len, std::vector<ck::index_t>{71, 71}, "Error: ConvParams 2D stride {1,1}."));
|
||||
|
||||
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{2, 2};
|
||||
conv_params.input_left_pads_ = std::vector<ck::index_t>{2, 2};
|
||||
conv_params.input_right_pads_ = std::vector<ck::index_t>{2, 2};
|
||||
out_spatial_len = conv_params.GetOutputSpatialLengths();
|
||||
EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
|
||||
std::vector<ck::index_t>{37, 37},
|
||||
"Error: ConvParams 2D padding left/right {2,2}."));
|
||||
|
||||
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2, 2};
|
||||
out_spatial_len = conv_params.GetOutputSpatialLengths();
|
||||
EXPECT_TRUE(ck::utils::check_err(
|
||||
out_spatial_len, std::vector<ck::index_t>{36, 36}, "Error: ConvParams 2D dilation {2,2}."));
|
||||
|
||||
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{3, 3};
|
||||
conv_params.input_left_pads_ = std::vector<ck::index_t>{1, 1};
|
||||
conv_params.input_right_pads_ = std::vector<ck::index_t>{1, 1};
|
||||
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2, 2};
|
||||
out_spatial_len = conv_params.GetOutputSpatialLengths();
|
||||
EXPECT_TRUE(
|
||||
ck::utils::check_err(out_spatial_len,
|
||||
std::vector<ck::index_t>{23, 23},
|
||||
"Error: ConvParams 2D strides{3,3}, padding {1,1}, dilations {2,2}."));
|
||||
}
|
||||
|
||||
TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths1D)
|
||||
{
|
||||
SetNDParams(1);
|
||||
|
||||
// stride 2, dilation 1, pad 1
|
||||
SetNDParams(1, 2, 1, 1);
|
||||
std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths();
|
||||
EXPECT_TRUE(ck::utils::check_err(
|
||||
out_spatial_len, std::vector<ck::index_t>{36}, "Error: ConvParams 1D."));
|
||||
|
||||
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{1};
|
||||
out_spatial_len = conv_params.GetOutputSpatialLengths();
|
||||
// stride 1, dilation 1, pad 1
|
||||
SetNDParams(1, 1, 1, 1);
|
||||
out_spatial_len = conv_params.GetOutputSpatialLengths();
|
||||
EXPECT_TRUE(ck::utils::check_err(
|
||||
out_spatial_len, std::vector<ck::index_t>{71}, "Error: ConvParams 1D stride {1}."));
|
||||
|
||||
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{2};
|
||||
conv_params.input_left_pads_ = std::vector<ck::index_t>{2};
|
||||
conv_params.input_right_pads_ = std::vector<ck::index_t>{2};
|
||||
out_spatial_len = conv_params.GetOutputSpatialLengths();
|
||||
// stride 2, dilation 1, pad 2
|
||||
SetNDParams(1, 2, 1, 2);
|
||||
out_spatial_len = conv_params.GetOutputSpatialLengths();
|
||||
EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
|
||||
std::vector<ck::index_t>{37},
|
||||
"Error: ConvParams 1D padding left/right {2}."));
|
||||
|
||||
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2};
|
||||
out_spatial_len = conv_params.GetOutputSpatialLengths();
|
||||
// stride 2, dilation 2, pad 2
|
||||
SetNDParams(1, 2, 2, 2);
|
||||
out_spatial_len = conv_params.GetOutputSpatialLengths();
|
||||
EXPECT_TRUE(ck::utils::check_err(
|
||||
out_spatial_len, std::vector<ck::index_t>{36}, "Error: ConvParams 1D dilation {2}."));
|
||||
|
||||
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{3};
|
||||
conv_params.input_left_pads_ = std::vector<ck::index_t>{1};
|
||||
conv_params.input_right_pads_ = std::vector<ck::index_t>{1};
|
||||
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2};
|
||||
out_spatial_len = conv_params.GetOutputSpatialLengths();
|
||||
// stride 3, dilation 2, pad 1
|
||||
SetNDParams(1, 3, 2, 1);
|
||||
out_spatial_len = conv_params.GetOutputSpatialLengths();
|
||||
EXPECT_TRUE(
|
||||
ck::utils::check_err(out_spatial_len,
|
||||
std::vector<ck::index_t>{23},
|
||||
"Error: ConvParams 1D strides{3}, padding {1}, dilations {2}."));
|
||||
}
|
||||
|
||||
TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths2D)
|
||||
{
|
||||
// stride 2, dilation 1, pad 1
|
||||
SetNDParams(2, 2, 1, 1);
|
||||
std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths();
|
||||
EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
|
||||
std::vector<ck::index_t>{36, 36},
|
||||
"Error: ConvParams 2D default constructor."));
|
||||
|
||||
// stride 1, dilation 1, pad 1
|
||||
SetNDParams(2, 1, 1, 1);
|
||||
out_spatial_len = conv_params.GetOutputSpatialLengths();
|
||||
EXPECT_TRUE(ck::utils::check_err(
|
||||
out_spatial_len, std::vector<ck::index_t>{71, 71}, "Error: ConvParams 2D stride {1,1}."));
|
||||
|
||||
// stride 2, dilation 1, pad 2
|
||||
SetNDParams(2, 2, 1, 2);
|
||||
out_spatial_len = conv_params.GetOutputSpatialLengths();
|
||||
EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
|
||||
std::vector<ck::index_t>{37, 37},
|
||||
"Error: ConvParams 2D padding left/right {2,2}."));
|
||||
|
||||
// stride 2, dilation 2, pad 2
|
||||
SetNDParams(2, 2, 2, 2);
|
||||
out_spatial_len = conv_params.GetOutputSpatialLengths();
|
||||
EXPECT_TRUE(ck::utils::check_err(
|
||||
out_spatial_len, std::vector<ck::index_t>{36, 36}, "Error: ConvParams 2D dilation {2,2}."));
|
||||
|
||||
// stride 3, dilation 2, pad 1
|
||||
SetNDParams(2, 3, 2, 1);
|
||||
out_spatial_len = conv_params.GetOutputSpatialLengths();
|
||||
EXPECT_TRUE(
|
||||
ck::utils::check_err(out_spatial_len,
|
||||
std::vector<ck::index_t>{23, 23},
|
||||
"Error: ConvParams 2D strides{3,3}, padding {1,1}, dilations {2,2}."));
|
||||
}
|
||||
|
||||
TEST_F(TestConvUtil, ConvParamsGetOutputSpatialLengths3D)
|
||||
{
|
||||
SetNDParams(3);
|
||||
|
||||
// stride 2, dilation 1, pad 1
|
||||
SetNDParams(3, 2, 1, 1);
|
||||
std::vector<ck::index_t> out_spatial_len = conv_params.GetOutputSpatialLengths();
|
||||
EXPECT_TRUE(ck::utils::check_err(
|
||||
out_spatial_len, std::vector<ck::index_t>{36, 36, 36}, "Error: ConvParams 3D."));
|
||||
|
||||
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1, 1};
|
||||
out_spatial_len = conv_params.GetOutputSpatialLengths();
|
||||
// stride 1, dilation 1, pad 1
|
||||
SetNDParams(3, 1, 1, 1);
|
||||
out_spatial_len = conv_params.GetOutputSpatialLengths();
|
||||
EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
|
||||
std::vector<ck::index_t>{71, 71, 71},
|
||||
"Error: ConvParams 3D stride {1, 1, 1}."));
|
||||
|
||||
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{2, 2, 2};
|
||||
conv_params.input_left_pads_ = std::vector<ck::index_t>{2, 2, 2};
|
||||
conv_params.input_right_pads_ = std::vector<ck::index_t>{2, 2, 2};
|
||||
out_spatial_len = conv_params.GetOutputSpatialLengths();
|
||||
// stride 2, dilation 1, pad 2
|
||||
SetNDParams(3, 2, 1, 2);
|
||||
out_spatial_len = conv_params.GetOutputSpatialLengths();
|
||||
EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
|
||||
std::vector<ck::index_t>{37, 37, 37},
|
||||
"Error: ConvParams 3D padding left/right {2, 2, 2}."));
|
||||
|
||||
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2, 2, 2};
|
||||
out_spatial_len = conv_params.GetOutputSpatialLengths();
|
||||
// stride 2, dilation 2, pad 2
|
||||
SetNDParams(3, 2, 2, 2);
|
||||
out_spatial_len = conv_params.GetOutputSpatialLengths();
|
||||
EXPECT_TRUE(ck::utils::check_err(out_spatial_len,
|
||||
std::vector<ck::index_t>{36, 36, 36},
|
||||
"Error: ConvParams 3D dilation {2, 2, 2}."));
|
||||
|
||||
conv_params.conv_filter_strides_ = std::vector<ck::index_t>{3, 3, 3};
|
||||
conv_params.input_left_pads_ = std::vector<ck::index_t>{1, 1, 1};
|
||||
conv_params.input_right_pads_ = std::vector<ck::index_t>{1, 1, 1};
|
||||
conv_params.conv_filter_dilations_ = std::vector<ck::index_t>{2, 2, 2};
|
||||
out_spatial_len = conv_params.GetOutputSpatialLengths();
|
||||
// stride 3, dilation 2, pad 1
|
||||
SetNDParams(3, 3, 2, 1);
|
||||
out_spatial_len = conv_params.GetOutputSpatialLengths();
|
||||
EXPECT_TRUE(ck::utils::check_err(
|
||||
out_spatial_len,
|
||||
std::vector<ck::index_t>{23, 23, 23},
|
||||
"Error: ConvParams 3D strides{3, 3, 3}, padding {1, 1, 1}, dilations {2, 2, 2}."));
|
||||
}
|
||||
|
||||
TEST(ConvUtil, GetHostTensorDescriptor)
|
||||
{
|
||||
namespace tl = ck::tensor_layout::convolution;
|
||||
std::vector<std::size_t> dims{2, 3, 4, 5};
|
||||
HostTensorDescriptor h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NHWC{});
|
||||
EXPECT_TRUE(ck::utils::check_err(
|
||||
h.GetLengths(), {2, 3, 4, 5}, "Error: wrong NHWC dimensions lengths!"));
|
||||
EXPECT_TRUE(ck::utils::check_err(
|
||||
h.GetStrides(), {3 * 4 * 5, 1, 3 * 5, 3}, "Error: wrong NHWC dimensions strides!"));
|
||||
|
||||
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NCHW{});
|
||||
EXPECT_TRUE(ck::utils::check_err(
|
||||
h.GetLengths(), {2, 3, 4, 5}, "Error: wrong NCHW dimensions lengths!"));
|
||||
EXPECT_TRUE(ck::utils::check_err(
|
||||
h.GetStrides(), {3 * 4 * 5, 4 * 5, 5, 1}, "Error: wrong NCHW dimensions strides!"));
|
||||
|
||||
dims = std::vector<std::size_t>{2, 3, 4};
|
||||
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NWC{});
|
||||
EXPECT_TRUE(
|
||||
ck::utils::check_err(h.GetLengths(), {2, 3, 4}, "Error: wrong NWC dimensions lengths!"));
|
||||
EXPECT_TRUE(ck::utils::check_err(
|
||||
h.GetStrides(), {3 * 4, 1, 3}, "Error: wrong NWC dimensions strides!"));
|
||||
|
||||
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NCW{});
|
||||
EXPECT_TRUE(
|
||||
ck::utils::check_err(h.GetLengths(), {2, 3, 4}, "Error: wrong NCW dimensions lengths!"));
|
||||
EXPECT_TRUE(ck::utils::check_err(
|
||||
h.GetStrides(), {3 * 4, 4, 1}, "Error: wrong NCW dimensions strides!"));
|
||||
|
||||
dims = std::vector<std::size_t>{2, 3, 4, 5, 6};
|
||||
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NDHWC{});
|
||||
EXPECT_TRUE(
|
||||
ck::utils::check_err(h.GetLengths(), dims, "Error: wrong NDHWC dimensions lengths!"));
|
||||
EXPECT_TRUE(ck::utils::check_err(h.GetStrides(),
|
||||
{3 * 4 * 5 * 6, // N
|
||||
1, // C
|
||||
3 * 5 * 6, // D
|
||||
3 * 6, // H
|
||||
3}, // W
|
||||
"Error: wrong NDHWC dimensions strides!"));
|
||||
|
||||
h = ck::utils::conv::get_host_tensor_descriptor(dims, tl::NCDHW{});
|
||||
EXPECT_TRUE(
|
||||
ck::utils::check_err(h.GetLengths(), dims, "Error: wrong NCDHW dimensions lengths!"));
|
||||
EXPECT_TRUE(ck::utils::check_err(h.GetStrides(),
|
||||
{3 * 4 * 5 * 6, // N
|
||||
4 * 5 * 6, // C
|
||||
5 * 6, // D
|
||||
6, // H
|
||||
1}, // W
|
||||
"Error: wrong NCDHW dimensions strides!"));
|
||||
}
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
add_test_executable(test_convnd_bwd_data convnd_bwd_data.cpp)
|
||||
target_link_libraries(test_convnd_bwd_data PRIVATE host_tensor device_convnd_bwd_data_instance conv_util)
|
||||
add_gtest_executable(test_convnd_bwd_data convnd_bwd_data.cpp)
|
||||
target_link_libraries(test_convnd_bwd_data PRIVATE utility device_conv1d_bwd_data_instance device_conv2d_bwd_data_instance device_conv3d_bwd_data_instance)
|
||||
|
||||
@@ -1,331 +1,241 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <iostream>
|
||||
#include <numeric>
|
||||
#include <initializer_list>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <initializer_list>
|
||||
#include <vector>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "profiler/include/profile_convnd_bwd_data_impl.hpp"
|
||||
#include "profiler/include/profile_conv_bwd_data_impl.hpp"
|
||||
|
||||
int main()
|
||||
class TestConvndBwdData : public ::testing::Test
|
||||
{
|
||||
bool pass = true;
|
||||
// check 1d
|
||||
std::vector<ck::utils::conv::ConvParams> params;
|
||||
params.push_back({1, 128, 128, 256, {1}, {14}, {2}, {1}, {0}, {0}});
|
||||
params.push_back({1, 128, 128, 256, {3}, {28}, {1}, {1}, {1}, {1}});
|
||||
params.push_back({1, 128, 128, 256, {1}, {3}, {1}, {1}, {0}, {0}});
|
||||
protected:
|
||||
std::vector<ck::utils::conv::ConvParam> conv_params;
|
||||
};
|
||||
|
||||
for(auto& param : params)
|
||||
// 1d
|
||||
TEST_F(TestConvndBwdData, Conv1dBwdData)
|
||||
{
|
||||
conv_params.clear();
|
||||
conv_params.push_back({1, 1, 128, 128, 256, {1}, {14}, {2}, {1}, {0}, {0}});
|
||||
conv_params.push_back({1, 1, 128, 128, 256, {3}, {28}, {1}, {1}, {1}, {1}});
|
||||
conv_params.push_back({1, 1, 128, 128, 256, {1}, {3}, {1}, {1}, {0}, {0}});
|
||||
|
||||
for(auto& param : conv_params)
|
||||
{
|
||||
pass &= ck::profiler::profile_convnd_bwd_data_impl<1,
|
||||
float,
|
||||
float,
|
||||
float,
|
||||
float,
|
||||
ck::tensor_layout::convolution::NWC,
|
||||
ck::tensor_layout::convolution::KXC,
|
||||
ck::tensor_layout::convolution::NWK>(
|
||||
true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param.N_,
|
||||
param.K_,
|
||||
param.C_,
|
||||
param.input_spatial_lengths_,
|
||||
param.filter_spatial_lengths_,
|
||||
param.GetOutputSpatialLengths(),
|
||||
param.conv_filter_strides_,
|
||||
param.conv_filter_dilations_,
|
||||
param.input_left_pads_,
|
||||
param.input_right_pads_);
|
||||
bool pass;
|
||||
|
||||
pass &= ck::profiler::profile_convnd_bwd_data_impl<1,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
float,
|
||||
ck::tensor_layout::convolution::NWC,
|
||||
ck::tensor_layout::convolution::KXC,
|
||||
ck::tensor_layout::convolution::NWK>(
|
||||
true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param.N_,
|
||||
param.K_,
|
||||
param.C_,
|
||||
param.input_spatial_lengths_,
|
||||
param.filter_spatial_lengths_,
|
||||
param.GetOutputSpatialLengths(),
|
||||
param.conv_filter_strides_,
|
||||
param.conv_filter_dilations_,
|
||||
param.input_left_pads_,
|
||||
param.input_right_pads_);
|
||||
// fp32
|
||||
pass = ck::profiler::profile_conv_bwd_data_impl<1,
|
||||
ck::tensor_layout::convolution::NWC,
|
||||
ck::tensor_layout::convolution::KXC,
|
||||
ck::tensor_layout::convolution::NWK,
|
||||
float,
|
||||
float,
|
||||
float>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
pass &= ck::profiler::profile_convnd_bwd_data_impl<1,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t,
|
||||
float,
|
||||
ck::tensor_layout::convolution::NWC,
|
||||
ck::tensor_layout::convolution::KXC,
|
||||
ck::tensor_layout::convolution::NWK>(
|
||||
true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param.N_,
|
||||
param.K_,
|
||||
param.C_,
|
||||
param.input_spatial_lengths_,
|
||||
param.filter_spatial_lengths_,
|
||||
param.GetOutputSpatialLengths(),
|
||||
param.conv_filter_strides_,
|
||||
param.conv_filter_dilations_,
|
||||
param.input_left_pads_,
|
||||
param.input_right_pads_);
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
pass &= ck::profiler::profile_convnd_bwd_data_impl<1,
|
||||
int8_t,
|
||||
int8_t,
|
||||
int8_t,
|
||||
int,
|
||||
ck::tensor_layout::convolution::NWC,
|
||||
ck::tensor_layout::convolution::KXC,
|
||||
ck::tensor_layout::convolution::NWK>(
|
||||
true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param.N_,
|
||||
param.K_,
|
||||
param.C_,
|
||||
param.input_spatial_lengths_,
|
||||
param.filter_spatial_lengths_,
|
||||
param.GetOutputSpatialLengths(),
|
||||
param.conv_filter_strides_,
|
||||
param.conv_filter_dilations_,
|
||||
param.input_left_pads_,
|
||||
param.input_right_pads_);
|
||||
}
|
||||
// fp16
|
||||
pass = ck::profiler::profile_conv_bwd_data_impl<1,
|
||||
ck::tensor_layout::convolution::NWC,
|
||||
ck::tensor_layout::convolution::KXC,
|
||||
ck::tensor_layout::convolution::NWK,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
ck::half_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
// check 2d
|
||||
params.clear();
|
||||
params.push_back({2, 128, 128, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}});
|
||||
params.push_back({2, 128, 128, 256, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
|
||||
params.push_back({2, 128, 128, 256, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
for(auto& param : params)
|
||||
{
|
||||
pass &= ck::profiler::profile_convnd_bwd_data_impl<2,
|
||||
float,
|
||||
float,
|
||||
float,
|
||||
float,
|
||||
ck::tensor_layout::convolution::NHWC,
|
||||
ck::tensor_layout::convolution::KYXC,
|
||||
ck::tensor_layout::convolution::NHWK>(
|
||||
true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param.N_,
|
||||
param.K_,
|
||||
param.C_,
|
||||
param.input_spatial_lengths_,
|
||||
param.filter_spatial_lengths_,
|
||||
param.GetOutputSpatialLengths(),
|
||||
param.conv_filter_strides_,
|
||||
param.conv_filter_dilations_,
|
||||
param.input_left_pads_,
|
||||
param.input_right_pads_);
|
||||
// bf16
|
||||
pass = ck::profiler::profile_conv_bwd_data_impl<1,
|
||||
ck::tensor_layout::convolution::NWC,
|
||||
ck::tensor_layout::convolution::KXC,
|
||||
ck::tensor_layout::convolution::NWK,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
pass &= ck::profiler::profile_convnd_bwd_data_impl<2,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
float,
|
||||
ck::tensor_layout::convolution::NHWC,
|
||||
ck::tensor_layout::convolution::KYXC,
|
||||
ck::tensor_layout::convolution::NHWK>(
|
||||
true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param.N_,
|
||||
param.K_,
|
||||
param.C_,
|
||||
param.input_spatial_lengths_,
|
||||
param.filter_spatial_lengths_,
|
||||
param.GetOutputSpatialLengths(),
|
||||
param.conv_filter_strides_,
|
||||
param.conv_filter_dilations_,
|
||||
param.input_left_pads_,
|
||||
param.input_right_pads_);
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
pass &= ck::profiler::profile_convnd_bwd_data_impl<2,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t,
|
||||
float,
|
||||
ck::tensor_layout::convolution::NHWC,
|
||||
ck::tensor_layout::convolution::KYXC,
|
||||
ck::tensor_layout::convolution::NHWK>(
|
||||
true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param.N_,
|
||||
param.K_,
|
||||
param.C_,
|
||||
param.input_spatial_lengths_,
|
||||
param.filter_spatial_lengths_,
|
||||
param.GetOutputSpatialLengths(),
|
||||
param.conv_filter_strides_,
|
||||
param.conv_filter_dilations_,
|
||||
param.input_left_pads_,
|
||||
param.input_right_pads_);
|
||||
// int8
|
||||
pass = ck::profiler::profile_conv_bwd_data_impl<1,
|
||||
ck::tensor_layout::convolution::NWC,
|
||||
ck::tensor_layout::convolution::KXC,
|
||||
ck::tensor_layout::convolution::NWK,
|
||||
int8_t,
|
||||
int8_t,
|
||||
int8_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
pass &= ck::profiler::profile_convnd_bwd_data_impl<2,
|
||||
int8_t,
|
||||
int8_t,
|
||||
int8_t,
|
||||
int,
|
||||
ck::tensor_layout::convolution::NHWC,
|
||||
ck::tensor_layout::convolution::KYXC,
|
||||
ck::tensor_layout::convolution::NHWK>(
|
||||
true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param.N_,
|
||||
param.K_,
|
||||
param.C_,
|
||||
param.input_spatial_lengths_,
|
||||
param.filter_spatial_lengths_,
|
||||
param.GetOutputSpatialLengths(),
|
||||
param.conv_filter_strides_,
|
||||
param.conv_filter_dilations_,
|
||||
param.input_left_pads_,
|
||||
param.input_right_pads_);
|
||||
}
|
||||
|
||||
// check 3d
|
||||
params.clear();
|
||||
params.push_back(
|
||||
{3, 128, 128, 256, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
|
||||
params.push_back(
|
||||
{3, 128, 128, 256, {3, 3, 3}, {14, 14, 14}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
|
||||
params.push_back(
|
||||
{3, 128, 128, 256, {1, 1, 1}, {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
|
||||
|
||||
for(auto& param : params)
|
||||
{
|
||||
pass &= ck::profiler::profile_convnd_bwd_data_impl<3,
|
||||
float,
|
||||
float,
|
||||
float,
|
||||
float,
|
||||
ck::tensor_layout::convolution::NDHWC,
|
||||
ck::tensor_layout::convolution::KZYXC,
|
||||
ck::tensor_layout::convolution::NDHWK>(
|
||||
true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param.N_,
|
||||
param.K_,
|
||||
param.C_,
|
||||
param.input_spatial_lengths_,
|
||||
param.filter_spatial_lengths_,
|
||||
param.GetOutputSpatialLengths(),
|
||||
param.conv_filter_strides_,
|
||||
param.conv_filter_dilations_,
|
||||
param.input_left_pads_,
|
||||
param.input_right_pads_);
|
||||
|
||||
pass &= ck::profiler::profile_convnd_bwd_data_impl<3,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
float,
|
||||
ck::tensor_layout::convolution::NDHWC,
|
||||
ck::tensor_layout::convolution::KZYXC,
|
||||
ck::tensor_layout::convolution::NDHWK>(
|
||||
true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param.N_,
|
||||
param.K_,
|
||||
param.C_,
|
||||
param.input_spatial_lengths_,
|
||||
param.filter_spatial_lengths_,
|
||||
param.GetOutputSpatialLengths(),
|
||||
param.conv_filter_strides_,
|
||||
param.conv_filter_dilations_,
|
||||
param.input_left_pads_,
|
||||
param.input_right_pads_);
|
||||
|
||||
pass &= ck::profiler::profile_convnd_bwd_data_impl<3,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t,
|
||||
float,
|
||||
ck::tensor_layout::convolution::NDHWC,
|
||||
ck::tensor_layout::convolution::KZYXC,
|
||||
ck::tensor_layout::convolution::NDHWK>(
|
||||
true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param.N_,
|
||||
param.K_,
|
||||
param.C_,
|
||||
param.input_spatial_lengths_,
|
||||
param.filter_spatial_lengths_,
|
||||
param.GetOutputSpatialLengths(),
|
||||
param.conv_filter_strides_,
|
||||
param.conv_filter_dilations_,
|
||||
param.input_left_pads_,
|
||||
param.input_right_pads_);
|
||||
|
||||
pass &= ck::profiler::profile_convnd_bwd_data_impl<3,
|
||||
int8_t,
|
||||
int8_t,
|
||||
int8_t,
|
||||
int,
|
||||
ck::tensor_layout::convolution::NDHWC,
|
||||
ck::tensor_layout::convolution::KZYXC,
|
||||
ck::tensor_layout::convolution::NDHWK>(
|
||||
true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param.N_,
|
||||
param.K_,
|
||||
param.C_,
|
||||
param.input_spatial_lengths_,
|
||||
param.filter_spatial_lengths_,
|
||||
param.GetOutputSpatialLengths(),
|
||||
param.conv_filter_strides_,
|
||||
param.conv_filter_dilations_,
|
||||
param.input_left_pads_,
|
||||
param.input_right_pads_);
|
||||
}
|
||||
|
||||
if(pass)
|
||||
{
|
||||
std::cout << "test convnd bwd : Pass" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "test convnd bwd: Fail " << std::endl;
|
||||
return -1;
|
||||
EXPECT_TRUE(pass);
|
||||
}
|
||||
}
|
||||
|
||||
// 2d
|
||||
TEST_F(TestConvndBwdData, Conv2dBwdData)
|
||||
{
|
||||
conv_params.clear();
|
||||
conv_params.push_back({2, 1, 128, 128, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}});
|
||||
conv_params.push_back({2, 1, 128, 128, 256, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
|
||||
conv_params.push_back({2, 1, 128, 128, 256, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
|
||||
|
||||
for(auto& param : conv_params)
|
||||
{
|
||||
bool pass;
|
||||
|
||||
// fp32
|
||||
pass = ck::profiler::profile_conv_bwd_data_impl<2,
|
||||
ck::tensor_layout::convolution::NHWC,
|
||||
ck::tensor_layout::convolution::KYXC,
|
||||
ck::tensor_layout::convolution::NHWK,
|
||||
float,
|
||||
float,
|
||||
float>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// fp16
|
||||
pass = ck::profiler::profile_conv_bwd_data_impl<2,
|
||||
ck::tensor_layout::convolution::NHWC,
|
||||
ck::tensor_layout::convolution::KYXC,
|
||||
ck::tensor_layout::convolution::NHWK,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
ck::half_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// bf16
|
||||
pass = ck::profiler::profile_conv_bwd_data_impl<2,
|
||||
ck::tensor_layout::convolution::NHWC,
|
||||
ck::tensor_layout::convolution::KYXC,
|
||||
ck::tensor_layout::convolution::NHWK,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// int8
|
||||
pass = ck::profiler::profile_conv_bwd_data_impl<2,
|
||||
ck::tensor_layout::convolution::NHWC,
|
||||
ck::tensor_layout::convolution::KYXC,
|
||||
ck::tensor_layout::convolution::NHWK,
|
||||
int8_t,
|
||||
int8_t,
|
||||
int8_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
}
|
||||
}
|
||||
|
||||
// 3d
|
||||
TEST_F(TestConvndBwdData, Conv3dBwdData)
|
||||
{
|
||||
conv_params.clear();
|
||||
conv_params.push_back(
|
||||
{3, 1, 128, 128, 256, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
|
||||
conv_params.push_back(
|
||||
{3, 1, 128, 128, 256, {3, 3, 3}, {14, 14, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
|
||||
conv_params.push_back(
|
||||
{3, 1, 128, 128, 256, {1, 1, 1}, {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
|
||||
|
||||
for(auto& param : conv_params)
|
||||
{
|
||||
bool pass;
|
||||
|
||||
// fp32
|
||||
pass = ck::profiler::profile_conv_bwd_data_impl<3,
|
||||
ck::tensor_layout::convolution::NDHWC,
|
||||
ck::tensor_layout::convolution::KZYXC,
|
||||
ck::tensor_layout::convolution::NDHWK,
|
||||
float,
|
||||
float,
|
||||
float>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// fp16
|
||||
pass = ck::profiler::profile_conv_bwd_data_impl<3,
|
||||
ck::tensor_layout::convolution::NDHWC,
|
||||
ck::tensor_layout::convolution::KZYXC,
|
||||
ck::tensor_layout::convolution::NDHWK,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
ck::half_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// bf16
|
||||
pass = ck::profiler::profile_conv_bwd_data_impl<3,
|
||||
ck::tensor_layout::convolution::NDHWC,
|
||||
ck::tensor_layout::convolution::KZYXC,
|
||||
ck::tensor_layout::convolution::NDHWK,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// int8
|
||||
pass = ck::profiler::profile_conv_bwd_data_impl<3,
|
||||
ck::tensor_layout::convolution::NDHWC,
|
||||
ck::tensor_layout::convolution::KZYXC,
|
||||
ck::tensor_layout::convolution::NDHWK,
|
||||
int8_t,
|
||||
int8_t,
|
||||
int8_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
add_test_executable(test_convnd_bwd_weight convnd_bwd_weight.cpp)
|
||||
target_link_libraries(test_convnd_bwd_weight PRIVATE host_tensor device_convnd_bwd_weight_instance conv_util)
|
||||
add_gtest_executable(test_convnd_bwd_weight convnd_bwd_weight.cpp)
|
||||
target_link_libraries(test_convnd_bwd_weight PRIVATE utility device_conv1d_bwd_weight_instance device_conv2d_bwd_weight_instance device_conv3d_bwd_weight_instance)
|
||||
|
||||
@@ -1,283 +1,205 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <iostream>
|
||||
#include <numeric>
|
||||
#include <initializer_list>
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <initializer_list>
|
||||
#include <vector>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "test/convnd_fwd/conv_util.hpp"
|
||||
#include "profiler/include/profile_convnd_bwd_weight_impl.hpp"
|
||||
#include "profiler/include/profile_conv_bwd_weight_impl.hpp"
|
||||
|
||||
int test_self()
|
||||
class TestConvndBwdWeight : public ::testing::Test
|
||||
{
|
||||
bool pass = true;
|
||||
std::vector<ck::utils::conv::ConvParams> params;
|
||||
protected:
|
||||
std::vector<ck::utils::conv::ConvParam> conv_params;
|
||||
};
|
||||
|
||||
params.push_back({1, 128, 256, 256, {1}, {7}, {2}, {1}, {0}, {0}});
|
||||
params.push_back({1, 128, 256, 256, {3}, {14}, {1}, {1}, {1}, {1}});
|
||||
params.push_back({1, 128, 256, 256, {1}, {3}, {1}, {1}, {0}, {0}});
|
||||
|
||||
for(auto& param : params)
|
||||
{
|
||||
// f32
|
||||
pass &= ck::profiler::profile_convnd_bwd_weight_impl<1,
|
||||
float,
|
||||
float,
|
||||
float,
|
||||
ck::tensor_layout::convolution::NWC,
|
||||
ck::tensor_layout::convolution::KXC,
|
||||
ck::tensor_layout::convolution::NWK>(
|
||||
true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
true, // time_kernel
|
||||
param.N_,
|
||||
param.K_,
|
||||
param.C_,
|
||||
param.input_spatial_lengths_,
|
||||
param.filter_spatial_lengths_,
|
||||
param.GetOutputSpatialLengths(),
|
||||
param.conv_filter_strides_,
|
||||
param.conv_filter_dilations_,
|
||||
param.input_left_pads_,
|
||||
param.input_right_pads_,
|
||||
2);
|
||||
|
||||
// fp16
|
||||
pass &= ck::profiler::profile_convnd_bwd_weight_impl<1,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
ck::tensor_layout::convolution::NWC,
|
||||
ck::tensor_layout::convolution::KXC,
|
||||
ck::tensor_layout::convolution::NWK>(
|
||||
true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
true, // time_kernel
|
||||
param.N_,
|
||||
param.K_,
|
||||
param.C_,
|
||||
param.input_spatial_lengths_,
|
||||
param.filter_spatial_lengths_,
|
||||
param.GetOutputSpatialLengths(),
|
||||
param.conv_filter_strides_,
|
||||
param.conv_filter_dilations_,
|
||||
param.input_left_pads_,
|
||||
param.input_right_pads_,
|
||||
2);
|
||||
|
||||
// bf16
|
||||
pass &= ck::profiler::profile_convnd_bwd_weight_impl<1,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t,
|
||||
ck::tensor_layout::convolution::NWC,
|
||||
ck::tensor_layout::convolution::KXC,
|
||||
ck::tensor_layout::convolution::NWK>(
|
||||
true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
true, // time_kernel
|
||||
param.N_,
|
||||
param.K_,
|
||||
param.C_,
|
||||
param.input_spatial_lengths_,
|
||||
param.filter_spatial_lengths_,
|
||||
param.GetOutputSpatialLengths(),
|
||||
param.conv_filter_strides_,
|
||||
param.conv_filter_dilations_,
|
||||
param.input_left_pads_,
|
||||
param.input_right_pads_,
|
||||
2);
|
||||
}
|
||||
|
||||
// check 2d
|
||||
params.clear();
|
||||
params.push_back({2, 128, 256, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}});
|
||||
params.push_back({2, 128, 256, 256, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
|
||||
params.push_back({2, 128, 256, 256, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
|
||||
|
||||
for(auto& param : params)
|
||||
{
|
||||
// f32
|
||||
pass &= ck::profiler::profile_convnd_bwd_weight_impl<2,
|
||||
float,
|
||||
float,
|
||||
float,
|
||||
ck::tensor_layout::convolution::NHWC,
|
||||
ck::tensor_layout::convolution::KYXC,
|
||||
ck::tensor_layout::convolution::NHWK>(
|
||||
true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
true, // time_kernel
|
||||
param.N_,
|
||||
param.K_,
|
||||
param.C_,
|
||||
param.input_spatial_lengths_,
|
||||
param.filter_spatial_lengths_,
|
||||
param.GetOutputSpatialLengths(),
|
||||
param.conv_filter_strides_,
|
||||
param.conv_filter_dilations_,
|
||||
param.input_left_pads_,
|
||||
param.input_right_pads_,
|
||||
2);
|
||||
|
||||
// fp16
|
||||
pass &= ck::profiler::profile_convnd_bwd_weight_impl<2,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
ck::tensor_layout::convolution::NHWC,
|
||||
ck::tensor_layout::convolution::KYXC,
|
||||
ck::tensor_layout::convolution::NHWK>(
|
||||
true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
true, // time_kernel
|
||||
param.N_,
|
||||
param.K_,
|
||||
param.C_,
|
||||
param.input_spatial_lengths_,
|
||||
param.filter_spatial_lengths_,
|
||||
param.GetOutputSpatialLengths(),
|
||||
param.conv_filter_strides_,
|
||||
param.conv_filter_dilations_,
|
||||
param.input_left_pads_,
|
||||
param.input_right_pads_,
|
||||
2);
|
||||
|
||||
// bf16
|
||||
pass &= ck::profiler::profile_convnd_bwd_weight_impl<2,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t,
|
||||
ck::tensor_layout::convolution::NHWC,
|
||||
ck::tensor_layout::convolution::KYXC,
|
||||
ck::tensor_layout::convolution::NHWK>(
|
||||
true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
true, // time_kernel
|
||||
param.N_,
|
||||
param.K_,
|
||||
param.C_,
|
||||
param.input_spatial_lengths_,
|
||||
param.filter_spatial_lengths_,
|
||||
param.GetOutputSpatialLengths(),
|
||||
param.conv_filter_strides_,
|
||||
param.conv_filter_dilations_,
|
||||
param.input_left_pads_,
|
||||
param.input_right_pads_,
|
||||
2);
|
||||
}
|
||||
|
||||
// check 2d
|
||||
params.clear();
|
||||
params.push_back(
|
||||
{3, 128, 256, 256, {1, 1, 1}, {4, 4, 4}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
|
||||
params.push_back(
|
||||
{3, 128, 256, 256, {3, 3, 3}, {4, 4, 8}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
|
||||
params.push_back(
|
||||
{3, 128, 256, 256, {1, 1, 1}, {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
|
||||
|
||||
for(auto& param : params)
|
||||
{
|
||||
// f32
|
||||
pass &= ck::profiler::profile_convnd_bwd_weight_impl<3,
|
||||
float,
|
||||
float,
|
||||
float,
|
||||
ck::tensor_layout::convolution::NDHWC,
|
||||
ck::tensor_layout::convolution::KZYXC,
|
||||
ck::tensor_layout::convolution::NDHWK>(
|
||||
true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
true, // time_kernel
|
||||
param.N_,
|
||||
param.K_,
|
||||
param.C_,
|
||||
param.input_spatial_lengths_,
|
||||
param.filter_spatial_lengths_,
|
||||
param.GetOutputSpatialLengths(),
|
||||
param.conv_filter_strides_,
|
||||
param.conv_filter_dilations_,
|
||||
param.input_left_pads_,
|
||||
param.input_right_pads_,
|
||||
2);
|
||||
|
||||
// fp16
|
||||
pass &= ck::profiler::profile_convnd_bwd_weight_impl<3,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
ck::tensor_layout::convolution::NDHWC,
|
||||
ck::tensor_layout::convolution::KZYXC,
|
||||
ck::tensor_layout::convolution::NDHWK>(
|
||||
true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
true, // time_kernel
|
||||
param.N_,
|
||||
param.K_,
|
||||
param.C_,
|
||||
param.input_spatial_lengths_,
|
||||
param.filter_spatial_lengths_,
|
||||
param.GetOutputSpatialLengths(),
|
||||
param.conv_filter_strides_,
|
||||
param.conv_filter_dilations_,
|
||||
param.input_left_pads_,
|
||||
param.input_right_pads_,
|
||||
2);
|
||||
|
||||
// bf16
|
||||
pass &= ck::profiler::profile_convnd_bwd_weight_impl<3,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t,
|
||||
ck::tensor_layout::convolution::NDHWC,
|
||||
ck::tensor_layout::convolution::KZYXC,
|
||||
ck::tensor_layout::convolution::NDHWK>(
|
||||
true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
true, // time_kernel
|
||||
param.N_,
|
||||
param.K_,
|
||||
param.C_,
|
||||
param.input_spatial_lengths_,
|
||||
param.filter_spatial_lengths_,
|
||||
param.GetOutputSpatialLengths(),
|
||||
param.conv_filter_strides_,
|
||||
param.conv_filter_dilations_,
|
||||
param.input_left_pads_,
|
||||
param.input_right_pads_,
|
||||
2);
|
||||
}
|
||||
|
||||
return pass;
|
||||
}
|
||||
int main()
|
||||
// 1d
|
||||
TEST_F(TestConvndBwdWeight, Conv1dBwdWeight)
|
||||
{
|
||||
// int data_type = 1;
|
||||
// int init_method = 1;
|
||||
conv_params.clear();
|
||||
conv_params.push_back({1, 1, 128, 128, 256, {1}, {14}, {2}, {1}, {0}, {0}});
|
||||
conv_params.push_back({1, 1, 128, 128, 256, {3}, {28}, {1}, {1}, {1}, {1}});
|
||||
conv_params.push_back({1, 1, 128, 128, 256, {1}, {3}, {1}, {1}, {0}, {0}});
|
||||
|
||||
bool pass = true;
|
||||
|
||||
pass = test_self();
|
||||
|
||||
if(pass)
|
||||
for(auto& param : conv_params)
|
||||
{
|
||||
std::cout << "test conv2d bwd weight : Pass" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::cout << "test conv2d bwd weight: Fail " << std::endl;
|
||||
return -1;
|
||||
bool pass;
|
||||
|
||||
// fp32
|
||||
pass = ck::profiler::profile_conv_bwd_weight_impl<1,
|
||||
ck::tensor_layout::convolution::NWC,
|
||||
ck::tensor_layout::convolution::KXC,
|
||||
ck::tensor_layout::convolution::NWK,
|
||||
float,
|
||||
float,
|
||||
float>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param,
|
||||
2);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// fp16
|
||||
pass = ck::profiler::profile_conv_bwd_weight_impl<1,
|
||||
ck::tensor_layout::convolution::NWC,
|
||||
ck::tensor_layout::convolution::KXC,
|
||||
ck::tensor_layout::convolution::NWK,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
ck::half_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param,
|
||||
2);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// bf16
|
||||
pass = ck::profiler::profile_conv_bwd_weight_impl<1,
|
||||
ck::tensor_layout::convolution::NWC,
|
||||
ck::tensor_layout::convolution::KXC,
|
||||
ck::tensor_layout::convolution::NWK,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param,
|
||||
2);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
}
|
||||
}
|
||||
|
||||
// 2d
|
||||
TEST_F(TestConvndBwdWeight, Conv2dBwdWeight)
|
||||
{
|
||||
conv_params.clear();
|
||||
conv_params.push_back({2, 1, 128, 128, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}});
|
||||
conv_params.push_back({2, 1, 32, 128, 256, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
|
||||
conv_params.push_back({2, 1, 128, 128, 256, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
|
||||
|
||||
for(auto& param : conv_params)
|
||||
{
|
||||
bool pass;
|
||||
|
||||
// fp32
|
||||
pass = ck::profiler::profile_conv_bwd_weight_impl<2,
|
||||
ck::tensor_layout::convolution::NHWC,
|
||||
ck::tensor_layout::convolution::KYXC,
|
||||
ck::tensor_layout::convolution::NHWK,
|
||||
float,
|
||||
float,
|
||||
float>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param,
|
||||
2);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// fp16
|
||||
pass = ck::profiler::profile_conv_bwd_weight_impl<2,
|
||||
ck::tensor_layout::convolution::NHWC,
|
||||
ck::tensor_layout::convolution::KYXC,
|
||||
ck::tensor_layout::convolution::NHWK,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
ck::half_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param,
|
||||
2);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// bf16
|
||||
pass = ck::profiler::profile_conv_bwd_weight_impl<2,
|
||||
ck::tensor_layout::convolution::NHWC,
|
||||
ck::tensor_layout::convolution::KYXC,
|
||||
ck::tensor_layout::convolution::NHWK,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param,
|
||||
2);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
}
|
||||
}
|
||||
|
||||
// 3d
|
||||
TEST_F(TestConvndBwdWeight, Conv3dBwdWeight)
|
||||
{
|
||||
conv_params.clear();
|
||||
conv_params.push_back(
|
||||
{3, 1, 128, 128, 256, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
|
||||
conv_params.push_back(
|
||||
{3, 1, 32, 128, 256, {3, 3, 3}, {14, 14, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
|
||||
conv_params.push_back(
|
||||
{3, 1, 128, 128, 256, {1, 1, 1}, {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
|
||||
|
||||
for(auto& param : conv_params)
|
||||
{
|
||||
bool pass;
|
||||
|
||||
// fp32
|
||||
pass = ck::profiler::profile_conv_bwd_weight_impl<3,
|
||||
ck::tensor_layout::convolution::NDHWC,
|
||||
ck::tensor_layout::convolution::KZYXC,
|
||||
ck::tensor_layout::convolution::NDHWK,
|
||||
float,
|
||||
float,
|
||||
float>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param,
|
||||
2);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// fp16
|
||||
pass = ck::profiler::profile_conv_bwd_weight_impl<3,
|
||||
ck::tensor_layout::convolution::NDHWC,
|
||||
ck::tensor_layout::convolution::KZYXC,
|
||||
ck::tensor_layout::convolution::NDHWK,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
ck::half_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param,
|
||||
2);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// bf16
|
||||
pass = ck::profiler::profile_conv_bwd_weight_impl<3,
|
||||
ck::tensor_layout::convolution::NDHWC,
|
||||
ck::tensor_layout::convolution::KZYXC,
|
||||
ck::tensor_layout::convolution::NDHWK,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param,
|
||||
2);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,13 +1,2 @@
|
||||
add_custom_target(test_convnd_fwd)
|
||||
|
||||
add_gtest_executable(test_conv1d_fwd conv1d_fwd.cpp)
|
||||
target_link_libraries(test_conv1d_fwd PRIVATE host_tensor device_conv1d_fwd_instance conv_util)
|
||||
add_dependencies(test_convnd_fwd test_conv1d_fwd)
|
||||
|
||||
add_gtest_executable(test_conv2d_fwd conv2d_fwd.cpp)
|
||||
target_link_libraries(test_conv2d_fwd PRIVATE host_tensor device_conv2d_fwd_instance device_convnd_2d_fwd_instance conv_util)
|
||||
add_dependencies(test_convnd_fwd test_conv2d_fwd)
|
||||
|
||||
add_gtest_executable(test_conv3d_fwd conv3d_fwd.cpp)
|
||||
target_link_libraries(test_conv3d_fwd PRIVATE host_tensor device_conv3d_fwd_instance conv_util)
|
||||
add_dependencies(test_convnd_fwd test_conv3d_fwd)
|
||||
add_gtest_executable(test_convnd_fwd convnd_fwd.cpp)
|
||||
target_link_libraries(test_convnd_fwd PRIVATE utility device_conv2d_fwd_instance)
|
||||
|
||||
@@ -1,192 +0,0 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <iostream>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "ck/utility/data_type.hpp"
|
||||
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
|
||||
#include "ck/library/utility/conv_util.hpp"
|
||||
#include "test/convnd_fwd/conv_util.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
class Conv1dFwdNWCInstances : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
template <typename T>
|
||||
bool test_conv1d_nwc_instances(const std::vector<test::conv::DeviceConvFwdNoOpPtr>& conv_ptrs,
|
||||
const ck::utils::conv::ConvParams& params)
|
||||
{
|
||||
using namespace std::placeholders;
|
||||
using namespace ck::utils;
|
||||
namespace ctl = ck::tensor_layout::convolution;
|
||||
|
||||
conv::ConvFwdOpInstance<T,
|
||||
T,
|
||||
T,
|
||||
ctl::NWC,
|
||||
ctl::KXC,
|
||||
ctl::NWK,
|
||||
ck::tensor_operation::element_wise::PassThrough,
|
||||
ck::tensor_operation::element_wise::PassThrough,
|
||||
ck::tensor_operation::element_wise::PassThrough,
|
||||
FillUniformDistributionIntegerValue<T>,
|
||||
FillUniformDistributionIntegerValue<T>>
|
||||
conv_instance(params,
|
||||
true,
|
||||
FillUniformDistributionIntegerValue<T>{},
|
||||
FillUniformDistributionIntegerValue<T>{});
|
||||
auto reference_conv_fwd_fun =
|
||||
std::bind(conv::run_reference_convolution_forward<1, T, T, T>, params, _1, _2, _3);
|
||||
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
|
||||
run_engine.SetAtol(atol_);
|
||||
run_engine.SetRtol(rtol_);
|
||||
return run_engine.Test(conv_ptrs);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool test_default()
|
||||
{
|
||||
return test_conv1d_nwc_instances<T>(
|
||||
ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<1>(), params_default_);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool test_filter1x1_stride1_pad0()
|
||||
{
|
||||
return test_conv1d_nwc_instances<T>(
|
||||
ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<1>(),
|
||||
params_filter1x1_stride1_pad0_);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool test_filter1x1_pad0()
|
||||
{
|
||||
return test_conv1d_nwc_instances<T>(
|
||||
ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<1>(),
|
||||
params_filter1x1_pad0_);
|
||||
}
|
||||
|
||||
static inline ck::utils::conv::ConvParams params_default_{
|
||||
1, 4, 256, 64, {3}, {71}, {2}, {2}, {2}, {2}};
|
||||
static inline ck::utils::conv::ConvParams params_filter1x1_stride1_pad0_{
|
||||
1, 4, 256, 64, {1}, {28}, {1}, {1}, {0}, {0}};
|
||||
static inline ck::utils::conv::ConvParams params_filter1x1_pad0_{
|
||||
1, 4, 256, 64, {1}, {28}, {2}, {1}, {0}, {0}};
|
||||
|
||||
private:
|
||||
double atol_{1e-5};
|
||||
double rtol_{1e-4};
|
||||
};
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(Conv1DFwdNWC, IntegerValues)
|
||||
{
|
||||
using namespace std::placeholders;
|
||||
using namespace ck::utils;
|
||||
namespace ctl = ck::tensor_layout::convolution;
|
||||
using T = float;
|
||||
|
||||
ck::utils::conv::ConvParams params{1, 4, 256, 64, {3}, {36}, {1}, {2}, {2}, {2}};
|
||||
|
||||
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
|
||||
test::conv::get_test_convolution_fwd_instance<1, T, T, T, T>(conv_ptrs);
|
||||
conv::ConvFwdOpInstance<T,
|
||||
T,
|
||||
T,
|
||||
ctl::NWC,
|
||||
ctl::KXC,
|
||||
ctl::NWK,
|
||||
ck::tensor_operation::element_wise::PassThrough,
|
||||
ck::tensor_operation::element_wise::PassThrough,
|
||||
ck::tensor_operation::element_wise::PassThrough,
|
||||
FillUniformDistributionIntegerValue<T>,
|
||||
FillUniformDistributionIntegerValue<T>>
|
||||
conv_instance(params,
|
||||
true,
|
||||
FillUniformDistributionIntegerValue<T>{},
|
||||
FillUniformDistributionIntegerValue<T>{});
|
||||
|
||||
auto reference_conv_fwd_fun =
|
||||
std::bind(conv::run_reference_convolution_forward<1, T, T, T>, params, _1, _2, _3);
|
||||
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
|
||||
run_engine.SetAtol(1e-5);
|
||||
run_engine.SetRtol(1e-4);
|
||||
EXPECT_TRUE(run_engine.Test(conv_ptrs));
|
||||
}
|
||||
|
||||
TEST(Conv1DFwdNWC, FloatingPointValues)
|
||||
{
|
||||
using namespace std::placeholders;
|
||||
using namespace ck::utils;
|
||||
namespace ctl = ck::tensor_layout::convolution;
|
||||
using T = ck::half_t;
|
||||
|
||||
ck::utils::conv::ConvParams params{1, 4, 256, 64, {3}, {36}, {1}, {2}, {2}, {2}};
|
||||
|
||||
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
|
||||
test::conv::get_test_convolution_fwd_instance<1, T, T, T, float>(conv_ptrs);
|
||||
conv::ConvFwdOpInstance<T,
|
||||
T,
|
||||
T,
|
||||
ctl::NWC,
|
||||
ctl::KXC,
|
||||
ctl::NWK,
|
||||
ck::tensor_operation::element_wise::PassThrough,
|
||||
ck::tensor_operation::element_wise::PassThrough,
|
||||
ck::tensor_operation::element_wise::PassThrough,
|
||||
FillUniformDistribution<T>,
|
||||
FillUniformDistribution<T>>
|
||||
conv_instance(params, true, FillUniformDistribution<T>{}, FillUniformDistribution<T>{});
|
||||
|
||||
auto reference_conv_fwd_fun =
|
||||
std::bind(conv::run_reference_convolution_forward<1, T, T, T>, params, _1, _2, _3);
|
||||
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
|
||||
run_engine.SetAtol(0.1);
|
||||
run_engine.SetRtol(1e-2);
|
||||
EXPECT_TRUE(run_engine.Test(conv_ptrs));
|
||||
}
|
||||
|
||||
TEST_F(Conv1dFwdNWCInstances, BF16_default) { EXPECT_TRUE(this->test_default<ck::bhalf_t>()); }
|
||||
TEST_F(Conv1dFwdNWCInstances, BF16_filter1x1_stride1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<ck::bhalf_t>());
|
||||
}
|
||||
TEST_F(Conv1dFwdNWCInstances, BF16_filter1x1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_pad0<ck::bhalf_t>());
|
||||
}
|
||||
|
||||
TEST_F(Conv1dFwdNWCInstances, F16_default) { EXPECT_TRUE(this->test_default<ck::half_t>()); }
|
||||
TEST_F(Conv1dFwdNWCInstances, F16_filter1x1_stride1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<ck::half_t>());
|
||||
}
|
||||
TEST_F(Conv1dFwdNWCInstances, F16_filter1x1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_pad0<ck::half_t>());
|
||||
}
|
||||
|
||||
TEST_F(Conv1dFwdNWCInstances, F32_default) { EXPECT_TRUE(this->test_default<float>()); }
|
||||
TEST_F(Conv1dFwdNWCInstances, F32_filter1x1_stride1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<float>());
|
||||
}
|
||||
TEST_F(Conv1dFwdNWCInstances, F32_filter1x1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_pad0<float>());
|
||||
}
|
||||
|
||||
TEST_F(Conv1dFwdNWCInstances, I8_default) { EXPECT_TRUE(this->test_default<int8_t>()); }
|
||||
TEST_F(Conv1dFwdNWCInstances, I8_filter1x1_stride1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<int8_t>());
|
||||
}
|
||||
TEST_F(Conv1dFwdNWCInstances, I8_filter1x1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_pad0<int8_t>());
|
||||
}
|
||||
@@ -1,266 +0,0 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "ck/utility/data_type.hpp"
|
||||
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
|
||||
#include "ck/library/utility/conv_util.hpp"
|
||||
#include "test/convnd_fwd/conv_util.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
class Conv2dFwdNHWCInstances : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
template <typename T>
|
||||
bool test_conv2d_nhwc_instances(const std::vector<test::conv::DeviceConvFwdNoOpPtr>& conv_ptrs,
|
||||
const ck::utils::conv::ConvParams& params)
|
||||
{
|
||||
using namespace std::placeholders;
|
||||
using namespace ck::utils;
|
||||
|
||||
conv::ConvFwdOpInstance<T,
|
||||
T,
|
||||
T,
|
||||
ck::tensor_layout::convolution::NHWC,
|
||||
ck::tensor_layout::convolution::KYXC,
|
||||
ck::tensor_layout::convolution::NHWK,
|
||||
ck::tensor_operation::element_wise::PassThrough,
|
||||
ck::tensor_operation::element_wise::PassThrough,
|
||||
ck::tensor_operation::element_wise::PassThrough,
|
||||
FillUniformDistributionIntegerValue<T>,
|
||||
FillUniformDistributionIntegerValue<T>>
|
||||
conv_instance(params,
|
||||
true,
|
||||
FillUniformDistributionIntegerValue<T>{},
|
||||
FillUniformDistributionIntegerValue<T>{});
|
||||
auto reference_conv_fwd_fun =
|
||||
std::bind(conv::run_reference_convolution_forward<2, T, T, T>, params, _1, _2, _3);
|
||||
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
|
||||
run_engine.SetAtol(atol_);
|
||||
run_engine.SetRtol(rtol_);
|
||||
return run_engine.Test(conv_ptrs);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool test_default(bool use_convnd = false)
|
||||
{
|
||||
if(use_convnd)
|
||||
{
|
||||
return test_conv2d_nhwc_instances<T>(
|
||||
test::conv::ConvolutionNDFwdInstances<T, T, T>::Get(2), params_default_);
|
||||
}
|
||||
else
|
||||
{
|
||||
return test_conv2d_nhwc_instances<T>(
|
||||
ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<2>(),
|
||||
params_default_);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool test_filter1x1_stride1_pad0(bool use_convnd = false)
|
||||
{
|
||||
if(use_convnd)
|
||||
{
|
||||
return test_conv2d_nhwc_instances<T>(
|
||||
test::conv::ConvolutionNDFwdInstances<T, T, T>::Get(2),
|
||||
params_filter1x1_stride1_pad0_);
|
||||
}
|
||||
else
|
||||
{
|
||||
return test_conv2d_nhwc_instances<T>(
|
||||
ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<2>(),
|
||||
params_filter1x1_stride1_pad0_);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool test_filter1x1_pad0(bool use_convnd = false)
|
||||
{
|
||||
if(use_convnd)
|
||||
{
|
||||
return test_conv2d_nhwc_instances<T>(
|
||||
test::conv::ConvolutionNDFwdInstances<T, T, T>::Get(2), params_filter1x1_pad0_);
|
||||
}
|
||||
else
|
||||
{
|
||||
return test_conv2d_nhwc_instances<T>(
|
||||
ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<2>(),
|
||||
params_filter1x1_pad0_);
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool test_oddC()
|
||||
{
|
||||
return test_conv2d_nhwc_instances<T>(
|
||||
ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<2>(), params_oddC_);
|
||||
}
|
||||
|
||||
static inline ck::utils::conv::ConvParams params_default_{
|
||||
2, 4, 256, 64, {3, 3}, {36, 36}, {2, 2}, {2, 2}, {2, 2}, {2, 2}};
|
||||
static inline ck::utils::conv::ConvParams params_filter1x1_stride1_pad0_{
|
||||
2, 4, 256, 64, {1, 1}, {28, 28}, {1, 1}, {1, 1}, {0, 0}, {0, 0}};
|
||||
static inline ck::utils::conv::ConvParams params_filter1x1_pad0_{
|
||||
2, 4, 256, 64, {1, 1}, {28, 28}, {2, 2}, {1, 1}, {0, 0}, {0, 0}};
|
||||
static inline ck::utils::conv::ConvParams params_oddC_{
|
||||
2, 4, 256, 3, {3, 3}, {28, 28}, {1, 1}, {1, 1}, {0, 0}, {0, 0}};
|
||||
|
||||
private:
|
||||
double atol_{1e-5};
|
||||
double rtol_{1e-4};
|
||||
};
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(Conv2DFwdNHWC, IntegerValues)
|
||||
{
|
||||
using namespace std::placeholders;
|
||||
using namespace ck::utils;
|
||||
using T = float;
|
||||
|
||||
ck::utils::conv::ConvParams params{
|
||||
2, 4, 256, 64, {3, 3}, {36, 36}, {1, 1}, {2, 2}, {2, 2}, {2, 2}};
|
||||
|
||||
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
|
||||
test::conv::get_test_convolution_fwd_instance<2, T, T, T, T>(conv_ptrs);
|
||||
conv::ConvFwdOpInstance<T,
|
||||
T,
|
||||
T,
|
||||
ck::tensor_layout::convolution::NHWC,
|
||||
ck::tensor_layout::convolution::KYXC,
|
||||
ck::tensor_layout::convolution::NHWK,
|
||||
ck::tensor_operation::element_wise::PassThrough,
|
||||
ck::tensor_operation::element_wise::PassThrough,
|
||||
ck::tensor_operation::element_wise::PassThrough,
|
||||
FillUniformDistributionIntegerValue<T>,
|
||||
FillUniformDistributionIntegerValue<T>>
|
||||
conv_instance(params,
|
||||
true,
|
||||
FillUniformDistributionIntegerValue<T>{},
|
||||
FillUniformDistributionIntegerValue<T>{});
|
||||
|
||||
auto reference_conv_fwd_fun =
|
||||
std::bind(conv::run_reference_convolution_forward<2, T, T, T>, params, _1, _2, _3);
|
||||
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
|
||||
run_engine.SetAtol(1e-5);
|
||||
run_engine.SetRtol(1e-4);
|
||||
EXPECT_TRUE(run_engine.Test(conv_ptrs));
|
||||
}
|
||||
|
||||
TEST(Conv2DFwdNHWC, FloatingPointValues)
|
||||
{
|
||||
using namespace std::placeholders;
|
||||
using namespace ck::utils;
|
||||
using T = ck::half_t;
|
||||
|
||||
ck::utils::conv::ConvParams params{
|
||||
2, 4, 256, 64, {3, 3}, {36, 36}, {2, 2}, {2, 2}, {2, 2}, {2, 2}};
|
||||
|
||||
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
|
||||
test::conv::get_test_convolution_fwd_instance<2, T, T, T, float>(conv_ptrs);
|
||||
conv::ConvFwdOpInstance<T,
|
||||
T,
|
||||
T,
|
||||
ck::tensor_layout::convolution::NHWC,
|
||||
ck::tensor_layout::convolution::KYXC,
|
||||
ck::tensor_layout::convolution::NHWK,
|
||||
ck::tensor_operation::element_wise::PassThrough,
|
||||
ck::tensor_operation::element_wise::PassThrough,
|
||||
ck::tensor_operation::element_wise::PassThrough,
|
||||
FillUniformDistribution<T>,
|
||||
FillUniformDistribution<T>>
|
||||
conv_instance(params, true, FillUniformDistribution<T>{}, FillUniformDistribution<T>{});
|
||||
|
||||
auto reference_conv_fwd_fun =
|
||||
std::bind(conv::run_reference_convolution_forward<2, T, T, T>, params, _1, _2, _3);
|
||||
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
|
||||
run_engine.SetAtol(2e-4);
|
||||
run_engine.SetRtol(1e-3);
|
||||
EXPECT_TRUE(run_engine.Test(conv_ptrs));
|
||||
}
|
||||
|
||||
TEST_F(Conv2dFwdNHWCInstances, BF16_default) { EXPECT_TRUE(this->test_default<ck::bhalf_t>()); }
|
||||
TEST_F(Conv2dFwdNHWCInstances, BF16_filter1x1_stride1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<ck::bhalf_t>());
|
||||
}
|
||||
TEST_F(Conv2dFwdNHWCInstances, BF16_filter1x1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_pad0<ck::bhalf_t>());
|
||||
}
|
||||
TEST_F(Conv2dFwdNHWCInstances, F16_default) { EXPECT_TRUE(this->test_default<ck::half_t>()); }
|
||||
TEST_F(Conv2dFwdNHWCInstances, F16_filter1x1_stride1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<ck::half_t>());
|
||||
}
|
||||
TEST_F(Conv2dFwdNHWCInstances, F16_filter1x1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_pad0<ck::half_t>());
|
||||
}
|
||||
TEST_F(Conv2dFwdNHWCInstances, F16_oddC) { EXPECT_TRUE(this->test_oddC<ck::half_t>()); }
|
||||
TEST_F(Conv2dFwdNHWCInstances, F32_default) { EXPECT_TRUE(this->test_default<float>()); }
|
||||
TEST_F(Conv2dFwdNHWCInstances, F32_filter1x1_stride1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<float>());
|
||||
}
|
||||
TEST_F(Conv2dFwdNHWCInstances, F32_filter1x1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_pad0<float>());
|
||||
}
|
||||
TEST_F(Conv2dFwdNHWCInstances, I8_default) { EXPECT_TRUE(this->test_default<int8_t>()); }
|
||||
TEST_F(Conv2dFwdNHWCInstances, I8_filter1x1_stride1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<int8_t>());
|
||||
}
|
||||
TEST_F(Conv2dFwdNHWCInstances, I8_filter1x1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_pad0<int8_t>());
|
||||
}
|
||||
|
||||
TEST_F(Conv2dFwdNHWCInstances, ND_BF16_default)
|
||||
{
|
||||
EXPECT_TRUE(this->test_default<ck::bhalf_t>(true));
|
||||
}
|
||||
TEST_F(Conv2dFwdNHWCInstances, ND_BF16_filter1x1_stride1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<ck::bhalf_t>(true));
|
||||
}
|
||||
TEST_F(Conv2dFwdNHWCInstances, ND_BF16_filter1x1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_pad0<ck::bhalf_t>(true));
|
||||
}
|
||||
TEST_F(Conv2dFwdNHWCInstances, ND_F16_default)
|
||||
{
|
||||
EXPECT_TRUE(this->test_default<ck::half_t>(true));
|
||||
}
|
||||
TEST_F(Conv2dFwdNHWCInstances, ND_F16_filter1x1_stride1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<ck::half_t>(true));
|
||||
}
|
||||
TEST_F(Conv2dFwdNHWCInstances, ND_F16_filter1x1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_pad0<ck::half_t>(true));
|
||||
}
|
||||
TEST_F(Conv2dFwdNHWCInstances, ND_F32_default) { EXPECT_TRUE(this->test_default<float>(true)); }
|
||||
TEST_F(Conv2dFwdNHWCInstances, ND_F32_filter1x1_stride1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<float>(true));
|
||||
}
|
||||
TEST_F(Conv2dFwdNHWCInstances, ND_F32_filter1x1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_pad0<float>(true));
|
||||
}
|
||||
TEST_F(Conv2dFwdNHWCInstances, ND_I8_default) { EXPECT_TRUE(this->test_default<int8_t>(true)); }
|
||||
TEST_F(Conv2dFwdNHWCInstances, ND_I8_filter1x1_stride1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<int8_t>(true));
|
||||
}
|
||||
TEST_F(Conv2dFwdNHWCInstances, ND_I8_filter1x1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_pad0<int8_t>(true));
|
||||
}
|
||||
@@ -1,317 +0,0 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <iostream>
|
||||
#include <stdexcept>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "ck/utility/data_type.hpp"
|
||||
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
|
||||
|
||||
#include "ck/library/utility/conv_util.hpp"
|
||||
|
||||
#include "test/convnd_fwd/conv_util.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
class Conv3dFwdNDHWCInstances : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
template <typename T>
|
||||
bool test_conv3d_nwc_instances(const std::vector<test::conv::DeviceConvFwdNoOpPtr>& conv_ptrs,
|
||||
const ck::utils::conv::ConvParams& params)
|
||||
{
|
||||
using namespace std::placeholders;
|
||||
using namespace ck::utils;
|
||||
namespace ctl = ck::tensor_layout::convolution;
|
||||
|
||||
conv::ConvFwdOpInstance<T,
|
||||
T,
|
||||
T,
|
||||
ctl::NDHWC,
|
||||
ctl::KZYXC,
|
||||
ctl::NDHWK,
|
||||
ck::tensor_operation::element_wise::PassThrough,
|
||||
ck::tensor_operation::element_wise::PassThrough,
|
||||
ck::tensor_operation::element_wise::PassThrough,
|
||||
FillUniformDistributionIntegerValue<T>,
|
||||
FillUniformDistributionIntegerValue<T>>
|
||||
conv_instance(params,
|
||||
true,
|
||||
FillUniformDistributionIntegerValue<T>{},
|
||||
FillUniformDistributionIntegerValue<T>{});
|
||||
auto reference_conv_fwd_fun =
|
||||
std::bind(conv::run_reference_convolution_forward<3, T, T, T>, params, _1, _2, _3);
|
||||
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
|
||||
run_engine.SetAtol(atol_);
|
||||
run_engine.SetRtol(rtol_);
|
||||
return run_engine.Test(conv_ptrs);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool test_default()
|
||||
{
|
||||
return test_conv3d_nwc_instances<T>(
|
||||
ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<3>(), params_default_);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool test_filter1x1_stride1_pad0()
|
||||
{
|
||||
return test_conv3d_nwc_instances<T>(
|
||||
ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<3>(),
|
||||
params_filter1x1_stride1_pad0_);
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool test_filter1x1_pad0()
|
||||
{
|
||||
return test_conv3d_nwc_instances<T>(
|
||||
ck::utils::conv::ConvolutionFwdInstances<T, T, T>::template Get<3>(),
|
||||
params_filter1x1_pad0_);
|
||||
}
|
||||
|
||||
static inline ck::utils::conv::ConvParams params_default_{
|
||||
3, 4, 256, 64, {3, 3, 3}, {28, 28, 28}, {2, 2, 2}, {2, 2, 2}, {2, 2, 2}, {2, 2, 2}};
|
||||
static inline ck::utils::conv::ConvParams params_filter1x1_stride1_pad0_{
|
||||
3, 4, 256, 64, {1, 1, 1}, {28, 28, 28}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}};
|
||||
static inline ck::utils::conv::ConvParams params_filter1x1_pad0_{
|
||||
3, 4, 256, 64, {1, 1, 1}, {28, 28, 28}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}};
|
||||
|
||||
private:
|
||||
double atol_{1e-5};
|
||||
double rtol_{1e-4};
|
||||
};
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(Conv3DFwdNDHWC, IntegerValues)
|
||||
{
|
||||
using namespace std::placeholders;
|
||||
using namespace ck::utils;
|
||||
namespace ctl = ck::tensor_layout::convolution;
|
||||
using T = float;
|
||||
|
||||
ck::utils::conv::ConvParams params{
|
||||
3, 4, 256, 64, {3, 3, 3}, {18, 18, 18}, {1, 1, 1}, {2, 2, 2}, {2, 2, 2}, {2, 2, 2}};
|
||||
|
||||
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
|
||||
test::conv::get_test_convolution_fwd_instance<3, T, T, T, T>(conv_ptrs);
|
||||
conv::ConvFwdOpInstance<T,
|
||||
T,
|
||||
T,
|
||||
ctl::NDHWC,
|
||||
ctl::KZYXC,
|
||||
ctl::NDHWK,
|
||||
ck::tensor_operation::element_wise::PassThrough,
|
||||
ck::tensor_operation::element_wise::PassThrough,
|
||||
ck::tensor_operation::element_wise::PassThrough,
|
||||
FillUniformDistributionIntegerValue<T>,
|
||||
FillUniformDistributionIntegerValue<T>>
|
||||
conv_instance(params,
|
||||
true,
|
||||
FillUniformDistributionIntegerValue<T>{},
|
||||
FillUniformDistributionIntegerValue<T>{});
|
||||
|
||||
auto reference_conv_fwd_fun =
|
||||
std::bind(conv::run_reference_convolution_forward<3, T, T, T>, params, _1, _2, _3);
|
||||
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
|
||||
run_engine.SetAtol(1e-5);
|
||||
run_engine.SetRtol(1e-3);
|
||||
EXPECT_TRUE(run_engine.Test(conv_ptrs));
|
||||
}
|
||||
|
||||
TEST(Conv3DFwdNDHWC, FloatingPointValues)
|
||||
{
|
||||
using namespace std::placeholders;
|
||||
using namespace ck::utils;
|
||||
namespace ctl = ck::tensor_layout::convolution;
|
||||
using T = ck::half_t;
|
||||
|
||||
ck::utils::conv::ConvParams params{
|
||||
3, 4, 256, 64, {3, 3, 3}, {18, 18, 18}, {1, 1, 1}, {2, 2, 2}, {2, 2, 2}, {2, 2, 2}};
|
||||
|
||||
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
|
||||
test::conv::get_test_convolution_fwd_instance<3, T, T, T, float>(conv_ptrs);
|
||||
conv::ConvFwdOpInstance<T,
|
||||
T,
|
||||
T,
|
||||
ctl::NDHWC,
|
||||
ctl::KZYXC,
|
||||
ctl::NDHWK,
|
||||
ck::tensor_operation::element_wise::PassThrough,
|
||||
ck::tensor_operation::element_wise::PassThrough,
|
||||
ck::tensor_operation::element_wise::PassThrough,
|
||||
FillUniformDistribution<T>,
|
||||
FillUniformDistribution<T>>
|
||||
conv_instance(params, true, FillUniformDistribution<T>{}, FillUniformDistribution<T>{});
|
||||
|
||||
auto reference_conv_fwd_fun =
|
||||
std::bind(conv::run_reference_convolution_forward<3, T, T, T>, params, _1, _2, _3);
|
||||
OpInstanceRunEngine<T, T, T> run_engine(conv_instance, reference_conv_fwd_fun);
|
||||
run_engine.SetAtol(1e-3);
|
||||
run_engine.SetRtol(1e-3);
|
||||
EXPECT_TRUE(run_engine.Test(conv_ptrs));
|
||||
}
|
||||
|
||||
TEST(Conv3DFwdNDHWC, InputOver2GB)
|
||||
{
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
using namespace ck::utils;
|
||||
using T = float;
|
||||
|
||||
// >2GB Input
|
||||
conv::ConvParams params;
|
||||
params.num_dim_spatial_ = 3;
|
||||
params.N_ = 2;
|
||||
params.K_ = 16;
|
||||
params.C_ = 32;
|
||||
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3, 3};
|
||||
params.input_spatial_lengths_ = std::vector<ck::index_t>{32, 1000, 1000};
|
||||
params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1, 1};
|
||||
params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1, 1};
|
||||
params.input_left_pads_ = std::vector<ck::index_t>{1, 1, 1};
|
||||
params.input_right_pads_ = std::vector<ck::index_t>{1, 1, 1};
|
||||
|
||||
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
|
||||
test::conv::get_test_convolution_fwd_instance<3, T, T, T, T>(conv_ptrs);
|
||||
auto arg = conv_ptrs.back()->MakeArgumentPointer(nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
params.N_,
|
||||
params.K_,
|
||||
params.C_,
|
||||
params.input_spatial_lengths_,
|
||||
params.filter_spatial_lengths_,
|
||||
params.GetOutputSpatialLengths(),
|
||||
params.conv_filter_strides_,
|
||||
params.conv_filter_dilations_,
|
||||
params.input_left_pads_,
|
||||
params.input_right_pads_,
|
||||
PassThrough{},
|
||||
PassThrough{},
|
||||
PassThrough{});
|
||||
EXPECT_FALSE(conv_ptrs.back()->IsSupportedArgument(arg.get()));
|
||||
}
|
||||
|
||||
TEST(Conv3DFwdNDHWC, FiltersOver2GB)
|
||||
{
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
using namespace ck::utils;
|
||||
using T = float;
|
||||
|
||||
// >2GB Filters
|
||||
conv::ConvParams params;
|
||||
params.num_dim_spatial_ = 3;
|
||||
params.N_ = 2;
|
||||
params.K_ = 16;
|
||||
params.C_ = 32;
|
||||
params.filter_spatial_lengths_ = std::vector<ck::index_t>{4, 1000, 1000};
|
||||
params.input_spatial_lengths_ = std::vector<ck::index_t>{16, 16, 16};
|
||||
params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1, 1};
|
||||
params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1, 1};
|
||||
params.input_left_pads_ = std::vector<ck::index_t>{1, 1, 1};
|
||||
params.input_right_pads_ = std::vector<ck::index_t>{1, 1, 1};
|
||||
|
||||
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
|
||||
test::conv::get_test_convolution_fwd_instance<3, T, T, T, T>(conv_ptrs);
|
||||
auto arg = conv_ptrs.back()->MakeArgumentPointer(nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
params.N_,
|
||||
params.K_,
|
||||
params.C_,
|
||||
params.input_spatial_lengths_,
|
||||
params.filter_spatial_lengths_,
|
||||
params.GetOutputSpatialLengths(),
|
||||
params.conv_filter_strides_,
|
||||
params.conv_filter_dilations_,
|
||||
params.input_left_pads_,
|
||||
params.input_right_pads_,
|
||||
PassThrough{},
|
||||
PassThrough{},
|
||||
PassThrough{});
|
||||
EXPECT_FALSE(conv_ptrs.back()->IsSupportedArgument(arg.get()));
|
||||
}
|
||||
|
||||
TEST(Conv3DFwdNDHWC, OutputOver2GB)
|
||||
{
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
using namespace ck::utils;
|
||||
using T = float;
|
||||
|
||||
// >2GB Output
|
||||
conv::ConvParams params;
|
||||
params.num_dim_spatial_ = 3;
|
||||
params.N_ = 2;
|
||||
params.K_ = 16;
|
||||
params.C_ = 2;
|
||||
params.filter_spatial_lengths_ = std::vector<ck::index_t>{1, 1, 1};
|
||||
params.input_spatial_lengths_ = std::vector<ck::index_t>{1000, 1000, 30};
|
||||
params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1, 1};
|
||||
params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1, 1};
|
||||
params.input_left_pads_ = std::vector<ck::index_t>{2, 2, 2};
|
||||
params.input_right_pads_ = std::vector<ck::index_t>{2, 2, 2};
|
||||
|
||||
std::vector<test::conv::DeviceConvFwdNoOpPtr> conv_ptrs;
|
||||
test::conv::get_test_convolution_fwd_instance<3, T, T, T, T>(conv_ptrs);
|
||||
auto arg = conv_ptrs.back()->MakeArgumentPointer(nullptr,
|
||||
nullptr,
|
||||
nullptr,
|
||||
params.N_,
|
||||
params.K_,
|
||||
params.C_,
|
||||
params.input_spatial_lengths_,
|
||||
params.filter_spatial_lengths_,
|
||||
params.GetOutputSpatialLengths(),
|
||||
params.conv_filter_strides_,
|
||||
params.conv_filter_dilations_,
|
||||
params.input_left_pads_,
|
||||
params.input_right_pads_,
|
||||
PassThrough{},
|
||||
PassThrough{},
|
||||
PassThrough{});
|
||||
EXPECT_FALSE(conv_ptrs.back()->IsSupportedArgument(arg.get()));
|
||||
}
|
||||
|
||||
TEST_F(Conv3dFwdNDHWCInstances, BF16_default) { EXPECT_TRUE(this->test_default<ck::bhalf_t>()); }
|
||||
TEST_F(Conv3dFwdNDHWCInstances, BF16_filter1x1_stride1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<ck::bhalf_t>());
|
||||
}
|
||||
TEST_F(Conv3dFwdNDHWCInstances, BF16_filter1x1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_pad0<ck::bhalf_t>());
|
||||
}
|
||||
|
||||
TEST_F(Conv3dFwdNDHWCInstances, F16_default) { EXPECT_TRUE(this->test_default<ck::half_t>()); }
|
||||
TEST_F(Conv3dFwdNDHWCInstances, F16_filter1x1_stride1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<ck::half_t>());
|
||||
}
|
||||
TEST_F(Conv3dFwdNDHWCInstances, F16_filter1x1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_pad0<ck::half_t>());
|
||||
}
|
||||
|
||||
TEST_F(Conv3dFwdNDHWCInstances, F32_default) { EXPECT_TRUE(this->test_default<float>()); }
|
||||
TEST_F(Conv3dFwdNDHWCInstances, F32_filter1x1_stride1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<float>());
|
||||
}
|
||||
TEST_F(Conv3dFwdNDHWCInstances, F32_filter1x1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_pad0<float>());
|
||||
}
|
||||
|
||||
TEST_F(Conv3dFwdNDHWCInstances, I8_default) { EXPECT_TRUE(this->test_default<int8_t>()); }
|
||||
TEST_F(Conv3dFwdNDHWCInstances, I8_filter1x1_stride1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_stride1_pad0<int8_t>());
|
||||
}
|
||||
TEST_F(Conv3dFwdNDHWCInstances, I8_filter1x1_pad0)
|
||||
{
|
||||
EXPECT_TRUE(this->test_filter1x1_pad0<int8_t>());
|
||||
}
|
||||
@@ -1,174 +0,0 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <tuple>
|
||||
|
||||
#include "ck/ck.hpp"
|
||||
#include "ck/utility/sequence.hpp"
|
||||
#include "ck/utility/data_type.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/device_convnd_fwd_xdl_nhwc_kyxc_nhwk.hpp"
|
||||
#include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
|
||||
#include "ck/library/host_tensor/host_tensor.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
namespace device {
|
||||
|
||||
using DeviceConvFwdNoOpPtr = DeviceConvFwdPtr<element_wise::PassThrough,
|
||||
element_wise::PassThrough,
|
||||
element_wise::PassThrough>;
|
||||
namespace instance {
|
||||
|
||||
void add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instances(std::vector<DeviceConvFwdNoOpPtr>&);
|
||||
void add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instances(std::vector<DeviceConvFwdNoOpPtr>&);
|
||||
void add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instances(std::vector<DeviceConvFwdNoOpPtr>&);
|
||||
void add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instances(std::vector<DeviceConvFwdNoOpPtr>&);
|
||||
|
||||
} // namespace instance
|
||||
} // namespace device
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
|
||||
namespace test {
|
||||
namespace conv {
|
||||
|
||||
template <ck::index_t... Is>
|
||||
using S = ck::Sequence<Is...>;
|
||||
|
||||
using InElementOp = ck::tensor_operation::element_wise::PassThrough;
|
||||
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
|
||||
using OutElementOp = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
using DeviceConvFwdNoOpPtr =
|
||||
ck::tensor_operation::device::DeviceConvFwdPtr<InElementOp, WeiElementOp, OutElementOp>;
|
||||
|
||||
static constexpr auto ConvFwdDefault =
|
||||
ck::tensor_operation::device::ConvolutionForwardSpecialization::Default;
|
||||
|
||||
template <ck::index_t SpatialDims,
|
||||
typename InDataType,
|
||||
typename WeiDataType,
|
||||
typename OutDataType,
|
||||
typename AccDataType>
|
||||
using DeviceConvNDFwdInstance = ck::tensor_operation::device::
|
||||
DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K<
|
||||
// clang-format off
|
||||
InDataType, //
|
||||
WeiDataType, //
|
||||
OutDataType, //
|
||||
AccDataType, // Accumulator data type.
|
||||
InElementOp, // Input Elementwise Operation
|
||||
WeiElementOp, // Weights Elementwise Operation
|
||||
OutElementOp, // Output Elementwise Operation
|
||||
ConvFwdDefault, // ConvForwardSpecialization
|
||||
SpatialDims, // SptialDims
|
||||
256, // BlockSize
|
||||
128, // MPerBlock
|
||||
256, // NPerBlock
|
||||
4, // K0PerBlock
|
||||
8, // K1
|
||||
32, // MPerXdl
|
||||
32, // NPerXdl
|
||||
2, // MXdlPerWave
|
||||
4, // NXdlPerWave
|
||||
S<4, 64, 1>, // ABlockTransferThreadClusterLengths_K0_M_K1
|
||||
S<1, 0, 2>, // ABlockTransferThreadClusterArrangeOrder
|
||||
S<1, 0, 2>, // ABlockTransferSrcAccessOrder
|
||||
2, // ABlockTransferSrcVectorDim
|
||||
8, // ABlockTransferSrcScalarPerVector
|
||||
8, // ABlockTransferDstScalarPerVector_K1
|
||||
true, // ABlockLdsAddExtraM
|
||||
S<4, 64, 1>, // BBlockTransferThreadClusterLengths_K0_N_K1
|
||||
S<1, 0, 2>, // BBlockTransferThreadClusterArrangeOrder
|
||||
S<1, 0, 2>, // BBlockTransferSrcAccessOrder
|
||||
2, // BBlockTransferSrcVectorDim
|
||||
8, // BBlockTransferSrcScalarPerVector
|
||||
8, // BBlockTransferDstScalarPerVector_K1
|
||||
true, // BBlockLdsAddExtraN
|
||||
7, // CThreadTransferSrcDstVectorDim
|
||||
1>; // CThreadTransferDstScalarPerVector
|
||||
// clang-format on
|
||||
|
||||
template <ck::index_t NDim,
|
||||
typename InDataType,
|
||||
typename WeiDataType,
|
||||
typename OutDataType,
|
||||
typename AccDataType>
|
||||
void get_test_convolution_fwd_instance(std::vector<DeviceConvFwdNoOpPtr>& instances)
|
||||
{
|
||||
using ConvInstanceT =
|
||||
DeviceConvNDFwdInstance<NDim, InDataType, WeiDataType, OutDataType, AccDataType>;
|
||||
instances.emplace_back(std::make_unique<ConvInstanceT>());
|
||||
}
|
||||
|
||||
// TODO (aosewski)
|
||||
// Temporary solution to get all DeviceConvNDFwdXdl_Input_N_Hi_Wi_C_Weight_K_Y_X_C_Output_N_Ho_Wo_K
|
||||
// instances. When switched over to DeviceConvNDFwdXdl for 2D remove ConvolutionNDFwdInstances
|
||||
// structures.
|
||||
template <typename InDataType, typename WeiDataType, typename OutDataType>
|
||||
struct ConvolutionNDFwdInstances;
|
||||
|
||||
template <>
|
||||
struct ConvolutionNDFwdInstances<float, float, float>
|
||||
{
|
||||
static std::vector<DeviceConvFwdNoOpPtr> Get(std::size_t num_dim_spatial)
|
||||
{
|
||||
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
|
||||
if(num_dim_spatial == 2)
|
||||
{
|
||||
ck::tensor_operation::device::instance::
|
||||
add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_f32_instances(conv_ptrs);
|
||||
}
|
||||
return conv_ptrs;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct ConvolutionNDFwdInstances<ck::half_t, ck::half_t, ck::half_t>
|
||||
{
|
||||
static std::vector<DeviceConvFwdNoOpPtr> Get(std::size_t num_dim_spatial)
|
||||
{
|
||||
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
|
||||
if(num_dim_spatial == 2)
|
||||
{
|
||||
ck::tensor_operation::device::instance::
|
||||
add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_f16_instances(conv_ptrs);
|
||||
}
|
||||
return conv_ptrs;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct ConvolutionNDFwdInstances<ck::bhalf_t, ck::bhalf_t, ck::bhalf_t>
|
||||
{
|
||||
static std::vector<DeviceConvFwdNoOpPtr> Get(std::size_t num_dim_spatial)
|
||||
{
|
||||
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
|
||||
if(num_dim_spatial == 2)
|
||||
{
|
||||
ck::tensor_operation::device::instance::
|
||||
add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_bf16_instances(conv_ptrs);
|
||||
}
|
||||
return conv_ptrs;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct ConvolutionNDFwdInstances<int8_t, int8_t, int8_t>
|
||||
{
|
||||
static std::vector<DeviceConvFwdNoOpPtr> Get(std::size_t num_dim_spatial)
|
||||
{
|
||||
std::vector<DeviceConvFwdNoOpPtr> conv_ptrs;
|
||||
if(num_dim_spatial == 2)
|
||||
{
|
||||
ck::tensor_operation::device::instance::
|
||||
add_device_convnd_2d_fwd_xdl_nhwc_kyxc_nhwk_int8_instances(conv_ptrs);
|
||||
}
|
||||
return conv_ptrs;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace conv
|
||||
} // namespace test
|
||||
241
test/convnd_fwd/convnd_fwd.cpp
Normal file
241
test/convnd_fwd/convnd_fwd.cpp
Normal file
@@ -0,0 +1,241 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <initializer_list>
|
||||
#include <vector>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "profiler/include/profile_conv_fwd_impl.hpp"
|
||||
|
||||
class TestConvndFwd : public ::testing::Test
|
||||
{
|
||||
protected:
|
||||
std::vector<ck::utils::conv::ConvParam> conv_params;
|
||||
};
|
||||
|
||||
// 1d
|
||||
TEST_F(TestConvndFwd, Conv1dFwd)
|
||||
{
|
||||
conv_params.clear();
|
||||
conv_params.push_back({1, 1, 128, 128, 256, {1}, {14}, {2}, {1}, {0}, {0}});
|
||||
conv_params.push_back({1, 1, 128, 128, 256, {3}, {28}, {1}, {1}, {1}, {1}});
|
||||
conv_params.push_back({1, 1, 128, 128, 256, {1}, {3}, {1}, {1}, {0}, {0}});
|
||||
|
||||
for(auto& param : conv_params)
|
||||
{
|
||||
bool pass;
|
||||
|
||||
// fp32
|
||||
pass = ck::profiler::profile_conv_fwd_impl<1,
|
||||
ck::tensor_layout::convolution::NWC,
|
||||
ck::tensor_layout::convolution::KXC,
|
||||
ck::tensor_layout::convolution::NWK,
|
||||
float,
|
||||
float,
|
||||
float>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// fp16
|
||||
pass = ck::profiler::profile_conv_fwd_impl<1,
|
||||
ck::tensor_layout::convolution::NWC,
|
||||
ck::tensor_layout::convolution::KXC,
|
||||
ck::tensor_layout::convolution::NWK,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
ck::half_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// bf16
|
||||
pass = ck::profiler::profile_conv_fwd_impl<1,
|
||||
ck::tensor_layout::convolution::NWC,
|
||||
ck::tensor_layout::convolution::KXC,
|
||||
ck::tensor_layout::convolution::NWK,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// int8
|
||||
pass = ck::profiler::profile_conv_fwd_impl<1,
|
||||
ck::tensor_layout::convolution::NWC,
|
||||
ck::tensor_layout::convolution::KXC,
|
||||
ck::tensor_layout::convolution::NWK,
|
||||
int8_t,
|
||||
int8_t,
|
||||
int8_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
}
|
||||
}
|
||||
|
||||
// 2d
|
||||
TEST_F(TestConvndFwd, Conv2dFwd)
|
||||
{
|
||||
conv_params.clear();
|
||||
conv_params.push_back({2, 1, 128, 128, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}});
|
||||
conv_params.push_back({2, 1, 128, 128, 256, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
|
||||
conv_params.push_back({2, 1, 128, 128, 256, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
|
||||
|
||||
for(auto& param : conv_params)
|
||||
{
|
||||
bool pass;
|
||||
|
||||
// fp32
|
||||
pass = ck::profiler::profile_conv_fwd_impl<2,
|
||||
ck::tensor_layout::convolution::NHWC,
|
||||
ck::tensor_layout::convolution::KYXC,
|
||||
ck::tensor_layout::convolution::NHWK,
|
||||
float,
|
||||
float,
|
||||
float>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// fp16
|
||||
pass = ck::profiler::profile_conv_fwd_impl<2,
|
||||
ck::tensor_layout::convolution::NHWC,
|
||||
ck::tensor_layout::convolution::KYXC,
|
||||
ck::tensor_layout::convolution::NHWK,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
ck::half_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// bf16
|
||||
pass = ck::profiler::profile_conv_fwd_impl<2,
|
||||
ck::tensor_layout::convolution::NHWC,
|
||||
ck::tensor_layout::convolution::KYXC,
|
||||
ck::tensor_layout::convolution::NHWK,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// int8
|
||||
pass = ck::profiler::profile_conv_fwd_impl<2,
|
||||
ck::tensor_layout::convolution::NHWC,
|
||||
ck::tensor_layout::convolution::KYXC,
|
||||
ck::tensor_layout::convolution::NHWK,
|
||||
int8_t,
|
||||
int8_t,
|
||||
int8_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
}
|
||||
}
|
||||
|
||||
// 3d
|
||||
TEST_F(TestConvndFwd, Conv3dFwd)
|
||||
{
|
||||
conv_params.clear();
|
||||
conv_params.push_back(
|
||||
{3, 1, 128, 128, 256, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
|
||||
conv_params.push_back(
|
||||
{3, 1, 128, 128, 256, {3, 3, 3}, {14, 14, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
|
||||
conv_params.push_back(
|
||||
{3, 1, 128, 128, 256, {1, 1, 1}, {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
|
||||
|
||||
for(auto& param : conv_params)
|
||||
{
|
||||
bool pass;
|
||||
|
||||
// fp32
|
||||
pass = ck::profiler::profile_conv_fwd_impl<3,
|
||||
ck::tensor_layout::convolution::NDHWC,
|
||||
ck::tensor_layout::convolution::KZYXC,
|
||||
ck::tensor_layout::convolution::NDHWK,
|
||||
float,
|
||||
float,
|
||||
float>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// fp16
|
||||
pass = ck::profiler::profile_conv_fwd_impl<3,
|
||||
ck::tensor_layout::convolution::NDHWC,
|
||||
ck::tensor_layout::convolution::KZYXC,
|
||||
ck::tensor_layout::convolution::NDHWK,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
ck::half_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// bf16
|
||||
pass = ck::profiler::profile_conv_fwd_impl<3,
|
||||
ck::tensor_layout::convolution::NDHWC,
|
||||
ck::tensor_layout::convolution::KZYXC,
|
||||
ck::tensor_layout::convolution::NDHWK,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// int8
|
||||
pass = ck::profiler::profile_conv_fwd_impl<3,
|
||||
ck::tensor_layout::convolution::NDHWC,
|
||||
ck::tensor_layout::convolution::KZYXC,
|
||||
ck::tensor_layout::convolution::NDHWK,
|
||||
int8_t,
|
||||
int8_t,
|
||||
int8_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
}
|
||||
}
|
||||
@@ -1,15 +1,15 @@
|
||||
add_test_executable(test_gemm_fp32 gemm_fp32.cpp)
|
||||
target_link_libraries(test_gemm_fp32 PRIVATE host_tensor)
|
||||
target_link_libraries(test_gemm_fp32 PRIVATE utility)
|
||||
target_link_libraries(test_gemm_fp32 PRIVATE device_gemm_instance)
|
||||
|
||||
add_test_executable(test_gemm_fp16 gemm_fp16.cpp)
|
||||
target_link_libraries(test_gemm_fp16 PRIVATE host_tensor)
|
||||
target_link_libraries(test_gemm_fp16 PRIVATE utility)
|
||||
target_link_libraries(test_gemm_fp16 PRIVATE device_gemm_instance)
|
||||
|
||||
add_test_executable(test_gemm_bf16 gemm_bf16.cpp)
|
||||
target_link_libraries(test_gemm_bf16 PRIVATE host_tensor)
|
||||
target_link_libraries(test_gemm_bf16 PRIVATE utility)
|
||||
target_link_libraries(test_gemm_bf16 PRIVATE device_gemm_instance)
|
||||
|
||||
add_test_executable(test_gemm_int8 gemm_int8.cpp)
|
||||
target_link_libraries(test_gemm_int8 PRIVATE host_tensor)
|
||||
target_link_libraries(test_gemm_int8 PRIVATE utility)
|
||||
target_link_libraries(test_gemm_int8 PRIVATE device_gemm_instance)
|
||||
|
||||
@@ -17,9 +17,9 @@
|
||||
#include "ck/library/tensor_operation_instance/gpu/gemm.hpp"
|
||||
|
||||
#include "ck/library/utility/check_err.hpp"
|
||||
#include "ck/library/host_tensor/device_memory.hpp"
|
||||
#include "ck/library/host_tensor/host_tensor.hpp"
|
||||
#include "ck/library/host_tensor/host_tensor_generator.hpp"
|
||||
#include "ck/library/utility/device_memory.hpp"
|
||||
#include "ck/library/utility/host_tensor.hpp"
|
||||
#include "ck/library/utility/host_tensor_generator.hpp"
|
||||
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
|
||||
|
||||
#include "test/gemm/gemm_util.hpp"
|
||||
|
||||
@@ -17,9 +17,9 @@
|
||||
#include "ck/library/tensor_operation_instance/gpu/gemm.hpp"
|
||||
|
||||
#include "ck/library/utility/check_err.hpp"
|
||||
#include "ck/library/host_tensor/device_memory.hpp"
|
||||
#include "ck/library/host_tensor/host_tensor.hpp"
|
||||
#include "ck/library/host_tensor/host_tensor_generator.hpp"
|
||||
#include "ck/library/utility/device_memory.hpp"
|
||||
#include "ck/library/utility/host_tensor.hpp"
|
||||
#include "ck/library/utility/host_tensor_generator.hpp"
|
||||
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
|
||||
|
||||
#include "test/gemm/gemm_util.hpp"
|
||||
|
||||
@@ -17,9 +17,9 @@
|
||||
#include "ck/library/tensor_operation_instance/gpu/gemm.hpp"
|
||||
|
||||
#include "ck/library/utility/check_err.hpp"
|
||||
#include "ck/library/host_tensor/device_memory.hpp"
|
||||
#include "ck/library/host_tensor/host_tensor.hpp"
|
||||
#include "ck/library/host_tensor/host_tensor_generator.hpp"
|
||||
#include "ck/library/utility/device_memory.hpp"
|
||||
#include "ck/library/utility/host_tensor.hpp"
|
||||
#include "ck/library/utility/host_tensor_generator.hpp"
|
||||
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
|
||||
|
||||
#include "test/gemm/gemm_util.hpp"
|
||||
|
||||
@@ -17,9 +17,9 @@
|
||||
#include "ck/library/tensor_operation_instance/gpu/gemm.hpp"
|
||||
|
||||
#include "ck/library/utility/check_err.hpp"
|
||||
#include "ck/library/host_tensor/device_memory.hpp"
|
||||
#include "ck/library/host_tensor/host_tensor.hpp"
|
||||
#include "ck/library/host_tensor/host_tensor_generator.hpp"
|
||||
#include "ck/library/utility/device_memory.hpp"
|
||||
#include "ck/library/utility/host_tensor.hpp"
|
||||
#include "ck/library/utility/host_tensor_generator.hpp"
|
||||
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
|
||||
|
||||
#include "test/gemm/gemm_util.hpp"
|
||||
|
||||
@@ -17,9 +17,9 @@
|
||||
#include "ck/library/tensor_operation_instance/gpu/gemm.hpp"
|
||||
|
||||
#include "ck/library/utility/check_err.hpp"
|
||||
#include "ck/library/host_tensor/device_memory.hpp"
|
||||
#include "ck/library/host_tensor/host_tensor.hpp"
|
||||
#include "ck/library/host_tensor/host_tensor_generator.hpp"
|
||||
#include "ck/library/utility/device_memory.hpp"
|
||||
#include "ck/library/utility/host_tensor.hpp"
|
||||
#include "ck/library/utility/host_tensor_generator.hpp"
|
||||
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
|
||||
|
||||
#include "test/gemm/gemm_util.hpp"
|
||||
|
||||
@@ -6,9 +6,9 @@
|
||||
#include "ck/ck.hpp"
|
||||
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
|
||||
#include "ck/library/utility/check_err.hpp"
|
||||
#include "ck/library/host_tensor/device_memory.hpp"
|
||||
#include "ck/library/host_tensor/host_tensor.hpp"
|
||||
#include "ck/library/host_tensor/host_tensor_generator.hpp"
|
||||
#include "ck/library/utility/device_memory.hpp"
|
||||
#include "ck/library/utility/host_tensor.hpp"
|
||||
#include "ck/library/utility/host_tensor_generator.hpp"
|
||||
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
|
||||
|
||||
namespace ck {
|
||||
@@ -71,9 +71,9 @@ bool RunDeviceGEMM(DeviceGemmPtr_& gemmPtr,
|
||||
BElementwiseOperation b_element_op,
|
||||
CElementwiseOperation c_element_op)
|
||||
{
|
||||
DeviceMem a_m_k_device_buf(sizeof(ADataType) * A.mDesc.GetElementSpace());
|
||||
DeviceMem b_k_n_device_buf(sizeof(BDataType) * B.mDesc.GetElementSpace());
|
||||
DeviceMem c_m_n_device_buf(sizeof(CDataType) * C.mDesc.GetElementSpace());
|
||||
DeviceMem a_m_k_device_buf(sizeof(ADataType) * A.mDesc.GetElementSpaceSize());
|
||||
DeviceMem b_k_n_device_buf(sizeof(BDataType) * B.mDesc.GetElementSpaceSize());
|
||||
DeviceMem c_m_n_device_buf(sizeof(CDataType) * C.mDesc.GetElementSpaceSize());
|
||||
|
||||
auto invoker_ptr = gemmPtr->MakeInvokerPointer();
|
||||
auto argument_ptr =
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
add_test_executable(test_gemm_reduce_fp16 gemm_reduce_fp16.cpp)
|
||||
target_link_libraries(test_gemm_reduce_fp16 PRIVATE host_tensor)
|
||||
target_link_libraries(test_gemm_reduce_fp16 PRIVATE utility)
|
||||
target_link_libraries(test_gemm_reduce_fp16 PRIVATE device_gemm_reduce_instance)
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
add_test_executable(test_gemm_split_k gemm_split_k.cpp)
|
||||
target_link_libraries(test_gemm_split_k PRIVATE host_tensor)
|
||||
target_link_libraries(test_gemm_split_k PRIVATE utility)
|
||||
target_link_libraries(test_gemm_split_k PRIVATE device_gemm_splitk_instance)
|
||||
|
||||
@@ -14,12 +14,12 @@
|
||||
#include "ck/library/tensor_operation_instance/gpu/gemm_splitk.hpp"
|
||||
|
||||
#include "ck/library/utility/check_err.hpp"
|
||||
#include "ck/library/host_tensor/device_memory.hpp"
|
||||
#include "ck/library/host_tensor/host_tensor.hpp"
|
||||
#include "ck/library/host_tensor/host_tensor_generator.hpp"
|
||||
#include "ck/library/utility/device_memory.hpp"
|
||||
#include "ck/library/utility/host_tensor.hpp"
|
||||
#include "ck/library/utility/host_tensor_generator.hpp"
|
||||
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
|
||||
|
||||
#include "ck/library/host_tensor/host_gemm.hpp"
|
||||
#include "ck/library/utility/host_gemm.hpp"
|
||||
|
||||
enum struct GemmMatrixLayout
|
||||
{
|
||||
@@ -127,9 +127,9 @@ int test_gemm(const gemmArgs& args)
|
||||
ck::tensor_operation::element_wise::PassThrough{},
|
||||
ck::tensor_operation::element_wise::PassThrough{});
|
||||
|
||||
DeviceMem a_device_buf(sizeof(float) * a_m_k.mDesc.GetElementSpace());
|
||||
DeviceMem b_device_buf(sizeof(float) * b_k_n.mDesc.GetElementSpace());
|
||||
DeviceMem c_device_buf(sizeof(float) * c_m_n_device_result.mDesc.GetElementSpace());
|
||||
DeviceMem a_device_buf(sizeof(float) * a_m_k.mDesc.GetElementSpaceSize());
|
||||
DeviceMem b_device_buf(sizeof(float) * b_k_n.mDesc.GetElementSpaceSize());
|
||||
DeviceMem c_device_buf(sizeof(float) * c_m_n_device_result.mDesc.GetElementSpaceSize());
|
||||
|
||||
a_device_buf.ToDevice(a_m_k.mData.data());
|
||||
b_device_buf.ToDevice(b_k_n.mData.data());
|
||||
|
||||
3
test/grouped_convnd_fwd/CMakeLists.txt
Normal file
3
test/grouped_convnd_fwd/CMakeLists.txt
Normal file
@@ -0,0 +1,3 @@
|
||||
add_gtest_executable(test_grouped_convnd_fwd grouped_convnd_fwd.cpp)
|
||||
target_link_libraries(test_grouped_convnd_fwd PRIVATE utility device_grouped_conv1d_fwd_instance device_grouped_conv2d_fwd_instance device_grouped_conv3d_fwd_instance)
|
||||
|
||||
270
test/grouped_convnd_fwd/grouped_convnd_fwd.cpp
Normal file
270
test/grouped_convnd_fwd/grouped_convnd_fwd.cpp
Normal file
@@ -0,0 +1,270 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#include <cstdlib>
|
||||
#include <iostream>
|
||||
#include <initializer_list>
|
||||
#include <vector>
|
||||
#include <gtest/gtest.h>
|
||||
|
||||
#include "profiler/include/profile_grouped_conv_fwd_impl.hpp"
|
||||
|
||||
class TestGroupedConvNdFwd : public ::testing::Test
|
||||
{
|
||||
protected:
|
||||
std::vector<ck::utils::conv::ConvParam> conv_params;
|
||||
};
|
||||
|
||||
// 1d GNWC/GKXC/GNWK
|
||||
TEST_F(TestGroupedConvNdFwd, GroupedConv1dFwdGNWC)
|
||||
{
|
||||
conv_params.clear();
|
||||
conv_params.push_back({1, 2, 128, 128, 256, {1}, {14}, {2}, {1}, {0}, {0}});
|
||||
conv_params.push_back({1, 2, 128, 128, 256, {3}, {28}, {1}, {1}, {1}, {1}});
|
||||
conv_params.push_back({1, 2, 128, 128, 256, {1}, {3}, {1}, {1}, {0}, {0}});
|
||||
|
||||
for(auto& param : conv_params)
|
||||
{
|
||||
bool pass;
|
||||
|
||||
// fp32
|
||||
pass = ck::profiler::profile_grouped_conv_fwd_impl<1,
|
||||
ck::tensor_layout::convolution::GNWC,
|
||||
ck::tensor_layout::convolution::GKXC,
|
||||
ck::tensor_layout::convolution::GNWK,
|
||||
float,
|
||||
float,
|
||||
float>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// fp16
|
||||
pass = ck::profiler::profile_grouped_conv_fwd_impl<1,
|
||||
ck::tensor_layout::convolution::GNWC,
|
||||
ck::tensor_layout::convolution::GKXC,
|
||||
ck::tensor_layout::convolution::GNWK,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
ck::half_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// bf16
|
||||
pass = ck::profiler::profile_grouped_conv_fwd_impl<1,
|
||||
ck::tensor_layout::convolution::GNWC,
|
||||
ck::tensor_layout::convolution::GKXC,
|
||||
ck::tensor_layout::convolution::GNWK,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// int8
|
||||
pass = ck::profiler::profile_grouped_conv_fwd_impl<1,
|
||||
ck::tensor_layout::convolution::GNWC,
|
||||
ck::tensor_layout::convolution::GKXC,
|
||||
ck::tensor_layout::convolution::GNWK,
|
||||
int8_t,
|
||||
int8_t,
|
||||
int8_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
}
|
||||
}
|
||||
|
||||
// 2d GNHWC/GKYXC/GNHWK
|
||||
TEST_F(TestGroupedConvNdFwd, GroupedConv2dFwdGNHWC)
|
||||
{
|
||||
conv_params.clear();
|
||||
conv_params.push_back({2, 2, 128, 128, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}});
|
||||
conv_params.push_back({2, 2, 128, 128, 256, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
|
||||
conv_params.push_back({2, 2, 128, 128, 256, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
|
||||
|
||||
for(auto& param : conv_params)
|
||||
{
|
||||
bool pass;
|
||||
|
||||
// fp32
|
||||
pass = ck::profiler::profile_grouped_conv_fwd_impl<2,
|
||||
ck::tensor_layout::convolution::GNHWC,
|
||||
ck::tensor_layout::convolution::GKYXC,
|
||||
ck::tensor_layout::convolution::GNHWK,
|
||||
float,
|
||||
float,
|
||||
float>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// fp16
|
||||
pass = ck::profiler::profile_grouped_conv_fwd_impl<2,
|
||||
ck::tensor_layout::convolution::GNHWC,
|
||||
ck::tensor_layout::convolution::GKYXC,
|
||||
ck::tensor_layout::convolution::GNHWK,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
ck::half_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// bf16
|
||||
pass = ck::profiler::profile_grouped_conv_fwd_impl<2,
|
||||
ck::tensor_layout::convolution::GNHWC,
|
||||
ck::tensor_layout::convolution::GKYXC,
|
||||
ck::tensor_layout::convolution::GNHWK,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// int8
|
||||
pass = ck::profiler::profile_grouped_conv_fwd_impl<2,
|
||||
ck::tensor_layout::convolution::GNHWC,
|
||||
ck::tensor_layout::convolution::GKYXC,
|
||||
ck::tensor_layout::convolution::GNHWK,
|
||||
int8_t,
|
||||
int8_t,
|
||||
int8_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
}
|
||||
}
|
||||
|
||||
// 3d GNDHWC/GKZYXC/GNDHWK
|
||||
TEST_F(TestGroupedConvNdFwd, GroupedConv3dFwdGNDHWC)
|
||||
{
|
||||
conv_params.clear();
|
||||
conv_params.push_back(
|
||||
{3, 2, 128, 128, 256, {1, 1, 1}, {7, 7, 7}, {2, 2, 2}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
|
||||
conv_params.push_back(
|
||||
{3, 2, 128, 128, 256, {3, 3, 3}, {14, 14, 3}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}, {1, 1, 1}});
|
||||
conv_params.push_back(
|
||||
{3, 2, 128, 128, 256, {1, 1, 1}, {3, 3, 3}, {1, 1, 1}, {1, 1, 1}, {0, 0, 0}, {0, 0, 0}});
|
||||
|
||||
for(auto& param : conv_params)
|
||||
{
|
||||
bool pass;
|
||||
|
||||
// fp32
|
||||
pass = ck::profiler::profile_grouped_conv_fwd_impl<3,
|
||||
ck::tensor_layout::convolution::GNDHWC,
|
||||
ck::tensor_layout::convolution::GKZYXC,
|
||||
ck::tensor_layout::convolution::GNDHWK,
|
||||
float,
|
||||
float,
|
||||
float>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// fp16
|
||||
pass = ck::profiler::profile_grouped_conv_fwd_impl<3,
|
||||
ck::tensor_layout::convolution::GNDHWC,
|
||||
ck::tensor_layout::convolution::GKZYXC,
|
||||
ck::tensor_layout::convolution::GNDHWK,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
ck::half_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// bf16
|
||||
pass = ck::profiler::profile_grouped_conv_fwd_impl<3,
|
||||
ck::tensor_layout::convolution::GNDHWC,
|
||||
ck::tensor_layout::convolution::GKZYXC,
|
||||
ck::tensor_layout::convolution::GNDHWK,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t,
|
||||
ck::bhalf_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
|
||||
// int8
|
||||
pass = ck::profiler::profile_grouped_conv_fwd_impl<3,
|
||||
ck::tensor_layout::convolution::GNDHWC,
|
||||
ck::tensor_layout::convolution::GKZYXC,
|
||||
ck::tensor_layout::convolution::GNDHWK,
|
||||
int8_t,
|
||||
int8_t,
|
||||
int8_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
}
|
||||
}
|
||||
|
||||
// 2d NHWGC/KYXGC/NHWGK
|
||||
TEST_F(TestGroupedConvNdFwd, GroupedConv2dFwdNHWGC)
|
||||
{
|
||||
conv_params.clear();
|
||||
conv_params.push_back({2, 2, 128, 128, 256, {1, 1}, {7, 7}, {2, 2}, {1, 1}, {0, 0}, {0, 0}});
|
||||
conv_params.push_back({2, 2, 128, 128, 256, {3, 3}, {14, 14}, {1, 1}, {1, 1}, {1, 1}, {1, 1}});
|
||||
conv_params.push_back({2, 2, 128, 128, 256, {1, 1}, {3, 3}, {1, 1}, {1, 1}, {0, 0}, {0, 0}});
|
||||
|
||||
for(auto& param : conv_params)
|
||||
{
|
||||
bool pass;
|
||||
|
||||
// fp16
|
||||
pass = ck::profiler::profile_grouped_conv_fwd_impl<2,
|
||||
ck::tensor_layout::convolution::NHWGC,
|
||||
ck::tensor_layout::convolution::KYXGC,
|
||||
ck::tensor_layout::convolution::NHWGK,
|
||||
ck::half_t,
|
||||
ck::half_t,
|
||||
ck::half_t>(true, // do_verification
|
||||
1, // init_method
|
||||
false, // do_log
|
||||
false, // time_kernel
|
||||
param);
|
||||
|
||||
EXPECT_TRUE(pass);
|
||||
}
|
||||
}
|
||||
@@ -1,3 +1,3 @@
|
||||
add_test_executable(test_grouped_gemm_fp16 grouped_gemm_fp16.cpp)
|
||||
target_link_libraries(test_grouped_gemm_fp16 PRIVATE host_tensor)
|
||||
target_link_libraries(test_grouped_gemm_fp16 PRIVATE utility)
|
||||
target_link_libraries(test_grouped_gemm_fp16 PRIVATE device_grouped_gemm_instance)
|
||||
|
||||
@@ -2,7 +2,9 @@ add_custom_target(test_layernorm)
|
||||
|
||||
add_gtest_executable(test_layernorm_fp32 test_layernorm_fp32.cpp)
|
||||
add_gtest_executable(test_layernorm_fp16 test_layernorm_fp16.cpp)
|
||||
target_link_libraries(test_layernorm_fp32 PRIVATE host_tensor)
|
||||
target_link_libraries(test_layernorm_fp16 PRIVATE host_tensor)
|
||||
|
||||
target_link_libraries(test_layernorm_fp32 PRIVATE utility)
|
||||
target_link_libraries(test_layernorm_fp16 PRIVATE utility)
|
||||
|
||||
add_dependencies(test_layernorm test_layernorm_fp32)
|
||||
add_dependencies(test_layernorm test_layernorm_fp16)
|
||||
add_dependencies(test_layernorm test_layernorm_fp16)
|
||||
|
||||
@@ -12,8 +12,8 @@
|
||||
#include "ck/tensor_operation/gpu/device/device_layernorm.hpp"
|
||||
|
||||
#include "ck/library/utility/check_err.hpp"
|
||||
#include "ck/library/host_tensor/host_tensor.hpp"
|
||||
#include "ck/library/host_tensor/device_memory.hpp"
|
||||
#include "ck/library/utility/host_tensor.hpp"
|
||||
#include "ck/library/utility/device_memory.hpp"
|
||||
#include "ck/library/reference_tensor_operation/cpu/reference_layernorm.hpp"
|
||||
|
||||
namespace ck {
|
||||
@@ -102,10 +102,10 @@ class TestLayernorm : public ::testing::Test
|
||||
gamma.GenerateTensorValue(GeneratorTensor_3<GammaDataType>{0.0, 1.0});
|
||||
beta.GenerateTensorValue(GeneratorTensor_3<BetaDataType>{0.0, 1.0});
|
||||
|
||||
DeviceMem x_dev(sizeof(XDataType) * x.mDesc.GetElementSpace());
|
||||
DeviceMem gamma_dev(sizeof(GammaDataType) * gamma.mDesc.GetElementSpace());
|
||||
DeviceMem beta_dev(sizeof(BetaDataType) * beta.mDesc.GetElementSpace());
|
||||
DeviceMem y_dev(sizeof(YDataType) * y.mDesc.GetElementSpace());
|
||||
DeviceMem x_dev(sizeof(XDataType) * x.mDesc.GetElementSpaceSize());
|
||||
DeviceMem gamma_dev(sizeof(GammaDataType) * gamma.mDesc.GetElementSpaceSize());
|
||||
DeviceMem beta_dev(sizeof(BetaDataType) * beta.mDesc.GetElementSpaceSize());
|
||||
DeviceMem y_dev(sizeof(YDataType) * y.mDesc.GetElementSpaceSize());
|
||||
|
||||
x_dev.ToDevice(x.mData.data());
|
||||
gamma_dev.ToDevice(gamma.mData.data());
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
add_test_executable(test_magic_number_division magic_number_division.cpp)
|
||||
target_link_libraries(test_magic_number_division PRIVATE host_tensor)
|
||||
target_link_libraries(test_magic_number_division PRIVATE utility)
|
||||
|
||||
@@ -9,9 +9,9 @@
|
||||
#include "ck/ck.hpp"
|
||||
#include "ck/utility/magic_division.hpp"
|
||||
#include "ck/library/utility/check_err.hpp"
|
||||
#include "ck/library/host_tensor/device_memory.hpp"
|
||||
#include "ck/library/host_tensor/host_tensor.hpp"
|
||||
#include "ck/library/host_tensor/host_tensor_generator.hpp"
|
||||
#include "ck/library/utility/device_memory.hpp"
|
||||
#include "ck/library/utility/host_tensor.hpp"
|
||||
#include "ck/library/utility/host_tensor_generator.hpp"
|
||||
|
||||
__global__ void gpu_magic_number_division(uint32_t magic_multiplier,
|
||||
uint32_t magic_shift,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
add_test_executable(test_reduce_no_index reduce_no_index.cpp)
|
||||
add_test_executable(test_reduce_with_index reduce_with_index.cpp)
|
||||
target_link_libraries(test_reduce_no_index PRIVATE host_tensor)
|
||||
target_link_libraries(test_reduce_no_index PRIVATE utility)
|
||||
target_link_libraries(test_reduce_no_index PRIVATE device_reduce_instance)
|
||||
target_link_libraries(test_reduce_with_index PRIVATE host_tensor)
|
||||
target_link_libraries(test_reduce_with_index PRIVATE utility)
|
||||
target_link_libraries(test_reduce_with_index PRIVATE device_reduce_instance)
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
#include <getopt.h>
|
||||
|
||||
#include "ck/library/host_tensor/host_common_util.hpp"
|
||||
#include "ck/library/utility/host_common_util.hpp"
|
||||
#include "profiler/include/profile_reduce_impl.hpp"
|
||||
|
||||
using namespace ck;
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
|
||||
#include <getopt.h>
|
||||
|
||||
#include "ck/library/host_tensor/host_common_util.hpp"
|
||||
#include "ck/library/utility/host_common_util.hpp"
|
||||
#include "profiler/include/profile_reduce_impl.hpp"
|
||||
|
||||
using namespace ck;
|
||||
|
||||
@@ -1,2 +1,2 @@
|
||||
add_gtest_executable(test_reference_conv_fwd reference_conv_fwd.cpp)
|
||||
target_link_libraries(test_reference_conv_fwd PRIVATE host_tensor conv_util)
|
||||
target_link_libraries(test_reference_conv_fwd PRIVATE utility)
|
||||
|
||||
@@ -13,74 +13,64 @@
|
||||
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
|
||||
|
||||
#include "ck/library/utility/check_err.hpp"
|
||||
#include "ck/library/utility/conv_util.hpp"
|
||||
#include "ck/library/utility/fill.hpp"
|
||||
#include "ck/library/host_tensor/host_tensor.hpp"
|
||||
#include "ck/library/utility/host_tensor.hpp"
|
||||
#include "ck/library/utility/convolution_parameter.hpp"
|
||||
#include "ck/library/utility/convolution_host_tensor_descriptor_helper.hpp"
|
||||
#include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp"
|
||||
|
||||
namespace {
|
||||
|
||||
using InElementOp = ck::tensor_operation::element_wise::PassThrough;
|
||||
using WeiElementOp = ck::tensor_operation::element_wise::PassThrough;
|
||||
using OutElementOp = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
template <ck::index_t NDim,
|
||||
template <ck::index_t NDimSpatial,
|
||||
typename InDataType = float,
|
||||
typename WeiDataType = float,
|
||||
typename OutDataType = float,
|
||||
typename InLayout = ck::tensor_layout::convolution::NHWC,
|
||||
typename WeiLayout = ck::tensor_layout::convolution::KYXC,
|
||||
typename OutLayout = ck::tensor_layout::convolution::NHWK,
|
||||
typename InLayout = ck::tensor_layout::convolution::GNHWC,
|
||||
typename WeiLayout = ck::tensor_layout::convolution::GKYXC,
|
||||
typename OutLayout = ck::tensor_layout::convolution::GNHWK,
|
||||
typename FillInputOp = ck::utils::FillMonotonicSeq<InDataType>,
|
||||
typename FillWeightsOp = ck::utils::FillConstant<WeiDataType>>
|
||||
Tensor<OutDataType>
|
||||
run_reference_convolution_forward(const ck::utils::conv::ConvParams& params,
|
||||
run_reference_convolution_forward(const ck::utils::conv::ConvParam& conv_param,
|
||||
const FillInputOp& fill_input_op = FillInputOp{},
|
||||
const FillWeightsOp& fill_weights_op = FillWeightsOp{0.5f})
|
||||
{
|
||||
std::vector<std::size_t> input_dims{static_cast<std::size_t>(params.N_),
|
||||
static_cast<std::size_t>(params.C_)};
|
||||
input_dims.insert(std::end(input_dims),
|
||||
std::begin(params.input_spatial_lengths_),
|
||||
std::end(params.input_spatial_lengths_));
|
||||
const auto in_g_n_c_wis_desc =
|
||||
ck::utils::conv::make_input_host_tensor_descriptor_g_n_c_wis_packed<InLayout>(conv_param);
|
||||
|
||||
std::vector<std::size_t> filter_dims{static_cast<std::size_t>(params.K_),
|
||||
static_cast<std::size_t>(params.C_)};
|
||||
filter_dims.insert(std::end(filter_dims),
|
||||
std::begin(params.filter_spatial_lengths_),
|
||||
std::end(params.filter_spatial_lengths_));
|
||||
const auto wei_g_k_c_xs_desc =
|
||||
ck::utils::conv::make_weight_host_tensor_descriptor_g_k_c_xs_packed<WeiLayout>(conv_param);
|
||||
|
||||
const std::vector<ck::index_t>& output_spatial_lengths = params.GetOutputSpatialLengths();
|
||||
std::vector<std::size_t> output_dims{static_cast<std::size_t>(params.N_),
|
||||
static_cast<std::size_t>(params.K_)};
|
||||
output_dims.insert(std::end(output_dims),
|
||||
std::begin(output_spatial_lengths),
|
||||
std::end(output_spatial_lengths));
|
||||
const auto out_g_n_k_wos_desc =
|
||||
ck::utils::conv::make_output_host_tensor_descriptor_g_n_k_wos_packed<OutLayout>(conv_param);
|
||||
|
||||
Tensor<InDataType> input(ck::utils::conv::get_host_tensor_descriptor(input_dims, InLayout{}));
|
||||
Tensor<WeiDataType> weights(
|
||||
ck::utils::conv::get_host_tensor_descriptor(filter_dims, WeiLayout{}));
|
||||
Tensor<OutDataType> host_output(
|
||||
ck::utils::conv::get_host_tensor_descriptor(output_dims, OutLayout{}));
|
||||
Tensor<InDataType> input(in_g_n_c_wis_desc);
|
||||
Tensor<WeiDataType> weights(wei_g_k_c_xs_desc);
|
||||
Tensor<OutDataType> host_output(out_g_n_k_wos_desc);
|
||||
|
||||
fill_input_op(input.begin(), input.end());
|
||||
fill_weights_op(weights.begin(), weights.end());
|
||||
std::fill(host_output.begin(), host_output.end(), OutDataType(0.f));
|
||||
|
||||
auto ref_conv = ck::tensor_operation::host::ReferenceConvFwd<InDataType,
|
||||
auto ref_conv = ck::tensor_operation::host::ReferenceConvFwd<NDimSpatial,
|
||||
InDataType,
|
||||
WeiDataType,
|
||||
OutDataType,
|
||||
InElementOp,
|
||||
WeiElementOp,
|
||||
OutElementOp,
|
||||
NDim>();
|
||||
OutElementOp>();
|
||||
auto ref_invoker = ref_conv.MakeInvoker();
|
||||
auto ref_argument = ref_conv.MakeArgument(input,
|
||||
weights,
|
||||
host_output,
|
||||
params.conv_filter_strides_,
|
||||
params.conv_filter_dilations_,
|
||||
params.input_left_pads_,
|
||||
params.input_right_pads_,
|
||||
conv_param.conv_filter_strides_,
|
||||
conv_param.conv_filter_dilations_,
|
||||
conv_param.input_left_pads_,
|
||||
conv_param.input_right_pads_,
|
||||
InElementOp{},
|
||||
WeiElementOp{},
|
||||
OutElementOp{});
|
||||
@@ -91,21 +81,29 @@ run_reference_convolution_forward(const ck::utils::conv::ConvParams& params,
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
TEST(ReferenceConvolutionFWD, Conv2DNHWC)
|
||||
// Eeference convolution assume dimensions of tensor descriptors are in GNCDHW/GKCZYX/GNKDHW order,
|
||||
// regardless of physical tensor layouts in memory.
|
||||
// Some tests below assume dimensions of tensor descriptors can be in other order, and therefore
|
||||
// are disabled
|
||||
// TODO: add more tests, which comply with assumption about dimension order of reference convolution
|
||||
// and add tests for more physical layout
|
||||
#if 0
|
||||
TEST(ReferenceConvolutionFWD, Conv2DGNHWC)
|
||||
{
|
||||
ck::utils::conv::ConvParams params;
|
||||
params.N_ = 1;
|
||||
params.K_ = 1;
|
||||
params.C_ = 2;
|
||||
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3};
|
||||
params.input_spatial_lengths_ = std::vector<ck::index_t>{6, 6};
|
||||
params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1};
|
||||
params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1};
|
||||
params.input_left_pads_ = std::vector<ck::index_t>{0, 0};
|
||||
params.input_right_pads_ = std::vector<ck::index_t>{0, 0};
|
||||
ck::utils::conv::ConvParam conv_param(2,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
2,
|
||||
std::vector<ck::index_t>{3, 3},
|
||||
std::vector<ck::index_t>{6, 6},
|
||||
std::vector<ck::index_t>{1, 1},
|
||||
std::vector<ck::index_t>{1, 1},
|
||||
std::vector<ck::index_t>{0, 0},
|
||||
std::vector<ck::index_t>{0, 0});
|
||||
|
||||
auto out_tensor = run_reference_convolution_forward<2>(params);
|
||||
std::vector<std::size_t> ref_dims{1, 1, 4, 4};
|
||||
auto out_tensor = run_reference_convolution_forward<2>(conv_param);
|
||||
std::vector<std::size_t> ref_dims{1, 1, 4, 4, 1};
|
||||
std::vector<float> ref_data{130.5,
|
||||
148.5,
|
||||
166.5,
|
||||
@@ -127,21 +125,22 @@ TEST(ReferenceConvolutionFWD, Conv2DNHWC)
|
||||
EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!"));
|
||||
}
|
||||
|
||||
TEST(ReferenceConvolutionFWD, Conv2DNHWCStridesDilationsPadding)
|
||||
TEST(ReferenceConvolutionFWD, Conv2DGNHWCStridesDilationsPadding)
|
||||
{
|
||||
ck::utils::conv::ConvParams params;
|
||||
params.N_ = 1;
|
||||
params.K_ = 2;
|
||||
params.C_ = 2;
|
||||
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3};
|
||||
params.input_spatial_lengths_ = std::vector<ck::index_t>{12, 12};
|
||||
params.conv_filter_strides_ = std::vector<ck::index_t>{2, 2};
|
||||
params.conv_filter_dilations_ = std::vector<ck::index_t>{2, 2};
|
||||
params.input_left_pads_ = std::vector<ck::index_t>{1, 1};
|
||||
params.input_right_pads_ = std::vector<ck::index_t>{1, 1};
|
||||
ck::utils::conv::ConvParam conv_param(2,
|
||||
1,
|
||||
1,
|
||||
2,
|
||||
2,
|
||||
std::vector<ck::index_t>{3, 3},
|
||||
std::vector<ck::index_t>{12, 12},
|
||||
std::vector<ck::index_t>{2, 2},
|
||||
std::vector<ck::index_t>{2, 2},
|
||||
std::vector<ck::index_t>{1, 1},
|
||||
std::vector<ck::index_t>{1, 1});
|
||||
|
||||
auto out_tensor = run_reference_convolution_forward<2>(params);
|
||||
std::vector<std::size_t> ref_dims = std::vector<std::size_t>{1, 2, 5, 5};
|
||||
auto out_tensor = run_reference_convolution_forward<2>(conv_param);
|
||||
std::vector<std::size_t> ref_dims = std::vector<std::size_t>{1, 5, 5, 2};
|
||||
std::vector<float> ref_data{
|
||||
210., 210., 327., 327., 351., 351., 375., 375., 399., 399.,
|
||||
459., 459., 706.5, 706.5, 742.5, 742.5, 778.5, 778.5, 814.5, 814.5,
|
||||
@@ -153,88 +152,88 @@ TEST(ReferenceConvolutionFWD, Conv2DNHWCStridesDilationsPadding)
|
||||
EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!"));
|
||||
}
|
||||
|
||||
TEST(ReferenceConvolutionFWD, Conv1DNWC)
|
||||
TEST(ReferenceConvolutionFWD, Conv1DGNWC)
|
||||
{
|
||||
ck::utils::conv::ConvParams params;
|
||||
params.num_dim_spatial_ = 1;
|
||||
params.N_ = 1;
|
||||
params.K_ = 1;
|
||||
params.C_ = 2;
|
||||
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3};
|
||||
params.input_spatial_lengths_ = std::vector<ck::index_t>{6};
|
||||
params.conv_filter_strides_ = std::vector<ck::index_t>{1};
|
||||
params.conv_filter_dilations_ = std::vector<ck::index_t>{1};
|
||||
params.input_left_pads_ = std::vector<ck::index_t>{0};
|
||||
params.input_right_pads_ = std::vector<ck::index_t>{0};
|
||||
ck::utils::conv::ConvParam conv_param(1,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
2,
|
||||
std::vector<ck::index_t>{3},
|
||||
std::vector<ck::index_t>{6},
|
||||
std::vector<ck::index_t>{1},
|
||||
std::vector<ck::index_t>{1},
|
||||
std::vector<ck::index_t>{0},
|
||||
std::vector<ck::index_t>{0});
|
||||
|
||||
auto out_tensor =
|
||||
run_reference_convolution_forward<1,
|
||||
float,
|
||||
float,
|
||||
float,
|
||||
ck::tensor_layout::convolution::NWC,
|
||||
ck::tensor_layout::convolution::KXC,
|
||||
ck::tensor_layout::convolution::NWK>(params);
|
||||
std::vector<std::size_t> ref_dims{1, 1, 4};
|
||||
ck::tensor_layout::convolution::GNWC,
|
||||
ck::tensor_layout::convolution::GKXC,
|
||||
ck::tensor_layout::convolution::GNWK>(conv_param);
|
||||
std::vector<std::size_t> ref_dims{1, 1, 4, 1};
|
||||
std::vector<float> ref_data{7.5, 13.5, 19.5, 25.5};
|
||||
EXPECT_TRUE(ck::utils::check_err(
|
||||
out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
|
||||
EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!"));
|
||||
}
|
||||
|
||||
TEST(ReferenceConvolutionFWD, Conv1DNWCStridesDilationsPadding)
|
||||
TEST(ReferenceConvolutionFWD, Conv1DGNWCStridesDilationsPadding)
|
||||
{
|
||||
ck::utils::conv::ConvParams params;
|
||||
params.num_dim_spatial_ = 1;
|
||||
params.N_ = 1;
|
||||
params.K_ = 2;
|
||||
params.C_ = 2;
|
||||
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3};
|
||||
params.input_spatial_lengths_ = std::vector<ck::index_t>{12};
|
||||
params.conv_filter_strides_ = std::vector<ck::index_t>{2};
|
||||
params.conv_filter_dilations_ = std::vector<ck::index_t>{2};
|
||||
params.input_left_pads_ = std::vector<ck::index_t>{1};
|
||||
params.input_right_pads_ = std::vector<ck::index_t>{1};
|
||||
ck::utils::conv::ConvParam conv_param(1,
|
||||
1,
|
||||
1,
|
||||
2,
|
||||
2,
|
||||
std::vector<ck::index_t>{3},
|
||||
std::vector<ck::index_t>{12},
|
||||
std::vector<ck::index_t>{2},
|
||||
std::vector<ck::index_t>{2},
|
||||
std::vector<ck::index_t>{1},
|
||||
std::vector<ck::index_t>{1});
|
||||
|
||||
auto out_tensor =
|
||||
run_reference_convolution_forward<1,
|
||||
float,
|
||||
float,
|
||||
float,
|
||||
ck::tensor_layout::convolution::NWC,
|
||||
ck::tensor_layout::convolution::KXC,
|
||||
ck::tensor_layout::convolution::NWK>(params);
|
||||
std::vector<std::size_t> ref_dims{1, 2, 5};
|
||||
ck::tensor_layout::convolution::GNWC,
|
||||
ck::tensor_layout::convolution::GKXC,
|
||||
ck::tensor_layout::convolution::GNWK>(conv_param);
|
||||
std::vector<std::size_t> ref_dims{1, 1, 5, 2};
|
||||
std::vector<float> ref_data{9., 9., 19.5, 19.5, 31.5, 31.5, 43.5, 43.5, 55.5, 55.5};
|
||||
EXPECT_TRUE(ck::utils::check_err(
|
||||
out_tensor.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
|
||||
EXPECT_TRUE(ck::utils::check_err(out_tensor.mData, ref_data, "Error: incorrect results!"));
|
||||
}
|
||||
|
||||
TEST(ReferenceConvolutionFWD, Conv1DNWCSameOutputSize)
|
||||
TEST(ReferenceConvolutionFWD, Conv1DGNWCSameOutputSize)
|
||||
{
|
||||
ck::utils::conv::ConvParams params;
|
||||
params.num_dim_spatial_ = 1;
|
||||
params.N_ = 2;
|
||||
params.K_ = 16;
|
||||
params.C_ = 4;
|
||||
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3};
|
||||
params.input_spatial_lengths_ = std::vector<ck::index_t>{16};
|
||||
params.conv_filter_strides_ = std::vector<ck::index_t>{1};
|
||||
params.conv_filter_dilations_ = std::vector<ck::index_t>{1};
|
||||
params.input_left_pads_ = std::vector<ck::index_t>{1};
|
||||
params.input_right_pads_ = std::vector<ck::index_t>{1};
|
||||
ck::utils::conv::ConvParam conv_param(1,
|
||||
1,
|
||||
2,
|
||||
16,
|
||||
4,
|
||||
std::vector<ck::index_t>{3},
|
||||
std::vector<ck::index_t>{16},
|
||||
std::vector<ck::index_t>{1},
|
||||
std::vector<ck::index_t>{1},
|
||||
std::vector<ck::index_t>{1},
|
||||
std::vector<ck::index_t>{1});
|
||||
|
||||
auto out_tensor2 = run_reference_convolution_forward<1,
|
||||
float,
|
||||
float,
|
||||
float,
|
||||
ck::tensor_layout::convolution::NWC,
|
||||
ck::tensor_layout::convolution::KXC,
|
||||
ck::tensor_layout::convolution::NWK>(
|
||||
params, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f});
|
||||
ck::tensor_layout::convolution::GNWC,
|
||||
ck::tensor_layout::convolution::GKXC,
|
||||
ck::tensor_layout::convolution::GNWK>(
|
||||
conv_param, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f});
|
||||
|
||||
std::vector<std::size_t> ref_dims{2, 16, 16};
|
||||
std::vector<std::size_t> ref_dims{1, 2, 16, 16};
|
||||
std::vector<float> ref_data{
|
||||
1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4,
|
||||
1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4, 1.4,
|
||||
@@ -304,30 +303,31 @@ TEST(ReferenceConvolutionFWD, Conv1DNWCSameOutputSize)
|
||||
out_tensor2.mDesc.GetLengths(), ref_dims, "Error: wrong output tensor dimensions!"));
|
||||
EXPECT_TRUE(ck::utils::check_err(out_tensor2.mData, ref_data, "Error: incorrect results!"));
|
||||
}
|
||||
#endif
|
||||
|
||||
TEST(ReferenceConvolutionFWD, Conv3DNCDHW)
|
||||
TEST(ReferenceConvolutionFWD, Conv3DGNCDHW)
|
||||
{
|
||||
ck::utils::conv::ConvParams params;
|
||||
params.num_dim_spatial_ = 3;
|
||||
params.N_ = 1;
|
||||
params.K_ = 1;
|
||||
params.C_ = 2;
|
||||
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3, 3};
|
||||
params.input_spatial_lengths_ = std::vector<ck::index_t>{6, 6, 6};
|
||||
params.conv_filter_strides_ = std::vector<ck::index_t>{1, 1, 1};
|
||||
params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1, 1};
|
||||
params.input_left_pads_ = std::vector<ck::index_t>{0, 0, 0};
|
||||
params.input_right_pads_ = std::vector<ck::index_t>{0, 0, 0};
|
||||
ck::utils::conv::ConvParam conv_param(3,
|
||||
1,
|
||||
1,
|
||||
1,
|
||||
2,
|
||||
std::vector<ck::index_t>{3, 3, 3},
|
||||
std::vector<ck::index_t>{6, 6, 6},
|
||||
std::vector<ck::index_t>{1, 1, 1},
|
||||
std::vector<ck::index_t>{1, 1, 1},
|
||||
std::vector<ck::index_t>{0, 0, 0},
|
||||
std::vector<ck::index_t>{0, 0, 0});
|
||||
|
||||
auto out_tensor = run_reference_convolution_forward<3,
|
||||
float,
|
||||
float,
|
||||
float,
|
||||
ck::tensor_layout::convolution::NCDHW,
|
||||
ck::tensor_layout::convolution::KCZYX,
|
||||
ck::tensor_layout::convolution::NKDHW>(
|
||||
params, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f});
|
||||
std::vector<std::size_t> ref_dims{1, 1, 4, 4, 4};
|
||||
ck::tensor_layout::convolution::GNCDHW,
|
||||
ck::tensor_layout::convolution::GKCZYX,
|
||||
ck::tensor_layout::convolution::GNKDHW>(
|
||||
conv_param, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f});
|
||||
std::vector<std::size_t> ref_dims{1, 1, 1, 4, 4, 4};
|
||||
std::vector<float> ref_data{
|
||||
407.7, 410.40002, 413.09998, 415.80002, 423.90002, 426.6, 429.30002, 432.,
|
||||
440.1, 442.80002, 445.5, 448.2, 456.30002, 459., 461.7, 464.40002,
|
||||
@@ -344,29 +344,29 @@ TEST(ReferenceConvolutionFWD, Conv3DNCDHW)
|
||||
ck::utils::check_err(out_tensor.mData, ref_data, "Error [case 1]: incorrect results!"));
|
||||
}
|
||||
|
||||
TEST(ReferenceConvolutionFWD, Conv3DNCDHWStridesDilations)
|
||||
TEST(ReferenceConvolutionFWD, Conv3DGNCDHWStridesDilations)
|
||||
{
|
||||
ck::utils::conv::ConvParams params;
|
||||
params.num_dim_spatial_ = 3;
|
||||
params.N_ = 1;
|
||||
params.K_ = 2;
|
||||
params.C_ = 2;
|
||||
params.filter_spatial_lengths_ = std::vector<ck::index_t>{3, 3, 3};
|
||||
params.input_spatial_lengths_ = std::vector<ck::index_t>{12, 12, 12};
|
||||
params.conv_filter_strides_ = std::vector<ck::index_t>{3, 3, 3};
|
||||
params.conv_filter_dilations_ = std::vector<ck::index_t>{1, 1, 1};
|
||||
params.input_left_pads_ = std::vector<ck::index_t>{0, 0, 0};
|
||||
params.input_right_pads_ = std::vector<ck::index_t>{0, 0, 0};
|
||||
ck::utils::conv::ConvParam conv_param(3,
|
||||
1,
|
||||
1,
|
||||
2,
|
||||
2,
|
||||
std::vector<ck::index_t>{3, 3, 3},
|
||||
std::vector<ck::index_t>{12, 12, 12},
|
||||
std::vector<ck::index_t>{3, 3, 3},
|
||||
std::vector<ck::index_t>{1, 1, 1},
|
||||
std::vector<ck::index_t>{0, 0, 0},
|
||||
std::vector<ck::index_t>{0, 0, 0});
|
||||
|
||||
auto out_tensor = run_reference_convolution_forward<3,
|
||||
float,
|
||||
float,
|
||||
float,
|
||||
ck::tensor_layout::convolution::NCDHW,
|
||||
ck::tensor_layout::convolution::KCZYX,
|
||||
ck::tensor_layout::convolution::NKDHW>(
|
||||
params, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f});
|
||||
std::vector<std::size_t> ref_dims{1, 2, 4, 4, 4};
|
||||
ck::tensor_layout::convolution::GNCDHW,
|
||||
ck::tensor_layout::convolution::GKCZYX,
|
||||
ck::tensor_layout::convolution::GNKDHW>(
|
||||
conv_param, ck::utils::FillMonotonicSeq<float>{0.f, 0.1f});
|
||||
std::vector<std::size_t> ref_dims{1, 1, 2, 4, 4, 4};
|
||||
std::vector<float> ref_data{
|
||||
2756.7002, 2764.7998, 2772.9001, 2781., 2853.9001, 2862., 2870.1, 2878.2002,
|
||||
2951.1, 2959.2002, 2967.2998, 2975.4001, 3048.2998, 3056.4001, 3064.5, 3072.6,
|
||||
|
||||
@@ -3,9 +3,9 @@ add_custom_target(test_softmax)
|
||||
add_gtest_executable(test_softmax_fp32 test_softmax_fp32.cpp)
|
||||
add_gtest_executable(test_softmax_fp16 test_softmax_fp16.cpp)
|
||||
add_gtest_executable(test_softmax_int8 test_softmax_int8.cpp)
|
||||
target_link_libraries(test_softmax_fp32 PRIVATE host_tensor)
|
||||
target_link_libraries(test_softmax_fp16 PRIVATE host_tensor)
|
||||
target_link_libraries(test_softmax_int8 PRIVATE host_tensor)
|
||||
target_link_libraries(test_softmax_fp32 PRIVATE utility)
|
||||
target_link_libraries(test_softmax_fp16 PRIVATE utility)
|
||||
target_link_libraries(test_softmax_int8 PRIVATE utility)
|
||||
add_dependencies(test_softmax test_softmax_fp32)
|
||||
add_dependencies(test_softmax test_softmax_fp16)
|
||||
add_dependencies(test_softmax test_softmax_int8)
|
||||
add_dependencies(test_softmax test_softmax_int8)
|
||||
|
||||
@@ -12,8 +12,8 @@
|
||||
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
|
||||
|
||||
#include "ck/library/utility/check_err.hpp"
|
||||
#include "ck/library/host_tensor/host_tensor.hpp"
|
||||
#include "ck/library/host_tensor/device_memory.hpp"
|
||||
#include "ck/library/utility/host_tensor.hpp"
|
||||
#include "ck/library/utility/device_memory.hpp"
|
||||
#include "ck/library/reference_tensor_operation/cpu/reference_softmax.hpp"
|
||||
|
||||
namespace ck {
|
||||
@@ -80,8 +80,8 @@ class TestSoftmax : public ::testing::Test
|
||||
|
||||
Tensor<OutDataType> out_ref(out);
|
||||
|
||||
DeviceMem in_dev(sizeof(InDataType) * in.mDesc.GetElementSpace());
|
||||
DeviceMem out_dev(sizeof(OutDataType) * out.mDesc.GetElementSpace());
|
||||
DeviceMem in_dev(sizeof(InDataType) * in.mDesc.GetElementSpaceSize());
|
||||
DeviceMem out_dev(sizeof(OutDataType) * out.mDesc.GetElementSpaceSize());
|
||||
in_dev.ToDevice(in.mData.data());
|
||||
out_dev.ToDevice(out.mData.data());
|
||||
|
||||
|
||||
Reference in New Issue
Block a user