mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-16 10:59:55 +00:00
Refactor pool fwd (#815)
* Do not hardcode stride
* devicePool2DFwd Inherit devicePool3DFwd
* Move instance declaration out of common
* Add dilation
* use the pool3d rank, because pool2d inherit pooo3d
* calculate Do Ho Wo for the dilation
* Fix header name
* Modify ckProfiler
* Remove pool2d instance
* Remove pool2d in profiler
* Remove pool2d and add dilation
* In to client example, this commit revise following:
1. Add dilation.
2. Use pool3d to implement pool2d
* Refine naming and IsSupportedArgument()
* Add dilation to maxpool bwd example
* clang format
* 1. Remove useless header
2. Fix copyright
3. Refine naming
* Add layout parameter to pool fwd
* clang format
* Fix merge error
* Fix compile error
* Remove layout parameter in derived class
* Refine changlog
* Fix compile error
* Fix compiler error
* Add layout to external api and profiler
[ROCm/composable_kernel commit: f60f0a5e03]
This commit is contained in:
@@ -39,31 +39,35 @@ bool pool_test(bool do_verification,
|
||||
ck::index_t Wi,
|
||||
ck::index_t window_stride_h,
|
||||
ck::index_t window_stride_w,
|
||||
ck::index_t window_dilation_h,
|
||||
ck::index_t window_dilation_w,
|
||||
ck::index_t in_left_pad_h,
|
||||
ck::index_t in_left_pad_w,
|
||||
ck::index_t in_right_pad_h,
|
||||
ck::index_t in_right_pad_w)
|
||||
{
|
||||
using DevicePoolFwdInstance =
|
||||
ck::tensor_operation::device::DevicePool2dFwd_Input_N_Hi_Wi_C_Output_N_Ho_Wo_C<
|
||||
InDataType, // InDataType
|
||||
OutDataType, // OutDataType
|
||||
IndexDataType, // IndexDataType
|
||||
ComputeDataType, // ComputeDataType
|
||||
ReduceOpId,
|
||||
OutputIndex,
|
||||
64, // BlockSize
|
||||
64, // ReduceMThreadClusterSize
|
||||
1, // ReduceKThreadClusterSize
|
||||
4, // ReduceMThreadSliceSize
|
||||
1, // ReduceKThreadSliceSize
|
||||
4>; // InSrcOutDstVectorSize
|
||||
ck::tensor_operation::device::DevicePool2dFwd_NHWC_NHWC<InDataType,
|
||||
OutDataType,
|
||||
IndexDataType,
|
||||
ComputeDataType,
|
||||
ReduceOpId,
|
||||
OutputIndex,
|
||||
64, // BlockSize
|
||||
64, // ReduceMThreadClusterSize
|
||||
1, // ReduceKThreadClusterSize
|
||||
4, // ReduceMThreadSliceSize
|
||||
1, // ReduceKThreadSliceSize
|
||||
1>; // InSrcOutDstVectorSize
|
||||
|
||||
const ck::index_t Ho = (Hi + in_left_pad_h + in_right_pad_h - Y) / window_stride_h + 1;
|
||||
const ck::index_t Wo = (Wi + in_left_pad_w + in_right_pad_w - X) / window_stride_w + 1;
|
||||
const ck::index_t Ys = (Y - 1) * window_dilation_h + 1;
|
||||
const ck::index_t Xs = (X - 1) * window_dilation_w + 1;
|
||||
const ck::index_t Ho = (Hi + in_left_pad_h + in_right_pad_h - Ys) / window_stride_h + 1;
|
||||
const ck::index_t Wo = (Wi + in_left_pad_w + in_right_pad_w - Xs) / window_stride_w + 1;
|
||||
|
||||
const std::vector<ck::index_t> window_spatial_lengths{Y, X};
|
||||
const std::vector<ck::index_t> window_strides{window_stride_h, window_stride_w};
|
||||
const std::vector<ck::index_t> window_dilations{window_dilation_h, window_dilation_w};
|
||||
const std::vector<ck::index_t> input_left_pads{in_left_pad_h, in_left_pad_w};
|
||||
const std::vector<ck::index_t> input_right_pads{in_right_pad_h, in_right_pad_w};
|
||||
|
||||
@@ -123,6 +127,7 @@ bool pool_test(bool do_verification,
|
||||
{C * Ho * Wo, 1, Wo * C, C},
|
||||
{C * Ho * Wo, 1, Wo * C, C},
|
||||
window_strides,
|
||||
window_dilations,
|
||||
input_left_pads,
|
||||
input_right_pads,
|
||||
{2, 3});
|
||||
@@ -144,8 +149,8 @@ bool pool_test(bool do_verification,
|
||||
|
||||
float gb_per_sec = num_btype / 1.E6 / ave_time;
|
||||
|
||||
std::cout << "Perf: " << ave_time << " ms, " << tflops << " TFlops, " << gb_per_sec << " GB/s"
|
||||
<< std::endl;
|
||||
std::cout << "Perf: " << ave_time << " ms, " << tflops << " TFlops, " << gb_per_sec
|
||||
<< " GB / s " << std::endl;
|
||||
|
||||
bool pass = true;
|
||||
|
||||
@@ -169,6 +174,7 @@ bool pool_test(bool do_verification,
|
||||
out_indices_n_c_ho_wo_host,
|
||||
window_spatial_lengths,
|
||||
window_strides,
|
||||
window_dilations,
|
||||
input_left_pads,
|
||||
input_right_pads);
|
||||
|
||||
|
||||
@@ -34,18 +34,20 @@ int main(int argc, char* argv[])
|
||||
bool time_kernel;
|
||||
|
||||
// Pool shape
|
||||
ck::index_t N = 128;
|
||||
ck::index_t C = 192;
|
||||
ck::index_t Y = 3;
|
||||
ck::index_t X = 3;
|
||||
ck::index_t Hi = 71;
|
||||
ck::index_t Wi = 71;
|
||||
ck::index_t window_stride_h = 2;
|
||||
ck::index_t window_stride_w = 2;
|
||||
ck::index_t in_left_pad_h = 1;
|
||||
ck::index_t in_left_pad_w = 1;
|
||||
ck::index_t in_right_pad_h = 1;
|
||||
ck::index_t in_right_pad_w = 1;
|
||||
ck::index_t N = 128;
|
||||
ck::index_t C = 192;
|
||||
ck::index_t Y = 3;
|
||||
ck::index_t X = 3;
|
||||
ck::index_t Hi = 71;
|
||||
ck::index_t Wi = 71;
|
||||
ck::index_t window_stride_h = 2;
|
||||
ck::index_t window_stride_w = 2;
|
||||
ck::index_t window_dilation_h = 1;
|
||||
ck::index_t window_dilation_w = 1;
|
||||
ck::index_t in_left_pad_h = 1;
|
||||
ck::index_t in_left_pad_w = 1;
|
||||
ck::index_t in_right_pad_h = 1;
|
||||
ck::index_t in_right_pad_w = 1;
|
||||
|
||||
if(argc == 1)
|
||||
{
|
||||
@@ -59,31 +61,33 @@ int main(int argc, char* argv[])
|
||||
init_method = std::stoi(argv[2]);
|
||||
time_kernel = static_cast<bool>(std::stoi(argv[3]));
|
||||
}
|
||||
else if(argc == 16)
|
||||
else if(argc == 18)
|
||||
{
|
||||
do_verification = std::stoi(argv[1]);
|
||||
init_method = std::stoi(argv[2]);
|
||||
time_kernel = static_cast<bool>(std::stoi(argv[3]));
|
||||
|
||||
N = std::stoi(argv[4]);
|
||||
C = std::stoi(argv[5]);
|
||||
Y = std::stoi(argv[6]);
|
||||
X = std::stoi(argv[7]);
|
||||
Hi = std::stoi(argv[8]);
|
||||
Wi = std::stoi(argv[9]);
|
||||
window_stride_h = std::stoi(argv[10]);
|
||||
window_stride_w = std::stoi(argv[11]);
|
||||
in_left_pad_h = std::stoi(argv[12]);
|
||||
in_left_pad_w = std::stoi(argv[13]);
|
||||
in_right_pad_h = std::stoi(argv[14]);
|
||||
in_right_pad_w = std::stoi(argv[15]);
|
||||
N = std::stoi(argv[4]);
|
||||
C = std::stoi(argv[5]);
|
||||
Y = std::stoi(argv[6]);
|
||||
X = std::stoi(argv[7]);
|
||||
Hi = std::stoi(argv[8]);
|
||||
Wi = std::stoi(argv[9]);
|
||||
window_stride_h = std::stoi(argv[10]);
|
||||
window_stride_w = std::stoi(argv[11]);
|
||||
window_dilation_h = std::stoi(argv[12]);
|
||||
window_dilation_w = std::stoi(argv[13]);
|
||||
in_left_pad_h = std::stoi(argv[14]);
|
||||
in_left_pad_w = std::stoi(argv[15]);
|
||||
in_right_pad_h = std::stoi(argv[16]);
|
||||
in_right_pad_w = std::stoi(argv[17]);
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("arg1: verification (0=no, 1=yes)\n");
|
||||
printf("arg2: initialization (0=no init, 1=integer value, 2=decimal value)\n");
|
||||
printf("arg3: time kernel (0=no, 1=yes)\n");
|
||||
printf("arg4 to 15: N, C, Y, X, Hi, Wi, Sy, Sx, LeftPy, LeftPx, RightPy, "
|
||||
printf("arg4 to 15: N, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
|
||||
"RightPx\n");
|
||||
exit(0);
|
||||
}
|
||||
@@ -107,6 +111,8 @@ int main(int argc, char* argv[])
|
||||
Wi,
|
||||
window_stride_h,
|
||||
window_stride_w,
|
||||
window_dilation_h,
|
||||
window_dilation_w,
|
||||
in_left_pad_h,
|
||||
in_left_pad_w,
|
||||
in_right_pad_h,
|
||||
|
||||
@@ -34,18 +34,20 @@ int main(int argc, char* argv[])
|
||||
bool time_kernel;
|
||||
|
||||
// Pool shape
|
||||
ck::index_t N = 128;
|
||||
ck::index_t C = 192;
|
||||
ck::index_t Y = 3;
|
||||
ck::index_t X = 3;
|
||||
ck::index_t Hi = 71;
|
||||
ck::index_t Wi = 71;
|
||||
ck::index_t window_stride_h = 2;
|
||||
ck::index_t window_stride_w = 2;
|
||||
ck::index_t in_left_pad_h = 1;
|
||||
ck::index_t in_left_pad_w = 1;
|
||||
ck::index_t in_right_pad_h = 1;
|
||||
ck::index_t in_right_pad_w = 1;
|
||||
ck::index_t N = 128;
|
||||
ck::index_t C = 192;
|
||||
ck::index_t Y = 3;
|
||||
ck::index_t X = 3;
|
||||
ck::index_t Hi = 71;
|
||||
ck::index_t Wi = 71;
|
||||
ck::index_t window_stride_h = 2;
|
||||
ck::index_t window_stride_w = 2;
|
||||
ck::index_t window_dilation_h = 1;
|
||||
ck::index_t window_dilation_w = 1;
|
||||
ck::index_t in_left_pad_h = 1;
|
||||
ck::index_t in_left_pad_w = 1;
|
||||
ck::index_t in_right_pad_h = 1;
|
||||
ck::index_t in_right_pad_w = 1;
|
||||
|
||||
if(argc == 1)
|
||||
{
|
||||
@@ -59,31 +61,33 @@ int main(int argc, char* argv[])
|
||||
init_method = std::stoi(argv[2]);
|
||||
time_kernel = static_cast<bool>(std::stoi(argv[3]));
|
||||
}
|
||||
else if(argc == 16)
|
||||
else if(argc == 18)
|
||||
{
|
||||
do_verification = std::stoi(argv[1]);
|
||||
init_method = std::stoi(argv[2]);
|
||||
time_kernel = static_cast<bool>(std::stoi(argv[3]));
|
||||
|
||||
N = std::stoi(argv[4]);
|
||||
C = std::stoi(argv[5]);
|
||||
Y = std::stoi(argv[6]);
|
||||
X = std::stoi(argv[7]);
|
||||
Hi = std::stoi(argv[8]);
|
||||
Wi = std::stoi(argv[9]);
|
||||
window_stride_h = std::stoi(argv[10]);
|
||||
window_stride_w = std::stoi(argv[11]);
|
||||
in_left_pad_h = std::stoi(argv[12]);
|
||||
in_left_pad_w = std::stoi(argv[13]);
|
||||
in_right_pad_h = std::stoi(argv[14]);
|
||||
in_right_pad_w = std::stoi(argv[15]);
|
||||
N = std::stoi(argv[4]);
|
||||
C = std::stoi(argv[5]);
|
||||
Y = std::stoi(argv[6]);
|
||||
X = std::stoi(argv[7]);
|
||||
Hi = std::stoi(argv[8]);
|
||||
Wi = std::stoi(argv[9]);
|
||||
window_stride_h = std::stoi(argv[10]);
|
||||
window_stride_w = std::stoi(argv[11]);
|
||||
window_dilation_h = std::stoi(argv[12]);
|
||||
window_dilation_w = std::stoi(argv[13]);
|
||||
in_left_pad_h = std::stoi(argv[14]);
|
||||
in_left_pad_w = std::stoi(argv[15]);
|
||||
in_right_pad_h = std::stoi(argv[16]);
|
||||
in_right_pad_w = std::stoi(argv[17]);
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("arg1: verification (0=no, 1=yes)\n");
|
||||
printf("arg2: initialization (0=no init, 1=integer value, 2=decimal value)\n");
|
||||
printf("arg3: time kernel (0=no, 1=yes)\n");
|
||||
printf("arg4 to 15: N, C, Y, X, Hi, Wi, Sy, Sx, LeftPy, LeftPx, RightPy, "
|
||||
printf("arg4 to 15: N, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
|
||||
"RightPx\n");
|
||||
exit(0);
|
||||
}
|
||||
@@ -107,6 +111,8 @@ int main(int argc, char* argv[])
|
||||
Wi,
|
||||
window_stride_h,
|
||||
window_stride_w,
|
||||
window_dilation_h,
|
||||
window_dilation_w,
|
||||
in_left_pad_h,
|
||||
in_left_pad_w,
|
||||
in_right_pad_h,
|
||||
|
||||
@@ -18,7 +18,45 @@
|
||||
#include "ck/library/utility/literals.hpp"
|
||||
#include "ck/library/reference_tensor_operation/cpu/reference_pool_fwd.hpp"
|
||||
|
||||
template <typename InDataType,
|
||||
template <typename TensorLayout>
|
||||
std::vector<ck::index_t> f_tensor_strides_ncdhw(ck::index_t N_,
|
||||
ck::index_t C_,
|
||||
ck::index_t D,
|
||||
ck::index_t H,
|
||||
ck::index_t W,
|
||||
TensorLayout layout)
|
||||
{
|
||||
using namespace ck::literals;
|
||||
(void)N_;
|
||||
if constexpr(ck::is_same<decltype(layout), ck::tensor_layout::convolution::NCDHW>::value)
|
||||
return {C_ * D * H * W, D * H * W, H * W, W, 1_uz};
|
||||
else if constexpr(ck::is_same<decltype(layout), ck::tensor_layout::convolution::NDHWC>::value)
|
||||
return {D * C_ * H * W, 1_uz, C_ * H * W, W * C_, C_};
|
||||
};
|
||||
|
||||
template <typename TensorLayout>
|
||||
HostTensorDescriptor f_host_tensor_descriptor(std::size_t N_,
|
||||
std::size_t C_,
|
||||
std::size_t D,
|
||||
std::size_t H,
|
||||
std::size_t W,
|
||||
TensorLayout layout)
|
||||
{
|
||||
using namespace ck::literals;
|
||||
|
||||
if constexpr(ck::is_same<decltype(layout), ck::tensor_layout::convolution::NCDHW>::value)
|
||||
{
|
||||
return HostTensorDescriptor({N_, C_, D, H, W}, {C_ * D * H * W, D * H * W, H * W, W, 1_uz});
|
||||
}
|
||||
else if constexpr(ck::is_same<decltype(layout), ck::tensor_layout::convolution::NDHWC>::value)
|
||||
{
|
||||
return HostTensorDescriptor({N_, C_, D, H, W},
|
||||
{D * C_ * H * W, 1_uz, C_ * H * W, W * C_, C_});
|
||||
}
|
||||
};
|
||||
|
||||
template <typename DevicePoolFwdInstance,
|
||||
typename InDataType,
|
||||
typename OutDataType,
|
||||
typename ComputeDataType,
|
||||
typename IndexDataType,
|
||||
@@ -40,6 +78,9 @@ bool pool3d_test(bool do_verification,
|
||||
ck::index_t window_stride_d,
|
||||
ck::index_t window_stride_h,
|
||||
ck::index_t window_stride_w,
|
||||
ck::index_t window_dilation_d,
|
||||
ck::index_t window_dilation_h,
|
||||
ck::index_t window_dilation_w,
|
||||
ck::index_t in_left_pad_d,
|
||||
ck::index_t in_left_pad_h,
|
||||
ck::index_t in_left_pad_w,
|
||||
@@ -47,53 +88,21 @@ bool pool3d_test(bool do_verification,
|
||||
ck::index_t in_right_pad_h,
|
||||
ck::index_t in_right_pad_w)
|
||||
{
|
||||
using DevicePoolFwdInstance =
|
||||
ck::tensor_operation::device::DevicePool3dFwd_Input_N_Di_Hi_Wi_C_Output_N_Do_Ho_Wo_C<
|
||||
InDataType, // InDataType
|
||||
OutDataType, // OutDataType
|
||||
IndexDataType, // IndexDataType
|
||||
ComputeDataType, // ComputeDataType
|
||||
ReduceOpId,
|
||||
OutputIndex,
|
||||
64, // BlockSize
|
||||
64, // ReduceMThreadClusterSize
|
||||
1, // ReduceKThreadClusterSize
|
||||
4, // ReduceMThreadSliceSize
|
||||
1, // ReduceKThreadSliceSize
|
||||
4>; // InSrcOutDstVectorSize
|
||||
|
||||
const ck::index_t Do = (Di + in_left_pad_d + in_right_pad_d - Z) / window_stride_d + 1;
|
||||
const ck::index_t Ho = (Hi + in_left_pad_h + in_right_pad_h - Y) / window_stride_h + 1;
|
||||
const ck::index_t Wo = (Wi + in_left_pad_w + in_right_pad_w - X) / window_stride_w + 1;
|
||||
const ck::index_t Zs = (Z - 1) * window_dilation_d + 1;
|
||||
const ck::index_t Ys = (Y - 1) * window_dilation_h + 1;
|
||||
const ck::index_t Xs = (X - 1) * window_dilation_w + 1;
|
||||
const ck::index_t Do = (Di + in_left_pad_d + in_right_pad_d - Zs) / window_stride_d + 1;
|
||||
const ck::index_t Ho = (Hi + in_left_pad_h + in_right_pad_h - Ys) / window_stride_h + 1;
|
||||
const ck::index_t Wo = (Wi + in_left_pad_w + in_right_pad_w - Xs) / window_stride_w + 1;
|
||||
|
||||
const std::vector<ck::index_t> window_spatial_lengths{Z, Y, X};
|
||||
const std::vector<ck::index_t> window_strides{
|
||||
window_stride_d, window_stride_h, window_stride_w};
|
||||
const std::vector<ck::index_t> window_dilations{
|
||||
window_dilation_d, window_dilation_h, window_dilation_w};
|
||||
const std::vector<ck::index_t> input_left_pads{in_left_pad_d, in_left_pad_h, in_left_pad_w};
|
||||
const std::vector<ck::index_t> input_right_pads{in_right_pad_d, in_right_pad_h, in_right_pad_w};
|
||||
|
||||
// tensor layout
|
||||
auto f_host_tensor_descriptor = [](std::size_t N_,
|
||||
std::size_t C_,
|
||||
std::size_t D,
|
||||
std::size_t H,
|
||||
std::size_t W,
|
||||
auto layout) {
|
||||
using namespace ck::literals;
|
||||
|
||||
if constexpr(ck::is_same<decltype(layout), ck::tensor_layout::convolution::NCDHW>::value)
|
||||
{
|
||||
return HostTensorDescriptor({N_, C_, D, H, W},
|
||||
{C_ * D * H * W, D * H * W, H * W, W, 1_uz});
|
||||
}
|
||||
else if constexpr(ck::is_same<decltype(layout),
|
||||
ck::tensor_layout::convolution::NDHWC>::value)
|
||||
{
|
||||
return HostTensorDescriptor({N_, C_, D, H, W},
|
||||
{D * C_ * H * W, 1_uz, C_ * H * W, W * C_, C_});
|
||||
}
|
||||
};
|
||||
|
||||
Tensor<InDataType> in_n_c_di_hi_wi(f_host_tensor_descriptor(N, C, Di, Hi, Wi, InLayout{}));
|
||||
Tensor<OutDataType> out_n_c_do_ho_wo_host(
|
||||
f_host_tensor_descriptor(N, C, Do, Ho, Wo, OutLayout{}));
|
||||
@@ -126,10 +135,11 @@ bool pool3d_test(bool do_verification,
|
||||
{N, C, Di, Hi, Wi},
|
||||
{Z, Y, X},
|
||||
{N, C, Do, Ho, Wo},
|
||||
{Di * C * Hi * Wi, 1, C * Hi * Wi, Wi * C, C},
|
||||
{Do * C * Ho * Wo, 1, C * Ho * Wo, Wo * C, C},
|
||||
{Do * C * Ho * Wo, 1, C * Ho * Wo, Wo * C, C},
|
||||
f_tensor_strides_ncdhw(N, C, Di, Hi, Wi, InLayout{}),
|
||||
f_tensor_strides_ncdhw(N, C, Do, Ho, Wo, OutLayout{}),
|
||||
f_tensor_strides_ncdhw(N, C, Do, Ho, Wo, OutLayout{}),
|
||||
window_strides,
|
||||
window_dilations,
|
||||
input_left_pads,
|
||||
input_right_pads,
|
||||
{2, 3, 4});
|
||||
@@ -165,6 +175,7 @@ bool pool3d_test(bool do_verification,
|
||||
out_indices_n_c_do_ho_wo_host,
|
||||
window_spatial_lengths,
|
||||
window_strides,
|
||||
window_dilations,
|
||||
input_left_pads,
|
||||
input_right_pads);
|
||||
|
||||
|
||||
@@ -27,31 +27,49 @@ static constexpr auto ReduceOpId = ck::ReduceTensorOp::AVG;
|
||||
static constexpr bool OutputIndex = false;
|
||||
static constexpr bool PropagateNan = false;
|
||||
|
||||
using DevicePoolFwdInstance =
|
||||
ck::tensor_operation::device::DevicePool3dFwd_NDHWC_NDHWC<InDataType,
|
||||
OutDataType,
|
||||
IndexDataType,
|
||||
ComputeDataType,
|
||||
ReduceOpId,
|
||||
OutputIndex,
|
||||
64, // BlockSize
|
||||
64, // ReduceMThreadClusterSize
|
||||
1, // ReduceKThreadClusterSize
|
||||
1, // ReduceMThreadSliceSize
|
||||
1, // ReduceKThreadSliceSize
|
||||
1>; // InSrcOutDstVectorSize
|
||||
|
||||
int main()
|
||||
{
|
||||
bool do_verification = true;
|
||||
bool time_kernel = false;
|
||||
|
||||
// Pool shape
|
||||
ck::index_t N = 2;
|
||||
ck::index_t C = 32;
|
||||
ck::index_t Z = 2;
|
||||
ck::index_t Y = 2;
|
||||
ck::index_t X = 2;
|
||||
ck::index_t Di = 30;
|
||||
ck::index_t Hi = 30;
|
||||
ck::index_t Wi = 30;
|
||||
ck::index_t window_stride_d = 2;
|
||||
ck::index_t window_stride_h = 2;
|
||||
ck::index_t window_stride_w = 2;
|
||||
ck::index_t in_left_pad_d = 1;
|
||||
ck::index_t in_left_pad_h = 1;
|
||||
ck::index_t in_left_pad_w = 1;
|
||||
ck::index_t in_right_pad_d = 1;
|
||||
ck::index_t in_right_pad_h = 1;
|
||||
ck::index_t in_right_pad_w = 1;
|
||||
ck::index_t N = 2;
|
||||
ck::index_t C = 32;
|
||||
ck::index_t Z = 2;
|
||||
ck::index_t Y = 2;
|
||||
ck::index_t X = 2;
|
||||
ck::index_t Di = 30;
|
||||
ck::index_t Hi = 30;
|
||||
ck::index_t Wi = 30;
|
||||
ck::index_t window_stride_d = 2;
|
||||
ck::index_t window_stride_h = 2;
|
||||
ck::index_t window_stride_w = 2;
|
||||
ck::index_t window_dilation_d = 1;
|
||||
ck::index_t window_dilation_h = 1;
|
||||
ck::index_t window_dilation_w = 1;
|
||||
ck::index_t in_left_pad_d = 1;
|
||||
ck::index_t in_left_pad_h = 1;
|
||||
ck::index_t in_left_pad_w = 1;
|
||||
ck::index_t in_right_pad_d = 1;
|
||||
ck::index_t in_right_pad_h = 1;
|
||||
ck::index_t in_right_pad_w = 1;
|
||||
|
||||
bool pass = pool3d_test<InDataType,
|
||||
bool pass = pool3d_test<DevicePoolFwdInstance,
|
||||
InDataType,
|
||||
OutDataType,
|
||||
ComputeDataType,
|
||||
IndexDataType,
|
||||
@@ -72,6 +90,9 @@ int main()
|
||||
window_stride_d,
|
||||
window_stride_h,
|
||||
window_stride_w,
|
||||
window_dilation_d,
|
||||
window_dilation_h,
|
||||
window_dilation_w,
|
||||
in_left_pad_d,
|
||||
in_left_pad_h,
|
||||
in_left_pad_w,
|
||||
|
||||
@@ -24,18 +24,20 @@ int main()
|
||||
bool time_kernel = false;
|
||||
|
||||
// Pool shape
|
||||
ck::index_t N = 1;
|
||||
ck::index_t C = 1;
|
||||
ck::index_t Y = 3;
|
||||
ck::index_t X = 3;
|
||||
ck::index_t Hi = 32;
|
||||
ck::index_t Wi = 32;
|
||||
ck::index_t window_stride_h = 1;
|
||||
ck::index_t window_stride_w = 1;
|
||||
ck::index_t in_left_pad_h = 0;
|
||||
ck::index_t in_left_pad_w = 0;
|
||||
ck::index_t in_right_pad_h = 0;
|
||||
ck::index_t in_right_pad_w = 0;
|
||||
ck::index_t N = 1;
|
||||
ck::index_t C = 1;
|
||||
ck::index_t Y = 3;
|
||||
ck::index_t X = 3;
|
||||
ck::index_t Hi = 32;
|
||||
ck::index_t Wi = 32;
|
||||
ck::index_t window_stride_h = 1;
|
||||
ck::index_t window_stride_w = 1;
|
||||
ck::index_t window_dilation_h = 1;
|
||||
ck::index_t window_dilation_w = 1;
|
||||
ck::index_t in_left_pad_h = 0;
|
||||
ck::index_t in_left_pad_w = 0;
|
||||
ck::index_t in_right_pad_h = 0;
|
||||
ck::index_t in_right_pad_w = 0;
|
||||
|
||||
bool pass = maxpool_bwd_test<InDataType,
|
||||
OutDataType,
|
||||
@@ -53,6 +55,8 @@ int main()
|
||||
Wi,
|
||||
window_stride_h,
|
||||
window_stride_w,
|
||||
window_dilation_h,
|
||||
window_dilation_w,
|
||||
in_left_pad_h,
|
||||
in_left_pad_w,
|
||||
in_right_pad_h,
|
||||
|
||||
@@ -36,6 +36,8 @@ bool maxpool_bwd_test(bool do_verification,
|
||||
ck::index_t Wi,
|
||||
ck::index_t window_stride_h,
|
||||
ck::index_t window_stride_w,
|
||||
ck::index_t window_dilation_h,
|
||||
ck::index_t window_dilation_w,
|
||||
ck::index_t in_left_pad_h,
|
||||
ck::index_t in_left_pad_w,
|
||||
ck::index_t in_right_pad_h,
|
||||
@@ -44,28 +46,30 @@ bool maxpool_bwd_test(bool do_verification,
|
||||
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
|
||||
|
||||
using DevicePoolFwdInstance =
|
||||
ck::tensor_operation::device::DevicePool2dFwd_Input_N_Hi_Wi_C_Output_N_Ho_Wo_C<
|
||||
InDataType, // InDataType
|
||||
OutDataType, // OutDataType
|
||||
IndexDataType, // IndexDataType
|
||||
ComputeDataType, // ComputeDataType
|
||||
ck::ReduceTensorOp::MAX,
|
||||
true, // OutputIndex
|
||||
64, // BlockSize
|
||||
64, // ReduceMThreadClusterSize
|
||||
1, // ReduceKThreadClusterSize
|
||||
4, // ReduceMThreadSliceSize
|
||||
1, // ReduceKThreadSliceSize
|
||||
1>; // InSrcOutDstVectorSize
|
||||
ck::tensor_operation::device::DevicePool2dFwd_NHWC_NHWC<InDataType, // InDataType
|
||||
OutDataType, // OutDataType
|
||||
IndexDataType, // IndexDataType
|
||||
ComputeDataType, // ComputeDataType
|
||||
ck::ReduceTensorOp::MAX,
|
||||
true,
|
||||
64, // BlockSize
|
||||
64, // ReduceMThreadClusterSize
|
||||
1, // ReduceKThreadClusterSize
|
||||
4, // ReduceMThreadSliceSize
|
||||
1, // ReduceKThreadSliceSize
|
||||
1>; // InSrcOutDstVectorSize
|
||||
|
||||
using DeviceMaxPoolBwdInstance = ck::tensor_operation::device::
|
||||
DeviceIndexPoolBwdImpl<DOutDataType, IndexDataType, DInDataType, 4>;
|
||||
|
||||
const ck::index_t Ho = (Hi + in_left_pad_h + in_right_pad_h - Y) / window_stride_h + 1;
|
||||
const ck::index_t Wo = (Wi + in_left_pad_w + in_right_pad_w - X) / window_stride_w + 1;
|
||||
const ck::index_t Ys = (Y - 1) * window_dilation_h + 1;
|
||||
const ck::index_t Xs = (X - 1) * window_dilation_w + 1;
|
||||
const ck::index_t Ho = (Hi + in_left_pad_h + in_right_pad_h - Ys) / window_stride_h + 1;
|
||||
const ck::index_t Wo = (Wi + in_left_pad_w + in_right_pad_w - Xs) / window_stride_w + 1;
|
||||
|
||||
const std::vector<ck::index_t> window_spatial_lengths{Y, X};
|
||||
const std::vector<ck::index_t> window_strides{window_stride_h, window_stride_w};
|
||||
const std::vector<ck::index_t> window_dilations{window_dilation_h, window_dilation_w};
|
||||
const std::vector<ck::index_t> input_left_pads{in_left_pad_h, in_left_pad_w};
|
||||
const std::vector<ck::index_t> input_right_pads{in_right_pad_h, in_right_pad_w};
|
||||
|
||||
@@ -128,6 +132,7 @@ bool maxpool_bwd_test(bool do_verification,
|
||||
{C * Ho * Wo, 1, Wo * C, C},
|
||||
{C * Ho * Wo, 1, Wo * C, C},
|
||||
window_strides,
|
||||
window_dilations,
|
||||
input_left_pads,
|
||||
input_right_pads,
|
||||
{2, 3});
|
||||
@@ -191,6 +196,7 @@ bool maxpool_bwd_test(bool do_verification,
|
||||
indices_n_c_ho_wo_host,
|
||||
window_spatial_lengths,
|
||||
window_strides,
|
||||
window_dilations,
|
||||
input_left_pads,
|
||||
input_right_pads);
|
||||
ref_pooling_fwd_invoker.Run(ref_pooling_fwd_argument);
|
||||
|
||||
@@ -24,18 +24,20 @@ int main()
|
||||
bool time_kernel = false;
|
||||
|
||||
// Pool shape
|
||||
ck::index_t N = 1;
|
||||
ck::index_t C = 1;
|
||||
ck::index_t Y = 3;
|
||||
ck::index_t X = 3;
|
||||
ck::index_t Hi = 32;
|
||||
ck::index_t Wi = 32;
|
||||
ck::index_t window_stride_h = 1;
|
||||
ck::index_t window_stride_w = 1;
|
||||
ck::index_t in_left_pad_h = 0;
|
||||
ck::index_t in_left_pad_w = 0;
|
||||
ck::index_t in_right_pad_h = 0;
|
||||
ck::index_t in_right_pad_w = 0;
|
||||
ck::index_t N = 1;
|
||||
ck::index_t C = 1;
|
||||
ck::index_t Y = 3;
|
||||
ck::index_t X = 3;
|
||||
ck::index_t Hi = 32;
|
||||
ck::index_t Wi = 32;
|
||||
ck::index_t window_stride_h = 1;
|
||||
ck::index_t window_stride_w = 1;
|
||||
ck::index_t window_dilation_h = 1;
|
||||
ck::index_t window_dilation_w = 1;
|
||||
ck::index_t in_left_pad_h = 0;
|
||||
ck::index_t in_left_pad_w = 0;
|
||||
ck::index_t in_right_pad_h = 0;
|
||||
ck::index_t in_right_pad_w = 0;
|
||||
|
||||
bool pass = maxpool_bwd_test<InDataType,
|
||||
OutDataType,
|
||||
@@ -53,6 +55,8 @@ int main()
|
||||
Wi,
|
||||
window_stride_h,
|
||||
window_stride_w,
|
||||
window_dilation_h,
|
||||
window_dilation_w,
|
||||
in_left_pad_h,
|
||||
in_left_pad_w,
|
||||
in_right_pad_h,
|
||||
|
||||
@@ -24,18 +24,20 @@ int main()
|
||||
bool time_kernel = false;
|
||||
|
||||
// Pool shape
|
||||
ck::index_t N = 1;
|
||||
ck::index_t C = 1;
|
||||
ck::index_t Y = 2;
|
||||
ck::index_t X = 2;
|
||||
ck::index_t Hi = 32;
|
||||
ck::index_t Wi = 32;
|
||||
ck::index_t window_stride_h = 2;
|
||||
ck::index_t window_stride_w = 2;
|
||||
ck::index_t in_left_pad_h = 0;
|
||||
ck::index_t in_left_pad_w = 0;
|
||||
ck::index_t in_right_pad_h = 0;
|
||||
ck::index_t in_right_pad_w = 0;
|
||||
ck::index_t N = 1;
|
||||
ck::index_t C = 1;
|
||||
ck::index_t Y = 2;
|
||||
ck::index_t X = 2;
|
||||
ck::index_t Hi = 32;
|
||||
ck::index_t Wi = 32;
|
||||
ck::index_t window_stride_h = 2;
|
||||
ck::index_t window_stride_w = 2;
|
||||
ck::index_t window_dilation_h = 1;
|
||||
ck::index_t window_dilation_w = 1;
|
||||
ck::index_t in_left_pad_h = 0;
|
||||
ck::index_t in_left_pad_w = 0;
|
||||
ck::index_t in_right_pad_h = 0;
|
||||
ck::index_t in_right_pad_w = 0;
|
||||
|
||||
bool pass = maxpool_bwd_test<InDataType,
|
||||
OutDataType,
|
||||
@@ -53,6 +55,8 @@ int main()
|
||||
Wi,
|
||||
window_stride_h,
|
||||
window_stride_w,
|
||||
window_dilation_h,
|
||||
window_dilation_w,
|
||||
in_left_pad_h,
|
||||
in_left_pad_w,
|
||||
in_right_pad_h,
|
||||
|
||||
Reference in New Issue
Block a user