Refactor pool fwd (#815)

* Do not hardcode stride

* devicePool2DFwd Inherit devicePool3DFwd

* Move instance declaration out of common

* Add dilation

* use the pool3d rank, because pool2d inherit pooo3d

* calculate Do Ho Wo for the dilation

* Fix header name

* Modify ckProfiler

* Remove pool2d instance

* Remove pool2d in profiler

* Remove pool2d and add dilation

* In to client example, this commit revise following:
1. Add dilation.
2. Use pool3d to implement pool2d

* Refine naming and IsSupportedArgument()

* Add dilation to maxpool bwd example

* clang format

* 1. Remove useless header
2. Fix copyright
3. Refine naming

* Add layout parameter to pool fwd

* clang format

* Fix merge error

* Fix compile error

* Remove layout parameter in derived class

* Refine changlog

* Fix compile error

* Fix compiler error

* Add layout to external api and profiler

[ROCm/composable_kernel commit: f60f0a5e03]
This commit is contained in:
rocking
2023-08-15 02:25:28 +08:00
committed by GitHub
parent 759fb65188
commit 4ee825732f
41 changed files with 824 additions and 1574 deletions

View File

@@ -39,31 +39,35 @@ bool pool_test(bool do_verification,
ck::index_t Wi,
ck::index_t window_stride_h,
ck::index_t window_stride_w,
ck::index_t window_dilation_h,
ck::index_t window_dilation_w,
ck::index_t in_left_pad_h,
ck::index_t in_left_pad_w,
ck::index_t in_right_pad_h,
ck::index_t in_right_pad_w)
{
using DevicePoolFwdInstance =
ck::tensor_operation::device::DevicePool2dFwd_Input_N_Hi_Wi_C_Output_N_Ho_Wo_C<
InDataType, // InDataType
OutDataType, // OutDataType
IndexDataType, // IndexDataType
ComputeDataType, // ComputeDataType
ReduceOpId,
OutputIndex,
64, // BlockSize
64, // ReduceMThreadClusterSize
1, // ReduceKThreadClusterSize
4, // ReduceMThreadSliceSize
1, // ReduceKThreadSliceSize
4>; // InSrcOutDstVectorSize
ck::tensor_operation::device::DevicePool2dFwd_NHWC_NHWC<InDataType,
OutDataType,
IndexDataType,
ComputeDataType,
ReduceOpId,
OutputIndex,
64, // BlockSize
64, // ReduceMThreadClusterSize
1, // ReduceKThreadClusterSize
4, // ReduceMThreadSliceSize
1, // ReduceKThreadSliceSize
1>; // InSrcOutDstVectorSize
const ck::index_t Ho = (Hi + in_left_pad_h + in_right_pad_h - Y) / window_stride_h + 1;
const ck::index_t Wo = (Wi + in_left_pad_w + in_right_pad_w - X) / window_stride_w + 1;
const ck::index_t Ys = (Y - 1) * window_dilation_h + 1;
const ck::index_t Xs = (X - 1) * window_dilation_w + 1;
const ck::index_t Ho = (Hi + in_left_pad_h + in_right_pad_h - Ys) / window_stride_h + 1;
const ck::index_t Wo = (Wi + in_left_pad_w + in_right_pad_w - Xs) / window_stride_w + 1;
const std::vector<ck::index_t> window_spatial_lengths{Y, X};
const std::vector<ck::index_t> window_strides{window_stride_h, window_stride_w};
const std::vector<ck::index_t> window_dilations{window_dilation_h, window_dilation_w};
const std::vector<ck::index_t> input_left_pads{in_left_pad_h, in_left_pad_w};
const std::vector<ck::index_t> input_right_pads{in_right_pad_h, in_right_pad_w};
@@ -123,6 +127,7 @@ bool pool_test(bool do_verification,
{C * Ho * Wo, 1, Wo * C, C},
{C * Ho * Wo, 1, Wo * C, C},
window_strides,
window_dilations,
input_left_pads,
input_right_pads,
{2, 3});
@@ -144,8 +149,8 @@ bool pool_test(bool do_verification,
float gb_per_sec = num_btype / 1.E6 / ave_time;
std::cout << "Perf: " << ave_time << " ms, " << tflops << " TFlops, " << gb_per_sec << " GB/s"
<< std::endl;
std::cout << "Perf: " << ave_time << " ms, " << tflops << " TFlops, " << gb_per_sec
<< " GB / s " << std::endl;
bool pass = true;
@@ -169,6 +174,7 @@ bool pool_test(bool do_verification,
out_indices_n_c_ho_wo_host,
window_spatial_lengths,
window_strides,
window_dilations,
input_left_pads,
input_right_pads);

View File

@@ -34,18 +34,20 @@ int main(int argc, char* argv[])
bool time_kernel;
// Pool shape
ck::index_t N = 128;
ck::index_t C = 192;
ck::index_t Y = 3;
ck::index_t X = 3;
ck::index_t Hi = 71;
ck::index_t Wi = 71;
ck::index_t window_stride_h = 2;
ck::index_t window_stride_w = 2;
ck::index_t in_left_pad_h = 1;
ck::index_t in_left_pad_w = 1;
ck::index_t in_right_pad_h = 1;
ck::index_t in_right_pad_w = 1;
ck::index_t N = 128;
ck::index_t C = 192;
ck::index_t Y = 3;
ck::index_t X = 3;
ck::index_t Hi = 71;
ck::index_t Wi = 71;
ck::index_t window_stride_h = 2;
ck::index_t window_stride_w = 2;
ck::index_t window_dilation_h = 1;
ck::index_t window_dilation_w = 1;
ck::index_t in_left_pad_h = 1;
ck::index_t in_left_pad_w = 1;
ck::index_t in_right_pad_h = 1;
ck::index_t in_right_pad_w = 1;
if(argc == 1)
{
@@ -59,31 +61,33 @@ int main(int argc, char* argv[])
init_method = std::stoi(argv[2]);
time_kernel = static_cast<bool>(std::stoi(argv[3]));
}
else if(argc == 16)
else if(argc == 18)
{
do_verification = std::stoi(argv[1]);
init_method = std::stoi(argv[2]);
time_kernel = static_cast<bool>(std::stoi(argv[3]));
N = std::stoi(argv[4]);
C = std::stoi(argv[5]);
Y = std::stoi(argv[6]);
X = std::stoi(argv[7]);
Hi = std::stoi(argv[8]);
Wi = std::stoi(argv[9]);
window_stride_h = std::stoi(argv[10]);
window_stride_w = std::stoi(argv[11]);
in_left_pad_h = std::stoi(argv[12]);
in_left_pad_w = std::stoi(argv[13]);
in_right_pad_h = std::stoi(argv[14]);
in_right_pad_w = std::stoi(argv[15]);
N = std::stoi(argv[4]);
C = std::stoi(argv[5]);
Y = std::stoi(argv[6]);
X = std::stoi(argv[7]);
Hi = std::stoi(argv[8]);
Wi = std::stoi(argv[9]);
window_stride_h = std::stoi(argv[10]);
window_stride_w = std::stoi(argv[11]);
window_dilation_h = std::stoi(argv[12]);
window_dilation_w = std::stoi(argv[13]);
in_left_pad_h = std::stoi(argv[14]);
in_left_pad_w = std::stoi(argv[15]);
in_right_pad_h = std::stoi(argv[16]);
in_right_pad_w = std::stoi(argv[17]);
}
else
{
printf("arg1: verification (0=no, 1=yes)\n");
printf("arg2: initialization (0=no init, 1=integer value, 2=decimal value)\n");
printf("arg3: time kernel (0=no, 1=yes)\n");
printf("arg4 to 15: N, C, Y, X, Hi, Wi, Sy, Sx, LeftPy, LeftPx, RightPy, "
printf("arg4 to 15: N, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx\n");
exit(0);
}
@@ -107,6 +111,8 @@ int main(int argc, char* argv[])
Wi,
window_stride_h,
window_stride_w,
window_dilation_h,
window_dilation_w,
in_left_pad_h,
in_left_pad_w,
in_right_pad_h,

View File

@@ -34,18 +34,20 @@ int main(int argc, char* argv[])
bool time_kernel;
// Pool shape
ck::index_t N = 128;
ck::index_t C = 192;
ck::index_t Y = 3;
ck::index_t X = 3;
ck::index_t Hi = 71;
ck::index_t Wi = 71;
ck::index_t window_stride_h = 2;
ck::index_t window_stride_w = 2;
ck::index_t in_left_pad_h = 1;
ck::index_t in_left_pad_w = 1;
ck::index_t in_right_pad_h = 1;
ck::index_t in_right_pad_w = 1;
ck::index_t N = 128;
ck::index_t C = 192;
ck::index_t Y = 3;
ck::index_t X = 3;
ck::index_t Hi = 71;
ck::index_t Wi = 71;
ck::index_t window_stride_h = 2;
ck::index_t window_stride_w = 2;
ck::index_t window_dilation_h = 1;
ck::index_t window_dilation_w = 1;
ck::index_t in_left_pad_h = 1;
ck::index_t in_left_pad_w = 1;
ck::index_t in_right_pad_h = 1;
ck::index_t in_right_pad_w = 1;
if(argc == 1)
{
@@ -59,31 +61,33 @@ int main(int argc, char* argv[])
init_method = std::stoi(argv[2]);
time_kernel = static_cast<bool>(std::stoi(argv[3]));
}
else if(argc == 16)
else if(argc == 18)
{
do_verification = std::stoi(argv[1]);
init_method = std::stoi(argv[2]);
time_kernel = static_cast<bool>(std::stoi(argv[3]));
N = std::stoi(argv[4]);
C = std::stoi(argv[5]);
Y = std::stoi(argv[6]);
X = std::stoi(argv[7]);
Hi = std::stoi(argv[8]);
Wi = std::stoi(argv[9]);
window_stride_h = std::stoi(argv[10]);
window_stride_w = std::stoi(argv[11]);
in_left_pad_h = std::stoi(argv[12]);
in_left_pad_w = std::stoi(argv[13]);
in_right_pad_h = std::stoi(argv[14]);
in_right_pad_w = std::stoi(argv[15]);
N = std::stoi(argv[4]);
C = std::stoi(argv[5]);
Y = std::stoi(argv[6]);
X = std::stoi(argv[7]);
Hi = std::stoi(argv[8]);
Wi = std::stoi(argv[9]);
window_stride_h = std::stoi(argv[10]);
window_stride_w = std::stoi(argv[11]);
window_dilation_h = std::stoi(argv[12]);
window_dilation_w = std::stoi(argv[13]);
in_left_pad_h = std::stoi(argv[14]);
in_left_pad_w = std::stoi(argv[15]);
in_right_pad_h = std::stoi(argv[16]);
in_right_pad_w = std::stoi(argv[17]);
}
else
{
printf("arg1: verification (0=no, 1=yes)\n");
printf("arg2: initialization (0=no init, 1=integer value, 2=decimal value)\n");
printf("arg3: time kernel (0=no, 1=yes)\n");
printf("arg4 to 15: N, C, Y, X, Hi, Wi, Sy, Sx, LeftPy, LeftPx, RightPy, "
printf("arg4 to 15: N, C, Y, X, Hi, Wi, Sy, Sx, Dy, Dx, LeftPy, LeftPx, RightPy, "
"RightPx\n");
exit(0);
}
@@ -107,6 +111,8 @@ int main(int argc, char* argv[])
Wi,
window_stride_h,
window_stride_w,
window_dilation_h,
window_dilation_w,
in_left_pad_h,
in_left_pad_w,
in_right_pad_h,

View File

@@ -18,7 +18,45 @@
#include "ck/library/utility/literals.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_pool_fwd.hpp"
template <typename InDataType,
template <typename TensorLayout>
std::vector<ck::index_t> f_tensor_strides_ncdhw(ck::index_t N_,
ck::index_t C_,
ck::index_t D,
ck::index_t H,
ck::index_t W,
TensorLayout layout)
{
using namespace ck::literals;
(void)N_;
if constexpr(ck::is_same<decltype(layout), ck::tensor_layout::convolution::NCDHW>::value)
return {C_ * D * H * W, D * H * W, H * W, W, 1_uz};
else if constexpr(ck::is_same<decltype(layout), ck::tensor_layout::convolution::NDHWC>::value)
return {D * C_ * H * W, 1_uz, C_ * H * W, W * C_, C_};
};
template <typename TensorLayout>
HostTensorDescriptor f_host_tensor_descriptor(std::size_t N_,
std::size_t C_,
std::size_t D,
std::size_t H,
std::size_t W,
TensorLayout layout)
{
using namespace ck::literals;
if constexpr(ck::is_same<decltype(layout), ck::tensor_layout::convolution::NCDHW>::value)
{
return HostTensorDescriptor({N_, C_, D, H, W}, {C_ * D * H * W, D * H * W, H * W, W, 1_uz});
}
else if constexpr(ck::is_same<decltype(layout), ck::tensor_layout::convolution::NDHWC>::value)
{
return HostTensorDescriptor({N_, C_, D, H, W},
{D * C_ * H * W, 1_uz, C_ * H * W, W * C_, C_});
}
};
template <typename DevicePoolFwdInstance,
typename InDataType,
typename OutDataType,
typename ComputeDataType,
typename IndexDataType,
@@ -40,6 +78,9 @@ bool pool3d_test(bool do_verification,
ck::index_t window_stride_d,
ck::index_t window_stride_h,
ck::index_t window_stride_w,
ck::index_t window_dilation_d,
ck::index_t window_dilation_h,
ck::index_t window_dilation_w,
ck::index_t in_left_pad_d,
ck::index_t in_left_pad_h,
ck::index_t in_left_pad_w,
@@ -47,53 +88,21 @@ bool pool3d_test(bool do_verification,
ck::index_t in_right_pad_h,
ck::index_t in_right_pad_w)
{
using DevicePoolFwdInstance =
ck::tensor_operation::device::DevicePool3dFwd_Input_N_Di_Hi_Wi_C_Output_N_Do_Ho_Wo_C<
InDataType, // InDataType
OutDataType, // OutDataType
IndexDataType, // IndexDataType
ComputeDataType, // ComputeDataType
ReduceOpId,
OutputIndex,
64, // BlockSize
64, // ReduceMThreadClusterSize
1, // ReduceKThreadClusterSize
4, // ReduceMThreadSliceSize
1, // ReduceKThreadSliceSize
4>; // InSrcOutDstVectorSize
const ck::index_t Do = (Di + in_left_pad_d + in_right_pad_d - Z) / window_stride_d + 1;
const ck::index_t Ho = (Hi + in_left_pad_h + in_right_pad_h - Y) / window_stride_h + 1;
const ck::index_t Wo = (Wi + in_left_pad_w + in_right_pad_w - X) / window_stride_w + 1;
const ck::index_t Zs = (Z - 1) * window_dilation_d + 1;
const ck::index_t Ys = (Y - 1) * window_dilation_h + 1;
const ck::index_t Xs = (X - 1) * window_dilation_w + 1;
const ck::index_t Do = (Di + in_left_pad_d + in_right_pad_d - Zs) / window_stride_d + 1;
const ck::index_t Ho = (Hi + in_left_pad_h + in_right_pad_h - Ys) / window_stride_h + 1;
const ck::index_t Wo = (Wi + in_left_pad_w + in_right_pad_w - Xs) / window_stride_w + 1;
const std::vector<ck::index_t> window_spatial_lengths{Z, Y, X};
const std::vector<ck::index_t> window_strides{
window_stride_d, window_stride_h, window_stride_w};
const std::vector<ck::index_t> window_dilations{
window_dilation_d, window_dilation_h, window_dilation_w};
const std::vector<ck::index_t> input_left_pads{in_left_pad_d, in_left_pad_h, in_left_pad_w};
const std::vector<ck::index_t> input_right_pads{in_right_pad_d, in_right_pad_h, in_right_pad_w};
// tensor layout
auto f_host_tensor_descriptor = [](std::size_t N_,
std::size_t C_,
std::size_t D,
std::size_t H,
std::size_t W,
auto layout) {
using namespace ck::literals;
if constexpr(ck::is_same<decltype(layout), ck::tensor_layout::convolution::NCDHW>::value)
{
return HostTensorDescriptor({N_, C_, D, H, W},
{C_ * D * H * W, D * H * W, H * W, W, 1_uz});
}
else if constexpr(ck::is_same<decltype(layout),
ck::tensor_layout::convolution::NDHWC>::value)
{
return HostTensorDescriptor({N_, C_, D, H, W},
{D * C_ * H * W, 1_uz, C_ * H * W, W * C_, C_});
}
};
Tensor<InDataType> in_n_c_di_hi_wi(f_host_tensor_descriptor(N, C, Di, Hi, Wi, InLayout{}));
Tensor<OutDataType> out_n_c_do_ho_wo_host(
f_host_tensor_descriptor(N, C, Do, Ho, Wo, OutLayout{}));
@@ -126,10 +135,11 @@ bool pool3d_test(bool do_verification,
{N, C, Di, Hi, Wi},
{Z, Y, X},
{N, C, Do, Ho, Wo},
{Di * C * Hi * Wi, 1, C * Hi * Wi, Wi * C, C},
{Do * C * Ho * Wo, 1, C * Ho * Wo, Wo * C, C},
{Do * C * Ho * Wo, 1, C * Ho * Wo, Wo * C, C},
f_tensor_strides_ncdhw(N, C, Di, Hi, Wi, InLayout{}),
f_tensor_strides_ncdhw(N, C, Do, Ho, Wo, OutLayout{}),
f_tensor_strides_ncdhw(N, C, Do, Ho, Wo, OutLayout{}),
window_strides,
window_dilations,
input_left_pads,
input_right_pads,
{2, 3, 4});
@@ -165,6 +175,7 @@ bool pool3d_test(bool do_verification,
out_indices_n_c_do_ho_wo_host,
window_spatial_lengths,
window_strides,
window_dilations,
input_left_pads,
input_right_pads);

View File

@@ -27,31 +27,49 @@ static constexpr auto ReduceOpId = ck::ReduceTensorOp::AVG;
static constexpr bool OutputIndex = false;
static constexpr bool PropagateNan = false;
using DevicePoolFwdInstance =
ck::tensor_operation::device::DevicePool3dFwd_NDHWC_NDHWC<InDataType,
OutDataType,
IndexDataType,
ComputeDataType,
ReduceOpId,
OutputIndex,
64, // BlockSize
64, // ReduceMThreadClusterSize
1, // ReduceKThreadClusterSize
1, // ReduceMThreadSliceSize
1, // ReduceKThreadSliceSize
1>; // InSrcOutDstVectorSize
int main()
{
bool do_verification = true;
bool time_kernel = false;
// Pool shape
ck::index_t N = 2;
ck::index_t C = 32;
ck::index_t Z = 2;
ck::index_t Y = 2;
ck::index_t X = 2;
ck::index_t Di = 30;
ck::index_t Hi = 30;
ck::index_t Wi = 30;
ck::index_t window_stride_d = 2;
ck::index_t window_stride_h = 2;
ck::index_t window_stride_w = 2;
ck::index_t in_left_pad_d = 1;
ck::index_t in_left_pad_h = 1;
ck::index_t in_left_pad_w = 1;
ck::index_t in_right_pad_d = 1;
ck::index_t in_right_pad_h = 1;
ck::index_t in_right_pad_w = 1;
ck::index_t N = 2;
ck::index_t C = 32;
ck::index_t Z = 2;
ck::index_t Y = 2;
ck::index_t X = 2;
ck::index_t Di = 30;
ck::index_t Hi = 30;
ck::index_t Wi = 30;
ck::index_t window_stride_d = 2;
ck::index_t window_stride_h = 2;
ck::index_t window_stride_w = 2;
ck::index_t window_dilation_d = 1;
ck::index_t window_dilation_h = 1;
ck::index_t window_dilation_w = 1;
ck::index_t in_left_pad_d = 1;
ck::index_t in_left_pad_h = 1;
ck::index_t in_left_pad_w = 1;
ck::index_t in_right_pad_d = 1;
ck::index_t in_right_pad_h = 1;
ck::index_t in_right_pad_w = 1;
bool pass = pool3d_test<InDataType,
bool pass = pool3d_test<DevicePoolFwdInstance,
InDataType,
OutDataType,
ComputeDataType,
IndexDataType,
@@ -72,6 +90,9 @@ int main()
window_stride_d,
window_stride_h,
window_stride_w,
window_dilation_d,
window_dilation_h,
window_dilation_w,
in_left_pad_d,
in_left_pad_h,
in_left_pad_w,

View File

@@ -24,18 +24,20 @@ int main()
bool time_kernel = false;
// Pool shape
ck::index_t N = 1;
ck::index_t C = 1;
ck::index_t Y = 3;
ck::index_t X = 3;
ck::index_t Hi = 32;
ck::index_t Wi = 32;
ck::index_t window_stride_h = 1;
ck::index_t window_stride_w = 1;
ck::index_t in_left_pad_h = 0;
ck::index_t in_left_pad_w = 0;
ck::index_t in_right_pad_h = 0;
ck::index_t in_right_pad_w = 0;
ck::index_t N = 1;
ck::index_t C = 1;
ck::index_t Y = 3;
ck::index_t X = 3;
ck::index_t Hi = 32;
ck::index_t Wi = 32;
ck::index_t window_stride_h = 1;
ck::index_t window_stride_w = 1;
ck::index_t window_dilation_h = 1;
ck::index_t window_dilation_w = 1;
ck::index_t in_left_pad_h = 0;
ck::index_t in_left_pad_w = 0;
ck::index_t in_right_pad_h = 0;
ck::index_t in_right_pad_w = 0;
bool pass = maxpool_bwd_test<InDataType,
OutDataType,
@@ -53,6 +55,8 @@ int main()
Wi,
window_stride_h,
window_stride_w,
window_dilation_h,
window_dilation_w,
in_left_pad_h,
in_left_pad_w,
in_right_pad_h,

View File

@@ -36,6 +36,8 @@ bool maxpool_bwd_test(bool do_verification,
ck::index_t Wi,
ck::index_t window_stride_h,
ck::index_t window_stride_w,
ck::index_t window_dilation_h,
ck::index_t window_dilation_w,
ck::index_t in_left_pad_h,
ck::index_t in_left_pad_w,
ck::index_t in_right_pad_h,
@@ -44,28 +46,30 @@ bool maxpool_bwd_test(bool do_verification,
using PassThrough = ck::tensor_operation::element_wise::PassThrough;
using DevicePoolFwdInstance =
ck::tensor_operation::device::DevicePool2dFwd_Input_N_Hi_Wi_C_Output_N_Ho_Wo_C<
InDataType, // InDataType
OutDataType, // OutDataType
IndexDataType, // IndexDataType
ComputeDataType, // ComputeDataType
ck::ReduceTensorOp::MAX,
true, // OutputIndex
64, // BlockSize
64, // ReduceMThreadClusterSize
1, // ReduceKThreadClusterSize
4, // ReduceMThreadSliceSize
1, // ReduceKThreadSliceSize
1>; // InSrcOutDstVectorSize
ck::tensor_operation::device::DevicePool2dFwd_NHWC_NHWC<InDataType, // InDataType
OutDataType, // OutDataType
IndexDataType, // IndexDataType
ComputeDataType, // ComputeDataType
ck::ReduceTensorOp::MAX,
true,
64, // BlockSize
64, // ReduceMThreadClusterSize
1, // ReduceKThreadClusterSize
4, // ReduceMThreadSliceSize
1, // ReduceKThreadSliceSize
1>; // InSrcOutDstVectorSize
using DeviceMaxPoolBwdInstance = ck::tensor_operation::device::
DeviceIndexPoolBwdImpl<DOutDataType, IndexDataType, DInDataType, 4>;
const ck::index_t Ho = (Hi + in_left_pad_h + in_right_pad_h - Y) / window_stride_h + 1;
const ck::index_t Wo = (Wi + in_left_pad_w + in_right_pad_w - X) / window_stride_w + 1;
const ck::index_t Ys = (Y - 1) * window_dilation_h + 1;
const ck::index_t Xs = (X - 1) * window_dilation_w + 1;
const ck::index_t Ho = (Hi + in_left_pad_h + in_right_pad_h - Ys) / window_stride_h + 1;
const ck::index_t Wo = (Wi + in_left_pad_w + in_right_pad_w - Xs) / window_stride_w + 1;
const std::vector<ck::index_t> window_spatial_lengths{Y, X};
const std::vector<ck::index_t> window_strides{window_stride_h, window_stride_w};
const std::vector<ck::index_t> window_dilations{window_dilation_h, window_dilation_w};
const std::vector<ck::index_t> input_left_pads{in_left_pad_h, in_left_pad_w};
const std::vector<ck::index_t> input_right_pads{in_right_pad_h, in_right_pad_w};
@@ -128,6 +132,7 @@ bool maxpool_bwd_test(bool do_verification,
{C * Ho * Wo, 1, Wo * C, C},
{C * Ho * Wo, 1, Wo * C, C},
window_strides,
window_dilations,
input_left_pads,
input_right_pads,
{2, 3});
@@ -191,6 +196,7 @@ bool maxpool_bwd_test(bool do_verification,
indices_n_c_ho_wo_host,
window_spatial_lengths,
window_strides,
window_dilations,
input_left_pads,
input_right_pads);
ref_pooling_fwd_invoker.Run(ref_pooling_fwd_argument);

View File

@@ -24,18 +24,20 @@ int main()
bool time_kernel = false;
// Pool shape
ck::index_t N = 1;
ck::index_t C = 1;
ck::index_t Y = 3;
ck::index_t X = 3;
ck::index_t Hi = 32;
ck::index_t Wi = 32;
ck::index_t window_stride_h = 1;
ck::index_t window_stride_w = 1;
ck::index_t in_left_pad_h = 0;
ck::index_t in_left_pad_w = 0;
ck::index_t in_right_pad_h = 0;
ck::index_t in_right_pad_w = 0;
ck::index_t N = 1;
ck::index_t C = 1;
ck::index_t Y = 3;
ck::index_t X = 3;
ck::index_t Hi = 32;
ck::index_t Wi = 32;
ck::index_t window_stride_h = 1;
ck::index_t window_stride_w = 1;
ck::index_t window_dilation_h = 1;
ck::index_t window_dilation_w = 1;
ck::index_t in_left_pad_h = 0;
ck::index_t in_left_pad_w = 0;
ck::index_t in_right_pad_h = 0;
ck::index_t in_right_pad_w = 0;
bool pass = maxpool_bwd_test<InDataType,
OutDataType,
@@ -53,6 +55,8 @@ int main()
Wi,
window_stride_h,
window_stride_w,
window_dilation_h,
window_dilation_w,
in_left_pad_h,
in_left_pad_w,
in_right_pad_h,

View File

@@ -24,18 +24,20 @@ int main()
bool time_kernel = false;
// Pool shape
ck::index_t N = 1;
ck::index_t C = 1;
ck::index_t Y = 2;
ck::index_t X = 2;
ck::index_t Hi = 32;
ck::index_t Wi = 32;
ck::index_t window_stride_h = 2;
ck::index_t window_stride_w = 2;
ck::index_t in_left_pad_h = 0;
ck::index_t in_left_pad_w = 0;
ck::index_t in_right_pad_h = 0;
ck::index_t in_right_pad_w = 0;
ck::index_t N = 1;
ck::index_t C = 1;
ck::index_t Y = 2;
ck::index_t X = 2;
ck::index_t Hi = 32;
ck::index_t Wi = 32;
ck::index_t window_stride_h = 2;
ck::index_t window_stride_w = 2;
ck::index_t window_dilation_h = 1;
ck::index_t window_dilation_w = 1;
ck::index_t in_left_pad_h = 0;
ck::index_t in_left_pad_w = 0;
ck::index_t in_right_pad_h = 0;
ck::index_t in_right_pad_w = 0;
bool pass = maxpool_bwd_test<InDataType,
OutDataType,
@@ -53,6 +55,8 @@ int main()
Wi,
window_stride_h,
window_stride_w,
window_dilation_h,
window_dilation_w,
in_left_pad_h,
in_left_pad_w,
in_right_pad_h,