mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 05:01:25 +00:00
* Do not hardcode stride * devicePool2DFwd Inherit devicePool3DFwd * Move instance declaration out of common * Add dilation * use the pool3d rank, because pool2d inherit pooo3d * calculate Do Ho Wo for the dilation * Fix header name * Modify ckProfiler * Remove pool2d instance * Remove pool2d in profiler * Remove pool2d and add dilation * In to client example, this commit revise following: 1. Add dilation. 2. Use pool3d to implement pool2d * Refine naming and IsSupportedArgument() * Add dilation to maxpool bwd example * clang format * 1. Remove useless header 2. Fix copyright 3. Refine naming * Add layout parameter to pool fwd * clang format * Fix merge error * Fix compile error * Remove layout parameter in derived class * Refine changlog * Fix compile error * Fix compiler error * Add layout to external api and profiler
105 lines
4.2 KiB
C++
105 lines
4.2 KiB
C++
// SPDX-License-Identifier: MIT
|
|
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
|
|
|
|
#include <iostream>
|
|
|
|
#include "ck/ck.hpp"
|
|
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
|
|
#include "ck/utility/reduction_enums.hpp"
|
|
|
|
#include "pool3d_fwd_common.hpp"
|
|
|
|
using InDataType = ck::half_t;
|
|
using OutDataType = ck::half_t;
|
|
using ComputeDataType = float;
|
|
|
|
using IndexDataType = int32_t;
|
|
|
|
using InLayout = ck::tensor_layout::convolution::NDHWC;
|
|
using OutLayout = ck::tensor_layout::convolution::NDHWC;
|
|
|
|
#if 1
|
|
static constexpr auto ReduceOpId = ck::ReduceTensorOp::MAX;
|
|
#else
|
|
static constexpr auto ReduceOpId = ck::ReduceTensorOp::AVG;
|
|
#endif
|
|
|
|
static constexpr bool OutputIndex = false;
|
|
static constexpr bool PropagateNan = false;
|
|
|
|
using DevicePoolFwdInstance =
|
|
ck::tensor_operation::device::DevicePool3dFwd_NDHWC_NDHWC<InDataType,
|
|
OutDataType,
|
|
IndexDataType,
|
|
ComputeDataType,
|
|
ReduceOpId,
|
|
OutputIndex,
|
|
64, // BlockSize
|
|
64, // ReduceMThreadClusterSize
|
|
1, // ReduceKThreadClusterSize
|
|
1, // ReduceMThreadSliceSize
|
|
1, // ReduceKThreadSliceSize
|
|
1>; // InSrcOutDstVectorSize
|
|
|
|
int main()
|
|
{
|
|
bool do_verification = true;
|
|
bool time_kernel = false;
|
|
|
|
// Pool shape
|
|
ck::index_t N = 2;
|
|
ck::index_t C = 32;
|
|
ck::index_t Z = 2;
|
|
ck::index_t Y = 2;
|
|
ck::index_t X = 2;
|
|
ck::index_t Di = 30;
|
|
ck::index_t Hi = 30;
|
|
ck::index_t Wi = 30;
|
|
ck::index_t window_stride_d = 2;
|
|
ck::index_t window_stride_h = 2;
|
|
ck::index_t window_stride_w = 2;
|
|
ck::index_t window_dilation_d = 1;
|
|
ck::index_t window_dilation_h = 1;
|
|
ck::index_t window_dilation_w = 1;
|
|
ck::index_t in_left_pad_d = 1;
|
|
ck::index_t in_left_pad_h = 1;
|
|
ck::index_t in_left_pad_w = 1;
|
|
ck::index_t in_right_pad_d = 1;
|
|
ck::index_t in_right_pad_h = 1;
|
|
ck::index_t in_right_pad_w = 1;
|
|
|
|
bool pass = pool3d_test<DevicePoolFwdInstance,
|
|
InDataType,
|
|
OutDataType,
|
|
ComputeDataType,
|
|
IndexDataType,
|
|
InLayout,
|
|
OutLayout,
|
|
ReduceOpId,
|
|
PropagateNan,
|
|
OutputIndex>(do_verification,
|
|
time_kernel,
|
|
N,
|
|
C,
|
|
Z,
|
|
Y,
|
|
X,
|
|
Di,
|
|
Hi,
|
|
Wi,
|
|
window_stride_d,
|
|
window_stride_h,
|
|
window_stride_w,
|
|
window_dilation_d,
|
|
window_dilation_h,
|
|
window_dilation_w,
|
|
in_left_pad_d,
|
|
in_left_pad_h,
|
|
in_left_pad_w,
|
|
in_right_pad_d,
|
|
in_right_pad_h,
|
|
in_right_pad_w);
|
|
|
|
return (pass ? 0 : 1);
|
|
}
|