mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-18 20:09:25 +00:00
Common forward convolution utility refactor. (#141)
* Convolution ND
* Code unification across dimensions for generating tensor descriptors.
* Example
* Instances
* Move convnd f32 instance file to comply with repo structure.
* Conv 1D tensor layouts.
* Formatting and use ReferenceConv
* Reference ConvFwd supporting 1D and 2D convolution.
* Debug printing TensorLayout name.
* Conv fwd 1D instance f32
* Refactor conv ND example.
Needed to support various conv dimensio.
Needed to support various conv dimensions
* Rename conv nd example director to prevent conflicts.
* Refactor some common utility to single file.
Plus some tests.
* Refactor GetHostTensorDescriptor + UT.
* Add 1D test case.
* Test reference convolution 1d/2d
* Remove some leftovers.
* Fix convolution example error for 1D
* Refactor test check errors utility function.
* Test Conv2D Fwd XDL
* More UT for 1D case.
* Parameterize input & weight initializers.
* Rename example to prevent conflicts.
* Split convnd instance into separate files for 1d/2d
* Address review comments.
* Fix data type for flops/gbytes calculations.
* Assign example number 11.
* 3D cases for convolution utility functions.
* 3D reference convolution.
* Add support for 3D convolution.
* Check for inputs bigger than 2GB.
* Formatting
* Support for bf16/f16/f32/i8 - conv instances + UT.
* Use check_err from test_util.hpp.
* Split convnd test into separate files for each dim.
* Fix data generation and use proper instances.
* Formatting
* Skip tensor initialization if not necessary.
* Fix CMakefiles.
* Remove redundant conv2d_fwd test.
* Lower problem size for conv3D UT.
* 3D case for convnd example.
* Remove leftovers after merge.
* Add Conv Specialization string to GetTypeString
* Skip instance causing numerical errors.
* Small fixes.
* Remove redundant includes.
* Fix namespace name error.
* Script for automatic testing and logging convolution fwd UTs
* Comment out numactl cmd.
* Refine weights initalization and relax rtol for fp16
* Move test_util.hpp to check_err.hpp
* Refine weights initalization and relax rtol for fp16
* Refactor common part of test conv utils.
* Move utility function to single common place.
* Add additional common functions to utility.
* Refactor convnd_fwd_xdl examples.
* Remove redundant files.
* Unify structure.
* Add constructor to ConvParams.
* And add input parameters validation.
* Modify conv examples to use single utility file.
* Remove check_error from host_tensor.hpp
* Get rid of check_indices function.
* Remove bf16_to_f32 function overload for scalars.
* Fix namespace.
* Add half_float::half for check_err.
* Fix conv params size in UT.
* Fix weights initialization for int8.
* Fix weights initialization for int8.
* Add type_convert when store output in ref conv 1D.
* Get back old conv2d_fwd_xdl operation.
* Silence conv debug print.
* format
* clean
* clean
* Fix merge.
* Fix namespace for check_err
* Formatting.
* Fix merge artifacts.
* Remove deleted header.
* Fix some includes and use ck::utils::check_err.
* Remove unused check_indices restored by previous merge.
* Fix namespaces after merge.
* Fix compilation error.
* Small fixes.
* Use common functions.
* Fix filename
* Fix namespaces.
* Fix merge artifact - retrieve removed by accident fun.
* Fix ConvForwardSpecialization.
* Adhere to coding style rules.
* Fix merge artifacts.
Co-authored-by: Adam Osewski <aosewski@amd.com>
Co-authored-by: Chao Liu <chao.liu2@amd.com>
[ROCm/composable_kernel commit: abf4bdb9a9]
This commit is contained in:
@@ -1,242 +0,0 @@
|
||||
#ifndef CONV_UTILS_HPP
|
||||
#define CONV_UTILS_HPP
|
||||
|
||||
#include <cstdlib>
|
||||
#include <functional>
|
||||
#include <iterator>
|
||||
#include <numeric>
|
||||
#include <sstream>
|
||||
#include <type_traits>
|
||||
#include <vector>
|
||||
|
||||
#include "config.hpp"
|
||||
#include "host_tensor.hpp"
|
||||
#include "tensor_layout.hpp"
|
||||
|
||||
namespace ck {
|
||||
namespace conv_util {
|
||||
|
||||
/**
|
||||
* @brief Calculate number of FLOPs for Convolution
|
||||
*
|
||||
* @param[in] N Batch size.
|
||||
* @param[in] C Number of input channels.
|
||||
* @param[in] K Number of output channels.
|
||||
* @param[in] filter_spatial_lengths Filter spatial dimensions lengths.
|
||||
* @param[in] output_spatial_lengths Convolution output spatial dimensions
|
||||
* lengths.
|
||||
*
|
||||
* @return The number of flops.
|
||||
*/
|
||||
std::size_t GetFlops(ck::index_t N,
|
||||
ck::index_t C,
|
||||
ck::index_t K,
|
||||
const std::vector<ck::index_t>& filter_spatial_lengths,
|
||||
const std::vector<ck::index_t>& output_spatial_lengths)
|
||||
{
|
||||
// 2 * N * K * <output spatial lengths product> * C * <filter spatial lengths product>
|
||||
return static_cast<std::size_t>(2) * N * K *
|
||||
std::accumulate(std::begin(output_spatial_lengths),
|
||||
std::end(output_spatial_lengths),
|
||||
static_cast<std::size_t>(1),
|
||||
std::multiplies<std::size_t>()) *
|
||||
C *
|
||||
std::accumulate(std::begin(filter_spatial_lengths),
|
||||
std::end(filter_spatial_lengths),
|
||||
static_cast<std::size_t>(1),
|
||||
std::multiplies<std::size_t>());
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Calculate number of bytes read/write by convolution algorithm.
|
||||
*
|
||||
* @param[in] N Batch size.
|
||||
* @param[in] C Number of input channels.
|
||||
* @param[in] K Number of output channels.
|
||||
* @param[in] input_spatial_lengths Input spatial dimensions lengths.
|
||||
* @param[in] filter_spatial_lengths Filter spatial dimensions lengths.
|
||||
* @param[in] output_spatial_lengths Output spatial dimensions lengths
|
||||
*
|
||||
* @tparam InDataType Input tensor data type.
|
||||
* @tparam WeiDataType Weights tensor data type.
|
||||
* @tparam OutDataType Output tensor data type.
|
||||
*
|
||||
* @return The number of used bytes.
|
||||
*/
|
||||
template <typename InDataType = float,
|
||||
typename WeiDataType = InDataType,
|
||||
typename OutDataType = InDataType>
|
||||
std::size_t GetBtype(ck::index_t N,
|
||||
ck::index_t C,
|
||||
ck::index_t K,
|
||||
const std::vector<ck::index_t>& input_spatial_lengths,
|
||||
const std::vector<ck::index_t>& filter_spatial_lengths,
|
||||
const std::vector<ck::index_t>& output_spatial_lengths)
|
||||
{
|
||||
// sizeof(InDataType) * (N * C * <input spatial lengths product>) +
|
||||
// sizeof(WeiDataType) * (K * C * <filter spatial lengths product>) +
|
||||
// sizeof(OutDataType) * (N * K * <output spatial lengths product>);
|
||||
return sizeof(InDataType) * (N * C *
|
||||
std::accumulate(std::begin(input_spatial_lengths),
|
||||
std::end(input_spatial_lengths),
|
||||
static_cast<std::size_t>(1),
|
||||
std::multiplies<std::size_t>())) +
|
||||
sizeof(WeiDataType) * (K * C *
|
||||
std::accumulate(std::begin(filter_spatial_lengths),
|
||||
std::end(filter_spatial_lengths),
|
||||
static_cast<std::size_t>(1),
|
||||
std::multiplies<std::size_t>())) +
|
||||
sizeof(OutDataType) * (N * K *
|
||||
std::accumulate(std::begin(output_spatial_lengths),
|
||||
std::end(output_spatial_lengths),
|
||||
static_cast<std::size_t>(1),
|
||||
std::multiplies<std::size_t>()));
|
||||
}
|
||||
|
||||
struct ConvParams
|
||||
{
|
||||
ConvParams()
|
||||
: num_dim_spatial(2),
|
||||
N(128),
|
||||
K(256),
|
||||
C(192),
|
||||
filter_spatial_lengths(2, 3),
|
||||
input_spatial_lengths(2, 71),
|
||||
conv_filter_strides(2, 2),
|
||||
conv_filter_dilations(2, 1),
|
||||
input_left_pads(2, 1),
|
||||
input_right_pads(2, 1)
|
||||
{
|
||||
}
|
||||
ConvParams(ck::index_t n_dim_spatial,
|
||||
ck::index_t n,
|
||||
ck::index_t k,
|
||||
ck::index_t c,
|
||||
std::vector<ck::index_t> filter_lengths,
|
||||
std::vector<ck::index_t> input_lengths,
|
||||
std::vector<ck::index_t> conv_strides,
|
||||
std::vector<ck::index_t> conv_dilations,
|
||||
std::vector<ck::index_t> left_pads,
|
||||
std::vector<ck::index_t> right_pads)
|
||||
: num_dim_spatial(n_dim_spatial),
|
||||
N(n),
|
||||
K(k),
|
||||
C(c),
|
||||
filter_spatial_lengths(filter_lengths),
|
||||
input_spatial_lengths(input_lengths),
|
||||
conv_filter_strides(conv_strides),
|
||||
conv_filter_dilations(conv_dilations),
|
||||
input_left_pads(left_pads),
|
||||
input_right_pads(right_pads)
|
||||
{
|
||||
}
|
||||
|
||||
ck::index_t num_dim_spatial;
|
||||
ck::index_t N;
|
||||
ck::index_t K;
|
||||
ck::index_t C;
|
||||
|
||||
std::vector<ck::index_t> filter_spatial_lengths;
|
||||
std::vector<ck::index_t> input_spatial_lengths;
|
||||
|
||||
std::vector<ck::index_t> conv_filter_strides;
|
||||
std::vector<ck::index_t> conv_filter_dilations;
|
||||
|
||||
std::vector<ck::index_t> input_left_pads;
|
||||
std::vector<ck::index_t> input_right_pads;
|
||||
|
||||
std::vector<ck::index_t> GetOutputSpatialLengths() const
|
||||
{
|
||||
std::vector<ck::index_t> out_spatial_len(num_dim_spatial, 0);
|
||||
for(ck::index_t i = 0; i < num_dim_spatial; ++i)
|
||||
{
|
||||
// XEff = (X - 1) * conv_dilation_w + 1;
|
||||
// Wo = (Wi + in_left_pad_w + in_right_pad_w - XEff) / conv_stride_w + 1;
|
||||
const ck::index_t idx_eff =
|
||||
(filter_spatial_lengths[i] - 1) * conv_filter_dilations[i] + 1;
|
||||
out_spatial_len[i] =
|
||||
(input_spatial_lengths[i] + input_left_pads[i] + input_right_pads[i] - idx_eff) /
|
||||
conv_filter_strides[i] +
|
||||
1;
|
||||
}
|
||||
return out_spatial_len;
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* @brief Gets the host tensor descriptor.
|
||||
*
|
||||
* @param[in] dims The tensor dimensions lengths. Always in NCHW format.
|
||||
* @param[in] layout The tensor data layout.
|
||||
*
|
||||
* @tparam TensorLayout Layout type.
|
||||
*
|
||||
* @return The host tensor descriptor object.
|
||||
*/
|
||||
template <typename TensorLayout>
|
||||
HostTensorDescriptor GetHostTensorDescriptor(const std::vector<std::size_t>& dims,
|
||||
const TensorLayout& layout)
|
||||
{
|
||||
std::size_t C = dims[1];
|
||||
// 1D
|
||||
if constexpr(std::is_same<TensorLayout, ck::tensor_layout::convolution::NCW>::value ||
|
||||
std::is_same<TensorLayout, ck::tensor_layout::convolution::KCX>::value ||
|
||||
std::is_same<TensorLayout, ck::tensor_layout::convolution::NKW>::value)
|
||||
{
|
||||
|
||||
return HostTensorDescriptor(dims, std::vector<std::size_t>({C * dims[2], dims[2], 1}));
|
||||
}
|
||||
else if constexpr(std::is_same<TensorLayout, ck::tensor_layout::convolution::NWC>::value ||
|
||||
std::is_same<TensorLayout, ck::tensor_layout::convolution::KXC>::value ||
|
||||
std::is_same<TensorLayout, ck::tensor_layout::convolution::NWK>::value)
|
||||
{
|
||||
return HostTensorDescriptor(dims, std::vector<std::size_t>({C * dims[2], 1, C}));
|
||||
}
|
||||
// 2D
|
||||
else if constexpr(std::is_same<TensorLayout, ck::tensor_layout::convolution::NCHW>::value ||
|
||||
std::is_same<TensorLayout, ck::tensor_layout::convolution::KCYX>::value ||
|
||||
std::is_same<TensorLayout, ck::tensor_layout::convolution::NKHW>::value)
|
||||
{
|
||||
|
||||
return HostTensorDescriptor(
|
||||
dims, std::vector<std::size_t>{C * dims[2] * dims[3], dims[2] * dims[3], dims[3], 1});
|
||||
}
|
||||
else if constexpr(std::is_same<TensorLayout, ck::tensor_layout::convolution::NHWC>::value ||
|
||||
std::is_same<TensorLayout, ck::tensor_layout::convolution::KYXC>::value ||
|
||||
std::is_same<TensorLayout, ck::tensor_layout::convolution::NHWK>::value)
|
||||
{
|
||||
return HostTensorDescriptor(
|
||||
dims, std::vector<std::size_t>{C * dims[2] * dims[3], 1, dims[3] * C, C});
|
||||
}
|
||||
// 3D
|
||||
else if constexpr(std::is_same<TensorLayout, ck::tensor_layout::convolution::NCDHW>::value ||
|
||||
std::is_same<TensorLayout, ck::tensor_layout::convolution::KCZYX>::value ||
|
||||
std::is_same<TensorLayout, ck::tensor_layout::convolution::NKDHW>::value)
|
||||
{
|
||||
|
||||
return HostTensorDescriptor(dims,
|
||||
std::vector<std::size_t>{C * dims[2] * dims[3] * dims[4],
|
||||
dims[2] * dims[3] * dims[4],
|
||||
dims[3] * dims[4],
|
||||
dims[4],
|
||||
1});
|
||||
}
|
||||
else if constexpr(std::is_same<TensorLayout, ck::tensor_layout::convolution::NDHWC>::value ||
|
||||
std::is_same<TensorLayout, ck::tensor_layout::convolution::KZYXC>::value ||
|
||||
std::is_same<TensorLayout, ck::tensor_layout::convolution::NDHWK>::value)
|
||||
{
|
||||
return HostTensorDescriptor(
|
||||
dims,
|
||||
std::vector<std::size_t>{
|
||||
C * dims[2] * dims[3] * dims[4], 1, dims[3] * dims[4] * C, dims[4] * C, C});
|
||||
}
|
||||
|
||||
std::stringstream err_msg;
|
||||
err_msg << "Unsupported data layout provided: " << layout << "!";
|
||||
throw std::runtime_error(err_msg.str());
|
||||
}
|
||||
|
||||
} // namespace conv_util
|
||||
} // namespace ck
|
||||
|
||||
#endif
|
||||
@@ -1,73 +0,0 @@
|
||||
#ifndef CONVOLUTION_UTILITY_HPP
|
||||
#define CONVOLUTION_UTILITY_HPP
|
||||
|
||||
#include <vector>
|
||||
|
||||
namespace ck {
|
||||
namespace tensor_operation {
|
||||
|
||||
struct ConvolutionUtility
|
||||
{
|
||||
static std::vector<ck::index_t>
|
||||
ComputeOutputSpatialLengths(std::vector<ck::index_t> input_spatial_lengths,
|
||||
std::vector<ck::index_t> filter_spatial_lengths,
|
||||
std::vector<ck::index_t> conv_strides,
|
||||
std::vector<ck::index_t> conv_dilations,
|
||||
std::vector<ck::index_t> in_left_pads,
|
||||
std::vector<ck::index_t> in_right_pads)
|
||||
{
|
||||
if(input_spatial_lengths.size() == 2)
|
||||
{
|
||||
assert(filter_spatial_lengths.size() == 2);
|
||||
assert(conv_strides.size() == 2);
|
||||
assert(conv_dilations.size() == 2);
|
||||
assert(in_left_pads.size() == 2);
|
||||
assert(in_right_pads.size() == 2);
|
||||
|
||||
const index_t YEff = (filter_spatial_lengths[0] - 1) * conv_dilations[0] + 1;
|
||||
const index_t XEff = (filter_spatial_lengths[1] - 1) * conv_dilations[1] + 1;
|
||||
|
||||
const index_t Hi = input_spatial_lengths[0];
|
||||
const index_t Wi = input_spatial_lengths[1];
|
||||
|
||||
const index_t Ho =
|
||||
(Hi + in_left_pads[0] + in_right_pads[0] - YEff) / conv_strides[0] + 1;
|
||||
const index_t Wo =
|
||||
(Wi + in_left_pads[1] + in_right_pads[1] - XEff) / conv_strides[1] + 1;
|
||||
|
||||
return {Ho, Wo};
|
||||
}
|
||||
else if(input_spatial_lengths.size() == 3)
|
||||
{
|
||||
assert(filter_spatial_lengths.size() == 3);
|
||||
assert(conv_strides.size() == 3);
|
||||
assert(conv_dilations.size() == 3);
|
||||
assert(in_left_pads.size() == 3);
|
||||
assert(in_right_pads.size() == 3);
|
||||
|
||||
const index_t ZEff = (filter_spatial_lengths[0] - 1) * conv_dilations[0] + 1;
|
||||
const index_t YEff = (filter_spatial_lengths[1] - 1) * conv_dilations[1] + 1;
|
||||
const index_t XEff = (filter_spatial_lengths[2] - 1) * conv_dilations[2] + 1;
|
||||
|
||||
const index_t Di = input_spatial_lengths[0];
|
||||
const index_t Hi = input_spatial_lengths[1];
|
||||
const index_t Wi = input_spatial_lengths[2];
|
||||
|
||||
const index_t Do =
|
||||
(Di + in_left_pads[0] + in_right_pads[0] - ZEff) / conv_strides[0] + 1;
|
||||
const index_t Ho =
|
||||
(Hi + in_left_pads[1] + in_right_pads[1] - YEff) / conv_strides[1] + 1;
|
||||
const index_t Wo =
|
||||
(Wi + in_left_pads[2] + in_right_pads[2] - XEff) / conv_strides[2] + 1;
|
||||
return {Do, Ho, Wo};
|
||||
}
|
||||
else
|
||||
{
|
||||
return {};
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace tensor_operation
|
||||
} // namespace ck
|
||||
#endif
|
||||
@@ -4,7 +4,7 @@
|
||||
#include <iostream>
|
||||
#include <memory>
|
||||
#include <sstream>
|
||||
#include "convolution_utility.hpp"
|
||||
#include "conv_fwd_util.hpp"
|
||||
#include "device.hpp"
|
||||
#include "device_conv_fwd.hpp"
|
||||
#include "common_header.hpp"
|
||||
@@ -53,36 +53,30 @@ struct DeviceConv3dFwdNaive_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_W
|
||||
InElementwiseOperation in_element_op,
|
||||
WeiElementwiseOperation wei_element_op,
|
||||
OutElementwiseOperation out_element_op)
|
||||
: N_{N},
|
||||
K_{K},
|
||||
C_{C},
|
||||
in_spatial_lengths_{input_spatial_lengths},
|
||||
filter_spatial_lengths_{filter_spatial_lengths},
|
||||
: params_{3,
|
||||
N,
|
||||
K,
|
||||
C,
|
||||
filter_spatial_lengths,
|
||||
input_spatial_lengths,
|
||||
conv_filter_strides,
|
||||
conv_filter_dilations,
|
||||
input_left_pads,
|
||||
input_right_pads},
|
||||
out_spatial_lengths_{output_spatial_lengths},
|
||||
conv_filter_strides_{conv_filter_strides},
|
||||
conv_filter_dilations_{conv_filter_dilations},
|
||||
in_left_pads_{input_left_pads},
|
||||
in_right_pads_{input_right_pads},
|
||||
p_in_{p_in},
|
||||
p_wei_{p_wei},
|
||||
p_out_{p_out},
|
||||
in_element_op_{in_element_op},
|
||||
wei_element_op_{wei_element_op},
|
||||
out_element_op_{out_element_op}
|
||||
|
||||
{
|
||||
}
|
||||
|
||||
// private:
|
||||
index_t N_;
|
||||
index_t K_;
|
||||
index_t C_;
|
||||
std::vector<index_t> in_spatial_lengths_;
|
||||
std::vector<index_t> filter_spatial_lengths_;
|
||||
utils::conv::ConvParams params_;
|
||||
std::vector<index_t> out_spatial_lengths_;
|
||||
std::vector<index_t> conv_filter_strides_;
|
||||
std::vector<index_t> conv_filter_dilations_;
|
||||
std::vector<index_t> in_left_pads_;
|
||||
std::vector<index_t> in_right_pads_;
|
||||
|
||||
const InDataType* p_in_;
|
||||
const WeiDataType* p_wei_;
|
||||
@@ -157,13 +151,7 @@ struct DeviceConv3dFwdNaive_Input_N_Di_Hi_Wi_C_Weight_K_Z_Y_X_C_Output_N_Do_Ho_W
|
||||
|
||||
static bool IsSupportedArgument(const Argument& arg)
|
||||
{
|
||||
std::vector<index_t> out_spatial_lengths =
|
||||
ConvolutionUtility::ComputeOutputSpatialLengths(arg.in_spatial_lengths_,
|
||||
arg.filter_spatial_lengths_,
|
||||
arg.conv_filter_strides_,
|
||||
arg.conv_filter_dilations_,
|
||||
arg.in_left_pads_,
|
||||
arg.in_right_pads_);
|
||||
std::vector<index_t> out_spatial_lengths = arg.params_.GetOutputSpatialLengths();
|
||||
|
||||
bool out_lengths_are_consistent = out_spatial_lengths[0] == arg.out_spatial_lengths_[0] &&
|
||||
out_spatial_lengths[1] == arg.out_spatial_lengths_[1] &&
|
||||
|
||||
Reference in New Issue
Block a user