mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-11 17:00:18 +00:00
* init
* refactor for 1x1
* rename e0_e1
* add e1 with bugs
* debug
* fixed
* fixed e1
* add timer
* imprve threadwise gemm with dot2
* add e2
* tuning
* seperate c2
* add nhwc
* restore nchwc
* clean
* opt
* fixed; tuning
* add BGlobalMoveSliceWindowStepHacks{}
* tuning
* repeat running
* adjust
* merge v5r1 nchwc
* add adaptors
* split k0 k1 in c_thread_grid
* split h and w
* remove v5r1 nhwc
* clean for pr
* remove host_conv_add
* clean code
* clean
* add dynamic support
* static mode
* test static
* add conv+add fusion
* fixed validation
* naming fix
* use activ_enum
* make static
* refactor conv_add for InMem::add
* add bias
* add conv_out
* add configurable makeddesc
* add maxpool fusion
* add maxpool host for validation
* enable static desc
* conv-only use v5r1_add
* test
* test
* for binary dumps
* fixed incorrect results due to typo
* clean
* debugging maxpool
* workaround with offset trick
* clean code
* modularize ops of fusion
* add gridwise_gemm_v3
* create seperate fusion fun
* enable dynamic mode of conv and conv+resize_add
* add dynamic mode of maxpool
* add pass by point
* add activ_type as arguments
* merge develop
* clean
* reset config to old default
Co-authored-by: Chao Liu <chao.liu2@amd.com>
91 lines
2.8 KiB
C++
91 lines
2.8 KiB
C++
#ifndef CONV_COMMON_HPP
|
|
#define CONV_COMMON_HPP
|
|
|
|
#include "tensor_descriptor.hpp"
|
|
|
|
template <typename... InDesc,
|
|
typename... WeiDesc,
|
|
typename ConvStrides,
|
|
typename ConvDilations,
|
|
typename LeftPads,
|
|
typename RightPads>
|
|
constexpr auto get_convolution_output_default_4d_tensor_descriptor(
|
|
const ck::TensorDescriptor<InDesc...>& in_desc,
|
|
const ck::TensorDescriptor<WeiDesc...>& wei_desc,
|
|
const ConvStrides& conv_strides,
|
|
const ConvDilations conv_dilations,
|
|
const LeftPads& left_pads,
|
|
const RightPads& right_pads)
|
|
{
|
|
using namespace ck;
|
|
|
|
constexpr auto I0 = Number<0>{};
|
|
constexpr auto I1 = Number<1>{};
|
|
constexpr auto I2 = Number<2>{};
|
|
constexpr auto I3 = Number<3>{};
|
|
|
|
assert(in_desc.GetNumOfDimension() == 4);
|
|
assert(wei_desc.GetNumOfDimension() == 4);
|
|
assert(in_desc.GetLength(I1) == wei_desc.GetLength(I1));
|
|
|
|
const auto N = in_desc.GetLength(I0);
|
|
const auto Hi = in_desc.GetLength(I2);
|
|
const auto Wi = in_desc.GetLength(I3);
|
|
|
|
const auto K = wei_desc.GetLength(I0);
|
|
const auto Y = wei_desc.GetLength(I2);
|
|
const auto X = wei_desc.GetLength(I3);
|
|
|
|
const auto LeftPadH = left_pads[I0];
|
|
const auto LeftPadW = left_pads[I1];
|
|
|
|
const auto RightPadH = right_pads[I0];
|
|
const auto RightPadW = right_pads[I1];
|
|
|
|
const auto YEff = (Y - I1) * conv_dilations[I0] + I1;
|
|
const auto XEff = (X - I1) * conv_dilations[I1] + I1;
|
|
|
|
const auto Ho = (Hi + LeftPadH + RightPadH - YEff) / conv_strides[I0] + I1;
|
|
const auto Wo = (Wi + LeftPadW + RightPadW - XEff) / conv_strides[I1] + I1;
|
|
|
|
return make_naive_tensor_descriptor_packed(make_tuple(N, K, Ho, Wo));
|
|
}
|
|
|
|
template <class InDesc, class WeiDesc, class OutDesc>
|
|
constexpr std::size_t
|
|
calculate_convolution_flops(const InDesc&, const WeiDesc& wei_desc, const OutDesc& out_desc)
|
|
{
|
|
using namespace ck;
|
|
|
|
constexpr auto I0 = Number<0>{};
|
|
constexpr auto I1 = Number<1>{};
|
|
constexpr auto I2 = Number<2>{};
|
|
constexpr auto I3 = Number<3>{};
|
|
|
|
const index_t N = out_desc.GetLength(I0);
|
|
const index_t K = out_desc.GetLength(I1);
|
|
const index_t Ho = out_desc.GetLength(I2);
|
|
const index_t Wo = out_desc.GetLength(I3);
|
|
|
|
const index_t C = wei_desc.GetLength(I1);
|
|
const index_t Y = wei_desc.GetLength(I2);
|
|
const index_t X = wei_desc.GetLength(I3);
|
|
|
|
return std::size_t(2) * N * K * Ho * Wo * C * Y * X;
|
|
}
|
|
|
|
template <typename T>
|
|
inline auto activ(T v, const ck::ActivTypeEnum_t activ_type)
|
|
{
|
|
const T alpha = 0.3;
|
|
switch(activ_type)
|
|
{
|
|
case ck::ActivTypeEnum_t::None: return v;
|
|
case ck::ActivTypeEnum_t::LeakyRelu: return (v >= 0 ? v : alpha * v);
|
|
case ck::ActivTypeEnum_t::Sigmoid: return (1 / (1 + exp(-v)));
|
|
default: throw std::runtime_error("unsupported activ type"); break;
|
|
}
|
|
}
|
|
|
|
#endif
|