Add client example of grouped conv2d backward weight (data type: fp16) (#498)

* Remove redundant CMake setting

* Extract common code from files

* Rename folder 'convnd' to 'conv'

* Use std::array<> to accept compile-time kwnown # of arguments

* Fix compilation error of tuning parameter

* In example, use same setting as unit-test

* Remove no-longer used include directive

* Add interface for grouped conv bwd weight

* Add group support for conv bwd weight

* Add grouped conv bwd weight example

* Use group parameter in example

* Rename example folder

* Remove non-grouped version example source files

* Rename device op template

* Add group support to convolution backward weight

* Remove debug messages

* Use smaller group size in example

* Use named variable as loop terminate condition

* Prettify example output message

* Enlarge used grid size

* Allow real grid size exceeds expected grid size

* Rename interface file

* Add client example for grouped conv2d bwd weight

* Fix wrong include directive

* Rename client example folder

[ROCm/composable_kernel commit: 38470e0497]
This commit is contained in:
Po Yen Chen
2022-11-10 08:50:03 +08:00
committed by GitHub
parent db1f435770
commit 93f036f2c3
50 changed files with 2221 additions and 2114 deletions

View File

@@ -364,14 +364,16 @@ struct BlockToCTileMap_KSplit_M00_N00_M01_N01
index_t M01 = 1,
index_t N01 = 1,
index_t KSplit = 1)
: M01_(M01),
: c_grid_desc_m_n_(c_grid_desc_m_n),
M01_(M01),
N01_(N01),
KSplit_(KSplit),
underlying_map_(GetBlockToCTileMap(c_grid_desc_m_n, M01, N01, KSplit))
{
}
__host__ constexpr index_t CalculateGridSize(const CGridDesc_M_N& c_grid_desc_m_n) const
__host__ __device__ constexpr index_t
CalculateGridSize(const CGridDesc_M_N& c_grid_desc_m_n) const
{
const auto M0 = math::integer_divide_ceil(c_grid_desc_m_n.GetLength(I0), MPerBlock);
const auto N0 = math::integer_divide_ceil(c_grid_desc_m_n.GetLength(I1), NPerBlock);
@@ -387,7 +389,10 @@ struct BlockToCTileMap_KSplit_M00_N00_M01_N01
template <typename TopIdx>
__host__ __device__ constexpr auto CalculateBottomIndex(const TopIdx& idx_top) const
{
return underlying_map_.CalculateBottomIndex(idx_top);
static_assert(TopIdx::Size() == 1);
return underlying_map_.CalculateBottomIndex(
make_multi_index(idx_top[I0] % CalculateGridSize()));
}
template <typename CTileIdx, typename CTileDim>
@@ -418,6 +423,11 @@ struct BlockToCTileMap_KSplit_M00_N00_M01_N01
}
private:
__device__ constexpr index_t CalculateGridSize() const
{
return CalculateGridSize(c_grid_desc_m_n_);
}
__host__ static constexpr auto GetBlockToCTileMap(const CGridDesc_M_N& c_grid_desc_m_n,
index_t M01,
index_t N01,
@@ -450,6 +460,7 @@ struct BlockToCTileMap_KSplit_M00_N00_M01_N01
return c_blockid_to_ksplit_m0_n0_block_cluster_adaptor;
}
CGridDesc_M_N c_grid_desc_m_n_;
index_t M01_, N01_, KSplit_;
using UnderlyingMap = decltype(GetBlockToCTileMap(CGridDesc_M_N{}, 1, 1, 1));
UnderlyingMap underlying_map_;