mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-20 12:59:49 +00:00
Add client example of grouped conv2d backward weight (data type: fp16) (#498)
* Remove redundant CMake setting
* Extract common code from files
* Rename folder 'convnd' to 'conv'
* Use std::array<> to accept compile-time kwnown # of arguments
* Fix compilation error of tuning parameter
* In example, use same setting as unit-test
* Remove no-longer used include directive
* Add interface for grouped conv bwd weight
* Add group support for conv bwd weight
* Add grouped conv bwd weight example
* Use group parameter in example
* Rename example folder
* Remove non-grouped version example source files
* Rename device op template
* Add group support to convolution backward weight
* Remove debug messages
* Use smaller group size in example
* Use named variable as loop terminate condition
* Prettify example output message
* Enlarge used grid size
* Allow real grid size exceeds expected grid size
* Rename interface file
* Add client example for grouped conv2d bwd weight
* Fix wrong include directive
* Rename client example folder
[ROCm/composable_kernel commit: 38470e0497]
This commit is contained in:
@@ -364,14 +364,16 @@ struct BlockToCTileMap_KSplit_M00_N00_M01_N01
|
||||
index_t M01 = 1,
|
||||
index_t N01 = 1,
|
||||
index_t KSplit = 1)
|
||||
: M01_(M01),
|
||||
: c_grid_desc_m_n_(c_grid_desc_m_n),
|
||||
M01_(M01),
|
||||
N01_(N01),
|
||||
KSplit_(KSplit),
|
||||
underlying_map_(GetBlockToCTileMap(c_grid_desc_m_n, M01, N01, KSplit))
|
||||
{
|
||||
}
|
||||
|
||||
__host__ constexpr index_t CalculateGridSize(const CGridDesc_M_N& c_grid_desc_m_n) const
|
||||
__host__ __device__ constexpr index_t
|
||||
CalculateGridSize(const CGridDesc_M_N& c_grid_desc_m_n) const
|
||||
{
|
||||
const auto M0 = math::integer_divide_ceil(c_grid_desc_m_n.GetLength(I0), MPerBlock);
|
||||
const auto N0 = math::integer_divide_ceil(c_grid_desc_m_n.GetLength(I1), NPerBlock);
|
||||
@@ -387,7 +389,10 @@ struct BlockToCTileMap_KSplit_M00_N00_M01_N01
|
||||
template <typename TopIdx>
|
||||
__host__ __device__ constexpr auto CalculateBottomIndex(const TopIdx& idx_top) const
|
||||
{
|
||||
return underlying_map_.CalculateBottomIndex(idx_top);
|
||||
static_assert(TopIdx::Size() == 1);
|
||||
|
||||
return underlying_map_.CalculateBottomIndex(
|
||||
make_multi_index(idx_top[I0] % CalculateGridSize()));
|
||||
}
|
||||
|
||||
template <typename CTileIdx, typename CTileDim>
|
||||
@@ -418,6 +423,11 @@ struct BlockToCTileMap_KSplit_M00_N00_M01_N01
|
||||
}
|
||||
|
||||
private:
|
||||
__device__ constexpr index_t CalculateGridSize() const
|
||||
{
|
||||
return CalculateGridSize(c_grid_desc_m_n_);
|
||||
}
|
||||
|
||||
__host__ static constexpr auto GetBlockToCTileMap(const CGridDesc_M_N& c_grid_desc_m_n,
|
||||
index_t M01,
|
||||
index_t N01,
|
||||
@@ -450,6 +460,7 @@ struct BlockToCTileMap_KSplit_M00_N00_M01_N01
|
||||
return c_blockid_to_ksplit_m0_n0_block_cluster_adaptor;
|
||||
}
|
||||
|
||||
CGridDesc_M_N c_grid_desc_m_n_;
|
||||
index_t M01_, N01_, KSplit_;
|
||||
using UnderlyingMap = decltype(GetBlockToCTileMap(CGridDesc_M_N{}, 1, 1, 1));
|
||||
UnderlyingMap underlying_map_;
|
||||
|
||||
Reference in New Issue
Block a user