mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 05:01:25 +00:00
[CK_TILE] fix example reduces, permute and elementwise on gfx11 & gfx12 (#2810)
1. Refine Reduce2dShape to support both wave32 and wave64 2. Fix example reduce, permute and elementwise on gfx11 and gfx12 --------- Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com>
This commit is contained in:
@@ -137,8 +137,7 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
// This is often a multiple of the wavefront size, 64 on CDNA.
|
||||
// Here, it's explicitly set to 512. This should be consistent with Shape::kBlockSize.
|
||||
// Shape::kBlockSize would be BlockWarps * warpSize (e.g., 8 * 64 = 512).
|
||||
constexpr ck_tile::index_t kBlockSize =
|
||||
ck_tile::get_warp_size() * BlockWarps::at(ck_tile::number<0>{});
|
||||
const ck_tile::index_t kBlockSize = Kernel::BlockSize();
|
||||
|
||||
// kBlockPerCu: Hint for how many workgroups can be scheduled per Compute Unit (CU).
|
||||
// This can influence occupancy and performance.
|
||||
|
||||
@@ -84,8 +84,7 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
for(auto d : problem_shape)
|
||||
total_elements *= d;
|
||||
|
||||
constexpr ck_tile::index_t kBlockSize =
|
||||
ck_tile::get_warp_size() * BlockWarps::at(ck_tile::number<0>{});
|
||||
const ck_tile::index_t kBlockSize = Kernel::BlockSize();
|
||||
|
||||
constexpr ck_tile::index_t kBlockPerCu = 2;
|
||||
|
||||
|
||||
@@ -89,8 +89,7 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
|
||||
ck_tile::index_t total_elements = M * N;
|
||||
|
||||
constexpr ck_tile::index_t kBlockSize =
|
||||
ck_tile::get_warp_size() * BlockWarps::at(ck_tile::number<0>{});
|
||||
const ck_tile::index_t kBlockSize = Kernel::BlockSize();
|
||||
constexpr ck_tile::index_t kBlockPerCu = 1;
|
||||
constexpr ck_tile::index_t elements_per_block = BlockTile::at(ck_tile::number<0>{});
|
||||
ck_tile::index_t kGridSize = (total_elements + elements_per_block - 1) / elements_per_block;
|
||||
|
||||
@@ -78,8 +78,7 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
for(auto d : shape)
|
||||
total_elements *= d;
|
||||
|
||||
constexpr ck_tile::index_t kBlockSize =
|
||||
ck_tile::get_warp_size() * BlockWarps::at(ck_tile::number<0>{});
|
||||
const ck_tile::index_t kBlockSize = Kernel::BlockSize();
|
||||
constexpr ck_tile::index_t kBlockPerCu = 1;
|
||||
|
||||
constexpr ck_tile::index_t elements_per_block = BlockTile::at(ck_tile::number<0>{});
|
||||
|
||||
Reference in New Issue
Block a user