mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-04-20 06:49:15 +00:00
[CK_TILE] fix example reduces, permute and elementwise on gfx11 & gfx12 (#2810)
1. Refine Reduce2dShape to support both wave32 and wave64 2. Fix example reduce, permute and elementwise on gfx11 and gfx12 --------- Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com>
This commit is contained in:
@@ -88,7 +88,6 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
// using WarpTile = ck_tile::sequence<1, 512>;
|
||||
// using Vector = ck_tile::sequence<1, 8>;
|
||||
|
||||
constexpr ck_tile::index_t kBlockSize = 256;
|
||||
constexpr ck_tile::index_t kBlockPerCu = 1;
|
||||
ck_tile::index_t kept_dim_len_prod = N * C;
|
||||
ck_tile::index_t kGridSize = (kept_dim_len_prod + BlockTile::at(ck_tile::number<0>{}) - 1) /
|
||||
@@ -99,8 +98,8 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
using Porblem =
|
||||
ck_tile::Reduce2dProblem<XDataType, ComputeDataType, YDataType, Shape, ReduceOp>;
|
||||
|
||||
using Kernel = ck_tile::Reduce<Porblem>;
|
||||
|
||||
using Kernel = ck_tile::Reduce<Porblem>;
|
||||
const ck_tile::index_t kBlockSize = Kernel::BlockSize();
|
||||
// Create input tensor shape and strides
|
||||
auto input_shape =
|
||||
ck_tile::make_tuple(problem_shape[0], problem_shape[1], problem_shape[2], problem_shape[3]);
|
||||
|
||||
Reference in New Issue
Block a user