mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 18:17:44 +00:00
[CK_TILE]fix elementwise example in gfx11/12 (#2676)
* fix elementwise examples
* improve the robust
* fix ck_tile's elementwise test
* update elementwise test
[ROCm/composable_kernel commit: bcc38deff7]
This commit is contained in:
@@ -113,7 +113,7 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
// ElementWiseShape bundles these tiling parameters.
|
||||
// It calculates derived properties like threads per wavefront, repeats, vectorization and total
|
||||
// block size.
|
||||
using Shape = ck_tile::ElementWiseShape<BlockWarps, BlockTile, WarpTile, ComputeDataType>;
|
||||
using Shape = ck_tile::ElementWiseShape<BlockWarps, BlockTile, WarpTile, XDataType>;
|
||||
|
||||
// ElementWisePipelineProblem encapsulates all necessary information for the elementwise kernel:
|
||||
// - Data types (input, compute, output).
|
||||
|
||||
@@ -69,7 +69,7 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
using BlockWarps = ck_tile::sequence<1>;
|
||||
using WarpTile = ck_tile::sequence<256>;
|
||||
|
||||
using Shape = ck_tile::ElementWiseShape<BlockWarps, BlockTile, WarpTile, ComputeDataType>;
|
||||
using Shape = ck_tile::ElementWiseShape<BlockWarps, BlockTile, WarpTile, XDataType>;
|
||||
|
||||
using Problem = ck_tile::ElementWisePipelineProblem<XDataType,
|
||||
ComputeDataType,
|
||||
|
||||
@@ -73,7 +73,7 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
using BlockWarps = ck_tile::sequence<8>;
|
||||
using WarpTile = ck_tile::sequence<64>;
|
||||
|
||||
using Shape = ck_tile::ElementWiseShape<BlockWarps, BlockTile, WarpTile, ComputeDataType>;
|
||||
using Shape = ck_tile::ElementWiseShape<BlockWarps, BlockTile, WarpTile, XDataType>;
|
||||
|
||||
// Problem definition for a single input tensor
|
||||
using Problem = ck_tile::ElementWisePipelineProblem<XDataType,
|
||||
@@ -86,7 +86,8 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
|
||||
ck_tile::index_t total_elements = M * N;
|
||||
|
||||
constexpr ck_tile::index_t kBlockSize = 64 * BlockWarps::at(ck_tile::number<0>{});
|
||||
constexpr ck_tile::index_t kBlockSize =
|
||||
ck_tile::get_warp_size() * BlockWarps::at(ck_tile::number<0>{});
|
||||
constexpr ck_tile::index_t kBlockPerCu = 1;
|
||||
constexpr ck_tile::index_t elements_per_block = BlockTile::at(ck_tile::number<0>{});
|
||||
ck_tile::index_t kGridSize = (total_elements + elements_per_block - 1) / elements_per_block;
|
||||
|
||||
@@ -38,7 +38,6 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
|
||||
using XDataType = DataType;
|
||||
using YDataType = DataType;
|
||||
using ComputeDataType = float;
|
||||
using XElementwiseOperation = ck_tile::element_wise::UnarySquare;
|
||||
|
||||
// 1. Initialize the input data on the host
|
||||
@@ -64,7 +63,7 @@ bool run(const ck_tile::ArgParser& arg_parser)
|
||||
// will cover some part of blockTile)
|
||||
using WarpTile = ck_tile::sequence<64>; // How many elements are covered by a warp
|
||||
|
||||
using Shape = ck_tile::ElementWiseShape<BlockWarps, BlockTile, WarpTile, ComputeDataType>;
|
||||
using Shape = ck_tile::ElementWiseShape<BlockWarps, BlockTile, WarpTile, XDataType>;
|
||||
using Problem = ck_tile::ElementWisePipelineProblem<XDataType,
|
||||
XDataType, // ComputeDataType is same as
|
||||
// XDataType in the unary case
|
||||
|
||||
@@ -14,13 +14,14 @@ struct ElementWiseShape
|
||||
|
||||
static constexpr index_t kWarpM = WarpTile::at(number<0>{});
|
||||
|
||||
static constexpr index_t kVectorM = 16 / sizeof(ComputeDataType);
|
||||
static constexpr index_t kVectorM =
|
||||
min(static_cast<index_t>(16 / sizeof(ComputeDataType)), kWarpM / get_warp_size());
|
||||
|
||||
static constexpr index_t kWarpPerBlockM = BlockWarps::at(number<0>{});
|
||||
|
||||
static constexpr index_t kThreadPerWarpM = kWarpM / kVectorM;
|
||||
static constexpr index_t kThreadPerWarpM = get_warp_size();
|
||||
|
||||
static constexpr index_t kRepeatM = kBlockM / (kWarpPerBlockM * kWarpM);
|
||||
static constexpr index_t kRepeatM = kBlockM / (kWarpPerBlockM * kVectorM * kThreadPerWarpM);
|
||||
|
||||
static constexpr index_t kBlockSize =
|
||||
ck_tile::get_warp_size() * reduce_on_sequence(BlockWarps{}, multiplies{}, number<1>{});
|
||||
|
||||
@@ -53,7 +53,7 @@ class TestCkTileElementwise : public ::testing::Test
|
||||
using BlockTile_ = std::tuple_element_t<5, Tuple>;
|
||||
using WarpTile_ = std::tuple_element_t<6, Tuple>;
|
||||
using TestElementWiseShape =
|
||||
ck_tile::ElementWiseShape<BlockWarps_, BlockTile_, WarpTile_, ComputeDataType>;
|
||||
ck_tile::ElementWiseShape<BlockWarps_, BlockTile_, WarpTile_, XDataType>;
|
||||
static constexpr int NumInputs = elementwise_op_traits<ElementwiseOpType>::num_inputs;
|
||||
|
||||
void RunTest(ck_tile::index_t total_m_elements)
|
||||
@@ -195,8 +195,7 @@ TYPED_TEST(TestCkTileElementwise, RunElementwise_1024) { this->RunTest(1024); }
|
||||
|
||||
TYPED_TEST(TestCkTileElementwise, RunElementwise_513)
|
||||
{
|
||||
EXPECT_THROW((this->RunTest(513)),
|
||||
std::runtime_error); // Test with an input size that's not a multiple of kVectorM
|
||||
this->RunTest(513); // Test with an input size that's not a multiple of kVectorM
|
||||
}
|
||||
|
||||
TYPED_TEST(TestCkTileElementwise, RunElementwise_516)
|
||||
|
||||
Reference in New Issue
Block a user