diff --git a/example/ck_tile/40_streamk_gemm/run_gemm_example.inc b/example/ck_tile/40_streamk_gemm/run_gemm_example.inc index 17182d87dc..6ac57b34fc 100644 --- a/example/ck_tile/40_streamk_gemm/run_gemm_example.inc +++ b/example/ck_tile/40_streamk_gemm/run_gemm_example.inc @@ -71,16 +71,16 @@ invoke_gemm(ck_tile::DeviceMem& a_m_k_dev_buf, bool flush_cache, ck_tile::StreamKReductionStrategy reduction_strategy) { - ck_tile::reboot::StreamKHostArgs args{a_m_k_dev_buf.GetDeviceBuffer(), - b_k_n_dev_buf.GetDeviceBuffer(), - c_m_n_dev_buf.GetDeviceBuffer(), - M, - N, - K, - stride_A, - stride_B, - stride_C, - reduction_strategy}; + ck_tile::StreamKHostArgs args{a_m_k_dev_buf.GetDeviceBuffer(), + b_k_n_dev_buf.GetDeviceBuffer(), + c_m_n_dev_buf.GetDeviceBuffer(), + M, + N, + K, + stride_A, + stride_B, + stride_C, + reduction_strategy}; std::tuple ave_time_and_batch; diff --git a/example/ck_tile/40_streamk_gemm/streamk_gemm_basic.cpp b/example/ck_tile/40_streamk_gemm/streamk_gemm_basic.cpp index e04cb00379..0ba7546c3f 100644 --- a/example/ck_tile/40_streamk_gemm/streamk_gemm_basic.cpp +++ b/example/ck_tile/40_streamk_gemm/streamk_gemm_basic.cpp @@ -16,7 +16,7 @@ template -std::tuple gemm(const ck_tile::reboot::StreamKHostArgs& args, +std::tuple gemm(const ck_tile::StreamKHostArgs& args, const ck_tile::stream_config& s) { using GemmShape = ck_tile::TileGemmShape< @@ -28,7 +28,7 @@ std::tuple gemm(const ck_tile::reboot::StreamKHostArgs& GemmConfig::PermuteB>; using TilePartitioner = - ck_tile::StreamKTilePartitioner_v2; + ck_tile::StreamKTilePartitioner; using GemmUniversalTraits = ck_tile::TileGemmUniversalTraits gemm(const ck_tile::reboot::StreamKHostArgs& memory_operation.value, GemmConfig::NumWaveGroups>>; - using Kernel = ck_tile::reboot::StreamKKernel; + using Kernel = ck_tile::StreamKKernel; auto kargs = Kernel::MakeKernelArgs(args); const auto workspace_size = Kernel::GetWorkSpaceSize(kargs); diff --git a/include/ck_tile/ops/common/streamk_common.hpp b/include/ck_tile/ops/common/streamk_common.hpp index c01e967dcd..5dbe6223c4 100644 --- a/include/ck_tile/ops/common/streamk_common.hpp +++ b/include/ck_tile/ops/common/streamk_common.hpp @@ -11,33 +11,4 @@ enum StreamKReductionStrategy : uint32_t Atomic = 0u, Reduction = 1u }; - -/** - * @brief Estimates the number of Stream-K workgroups per macro tile in the C tensor. - * - * @param sk_ctas Number of Stream-K workgroups. - * @param iters_per_sk_cta Number of iterations per Stream-K workgroup. - * @param iters_per_tile Number of iterations per tile (i.e., the number of macro tiles in the K - * dimension). - * @return ck_tile::index_t An estimate of the number of workgroups per macro tile in the C tensor. - * @note It is assumed that `iters_per_sk_cta` > 0. - */ -template -ck_tile::index_t -estimate_num_wgs_per_tile(index_t sk_ctas, index_t iters_per_sk_cta, index_t iters_per_tile) -{ - // In the case of non-atomic reduction or data-parallel only, there will always be 1 workgroup - // writing final results to a given macro tile in C. - int num_wgs_per_tile = 1; - - // Otherwise, for atomics, multiple workgroups may be writing to the same macro tile in C. - if(sk_ctas > 0 && ReductionStrategy == ck_tile::StreamKReductionStrategy::Atomic) - { - // Estimate the number of workgroups per macro tile. - num_wgs_per_tile = - (iters_per_tile / iters_per_sk_cta) + ((iters_per_tile % iters_per_sk_cta) != 0); - } - - return std::max(num_wgs_per_tile, 1); -} } // namespace ck_tile diff --git a/include/ck_tile/ops/gemm/kernel/gemm_tile_partitioner.hpp b/include/ck_tile/ops/gemm/kernel/gemm_tile_partitioner.hpp index fc85c4dcdf..998431f165 100644 --- a/include/ck_tile/ops/gemm/kernel/gemm_tile_partitioner.hpp +++ b/include/ck_tile/ops/gemm/kernel/gemm_tile_partitioner.hpp @@ -364,448 +364,4 @@ struct GemmSpatiallyLocalTilePartitioner index_t M; index_t N; }; - -/** - * @brief Stream-K tile partitioner that dynamically balances work across workgroups - * - * This partitioner is responsible for mapping workgroups to tiles in the C tensor - * for the Stream-K algorithm which decomposes the GEMM problem - * into smaller work units and distributes them more evenly across available blocks, - * improving load balancing especially for cases where the K dimension is large. - * - * @tparam BlockGemmShapeType A class providing basic GEMM parameters. - * @tparam ReductionStrategy A class that defines the reduction strategy for the results in - * the C Tensor. - * @tparam TileSwizzleSubM A value that defines the size of the swizzle group along the m - * dimension, where the swizzle group denotes consecutive tiles down a column. For instance a - * swizzle group of 8 denotes tiles 0, 1, ..., 7, map to tiles [0,0], [1,0], ..., [7,0] in the C - * tensor. - */ -template -struct StreamKTilePartitioner -{ - static constexpr uint32_t MPerBlock = BlockGemmShapeType::kM; - static constexpr uint32_t NPerBlock = BlockGemmShapeType::kN; - static constexpr uint32_t KPerBlock = BlockGemmShapeType::kK; - - CK_TILE_HOST_DEVICE StreamKTilePartitioner() noexcept = delete; - - /** - * @brief Construct Stream-K tile partitioner with problem dimensions - */ - CK_TILE_HOST_DEVICE StreamKTilePartitioner(uint32_t M, - uint32_t N, - uint32_t K, - uint32_t num_cu, - uint32_t occupancy, - uint32_t sk_blocks = 0xffffffff) noexcept - : M_(M), N_(N), K_(K) - { - num_tile_m_ = integer_divide_ceil(M, MPerBlock); - num_tile_n_ = integer_divide_ceil(N, NPerBlock); - num_tile_k_ = integer_divide_ceil(K, KPerBlock); - - constexpr uint32_t min_k_iters_per_sk_block = 2; - uint32_t num_tiles = num_tile_m_ * num_tile_n_; - k_iters_per_tile = mdiv(num_tile_k_); - - // one cu can hold one wg at one time, from the whole cZ's point of view - // if number of wg is same as num_cu, we call it 1 dispatch - // if number of wg is 2x num_cu, we call it 2 dispatches. - // one dispatch can deliver wg same as num_cu (full dispatch), or less than num_cu (partial - // dispatch) - // - const uint32_t full_dispatches = num_tiles / num_cu; - const uint32_t full_dispatch_tiles = full_dispatches * num_cu; - const uint32_t partial_dispatch_tiles = num_tiles - full_dispatch_tiles; - - uint32_t sk_occupancy = occupancy; - uint32_t dp_tiles = full_dispatch_tiles; - uint32_t sk_tiles = partial_dispatch_tiles; - - if(full_dispatches < occupancy) - { - // in this case, we allocate all blocks as sk blocks - // sk_occupancy = occupancy - full_dispatches; - sk_occupancy = 1; - dp_tiles = full_dispatch_tiles; - sk_tiles = partial_dispatch_tiles; - } - else if((occupancy > 1) && (full_dispatches % occupancy == occupancy - 1)) - { - // e.g. occupancy = 2, full_dispatches = 3, 5, 7 ... - // occupancy = 3, full_dispatches = 5, 8, 11 ... - // occupancy = 4, full_dispatches = 7, 11 ... - sk_occupancy = 1; // left 1 slot for sk occupancy - dp_tiles = full_dispatch_tiles; - sk_tiles = partial_dispatch_tiles; - } - else - { - // otherwise, we reduce 1 dispatch from dp, together with partial dispatch, - // to construct sk dispatch - sk_occupancy = occupancy - ((full_dispatches - 1) % occupancy); - dp_tiles = full_dispatch_tiles - num_cu; - sk_tiles = partial_dispatch_tiles + num_cu; - } - - // uint32_t dp_iters_per_block = k_iters_per_tile.get(); - uint32_t sk_total_iters = k_iters_per_tile.get() * sk_tiles; - uint32_t dp_num_blocks = 0; - - { - const uint32_t min_sk_tiles = (sk_tiles >= num_cu) ? num_cu : (sk_tiles + 1); - const uint32_t max_sk_tiles = - (sk_tiles >= num_cu) ? num_cu * sk_occupancy - : min(num_cu, sk_total_iters / min_k_iters_per_sk_block); - - // if use dp for sk-block, how many iters do we need - const uint32_t dp_for_sk_iters = k_iters_per_tile.get(); - - uint32_t best_sk_score = - std::numeric_limits::max(); // we need to find the smallest sk iters - for(uint32_t tentative_sk_blocks = min_sk_tiles; tentative_sk_blocks < max_sk_tiles; - tentative_sk_blocks++) - { - const uint32_t tentative_sk_iters_per_block = - (sk_total_iters + tentative_sk_blocks - 1) / tentative_sk_blocks; - const uint32_t tentative_sk_iters = tentative_sk_iters_per_block; - const uint32_t sk_blocks_per_tile = (tentative_sk_blocks + sk_tiles - 1) / sk_tiles; - - // the more sk_blocks_per_tile, the worse the overhead - uint32_t cross_sk_blocks_overhead = sk_blocks_per_tile; - if(tentative_sk_blocks % sk_tiles != 0) - { - // penalty for uneven divide - cross_sk_blocks_overhead += - sk_blocks_per_tile * tentative_sk_iters_per_block / 50; - } - - const uint32_t tentative_sk_score = tentative_sk_iters + cross_sk_blocks_overhead; - - if(tentative_sk_score < best_sk_score) - { - best_sk_score = tentative_sk_score; - sk_num_blocks = tentative_sk_blocks; - } - } - - if(best_sk_score >= dp_for_sk_iters) - { - sk_num_blocks = 0; - } - - // give a chance to control num of sk blocks - sk_num_blocks = sk_blocks != 0xffffffff ? sk_blocks : sk_num_blocks; - - if(sk_num_blocks == 0) - { - sk_num_big_blocks = 0; - k_iters_per_big_block = 0; - - dp_num_blocks = num_tiles; // all tile to be dp block - dp_start_block_idx = 0; - sk_total_iters = 0; // clear this tiles - } - else - { - // k_iters_per_sk_block is the floor of avg each ck block loop over tiles. - // we need to decide how many iters for each sk block - // let m = k_iters_per_sk_block - // some of the sk block (little) will cover m iters, some (big) will cover m+1 - // we have - // 1) l + b = sk_blocks - // 2) l * m + b * (m + 1) = sk_total_iters - // => (l + b) * m + b = sk_total_iters - // => sk_blocks * m + b = sk_total_iters - // => b = sk_total_iters - m * sk_blocks - // NOTE: big could be zero - const uint32_t k_iters_per_sk_block = sk_total_iters / sk_num_blocks; - sk_num_big_blocks = sk_total_iters - k_iters_per_sk_block * sk_num_blocks; - k_iters_per_big_block = k_iters_per_sk_block + 1; - - dp_num_blocks = dp_tiles; - dp_start_block_idx = (sk_num_blocks + num_cu - 1) / num_cu * num_cu; - } - } - n_tiles = mdiv2(num_tile_n_); - reduction_start_block_idx = dp_start_block_idx + dp_num_blocks; - - if constexpr(ReductionStrategy == ck_tile::StreamKReductionStrategy::Reduction) - { - const uint32_t upper_big = lcm(k_iters_per_big_block, k_iters_per_tile.get()); - const uint32_t upper_little = lcm(k_iters_per_big_block - 1, k_iters_per_tile.get()); - equiv_tiles_big = mdiv(upper_big / k_iters_per_tile.get()); - equiv_tiles_little = mdiv(upper_little / k_iters_per_tile.get()); - } - } - - /** - * @brief Calculate optimal grid size for Stream-K - */ - CK_TILE_HOST auto GridSize() const noexcept -> dim3 - { - if constexpr(ReductionStrategy == ck_tile::StreamKReductionStrategy::Reduction) - { - return dim3(reduction_start_block_idx + GetSkTiles(), 1, 1); - } - else - return dim3(reduction_start_block_idx, 1, 1); - } - - /** - * @brief Calculate number of loop iterations over K dimension for given work unit - */ - CK_TILE_HOST_DEVICE static auto GetLoopNum(uint32_t K) noexcept -> uint32_t - { - return integer_divide_ceil(K, KPerBlock); // Stream-K processes one K-slice at a time - } - - /** - * @brief Get output tile index for standard 2D mapping (compatibility) - */ - CK_TILE_DEVICE auto - GetOutputTileIndex(uint32_t tile_idx) const noexcept -> tuple - { - uint32_t m_tile_idx, n_tile_idx; - n_tiles.divmod(tile_idx, num_tile_n_, m_tile_idx, n_tile_idx); - - // swizzle tile - - uint32_t tile_swizzle_sub_m_rem = num_tile_m_ % TileSwizzleSubM; - - const auto sub_m_adapt = (m_tile_idx < (num_tile_m_ - tile_swizzle_sub_m_rem)) - ? TileSwizzleSubM - : tile_swizzle_sub_m_rem; - - uint32_t m_tile_idx_sub0, m_tile_idx_sub1; - m_tile_idx_sub0 = m_tile_idx / TileSwizzleSubM; - m_tile_idx_sub1 = m_tile_idx % TileSwizzleSubM; - - uint32_t tile_idx_local = n_tile_idx + m_tile_idx_sub1 * num_tile_n_; - - uint32_t m_tile_idx_with_adapt, n_tile_idx_with_adapt; - - n_tile_idx_with_adapt = tile_idx_local / sub_m_adapt; - m_tile_idx_with_adapt = tile_idx_local % sub_m_adapt; - return make_tuple(m_tile_idx_with_adapt + m_tile_idx_sub0 * TileSwizzleSubM, - n_tile_idx_with_adapt); - } - - /** - * @brief Get work range for a given block ID - */ - CK_TILE_DEVICE void - GetBlockItr(uint32_t block_idx, uint32_t& iter_start, uint32_t& iter_end) const noexcept - { - if(block_idx < sk_num_big_blocks) - { - iter_start = block_idx * k_iters_per_big_block; - iter_end = iter_start + k_iters_per_big_block; - } - else if(block_idx < sk_num_blocks) - { - iter_start = (sk_num_big_blocks * k_iters_per_big_block) + - (block_idx - sk_num_big_blocks) * (k_iters_per_big_block - 1); - iter_end = iter_start + (k_iters_per_big_block - 1); - } - else if(block_idx >= dp_start_block_idx) - { - uint32_t sk_total_iters = GetSkTotalIters(); - uint32_t dp_iters_per_block = k_iters_per_tile.get(); - iter_start = sk_total_iters + (block_idx - dp_start_block_idx) * dp_iters_per_block; - iter_end = iter_start + dp_iters_per_block; - } - } - - /** - * @brief Get total number of iterations for sk tiles - */ - CK_TILE_HOST_DEVICE uint32_t GetSkTotalIters() const noexcept - { - uint32_t sk_total_iters = sk_num_big_blocks * k_iters_per_big_block + - (sk_num_blocks - sk_num_big_blocks) * (k_iters_per_big_block - 1); - return sk_total_iters; - } - - /** - * @brief Get total number of sk tiles - */ - CK_TILE_HOST_DEVICE uint32_t GetSkTiles() const noexcept - { - // tiles for sk - uint32_t sk_total_iters = GetSkTotalIters(); - return k_iters_per_tile.div(sk_total_iters); - } - - /** - * @brief Get length of loop iterations for stream-k loop - */ - CK_TILE_DEVICE uint32_t GetCurrentIterLength(uint32_t iter_start, - uint32_t iter_end) const noexcept - { - // A WG's iter_end is either in the current C macro tile or not. - // If it is not, then the macro tile boundary is where the WG must stop. - uint32_t distance_to_tile_boundary = - k_iters_per_tile.get() - (iter_start % k_iters_per_tile.get()); - return min(iter_start + distance_to_tile_boundary, iter_end) - iter_start; - } - - /** - * @brief Get index of tile during a specified iteration - */ - CK_TILE_DEVICE uint32_t GetTileIdx(uint32_t iter) const noexcept - { - return k_iters_per_tile.div(iter); - } - - /** - * @brief Get index of tile during a specified iteration - */ - CK_TILE_DEVICE void - GetTileIdxWithOffset(uint32_t iter, uint32_t& tile_idx, uint32_t& iter_offset) const noexcept - { - k_iters_per_tile.divmod(iter, tile_idx, iter_offset); - } - - /** - * @brief Calculates the buffer space needed for accumulation - */ - CK_TILE_HOST_DEVICE uint32_t GetWorkSpaceSizeForAcc(uint32_t acc_element_bytes) const noexcept - { - static constexpr uint32_t alignment = 128; - uint32_t acc_buffer_bytes = - MPerBlock * NPerBlock * GetTotalAccBuffers() * acc_element_bytes; - return (acc_buffer_bytes + alignment - 1) / alignment * alignment; - } - - /** - * @brief Calculates the buffer space needed for the semaphore - */ - CK_TILE_HOST_DEVICE uint32_t GetWorkSpaceSizeForSemaphore() const noexcept - { - return GetSkTiles() * sizeof(uint32_t); - } - - /** - * @brief Calculates the total buffer space needed for accumulation and the semaphore - */ - CK_TILE_HOST_DEVICE uint32_t GetWorkSpaceSize(uint32_t acc_element_bytes) const noexcept - { - return GetWorkSpaceSizeForAcc(acc_element_bytes) + GetWorkSpaceSizeForSemaphore(); - } - - /** - * @brief Get location of intersection of tiles for reduction - */ - CK_TILE_HOST_DEVICE uint32_t GetTileIntersections(uint32_t tiles_, - const mdiv& equiv_tiles_) const noexcept - { - uint32_t tile_idx_ = tiles_ == 0 ? 0 : (tiles_ - 1); - uint32_t max_equiv_tiles_ = equiv_tiles_.get() - 1; - uint32_t quo_, rem_; - equiv_tiles_.divmod(tile_idx_, quo_, rem_); - return quo_ * max_equiv_tiles_ + rem_; - } - - /** - * @brief Calculate the number of tiles needed for the number of sk blocks - */ - CK_TILE_HOST_DEVICE uint32_t GetTilesCoverSkBlock(uint32_t num_sk_blocks_, - uint32_t iters_per_sk_block_) const noexcept - { - return k_iters_per_tile.div(num_sk_blocks_ * iters_per_sk_block_ + k_iters_per_tile.get() - - 1); - } - - /** - * @brief Calculate the amount of total accumulation buffers required for stream-k - */ - CK_TILE_HOST_DEVICE uint32_t GetTotalAccBuffers() const noexcept - { - uint32_t tiles_cover_big_blocks = - GetTilesCoverSkBlock(sk_num_big_blocks, k_iters_per_big_block); - uint32_t tiles_cover_little_blocks = - GetTilesCoverSkBlock(sk_num_blocks - sk_num_big_blocks, k_iters_per_big_block - 1); - - uint32_t total_intersec_big = GetTileIntersections(tiles_cover_big_blocks, equiv_tiles_big); - uint32_t total_intersec_little = - GetTileIntersections(tiles_cover_little_blocks, equiv_tiles_little); - - return sk_num_blocks + total_intersec_big + total_intersec_little; - } - - /** - * @brief Calculate offset based on tile index for big/little tiles - */ - CK_TILE_DEVICE uint32_t GetAccBufferOffsetFromTile(uint32_t tile_idx_) const noexcept - { - uint32_t tiles_cover_big_blocks = - GetTilesCoverSkBlock(sk_num_big_blocks, k_iters_per_big_block); - if(tile_idx_ < tiles_cover_big_blocks) - { - uint32_t touched_sk_blocks = - (tile_idx_ * k_iters_per_tile.get() + k_iters_per_big_block - 1) / - k_iters_per_big_block; - uint32_t current_intersec = GetTileIntersections(tile_idx_, equiv_tiles_big); - return touched_sk_blocks + current_intersec; - } - else - { - uint32_t iters_per_little_sk_block = k_iters_per_big_block - 1; - uint32_t tile_idx_little_reverse = GetSkTiles() - tile_idx_; - uint32_t touched_sk_blocks = - (tile_idx_little_reverse * k_iters_per_tile.get() + iters_per_little_sk_block - 1) / - iters_per_little_sk_block; - uint32_t current_intersec = - GetTileIntersections(tile_idx_little_reverse, equiv_tiles_little); - return GetTotalAccBuffers() - (touched_sk_blocks + current_intersec); - } - } - - /** - * @brief Calculate offset based on block_idx index for big/little streamk blocks - */ - CK_TILE_DEVICE uint32_t GetAccBufferOffsetFromBlock(uint32_t block_idx_) const noexcept - { - uint32_t iters_per_big_sk_block = k_iters_per_big_block; - uint32_t iters_per_little_sk_block = k_iters_per_big_block - 1; - if(block_idx_ < sk_num_big_blocks) - { - uint32_t touched_tiles = k_iters_per_tile.div(block_idx_ * iters_per_big_sk_block + - k_iters_per_tile.get() - 1); - uint32_t current_intersec = GetTileIntersections(touched_tiles, equiv_tiles_big); - return block_idx_ + current_intersec; - } - else - { - uint32_t block_idx_little_reverse = sk_num_blocks - block_idx_; - uint32_t touched_tiles = k_iters_per_tile.div( - block_idx_little_reverse * iters_per_little_sk_block + k_iters_per_tile.get() - 1); - uint32_t current_intersec = GetTileIntersections(touched_tiles, equiv_tiles_little); - return GetTotalAccBuffers() - (block_idx_little_reverse + current_intersec); - } - } - - // Getters for problem dimensions - CK_TILE_HOST_DEVICE uint32_t GetNumTileM() const noexcept { return num_tile_m_; } - CK_TILE_HOST_DEVICE uint32_t GetNumTileN() const noexcept { return num_tile_n_; } - CK_TILE_HOST_DEVICE uint32_t GetNumTileK() const noexcept { return num_tile_k_; } - - uint32_t sk_num_blocks; - uint32_t sk_num_big_blocks; - uint32_t dp_start_block_idx; - uint32_t reduction_start_block_idx; - uint32_t k_iters_per_big_block; - mdiv2 n_tiles; - mdiv k_iters_per_tile; - mdiv equiv_tiles_big; // for reduction - mdiv equiv_tiles_little; // for reduction - - private: - uint32_t M_, N_, K_; - uint32_t num_tile_m_, num_tile_n_, num_tile_k_; -}; } // namespace ck_tile diff --git a/include/ck_tile/ops/gemm/kernel/streamk_gemm_kernel.hpp b/include/ck_tile/ops/gemm/kernel/streamk_gemm_kernel.hpp index a32e2faf5d..9dfed16bc9 100644 --- a/include/ck_tile/ops/gemm/kernel/streamk_gemm_kernel.hpp +++ b/include/ck_tile/ops/gemm/kernel/streamk_gemm_kernel.hpp @@ -8,15 +8,16 @@ #include "ck_tile/host/concat.hpp" namespace ck_tile { -namespace reboot { -/// @brief The Stream K GEMM kernel host arguments. -/// -/// @par Overview -/// This structure is passed to @ref StreamKKernel "StreamKKernel" when creating the kernel -/// arguments object. It contains all necessary information required to build proper kernel -/// arguments and launch the kernel on GPU. This structure defines the GEMM problem -/// configuration by stating all required information like M,N,K sizes and respective strides. +/** + * @brief The Stream K GEMM kernel host arguments. + * + * @par Overview + * This structure is passed to @ref StreamKKernel "StreamKKernel" when creating the kernel + * arguments object. It contains all necessary information required to build proper kernel + * arguments and launch the kernel on GPU. This structure defines the GEMM problem + * configuration by stating all required information like M,N,K sizes and respective strides. + */ struct StreamKHostArgs : public ck_tile::UniversalGemmHostArgs<> { CK_TILE_HOST explicit StreamKHostArgs(const void* a_ptr_, @@ -48,22 +49,26 @@ struct StreamKHostArgs : public ck_tile::UniversalGemmHostArgs<> ck_tile::StreamKReductionStrategy reduction_strategy; }; -/// @brief The Stream K GEMM kernel class. -/// -/// @par Overview -/// This class is responsible for the Stream-K kernel, making use of UniversalGemm. -// The main kernel functions are the operator() functions. There is one for Persistent -// and one for Non-Persistent data parallel sections of the Stream-K algorithm. -// -// Both the Non-Persistent and Persistent kernels make use of `BaseGemm()` and -// `StreamKGemm()`. `BaseGemm()` computes offsets into the A,B,C tensors, then calls -// `RunGemm()` which runs the GEMM pipeline and epilogue. `StreamKGemm()` performs the -// main Stream-K algorithm. Each iteration of the Stream-K loop calls `BaseGemm()`. +/** + * @brief The Stream K GEMM kernel class. + * + * @par Overview + * This class is responsible for the Stream-K kernel, making use of UniversalGemm. + * The main kernel functions are the operator() functions. There is one for Persistent + * and one for Non-Persistent data parallel sections of the Stream-K algorithm. + * + * Both the Non-Persistent and Persistent kernels make use of `BaseGemm()` and + * `StreamKGemm()`. `BaseGemm()` computes offsets into the A,B,C tensors, then calls + * `RunGemm()` which runs the GEMM pipeline and epilogue. `StreamKGemm()` performs the + * main Stream-K algorithm. Each iteration of the Stream-K loop calls `BaseGemm()`. + */ template struct StreamKKernel { - /// @brief Inject the UniversalGemmKernel base class to support execution of all necessary - /// functions. + /** + *@brief Inject the UniversalGemmKernel base class to support execution of all necessary + *functions. + */ using UniversalGemmKernel = UniversalGemmKernel; @@ -78,12 +83,16 @@ struct StreamKKernel TilePartitioner::PERSISTENT == PersistentDP, "Persistent flag from TilePartitioner must match Persistent flag from UniversalGemm."); - /// @brief Specify the layout configurations for A, B, and C + /** + * @brief Specify the layout configurations for A, B, and C + */ using ALayout = typename GemmPipeline::ALayout; using BLayout = typename GemmPipeline::BLayout; using CLayout = typename GemmPipeline::CLayout; - /// @brief Specify the data type configurations for A, B, and C + /** + * @brief Specify the data type configurations for A, B, and C + */ using ADataType = typename GemmPipeline::ADataType; using BDataType = typename GemmPipeline::BDataType; using CDataType = typename EpiloguePipeline::ODataType; @@ -91,16 +100,21 @@ struct StreamKKernel template static constexpr bool is_tuple_v = is_detected::value; - - /// @brief ALayout and ADataType are expected to be scalars, not a tuple. + /** + *@brief ALayout and ADataType are expected to be scalars, not a tuple. + */ static_assert(!is_tuple_v && !is_tuple_v, "ALayout and ADataType must be scalars."); - /// @brief BLayout and BDataType are expected to be scalars, not a tuple. + /** + *@brief BLayout and BDataType are expected to be scalars, not a tuple. + */ static_assert(!is_tuple_v && !is_tuple_v, "BLayout and BDataType must be scalars."); - /// @brief CLayout and CDataType are expected to be scalars, not a tuple. + /** + *@brief CLayout and CDataType are expected to be scalars, not a tuple. + */ static_assert(!is_tuple_v && !is_tuple_v, "CLayout and CDataType must be scalars."); @@ -127,14 +141,19 @@ struct StreamKKernel { } - - /// @brief The strategy used by work groups to compute final results in C tensor. + /** + * @brief The strategy used by work groups to compute final results in C tensor. + */ StreamKReductionStrategy reduction_strategy; - /// @brief A pointer to a buffer in device memory for accumulating partial via reduction - /// strategy. + /** + * @brief A pointer to a buffer in device memory for accumulating partial via reduction + * strategy. + */ void* workspace_ptr; - /// @brief An instance of the TilePartioner class for assisting with mapping workgroups to - /// the C tensor. + /** + * @brief An instance of the TilePartioner class for assisting with mapping workgroups to + * the C tensor. + */ TilePartitioner tile_partitioner; }; @@ -155,17 +174,21 @@ struct StreamKKernel // clang-format on } - /// @brief Compute the grid size for the Stream K kernel using the tile_partitioner. - /// @return The grid size. + /** + * @brief Compute the grid size for the Stream K kernel using the tile_partitioner. + * @return The grid size. + */ CK_TILE_HOST static auto GridSize(const TilePartitioner& tile_partitioner) -> dim3 { return tile_partitioner.grid_size(); } - /// @brief Get the maximum occupancy grid size for the persistent kernel on the current device. - /// @return The maximum occupancy grid size. - /// @note This function queries the maximum occupancy of the kernel using - /// `hipOccupancyMaxActiveBlocksPerMultiprocessor`. + /** + * @brief Get the maximum occupancy grid size for the persistent kernel on the current device. + * @return The maximum occupancy grid size. + * @note This function queries the maximum occupancy of the kernel using + * `hipOccupancyMaxActiveBlocksPerMultiprocessor`. + */ CK_TILE_HOST static auto MaxOccupancyGridSize(const stream_config& s) -> dim3 { return UniversalGemmKernel::MaxOccupancyGridSize(s); @@ -176,13 +199,15 @@ struct StreamKKernel return UniversalGemmKernel::BlockSize(); } - /// @brief Constructs kernel arguments for the Stream-K kernel. - /// @param host_args Stream-K host arguments. - /// @param num_cu Number of compute units (CUs). The default is the number of CUs on the device. - /// The caller may select their own to assist with test reproducibility, etc. - /// @param occupancy The maximum number of active blocks per CU for this kernel. The caller may - /// select their own to assist with test reproducibility, etc. - /// @return The kernel arguments for Stream-K. + /** + * @brief Constructs kernel arguments for the Stream-K kernel. + * @param host_args Stream-K host arguments. + * @param num_cu Number of compute units (CUs). The default is the number of CUs on the device. + * The caller may select their own to assist with test reproducibility, etc. + * @param occupancy The maximum number of active blocks per CU for this kernel. The caller may + * select their own to assist with test reproducibility, etc. + * @return The kernel arguments for Stream-K. + */ CK_TILE_HOST static StreamKKernelArgs MakeKernelArgs(const StreamKHostArgs& host_args, int num_cu = NumCU(), int occupancy = Occupancy()) @@ -247,30 +272,35 @@ struct StreamKKernel return UniversalGemmKernel::IsSupportedArgument(kargs); } - /// @brief Computes the buffer size needed to store accumulation results for Stream K. - /// @return The buffer size needed. + /** + * @brief Computes the buffer size needed to store accumulation results for Stream K. + * @return The buffer size needed. + */ CK_TILE_HOST static uint32_t GetWorkSpaceSize(const StreamKKernelArgs& kargs) { return kargs.tile_partitioner.get_workspace_size(sizeof(AccDataType)); } - - /// @brief Sets the kargs' current workspace_ptr to the given workspace_ptr. - /// @note Assumes that the given workspace_ptr points to allocated device memory. + /** + *@brief Sets the kargs' current workspace_ptr to the given workspace_ptr. + * @note Assumes that the given workspace_ptr points to allocated device memory. + */ CK_TILE_HOST static void SetWorkSpacePointer(StreamKKernelArgs& kargs, void* workspace_ptr) { kargs.workspace_ptr = workspace_ptr; } - /// @brief Computes offsets into A, B, and C tensors then runs the GEMM pipeline and epilogue. - /// @param kargs Stream-K kernel arguments. - /// @param tile_idx The 1D tile index in the C tensor for this workgroup. - /// @param num_loop The number of iterations (at the macro tile level) in the K dimension this - /// workgroup will perform in the C tile. - /// @param i_k_a The K offset in the A tensor. - /// @param i_k_b The K offset in the B tensor. - /// @param k_size The portion of the K dimension this workgroup processes in the assigned - /// `tile_idx`. - /// @param smem_ptr_0 Pointer to LDS. + /** + * @brief Computes offsets into A, B, and C tensors then runs the GEMM pipeline and epilogue. + * @param kargs Stream-K kernel arguments. + * @param tile_idx The 1D tile index in the C tensor for this workgroup. + * @param num_loop The number of iterations (at the macro tile level) in the K dimension this + * workgroup will perform in the C tile. + * @param i_k_a The K offset in the A tensor. + * @param i_k_b The K offset in the B tensor. + * @param k_size The portion of the K dimension this workgroup processes in the assigned + * `tile_idx`. + * @param smem_ptr_0 Pointer to LDS. + */ CK_TILE_DEVICE void BaseGemm(StreamKKernelArgs& kargs, index_t tile_idx, index_t num_loop, @@ -292,12 +322,14 @@ struct StreamKKernel {a_ptr}, {b_ptr}, {/*ds_ptr*/}, c_ptr, smem_ptr_0, kargs, num_loop, i_m, i_n, k_size); } - /// @brief Signals that the current thread block (CTA) has completed storing its partial - /// results. - /// @param kargs Kernel arguments, including the workspace pointer. - /// @param cta_idx The index of the current thread block (CTA). - /// @note This function utilizes a workgroup barrier to set a synchronization flag for the given - /// CTA index. + /** + *@brief Signals that the current thread block(CTA) has completed storing its partial + * results. + * @param kargs Kernel arguments, including the workspace pointer. + * @param cta_idx The index of the current thread block (CTA). + * @note This function utilizes a workgroup barrier to set a synchronization flag for the given + * CTA index. + */ CK_TILE_DEVICE void SignalStorePartialDone(const StreamKKernelArgs& kargs, index_t cta_idx) const { @@ -306,11 +338,13 @@ struct StreamKKernel sk_flags.wait_set(0, 1, cta_idx); } - /// @brief Waits for the thread block (cta_idx) to complete storing its partial results. - /// @param kargs Kernel arguments, including the workspace pointer. - /// @param cta_idx The index of the thread block (CTA). - /// @note This function utilizes a workgroup barrier to wait for the synchronization flag to be - /// set by the given CTA index. + /** + * @brief Waits for the thread block (cta_idx) to complete storing its partial results. + * @param kargs Kernel arguments, including the workspace pointer. + * @param cta_idx The index of the thread block (CTA). + * @note This function utilizes a workgroup barrier to wait for the synchronization flag to be + * set by the given CTA index. + */ CK_TILE_DEVICE void WaitStorePartialDone(const StreamKKernelArgs& kargs, index_t cta_idx) const { auto sk_flags_ptr = static_cast(kargs.workspace_ptr); @@ -318,11 +352,13 @@ struct StreamKKernel sk_flags.wait_eq(1, cta_idx); } - /// @brief Adds the values of a block tile to an output block tile. - /// @param in_out_block_tile The output block tile to which values are added. - /// @param in_block_tile The input block tile whose values are added. - /// @note This function iterates over the distributed spans of the block tiles and updates the - /// output block tile with accumulated values. + /** + * @brief Adds the values of a block tile to an output block tile. + * @param in_out_block_tile The output block tile to which values are added. + * @param in_block_tile The input block tile whose values are added. + * @note This function iterates over the distributed spans of the block tiles and updates the + * output block tile with accumulated values. + */ template CK_TILE_DEVICE void AddBlockTile(OAccTile& in_out_block_tile, const OAccTile& in_block_tile) const @@ -337,13 +373,15 @@ struct StreamKKernel }); } - /// @brief Loads a partial block tile from the workspace buffer. - /// @param kargs Kernel arguments, including the workspace pointer. - /// @param cta_idx The index of the thread block (CTA). - /// @param c_block_tile_dist The tile distribution for the block. - /// @return The loaded partial block tile. - /// @note This function calculates the buffer pointer and uses the tile distribution for loading - /// the partial block tile. + /** + * @brief Loads a partial block tile from the workspace buffer. + * @param kargs Kernel arguments, including the workspace pointer. + * @param cta_idx The index of the thread block (CTA). + * @param c_block_tile_dist The tile distribution for the block. + * @return The loaded partial block tile. + * @note This function calculates the buffer pointer and uses the tile distribution for loading + * the partial block tile. + */ template CK_TILE_DEVICE auto LoadPartial(const StreamKKernelArgs& kargs, index_t cta_idx, @@ -371,12 +409,14 @@ struct StreamKKernel return load_tile(partial_tile_window); } - /// @brief Stores a partial block tile to the workspace buffer. - /// @param kargs Kernel arguments, including the workspace pointer. - /// @param cta_idx The index of the thread block (CTA). - /// @param c_block_tile The block tile to be stored. - /// @note This function calculates the buffer pointer and uses the tile window for storing the - /// partial block tile. + /** + * @brief Stores a partial block tile to the workspace buffer. + * @param kargs Kernel arguments, including the workspace pointer. + * @param cta_idx The index of the thread block (CTA). + * @param c_block_tile The block tile to be stored. + * @note This function calculates the buffer pointer and uses the tile window for storing the + * partial block tile. + */ template CK_TILE_DEVICE void StorePartial(const StreamKKernelArgs& kargs, index_t cta_idx, @@ -404,15 +444,17 @@ struct StreamKKernel store_tile(partial_tile_window, c_block_tile); } - /// @brief Runs the main Stream-K algorithm. - /// @param kargs Stream-K kernel arguments. - /// @param cta_idx The current Stream-K workgroup's index. - /// @param smem_ptr_0 Pointer to LDS. - /// @note It is assumed that the first Stream-K workgroup has a `cta_idx` of zero. If a - /// non-persistent data-parallel (DP) section is used, then a Stream-K workgroup's `cta_idx` - /// should be something like `blockIdx.x` minus number of DP workgroups. - CK_TILE_DEVICE void - StreamKGemm(StreamKKernelArgs& kargs, index_t cta_idx, void* smem_ptr_0) const + /** + * @brief Runs the main Stream - K algorithm. + * @param kargs Stream - K kernel arguments. + * @param cta_idx The current Stream - K workgroup's index. + * @param smem_ptr_0 Pointer to LDS. + * @note It is assumed that the first Stream - K workgroup has a `cta_idx` of zero. If a + * non-persistent data-parallel (DP) section is used, then a Stream-K workgroup's `cta_idx` + * *should be something like `blockIdx.x` minus number of DP workgroups. + */ + CK_TILE_DEVICE + void StreamKGemm(StreamKKernelArgs& kargs, index_t cta_idx, void* smem_ptr_0) const { index_t iter_start, iter_end; kargs.tile_partitioner.get_iter_boundaries(iter_start, iter_end, cta_idx); @@ -542,13 +584,15 @@ struct StreamKKernel } } - /// @brief Entry point for the Stream-K Kernel with non-persistent DP. - /// - /// @par Overview - /// For the Non-Persistent kernel, each data parallel workgroup will - /// compute the results for their assigned macro-tile by calling `BaseGemm()`. - /// The Stream-K workgroups will do their assigned work by calling - /// `StreamKGemm()`, which calls `BaseGemm()` in the Stream-K loop. + /** + * @brief Entry point for the Stream-K Kernel with non-persistent DP. + * + * @par Overview + * For the Non-Persistent kernel, each data parallel workgroup will + * compute the results for their assigned macro-tile by calling `BaseGemm()`. + * The Stream-K workgroups will do their assigned work by calling + * `StreamKGemm()`, which calls `BaseGemm()` in the Stream-K loop. + */ template CK_TILE_DEVICE typename std::enable_if_t operator()(StreamKKernelArgs kargs) const { @@ -572,14 +616,16 @@ struct StreamKKernel } } - /// @brief Entry point for the Stream-K Kernel with persistent DP. - /// - /// @par Overview - /// For the Persistent kernel, each workgroup will first compute their - /// assigned data-parallel tiles. Each data parallel tile will be computed - /// by calling `BaseGemm()`. Then the workgroups will proceed with the - /// Stream-K portion by calling `StreamKGemm()`, which calls `BaseGemm()` - /// in the Stream-K loop. + /** + * @brief Entry point for the Stream-K Kernel with persistent DP. + * + * @par Overview + * For the Persistent kernel, each workgroup will first compute their + * assigned data-parallel tiles. Each data parallel tile will be computed + * by calling `BaseGemm()`. Then the workgroups will proceed with the + * Stream-K portion by calling `StreamKGemm()`, which calls `BaseGemm()` + * in the Stream-K loop. + */ template CK_TILE_DEVICE typename std::enable_if_t operator()(StreamKKernelArgs kargs) const { @@ -601,12 +647,14 @@ struct StreamKKernel } private: - /// @brief Computes the K offsets in the A and B tensors given iter_offset, where iter_offset is - /// the starting macro tile index in the K dimension for the workgroup. - /// @return A tuple containing the offsets into the A and B tensors accounting for the layouts - /// of A and B. - /// @note The default case is that A is assumed to be row major and B is assumed to be column - /// major. + /** + * @brief Computes the K offsets in the A and B tensors given iter_offset, where iter_offset is + * the starting macro tile index in the K dimension for the workgroup. + * @return A tuple containing the offsets into the A and B tensors accounting for the layouts + * of A and B. + * @note The default case is that A is assumed to be row major and B is assumed to be column + * major. + */ template CK_TILE_DEVICE static tuple GetKOffsets(index_t iter_offset, index_t stride_a, index_t stride_b) @@ -647,10 +695,12 @@ struct StreamKKernel return num_cu; } - /// @brief Computes the occupancy (i.e. maximum number of active blocks per CU) for the kernel - /// @return The occupancy - /// @note This function queries the maximum occupancy of the kernel using - /// `hipOccupancyMaxActiveBlocksPerMultiprocessor`. + /** + * @brief Computes the occupancy (i.e. maximum number of active blocks per CU) for the kernel + * @return The occupancy + * @note This function queries the maximum occupancy of the kernel using + * `hipOccupancyMaxActiveBlocksPerMultiprocessor`. + */ CK_TILE_HOST static int Occupancy() { int occupancy; @@ -665,402 +715,4 @@ struct StreamKKernel return max(occupancy, 1); } }; -} // namespace reboot - -/// @brief The Stream K GEMM kernel host arguments. -/// -/// @par Overview -/// This structure is passed to @ref StreamKKernel "StreamKKernel" when creating the kernel -/// arguments object. It contains all necessary information required to build proper kernel -/// arguments and launch the kernel on GPU. This structure defines the GEMM problem -/// configuration by stating all required information like M,N,K sizes and respective strides. -struct StreamKHostArgs : public ck_tile::UniversalGemmHostArgs<> -{ - CK_TILE_HOST explicit StreamKHostArgs(const void* a_ptr_, - const void* b_ptr_, - void* c_ptr_, - index_t M_, - index_t N_, - index_t K_, - index_t stride_A_, - index_t stride_B_, - index_t stride_C_, - StreamKReductionStrategy reduction_strategy_, - uint32_t num_sk_blocks_ = 0xffffffff) - : UniversalGemmHostArgs<>({a_ptr_}, - {b_ptr_}, - {/*ds_ptr*/}, - c_ptr_, - /*k_batch_ =*/1, - M_, - N_, - K_, - {stride_A_}, - {stride_B_}, - {/*stride_Ds_*/}, - stride_C_), - reduction_strategy{reduction_strategy_}, - num_sk_blocks{num_sk_blocks_} - { - } - - ck_tile::StreamKReductionStrategy reduction_strategy; - uint32_t num_sk_blocks; -}; - -template -struct StreamKKernel -{ - /// @brief Inject the UniversalGemmKernel base class to support execution of all necessary - /// functions. - using UniversalGemmKernel = - UniversalGemmKernel; - - static constexpr index_t kBlockSize = UniversalGemmKernel::kBlockSize; - - using TilePartitioner = remove_cvref_t; - using GemmPipeline = remove_cvref_t; - using EpiloguePipeline = remove_cvref_t; - - /// @brief Specify the layout configurations for A, B, and C - using ALayout = remove_cvref_t; - using BLayout = remove_cvref_t; - using CLayout = remove_cvref_t; - - /// @brief Specify the data type configurations for A, B, and C - using ADataType = remove_cvref_t; - using BDataType = remove_cvref_t; - using CDataType = remove_cvref_t; - - /// @brief ALayout and ADataType are expected to be scalars, not a tuple. - static_assert(!is_detected::value && - !is_detected::value, - "ALayout and ADataType must be scalars."); - - /// @brief BLayout and BDataType are expected to be scalars, not a tuple. - static_assert(!is_detected::value && - !is_detected::value, - "BLayout and BDataType must be scalars."); - - /// @brief CLayout and CDataType are expected to be scalars, not a tuple. - static_assert(!is_detected::value && - !is_detected::value, - "CLayout and CDataType must be scalars."); - - struct StreamKKernelArgs : ck_tile::UniversalGemmKernelArgs<> - { - /// @brief The strategy used by work groups to compute final results in C tensor. - StreamKReductionStrategy reduction_strategy; - /// @brief The number of stream k blocks. - uint32_t num_sk_blocks; - /// @brief A pointer to a buffer in device memory for accumulating partial via reduction - /// strategy. - void* workspace_ptr; - /// @brief An instance of the TilePartioner class for assisting with mapping workgroups to - /// the C tensor. - TilePartitioner tile_partitioner; - }; - - using KernelArgs = StreamKKernelArgs; - using Kernel = StreamKKernel; - - [[nodiscard]] CK_TILE_HOST static const std::string GetName() - { - // clang-format off - using P_ = GemmPipeline; - using WarpTile = typename P_::BlockGemmShape::WarpTile; - - return concat('_', "streamk", gemm_prec_str(), - concat('x', P_::MPerBlock, P_::NPerBlock, P_::KPerBlock), - concat('x', WarpTile::at(number<0>{}), WarpTile::at(number<1>{}), WarpTile::at(number<2>{})), - concat('x', P_::GetVectorSizeA(), P_::GetVectorSizeB(), P_::GetVectorSizeC()), - concat('x', P_::kPadM, P_::kPadN, P_::kPadK)); - // clang-format on - } - - /// @brief Compute the grid size for the Stream K kernel using the tile_partitioner. - /// @return The grid size. - CK_TILE_HOST static auto GridSize(const TilePartitioner& tile_partitioner) -> dim3 - { - return tile_partitioner.GridSize(); - } - - /// @brief Get the maximum occupancy grid size for the persistent kernel on the current device. - /// @return The maximum occupancy grid size. - /// @note This function queries the maximum occupancy of the kernel using - /// `hipOccupancyMaxActiveBlocksPerMultiprocessor`. - CK_TILE_HOST static auto MaxOccupancyGridSize(const stream_config& s) -> dim3 - { - return UniversalGemmKernel::MaxOccupancyGridSize(s); - } - - CK_TILE_HOST static constexpr auto BlockSize() -> dim3 - { - return UniversalGemmKernel::BlockSize(); - } - - /// @brief Constructs kernel arguments for the Stream-K kernel. - /// @param host_args Stream-K host arguments. - /// @param num_cu Number of compute units (CUs). The default is the number of CUs on the device. - /// The caller may select their own to assist with test reproducibility, etc. - /// @param occupancy The maximum number of active blocks per CU for this kernel. The caller may - /// select their own to assist with test reproducibility, etc. - /// @return The kernel arguments for Stream-K. - CK_TILE_HOST static StreamKKernelArgs MakeKernelArgs(const StreamKHostArgs& host_args, - int num_cu = NumCU(), - int occupancy = Occupancy()) - { - return StreamKKernelArgs{{host_args.as_ptr, - host_args.bs_ptr, - host_args.ds_ptr, - host_args.e_ptr, - host_args.M, - host_args.N, - host_args.K, - host_args.stride_As, - host_args.stride_Bs, - host_args.stride_Ds, - host_args.stride_E, - host_args.k_batch}, - host_args.reduction_strategy, - host_args.num_sk_blocks, - // The workspace pointer is set to nullptr because we must first - // instantiate the TilePartitioner to get the necessary size - /*workspace_ptr =*/nullptr, - TilePartitioner{static_cast(host_args.M), - static_cast(host_args.N), - static_cast(host_args.K), - static_cast(num_cu), - static_cast(occupancy), - host_args.num_sk_blocks}}; - } - - template - CK_TILE_DEVICE static void - RunGemm(const std::array& as_ptr, - const std::array& bs_ptr, - const std::array& ds_ptr, - CDataType* c_ptr, - void* smem_ptr_0, - const typename UniversalGemmKernel::KernelArgs& kargs, - const index_t num_loop, - const index_t block_idx_m, - const index_t block_idx_n, - const index_t k_size) - { - // Create Gemm tensor views, pad views and tile windows - const auto& gemm_tensor_views_tuple = - UniversalGemmKernel::template MakeGemmTensorViews( - as_ptr, bs_ptr, ds_ptr, c_ptr, kargs, k_size); - - const auto& gemm_pad_views = UniversalGemmKernel::MakeGemmPadViews(gemm_tensor_views_tuple); - auto gemm_tile_windows = - UniversalGemmKernel::MakeGemmTileWindows(gemm_pad_views, block_idx_m, block_idx_n); - - // Run GEMM cooperatively by whole workgroup. - const auto& as_block_window = gemm_tile_windows.at(UniversalGemmKernel::I0); - const auto& bs_block_window = gemm_tile_windows.at(UniversalGemmKernel::I1); - const auto& ds_block_window = gemm_tile_windows.at(UniversalGemmKernel::I2); - - // Since num_loop can vary per WG and per iteration of the Stream-K while loop, we compute - // has_hot_loop and tail_num here. This is a similar pattern used by grouped GEMM. In this - // case, we call the GemmPipeline's operator() function that takes both has_hot_loop and - // tail_num. - const bool has_hot_loop = GemmPipeline::BlockHasHotloop(num_loop); - const TailNumber tail_num = GemmPipeline::GetBlockLoopTailNum(num_loop); - - const auto& c_block_tile = GemmPipeline{}(as_block_window[UniversalGemmKernel::I0], - bs_block_window[UniversalGemmKernel::I0], - num_loop, - has_hot_loop, - tail_num, - smem_ptr_0); - - if(UseDefaultScheduler || (get_warp_id() == 0)) - { - // Run Epilogue Pipeline - auto& c_block_window = gemm_tile_windows.at(UniversalGemmKernel::I3); - - EpiloguePipeline{}(c_block_window, c_block_tile, ds_block_window, smem_ptr_0); - } - } - - CK_TILE_HOST static bool IsSupportedArgument(const StreamKKernelArgs& kargs) - { - if(kargs.reduction_strategy == StreamKReductionStrategy::Reduction) - { - if(ck_tile::EnvIsEnabled(CK_TILE_ENV(CK_TILE_LOGGING))) - { - CK_TILE_ERROR("CK Tile Stream-K only supports the atomic reduction strategy."); - } - return false; - } - return UniversalGemmKernel::IsSupportedArgument(kargs); - } - - /// @brief Computes the buffer size needed to store accumulation results for Stream K. - /// @return The buffer size needed. - CK_TILE_HOST static uint32_t GetWorkSpaceSize(const StreamKKernelArgs& kargs) - { - // For reduction, we need to determine the amount of device space for acculumation - // results and semaphores. - if(kargs.reduction_strategy == ck_tile::StreamKReductionStrategy::Reduction) - { - return kargs.tile_partitioner.GetWorkSpaceSize(sizeof(CDataType)); - } - - // Otherwise, no additional space is needed since blocks atomically store their results. - return 0; - } - - /// @brief Sets the kargs' current workspace_ptr to the given workspace_ptr. - /// @note Assumes that the given workspace_ptr points to allocated device memory. - CK_TILE_HOST static void SetWorkSpacePointer(StreamKKernelArgs& kargs, void* workspace_ptr) - { - kargs.workspace_ptr = workspace_ptr; - } - - /// @brief Entry point for the Stream-K Kernel, performing the main Stream-K loop. - CK_TILE_DEVICE void operator()(StreamKKernelArgs kargs) const - { - // Allocate LDS - __shared__ char smem_ptr_0[UniversalGemmKernel::GetSmemSize()]; - - uint32_t block_idx = ck_tile::get_block_1d_id(); - - bool is_padding_block = - amd_wave_read_first_lane(block_idx >= kargs.tile_partitioner.sk_num_blocks && - block_idx < kargs.tile_partitioner.dp_start_block_idx); - - // Padding blocks make it such that the DP blocks are aligned with the number of CUs; they - // should not partake in the GEMM - if(is_padding_block) - return; - - // Determine the K offset of the first and final macro tile in the A and B tensors along the - // K dimension. - uint32_t iter_start, iter_end; - kargs.tile_partitioner.GetBlockItr(block_idx, iter_start, iter_end); - - // Main Stream-K loop - while(true) - { - // Determine the number of macro tiles in A and B this WG is resposible for in the - // current C macro tile. - uint32_t current_iter_length = amd_wave_read_first_lane( - kargs.tile_partitioner.GetCurrentIterLength(iter_start, iter_end)); - - // Determine the 1D tile_idx and the iter_offset for this WG. - // The tile_idx is the 1D macro tile index in the C tensor. - // The iter_offset is the starting macro tile index in the K dimension for the WG in the - // current iteration of the while loop. - uint32_t tile_idx, iter_offset; - kargs.tile_partitioner.GetTileIdxWithOffset(iter_start, tile_idx, iter_offset); - - // Get the 2D tile index in the C tensor for this WG using the 1D index (i.e. tile_idx) - auto spatial_idx = kargs.tile_partitioner.GetOutputTileIndex(tile_idx); - - // Get the offsets in A, B, C tensors. - index_t i_m = static_cast(spatial_idx[UniversalGemmKernel::I0] * - TilePartitioner::MPerBlock); - index_t i_n = static_cast(spatial_idx[UniversalGemmKernel::I1] * - TilePartitioner::NPerBlock); - auto [i_k_a, i_k_b] = GetKOffsets( - static_cast(iter_offset), kargs.stride_As[0], kargs.stride_Bs[0]); - - // Determine the total size along the K dimension the WG is using in this iteration - // (used to construct tensor views). - index_t k_size = static_cast(current_iter_length * TilePartitioner::KPerBlock); - - // Update pointer offsets for A, B, and C. - const ADataType* a_ptr = static_cast(kargs.as_ptr[0]) + i_k_a; - const BDataType* b_ptr = static_cast(kargs.bs_ptr[0]) + i_k_b; - CDataType* c_ptr = static_cast(kargs.e_ptr); - - // Run the GEMM pipeline and Epilogue. - RunGemm({a_ptr}, - {b_ptr}, - {/*ds_ptr*/}, - c_ptr, - smem_ptr_0, - kargs, - current_iter_length, - i_m, - i_n, - k_size); - - // Prepare for next Stream-K loop iteration. - iter_start += current_iter_length; - if(iter_end <= iter_start) - break; - block_sync_lds(); - } - } - - private: - /// @brief Computes the K offsets in the A and B tensors given iter_offset, where iter_offset is - /// the starting macro tile index in the K dimension for the workgroup. - /// @return A tuple containing the offsets into the A and B tensors accounting for the layouts - /// of A and B. - /// @note The default case is that A is assumed to be row major and B is assumed to be column - /// major. - template - CK_TILE_DEVICE static tuple - GetKOffsets(index_t iter_offset, index_t stride_a, index_t stride_b) - { - index_t stride_offset_a; - index_t stride_offset_b; - if constexpr(std::is_same_v) - { - stride_offset_a = stride_a; - } - else - { - stride_offset_a = 1; - } - - if constexpr(std::is_same_v) - { - stride_offset_b = stride_b; - } - else - { - stride_offset_b = 1; - } - - index_t base_offset = iter_offset * TilePartitioner::KPerBlock; - - return make_tuple(base_offset * stride_offset_a, base_offset * stride_offset_b); - } - - CK_TILE_HOST static int NumCU() - { - hipDeviceProp_t dev_prop; - hipDevice_t dev; - hip_check_error(hipGetDevice(&dev)); - hip_check_error(hipGetDeviceProperties(&dev_prop, dev)); - int num_cu = dev_prop.multiProcessorCount; - - return num_cu; - } - - /// @brief Computes the occupancy (i.e. maximum number of active blocks per CU) for the kernel - /// @return The occupancy - /// @note This function queries the maximum occupancy of the kernel using - /// `hipOccupancyMaxActiveBlocksPerMultiprocessor`. - CK_TILE_HOST static int Occupancy() - { - int occupancy; - - // Since occupancy of 1 is valid for stream k, we set min_num_block_per_cu to 1 - constexpr int min_block_per_cu = 1; - const auto kernel = kentry; - - hip_check_error( - hipOccupancyMaxActiveBlocksPerMultiprocessor(&occupancy, kernel, kBlockSize, 0)); - - return occupancy; - } -}; - } // namespace ck_tile diff --git a/include/ck_tile/ops/gemm/kernel/streamk_gemm_tile_partitioner.hpp b/include/ck_tile/ops/gemm/kernel/streamk_gemm_tile_partitioner.hpp index f32f8b681b..8ee1ebc51a 100644 --- a/include/ck_tile/ops/gemm/kernel/streamk_gemm_tile_partitioner.hpp +++ b/include/ck_tile/ops/gemm/kernel/streamk_gemm_tile_partitioner.hpp @@ -226,7 +226,7 @@ struct StreamKTilePartitionerBase template -struct StreamKTilePartitioner_v2; +struct StreamKTilePartitioner; /** * @brief Persistent Stream-K tile partitioner derived struct. @@ -240,13 +240,13 @@ struct StreamKTilePartitioner_v2; * the C Tensor. */ template -struct StreamKTilePartitioner_v2 +struct StreamKTilePartitioner : StreamKTilePartitionerBase { - StreamKTilePartitioner_v2(ck_tile::index_t m, - ck_tile::index_t n, - ck_tile::index_t k, - ck_tile::index_t grid); + StreamKTilePartitioner(ck_tile::index_t m, + ck_tile::index_t n, + ck_tile::index_t k, + ck_tile::index_t grid); public: static constexpr bool PERSISTENT = true; @@ -287,13 +287,13 @@ struct StreamKTilePartitioner_v2 -struct StreamKTilePartitioner_v2 +struct StreamKTilePartitioner : StreamKTilePartitionerBase { - StreamKTilePartitioner_v2(ck_tile::index_t m, - ck_tile::index_t n, - ck_tile::index_t k, - ck_tile::index_t grid); + StreamKTilePartitioner(ck_tile::index_t m, + ck_tile::index_t n, + ck_tile::index_t k, + ck_tile::index_t grid); public: static constexpr bool PERSISTENT = false; diff --git a/include/ck_tile/ops/gemm/kernel/streamk_gemm_tile_partitioner_impl.hpp b/include/ck_tile/ops/gemm/kernel/streamk_gemm_tile_partitioner_impl.hpp index b3217624d1..9116e0448c 100644 --- a/include/ck_tile/ops/gemm/kernel/streamk_gemm_tile_partitioner_impl.hpp +++ b/include/ck_tile/ops/gemm/kernel/streamk_gemm_tile_partitioner_impl.hpp @@ -238,15 +238,12 @@ StreamKTilePartitionerBase::estimate_ template -struct StreamKTilePartitioner_v2; +struct StreamKTilePartitioner; // child class for Persistent Tile Partitioner template -StreamKTilePartitioner_v2:: - StreamKTilePartitioner_v2(ck_tile::index_t m, - ck_tile::index_t n, - ck_tile::index_t k, - ck_tile::index_t grid) +StreamKTilePartitioner::StreamKTilePartitioner( + ck_tile::index_t m, ck_tile::index_t n, ck_tile::index_t k, ck_tile::index_t grid) : StreamKTilePartitionerBase(m, n, k, grid) { // inherit from base constructor dp_tiles_per_cta_ = this->dp_tiles_ / this->grid_; @@ -255,8 +252,8 @@ StreamKTilePartitioner_v2:: template CK_TILE_HOST auto -StreamKTilePartitioner_v2::grid_size() - const noexcept -> dim3 +StreamKTilePartitioner::grid_size() const noexcept + -> dim3 { if(extra_dp_tiles_ == 0) { @@ -270,7 +267,7 @@ StreamKTilePartitioner_v2::grid template CK_TILE_HOST_DEVICE index_t -StreamKTilePartitioner_v2::get_dp_tiles_per_cta() +StreamKTilePartitioner::get_dp_tiles_per_cta() const noexcept { return dp_tiles_per_cta_; @@ -278,7 +275,7 @@ StreamKTilePartitioner_v2::get_ template CK_TILE_HOST_DEVICE index_t -StreamKTilePartitioner_v2::get_extra_dp_tiles() +StreamKTilePartitioner::get_extra_dp_tiles() const noexcept { return extra_dp_tiles_; @@ -286,11 +283,8 @@ StreamKTilePartitioner_v2::get_ // child class for Non-Persistent Tile Partitioner template -StreamKTilePartitioner_v2:: - StreamKTilePartitioner_v2(ck_tile::index_t m, - ck_tile::index_t n, - ck_tile::index_t k, - ck_tile::index_t grid) +StreamKTilePartitioner::StreamKTilePartitioner( + ck_tile::index_t m, ck_tile::index_t n, ck_tile::index_t k, ck_tile::index_t grid) : StreamKTilePartitionerBase(m, n, k, grid) { // inherit from base constructor dp_ctas_ = this->dp_tiles_; @@ -300,15 +294,15 @@ StreamKTilePartitioner_v2:: template CK_TILE_HOST auto -StreamKTilePartitioner_v2::grid_size() - const noexcept -> dim3 +StreamKTilePartitioner::grid_size() const noexcept + -> dim3 { return dim3(dp_ctas_ + this->get_sk_ctas(), 1, 1); } template CK_TILE_HOST_DEVICE index_t -StreamKTilePartitioner_v2::get_dp_ctas() +StreamKTilePartitioner::get_dp_ctas() const noexcept { return dp_ctas_; @@ -316,16 +310,16 @@ StreamKTilePartitioner_v2::get template CK_TILE_HOST_DEVICE index_t -StreamKTilePartitioner_v2:: - get_dp_start_block_idx() const noexcept +StreamKTilePartitioner::get_dp_start_block_idx() + const noexcept { return dp_start_block_idx_; } template CK_TILE_HOST_DEVICE index_t -StreamKTilePartitioner_v2:: - get_sk_start_block_idx() const noexcept +StreamKTilePartitioner::get_sk_start_block_idx() + const noexcept { return sk_start_block_idx_; } diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 3a667cb551..84c2ea090b 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -46,7 +46,7 @@ set(REGRESSION_TESTS test_ck_tile_fmha_fwd_bf16 test_ck_tile_fmha_fwd_fp16 test_ck_tile_fmha_fwd_fp8 - test_ck_tile_streamk_reboot_extended + test_ck_tile_streamk_extended ) function(add_test_executable TEST_NAME) diff --git a/test/ck_tile/gemm_streamk/CMakeLists.txt b/test/ck_tile/gemm_streamk/CMakeLists.txt index 3e3345dd0e..90aa7771fe 100644 --- a/test/ck_tile/gemm_streamk/CMakeLists.txt +++ b/test/ck_tile/gemm_streamk/CMakeLists.txt @@ -19,147 +19,29 @@ if(GPU_TARGETS MATCHES "gfx9") #TODO: support all arches #TODO: current c-shuffle only supports C layout as R - add_gtest_executable(test_ck_tile_streamk_smoke - ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/f16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - #${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/f16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/f16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - #${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/f16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/f16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - #${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/f16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/f16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - #${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/f16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/bf16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - #${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/bf16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/bf16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - #${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/bf16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/bf16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - #${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/bf16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/bf16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - #${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/bf16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/f8_rrr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/f8_rcr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/f8_crr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/f8_ccr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - - ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/bf8_rrr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/bf8_rcr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/bf8_crr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/bf8_ccr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - ) - # TODO: enable extended tests after tolerances for atomic reductions are addressed. - # add_gtest_executable(test_ck_tile_streamk_extended - # # compv3 pipeline - # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_rrr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_rrc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_rcr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_rcc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_crr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_crc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_ccr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_ccc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - - # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_rrr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_rrc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_rcr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_rcc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_crr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_crc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_ccr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_ccc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - - # # TODO: add compv4 pipeline - # # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_rrr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_rrc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_rcr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_rcc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_crr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_crc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_ccr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_ccc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_rrr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_rrc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_rcr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_rcc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_crr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_crc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_ccr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_ccc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - - # # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_rrr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_rrc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_rcr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_rcc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_crr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_crc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_ccr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_ccc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp - # # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_rrr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_rrc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_rcr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_rcc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_crr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_crc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_ccr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_ccc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - - - # # mem pipeline - # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/f16_rrr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/f16_rrc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/f16_rcr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/f16_rcc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/f16_crr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/f16_crc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/f16_ccr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/f16_ccc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - - # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/bf16_rrr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/bf16_rrc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/bf16_rcr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/bf16_rcc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/bf16_crr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/bf16_crc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/bf16_ccr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/bf16_ccc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp - # ) - target_compile_options(test_ck_tile_streamk_smoke PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS}) -else() - message(DEBUG "Skipping test_ck_tile_streamk_smoke for current target") -endif() - -if(GPU_TARGETS MATCHES "gfx9|gfx11|gfx12") - include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}) add_gtest_executable(test_ck_tile_streamk_tile_partitioner test_streamk_tile_partitioner.cpp) - add_gtest_executable(test_ck_tile_streamk_reboot_smoke - ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_reboot_fp16_persistent.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_reboot_bf16_persistent.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_reboot_fp16_nonpersistent.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_reboot_bf16_nonpersistent.cpp - test_gemm_streamk_reboot_util.cpp) - add_gtest_executable(test_ck_tile_streamk_reboot_extended - ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/test_gemm_streamk_reboot_fp16_persistent.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/test_gemm_streamk_reboot_bf16_persistent.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/test_gemm_streamk_reboot_fp16_nonpersistent.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/test_gemm_streamk_reboot_bf16_nonpersistent.cpp - test_gemm_streamk_reboot_util.cpp) + add_gtest_executable(test_ck_tile_streamk_smoke + ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_fp16_persistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_bf16_persistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_fp8_persistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_bf8_persistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_fp16_nonpersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_bf16_nonpersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_fp8_nonpersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_bf8_nonpersistent.cpp + test_gemm_streamk_util.cpp) + add_gtest_executable(test_ck_tile_streamk_extended + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/test_gemm_streamk_fp16_persistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/test_gemm_streamk_bf16_persistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/test_gemm_streamk_fp8_persistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/test_gemm_streamk_bf8_persistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/test_gemm_streamk_fp16_nonpersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/test_gemm_streamk_bf16_nonpersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/test_gemm_streamk_fp8_nonpersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/test_gemm_streamk_bf8_nonpersistent.cpp + test_gemm_streamk_util.cpp) + target_compile_options(test_ck_tile_streamk_smoke PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS}) + target_compile_options(test_ck_tile_streamk_extended PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS}) else() message(DEBUG "Skipping test_ck_tile_streamk unit tests for current target") endif() diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_ccc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_ccc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 761f95af88..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_ccc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_CCC_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 747243039f..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_CCC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_ccr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_ccr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 0b44e55738..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_ccr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_CCR_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 846424d3df..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_CCR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_crc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_crc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index c7a0031dbf..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_crc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_CRC_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 2829d5c82b..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_CRC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_crr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_crr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 263381cc2f..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_crr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_CRR_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 3ef025ec86..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_CRR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rcc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rcc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 1905f70a7f..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rcc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_RCC_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 45a3f2ff36..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_RCC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rcr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rcr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 19974273fb..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rcr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_RCR_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 36d2d6f7cb..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_RCR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rrc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rrc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index dc4802ff68..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rrc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_RRC_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index e09992d362..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_RRC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rrr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rrr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 21ec72f0f8..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rrr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_RRR_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 885054789b..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_RRR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_ccc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_ccc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 99cc58d967..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_ccc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_CCC_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index c5bab8d95d..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_CCC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_ccr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_ccr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index bd505ed866..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_ccr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_CCR_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index fb9908321a..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_CCR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_crc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_crc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index ebe9bf7173..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_crc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_CRC_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 88612d536a..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_CRC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_crr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_crr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 87e667eec9..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_crr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_CRR_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index a87749b10d..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_CRR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rcc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rcc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 710dcc15dc..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rcc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_RCC_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 98f3e3fb05..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_RCC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rcr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rcr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 6defea0fc3..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rcr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_RCR_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 3a51718502..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_RCR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rrc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rrc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index f06cb269ee..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rrc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_RRC_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index dad028e597..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_RRC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rrr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rrr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 05a27c5f42..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rrr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_RRR_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 72d8b57e18..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_RRR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_ccc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_ccc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index e89a3942cd..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_ccc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_CCC_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_ccc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_ccc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 6c815d3ae8..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_ccc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_CCC_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_ccr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_ccr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index d413887a0e..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_ccr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_CCR_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_ccr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_ccr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index c55a088285..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_ccr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_CCR_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_crc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_crc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 9884a7f4be..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_crc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_CRC_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_crc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_crc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 0d0eebc7d2..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_crc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_CRC_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_crr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_crr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 4a9788fadb..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_crr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_CRR_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_crr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_crr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 79bd7b0aa1..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_crr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_CRR_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rcc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rcc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index b03de5360b..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rcc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_RCC_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rcc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rcc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index cb0c4af76a..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rcc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_RCC_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rcr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rcr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index d7bf73888e..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rcr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_RCR_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rcr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rcr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 2273c5d07f..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rcr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_RCR_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rrc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rrc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 12943334d7..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rrc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_RRC_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rrc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rrc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index c319ab0c5b..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rrc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_RRC_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rrr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rrr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index dd03f221c2..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rrr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_RRR_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rrr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rrr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index ed7e103584..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rrr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_RRR_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_ccc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_ccc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index e2b81c6d55..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_ccc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_CCC_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_ccc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_ccc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index ab688e2bb4..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_ccc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_CCC_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_ccr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_ccr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 4a76a78c13..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_ccr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_CCR_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_ccr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_ccr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 19b9c06182..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_ccr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_CCR_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_crc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_crc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index d40f7369c5..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_crc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_CRC_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_crc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_crc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 482ecabc5b..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_crc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_CRC_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_crr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_crr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index bb1da4e8c0..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_crr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_CRR_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_crr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_crr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 8f8137f07d..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_crr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_CRR_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rcc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rcc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 5e4f122f72..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rcc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_RCC_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rcc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rcc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 2ee71d62dc..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rcc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_RCC_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rcr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rcr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 632af43e0a..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rcr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_RCR_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rcr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rcr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index f56320092f..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rcr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_RCR_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rrc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rrc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index d61389c941..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rrc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_RRC_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rrc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rrc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index d276153efc..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rrc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_RRC_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rrr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rrr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index a31d8ed592..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rrr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_RRR_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rrr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rrr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index c68e67658a..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rrr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_RRR_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_ccc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_ccc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 851765f0aa..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_ccc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_CCC_Mem_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_ccr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_ccr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 0f7a3f8ca8..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_ccr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_CCR_Mem_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_crc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_crc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 0e7f1e1fad..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_crc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_CRC_Mem_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_crr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_crr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 8a85738652..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_crr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_CRR_Mem_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_rcc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_rcc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index ab0220f4ef..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_rcc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_RCC_Mem_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_rcr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_rcr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 9e60ef1717..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_rcr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_RCR_Mem_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_rrc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_rrc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 61cd772d51..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_rrc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_RRC_Mem_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_rrr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_rrr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 66e0e80c46..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_rrr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_RRR_Mem_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/f16_ccc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/f16_ccc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index a2f26b768e..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/mem/f16_ccc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_CCC_Mem_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/f16_ccr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/f16_ccr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index d94547daa7..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/mem/f16_ccr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_CCR_Mem_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/f16_crc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/f16_crc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 090b472d45..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/mem/f16_crc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_CRC_Mem_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/f16_crr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/f16_crr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 5535325436..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/mem/f16_crr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_CRR_Mem_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/f16_rcc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/f16_rcc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index a2f999a69d..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/mem/f16_rcc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_RCC_Mem_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/f16_rcr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/f16_rcr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index f6d4b50c4a..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/mem/f16_rcr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_RCR_Mem_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/f16_rrc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/f16_rrc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 7dd85dbf0d..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/mem/f16_rrc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_RRC_Mem_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/f16_rrr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/f16_rrr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 58ba61ad38..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/mem/f16_rrr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_RRR_Mem_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_bf16_nonpersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_bf16_nonpersistent.cpp new file mode 100644 index 0000000000..7c9c2c9657 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_bf16_nonpersistent.cpp @@ -0,0 +1,17 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +template +class TestCkTileStreamKBf16NonPersistent : public TestCkTileStreamK +{ +}; + +#define TEST_SUITE_NAME TestCkTileStreamKBf16NonPersistent + +TYPED_TEST_SUITE(TestCkTileStreamKBf16NonPersistent, KernelTypesStreamKBf16NonPersistent); + +#include "test_gemm_streamk_extended_cases.inc" + +#undef TEST_SUITE_NAME diff --git a/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_bf16_persistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_bf16_persistent.cpp new file mode 100644 index 0000000000..dd4bbad61b --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_bf16_persistent.cpp @@ -0,0 +1,17 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +template +class TestCkTileStreamKBf16Persistent : public TestCkTileStreamK +{ +}; + +#define TEST_SUITE_NAME TestCkTileStreamKBf16Persistent + +TYPED_TEST_SUITE(TestCkTileStreamKBf16Persistent, KernelTypesStreamKBf16Persistent); + +#include "test_gemm_streamk_extended_cases.inc" + +#undef TEST_SUITE_NAME diff --git a/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_bf8_nonpersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_bf8_nonpersistent.cpp new file mode 100644 index 0000000000..9b3b0fccb9 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_bf8_nonpersistent.cpp @@ -0,0 +1,17 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +template +class TestCkTileStreamKBf8NonPersistent : public TestCkTileStreamK +{ +}; + +#define TEST_SUITE_NAME TestCkTileStreamKBf8NonPersistent + +TYPED_TEST_SUITE(TestCkTileStreamKBf8NonPersistent, KernelTypesStreamKBf8NonPersistent); + +#include "test_gemm_streamk_extended_cases.inc" + +#undef TEST_SUITE_NAME diff --git a/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_bf8_persistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_bf8_persistent.cpp new file mode 100644 index 0000000000..5f1bdaca86 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_bf8_persistent.cpp @@ -0,0 +1,17 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +template +class TestCkTileStreamKBf8Persistent : public TestCkTileStreamK +{ +}; + +#define TEST_SUITE_NAME TestCkTileStreamKBf8Persistent + +TYPED_TEST_SUITE(TestCkTileStreamKBf8Persistent, KernelTypesStreamKBf8Persistent); + +#include "test_gemm_streamk_extended_cases.inc" + +#undef TEST_SUITE_NAME diff --git a/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_fp16_nonpersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_fp16_nonpersistent.cpp new file mode 100644 index 0000000000..f1a3bad142 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_fp16_nonpersistent.cpp @@ -0,0 +1,17 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +template +class TestCkTileStreamKFp16NonPersistent : public TestCkTileStreamK +{ +}; + +#define TEST_SUITE_NAME TestCkTileStreamKFp16NonPersistent + +TYPED_TEST_SUITE(TestCkTileStreamKFp16NonPersistent, KernelTypesStreamKFp16NonPersistent); + +#include "test_gemm_streamk_extended_cases.inc" + +#undef TEST_SUITE_NAME diff --git a/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_fp16_persistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_fp16_persistent.cpp new file mode 100644 index 0000000000..33b474526c --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_fp16_persistent.cpp @@ -0,0 +1,17 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +template +class TestCkTileStreamKFp16Persistent : public TestCkTileStreamK +{ +}; + +#define TEST_SUITE_NAME TestCkTileStreamKFp16Persistent + +TYPED_TEST_SUITE(TestCkTileStreamKFp16Persistent, KernelTypesStreamKFp16Persistent); + +#include "test_gemm_streamk_extended_cases.inc" + +#undef TEST_SUITE_NAME diff --git a/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_fp8_nonpersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_fp8_nonpersistent.cpp new file mode 100644 index 0000000000..0cdb4091d1 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_fp8_nonpersistent.cpp @@ -0,0 +1,17 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +template +class TestCkTileStreamKFp8NonPersistent : public TestCkTileStreamK +{ +}; + +#define TEST_SUITE_NAME TestCkTileStreamKFp8NonPersistent + +TYPED_TEST_SUITE(TestCkTileStreamKFp8NonPersistent, KernelTypesStreamKFp8NonPersistent); + +#include "test_gemm_streamk_extended_cases.inc" + +#undef TEST_SUITE_NAME diff --git a/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_fp8_persistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_fp8_persistent.cpp new file mode 100644 index 0000000000..d418c889cd --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_fp8_persistent.cpp @@ -0,0 +1,17 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +template +class TestCkTileStreamKFp8Persistent : public TestCkTileStreamK +{ +}; + +#define TEST_SUITE_NAME TestCkTileStreamKFp8Persistent + +TYPED_TEST_SUITE(TestCkTileStreamKFp8Persistent, KernelTypesStreamKFp8Persistent); + +#include "test_gemm_streamk_extended_cases.inc" + +#undef TEST_SUITE_NAME diff --git a/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_reboot_bf16_nonpersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_reboot_bf16_nonpersistent.cpp deleted file mode 100644 index eb4478f3d6..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_reboot_bf16_nonpersistent.cpp +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_reboot_types.hpp" -#include "test_gemm_streamk_reboot_util.hpp" -#include "gtest/gtest.h" - -template -class TestCkTileStreamKRebootBf16NonPersistent : public TestCkTileStreamKReboot -{ -}; - -#define TEST_SUITE_NAME TestCkTileStreamKRebootBf16NonPersistent - -TYPED_TEST_SUITE(TestCkTileStreamKRebootBf16NonPersistent, KernelTypesStreamKBf16NonPersistent); - -#include "test_gemm_streamk_reboot_extended_cases.inc" - -#undef TEST_SUITE_NAME diff --git a/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_reboot_bf16_persistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_reboot_bf16_persistent.cpp deleted file mode 100644 index c42ada1a98..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_reboot_bf16_persistent.cpp +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_reboot_types.hpp" -#include "test_gemm_streamk_reboot_util.hpp" -#include "gtest/gtest.h" - -template -class TestCkTileStreamKRebootBf16Persistent : public TestCkTileStreamKReboot -{ -}; - -#define TEST_SUITE_NAME TestCkTileStreamKRebootBf16Persistent - -TYPED_TEST_SUITE(TestCkTileStreamKRebootBf16Persistent, KernelTypesStreamKBf16Persistent); - -#include "test_gemm_streamk_reboot_extended_cases.inc" - -#undef TEST_SUITE_NAME diff --git a/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_reboot_fp16_nonpersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_reboot_fp16_nonpersistent.cpp deleted file mode 100644 index 664c16a5e6..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_reboot_fp16_nonpersistent.cpp +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_reboot_types.hpp" -#include "test_gemm_streamk_reboot_util.hpp" -#include "gtest/gtest.h" - -template -class TestCkTileStreamKRebootFp16NonPersistent : public TestCkTileStreamKReboot -{ -}; - -#define TEST_SUITE_NAME TestCkTileStreamKRebootFp16NonPersistent - -TYPED_TEST_SUITE(TestCkTileStreamKRebootFp16NonPersistent, KernelTypesStreamKFp16NonPersistent); - -#include "test_gemm_streamk_reboot_extended_cases.inc" - -#undef TEST_SUITE_NAME diff --git a/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_reboot_fp16_persistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_reboot_fp16_persistent.cpp deleted file mode 100644 index 39c79b4180..0000000000 --- a/test/ck_tile/gemm_streamk/extended_tests/test_gemm_streamk_reboot_fp16_persistent.cpp +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_reboot_types.hpp" -#include "test_gemm_streamk_reboot_util.hpp" -#include "gtest/gtest.h" - -template -class TestCkTileStreamKRebootFp16Persistent : public TestCkTileStreamKReboot -{ -}; - -#define TEST_SUITE_NAME TestCkTileStreamKRebootFp16Persistent - -TYPED_TEST_SUITE(TestCkTileStreamKRebootFp16Persistent, KernelTypesStreamKFp16Persistent); - -#include "test_gemm_streamk_reboot_extended_cases.inc" - -#undef TEST_SUITE_NAME diff --git a/test/ck_tile/gemm_streamk/smoke_tests/bf16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/bf16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 9f43b7a0a7..0000000000 --- a/test/ck_tile/gemm_streamk/smoke_tests/bf16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_CCC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/bf16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/bf16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index cda7f2a72b..0000000000 --- a/test/ck_tile/gemm_streamk/smoke_tests/bf16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_CCR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/bf16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/bf16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 115c5449d4..0000000000 --- a/test/ck_tile/gemm_streamk/smoke_tests/bf16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_CRC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/bf16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/bf16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index b1cd42d599..0000000000 --- a/test/ck_tile/gemm_streamk/smoke_tests/bf16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_CRR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/bf16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/bf16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 944b6b1960..0000000000 --- a/test/ck_tile/gemm_streamk/smoke_tests/bf16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_RCC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/bf16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/bf16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 165f8349e9..0000000000 --- a/test/ck_tile/gemm_streamk/smoke_tests/bf16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_RCR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/bf16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/bf16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 9705060b18..0000000000 --- a/test/ck_tile/gemm_streamk/smoke_tests/bf16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_RRC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/bf16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/bf16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 5b3350534a..0000000000 --- a/test/ck_tile/gemm_streamk/smoke_tests/bf16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF16_RRR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/bf8_ccr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/bf8_ccr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index f2406d652d..0000000000 --- a/test/ck_tile/gemm_streamk/smoke_tests/bf8_ccr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF8_CCR_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/bf8_crr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/bf8_crr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index e961d7c35b..0000000000 --- a/test/ck_tile/gemm_streamk/smoke_tests/bf8_crr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF8_CRR_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/bf8_rcr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/bf8_rcr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 93b7c57b3a..0000000000 --- a/test/ck_tile/gemm_streamk/smoke_tests/bf8_rcr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF8_RCR_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/bf8_rrr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/bf8_rrr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 64fa12d226..0000000000 --- a/test/ck_tile/gemm_streamk/smoke_tests/bf8_rrr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS BF8_RRR_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/f16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/f16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index e51fb3b959..0000000000 --- a/test/ck_tile/gemm_streamk/smoke_tests/f16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_CCC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/f16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/f16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 22d417bdde..0000000000 --- a/test/ck_tile/gemm_streamk/smoke_tests/f16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_CCR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/f16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/f16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 3d45ac02ab..0000000000 --- a/test/ck_tile/gemm_streamk/smoke_tests/f16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_CRC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/f16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/f16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index dac4308d66..0000000000 --- a/test/ck_tile/gemm_streamk/smoke_tests/f16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_CRR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/f16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/f16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 52b11dd8a2..0000000000 --- a/test/ck_tile/gemm_streamk/smoke_tests/f16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_RCC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/f16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/f16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 0cb8d1d338..0000000000 --- a/test/ck_tile/gemm_streamk/smoke_tests/f16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_RCR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/f16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/f16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index ce9ec9244a..0000000000 --- a/test/ck_tile/gemm_streamk/smoke_tests/f16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_RRC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/f16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/f16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 93f2f90048..0000000000 --- a/test/ck_tile/gemm_streamk/smoke_tests/f16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F16_RRR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/f8_ccr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/f8_ccr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index cdf4e8306e..0000000000 --- a/test/ck_tile/gemm_streamk/smoke_tests/f8_ccr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F8_CCR_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/f8_crr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/f8_crr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 5edbec9c45..0000000000 --- a/test/ck_tile/gemm_streamk/smoke_tests/f8_crr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F8_CRR_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/f8_rcr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/f8_rcr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index 4e3a9eaa25..0000000000 --- a/test/ck_tile/gemm_streamk/smoke_tests/f8_rcr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F8_RCR_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/f8_rrr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/f8_rrr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp deleted file mode 100644 index ab2eabb442..0000000000 --- a/test/ck_tile/gemm_streamk/smoke_tests/f8_rrr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp +++ /dev/null @@ -1,11 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_common_includes.hpp" - -#define TEST_SUITE_PARAMS F8_RRR_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent -#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) - -DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); - -#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_bf16_nonpersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_bf16_nonpersistent.cpp new file mode 100644 index 0000000000..95117b6f0d --- /dev/null +++ b/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_bf16_nonpersistent.cpp @@ -0,0 +1,17 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +template +class TestCkTileStreamKBf16NonPersistent : public TestCkTileStreamK +{ +}; + +#define TEST_SUITE_NAME TestCkTileStreamKBf16NonPersistent + +TYPED_TEST_SUITE(TestCkTileStreamKBf16NonPersistent, KernelTypesStreamKBf16NonPersistent); + +#include "test_gemm_streamk_smoke_cases.inc" + +#undef TEST_SUITE_NAME diff --git a/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_bf16_persistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_bf16_persistent.cpp new file mode 100644 index 0000000000..5e0705ab29 --- /dev/null +++ b/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_bf16_persistent.cpp @@ -0,0 +1,17 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +template +class TestCkTileStreamKBf16Persistent : public TestCkTileStreamK +{ +}; + +#define TEST_SUITE_NAME TestCkTileStreamKBf16Persistent + +TYPED_TEST_SUITE(TestCkTileStreamKBf16Persistent, KernelTypesStreamKBf16Persistent); + +#include "test_gemm_streamk_smoke_cases.inc" + +#undef TEST_SUITE_NAME diff --git a/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_bf8_nonpersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_bf8_nonpersistent.cpp new file mode 100644 index 0000000000..21e447af29 --- /dev/null +++ b/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_bf8_nonpersistent.cpp @@ -0,0 +1,17 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +template +class TestCkTileStreamKBf8NonPersistent : public TestCkTileStreamK +{ +}; + +#define TEST_SUITE_NAME TestCkTileStreamKBf8NonPersistent + +TYPED_TEST_SUITE(TestCkTileStreamKBf8NonPersistent, KernelTypesStreamKBf8NonPersistent); + +#include "test_gemm_streamk_smoke_cases.inc" + +#undef TEST_SUITE_NAME diff --git a/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_bf8_persistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_bf8_persistent.cpp new file mode 100644 index 0000000000..62b7767a69 --- /dev/null +++ b/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_bf8_persistent.cpp @@ -0,0 +1,17 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +template +class TestCkTileStreamKBf8Persistent : public TestCkTileStreamK +{ +}; + +#define TEST_SUITE_NAME TestCkTileStreamKBf8Persistent + +TYPED_TEST_SUITE(TestCkTileStreamKBf8Persistent, KernelTypesStreamKBf8Persistent); + +#include "test_gemm_streamk_smoke_cases.inc" + +#undef TEST_SUITE_NAME diff --git a/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_fp16_nonpersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_fp16_nonpersistent.cpp new file mode 100644 index 0000000000..fc18b9ebf7 --- /dev/null +++ b/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_fp16_nonpersistent.cpp @@ -0,0 +1,17 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +template +class TestCkTileStreamKFp16NonPersistent : public TestCkTileStreamK +{ +}; + +#define TEST_SUITE_NAME TestCkTileStreamKFp16NonPersistent + +TYPED_TEST_SUITE(TestCkTileStreamKFp16NonPersistent, KernelTypesStreamKFp16NonPersistent); + +#include "test_gemm_streamk_smoke_cases.inc" + +#undef TEST_SUITE_NAME diff --git a/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_fp16_persistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_fp16_persistent.cpp new file mode 100644 index 0000000000..8756da4ad8 --- /dev/null +++ b/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_fp16_persistent.cpp @@ -0,0 +1,17 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +template +class TestCkTileStreamKFp16Persistent : public TestCkTileStreamK +{ +}; + +#define TEST_SUITE_NAME TestCkTileStreamKFp16Persistent + +TYPED_TEST_SUITE(TestCkTileStreamKFp16Persistent, KernelTypesStreamKFp16Persistent); + +#include "test_gemm_streamk_smoke_cases.inc" + +#undef TEST_SUITE_NAME diff --git a/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_fp8_nonpersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_fp8_nonpersistent.cpp new file mode 100644 index 0000000000..58dca5ca1d --- /dev/null +++ b/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_fp8_nonpersistent.cpp @@ -0,0 +1,17 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +template +class TestCkTileStreamKFp8NonPersistent : public TestCkTileStreamK +{ +}; + +#define TEST_SUITE_NAME TestCkTileStreamKFp8NonPersistent + +TYPED_TEST_SUITE(TestCkTileStreamKFp8NonPersistent, KernelTypesStreamKFp8NonPersistent); + +#include "test_gemm_streamk_smoke_cases.inc" + +#undef TEST_SUITE_NAME diff --git a/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_fp8_persistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_fp8_persistent.cpp new file mode 100644 index 0000000000..1d1e1e31ec --- /dev/null +++ b/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_fp8_persistent.cpp @@ -0,0 +1,17 @@ +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +template +class TestCkTileStreamKFp8Persistent : public TestCkTileStreamK +{ +}; + +#define TEST_SUITE_NAME TestCkTileStreamKFp8Persistent + +TYPED_TEST_SUITE(TestCkTileStreamKFp8Persistent, KernelTypesStreamKFp8Persistent); + +#include "test_gemm_streamk_smoke_cases.inc" + +#undef TEST_SUITE_NAME diff --git a/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_reboot_bf16_nonpersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_reboot_bf16_nonpersistent.cpp deleted file mode 100644 index 0c1813fb65..0000000000 --- a/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_reboot_bf16_nonpersistent.cpp +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_reboot_types.hpp" -#include "test_gemm_streamk_reboot_util.hpp" -#include "gtest/gtest.h" - -template -class TestCkTileStreamKRebootBf16NonPersistent : public TestCkTileStreamKReboot -{ -}; - -#define TEST_SUITE_NAME TestCkTileStreamKRebootBf16NonPersistent - -TYPED_TEST_SUITE(TestCkTileStreamKRebootBf16NonPersistent, KernelTypesStreamKBf16NonPersistent); - -#include "test_gemm_streamk_reboot_smoke_cases.inc" - -#undef TEST_SUITE_NAME diff --git a/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_reboot_bf16_persistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_reboot_bf16_persistent.cpp deleted file mode 100644 index e78092c4ba..0000000000 --- a/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_reboot_bf16_persistent.cpp +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_reboot_types.hpp" -#include "test_gemm_streamk_reboot_util.hpp" -#include "gtest/gtest.h" - -template -class TestCkTileStreamKRebootBf16Persistent : public TestCkTileStreamKReboot -{ -}; - -#define TEST_SUITE_NAME TestCkTileStreamKRebootBf16Persistent - -TYPED_TEST_SUITE(TestCkTileStreamKRebootBf16Persistent, KernelTypesStreamKBf16Persistent); - -#include "test_gemm_streamk_reboot_smoke_cases.inc" - -#undef TEST_SUITE_NAME diff --git a/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_reboot_fp16_nonpersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_reboot_fp16_nonpersistent.cpp deleted file mode 100644 index 5e6118bd0c..0000000000 --- a/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_reboot_fp16_nonpersistent.cpp +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_reboot_types.hpp" -#include "test_gemm_streamk_reboot_util.hpp" -#include "gtest/gtest.h" - -template -class TestCkTileStreamKRebootFp16NonPersistent : public TestCkTileStreamKReboot -{ -}; - -#define TEST_SUITE_NAME TestCkTileStreamKRebootFp16NonPersistent - -TYPED_TEST_SUITE(TestCkTileStreamKRebootFp16NonPersistent, KernelTypesStreamKFp16NonPersistent); - -#include "test_gemm_streamk_reboot_smoke_cases.inc" - -#undef TEST_SUITE_NAME diff --git a/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_reboot_fp16_persistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_reboot_fp16_persistent.cpp deleted file mode 100644 index 9f9c8f8234..0000000000 --- a/test/ck_tile/gemm_streamk/smoke_tests/test_gemm_streamk_reboot_fp16_persistent.cpp +++ /dev/null @@ -1,19 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include "test_gemm_streamk_reboot_types.hpp" -#include "test_gemm_streamk_reboot_util.hpp" -#include "gtest/gtest.h" - -template -class TestCkTileStreamKRebootFp16Persistent : public TestCkTileStreamKReboot -{ -}; - -#define TEST_SUITE_NAME TestCkTileStreamKRebootFp16Persistent - -TYPED_TEST_SUITE(TestCkTileStreamKRebootFp16Persistent, KernelTypesStreamKFp16Persistent); - -#include "test_gemm_streamk_reboot_smoke_cases.inc" - -#undef TEST_SUITE_NAME diff --git a/test/ck_tile/gemm_streamk/test_gemm_streamk.hpp b/test/ck_tile/gemm_streamk/test_gemm_streamk.hpp deleted file mode 100644 index c341789435..0000000000 --- a/test/ck_tile/gemm_streamk/test_gemm_streamk.hpp +++ /dev/null @@ -1,282 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#pragma once - -#include -#include -#include -#include -#include - -#include "ck_tile/host.hpp" -#include "ck_tile/ops/common.hpp" -#include "ck_tile/ops/epilogue.hpp" -#include "ck_tile/ops/gemm.hpp" - -#include "test_gemm_streamk_util.hpp" - -template -class TestCkTileStreamK : public ::testing::Test -{ - protected: - using ALayout = std::tuple_element_t<0, Tuple>; - using BLayout = std::tuple_element_t<1, Tuple>; - using CLayout = std::tuple_element_t<2, Tuple>; - using ADataType = std::tuple_element_t<3, Tuple>; - using BDataType = std::tuple_element_t<4, Tuple>; - using AccDataType = std::tuple_element_t<5, Tuple>; - using CDataType = std::tuple_element_t<6, Tuple>; - using DsLayout = ck_tile::tuple<>; - using DsDataType = ck_tile::tuple<>; - - static constexpr ck_tile::index_t M_Tile = std::tuple_element_t<7, Tuple>::value; - static constexpr ck_tile::index_t N_Tile = std::tuple_element_t<8, Tuple>::value; - static constexpr ck_tile::index_t K_Tile = std::tuple_element_t<9, Tuple>::value; - - static constexpr ck_tile::index_t M_Warp = std::tuple_element_t<10, Tuple>::value; - static constexpr ck_tile::index_t N_Warp = std::tuple_element_t<11, Tuple>::value; - static constexpr ck_tile::index_t K_Warp = std::tuple_element_t<12, Tuple>::value; - - static constexpr ck_tile::index_t M_Warp_Tile = std::tuple_element_t<13, Tuple>::value; - static constexpr ck_tile::index_t N_Warp_Tile = std::tuple_element_t<14, Tuple>::value; - static constexpr ck_tile::index_t K_Warp_Tile = std::tuple_element_t<15, Tuple>::value; - - static constexpr GemmPipelineType PipelineType = std::tuple_element_t<16, Tuple>::value; - static constexpr bool Persistent = std::tuple_element_t<17, Tuple>::value; - - template - std::tuple invoke_streamk(const ck_tile::StreamKHostArgs& args, - const ck_tile::stream_config& s, - int num_cu, - int occupancy) - { - constexpr bool kPadM = PadM; - constexpr bool kPadN = PadN; - constexpr bool kPadK = PadK; - constexpr bool preshuffle = Preshuffle; - - constexpr bool DoubleSmemBuffer = false; - constexpr int kBlockPerCu = 1; - constexpr bool StructuredSparsity = false; - constexpr bool NumWaveGroup = 1; - - using GemmShape = - ck_tile::TileGemmShape, - ck_tile::sequence, - ck_tile::sequence>; - - using TilePartitioner = ck_tile::StreamKTilePartitioner; - - using GemmUniversalTraits = ck_tile::TileGemmUniversalTraits; - - const auto Run = [&](const auto memory_operation_) { - constexpr auto memory_operation = memory_operation_.value; - constexpr auto scheduler = ck_tile::GemmPipelineScheduler::Intrawave; - - // We create the GEMM pipeline without specifying has_hot_loop or tail_num. - // This is because num_loop can vary (a) per WG and (b) per iteration of the Stream-K - // while loop. Instead, has_hot_loop and tail_num are determined in the Stream-K - // Kernel's RunGemm function. This is a similar pattern used by grouped GEMM. - using UniversalGemmProblem = ck_tile::UniversalGemmPipelineProblem; - // For initial testing, we will just test with one pipeline. - // More extensive testing is coming later and will test other pipelines. - using GemmPipeline = - typename GemmPipelineTypeSelector::pipeline; - - using GemmEpilogue = ck_tile::CShuffleEpilogue< - ck_tile::CShuffleEpilogueProblem, - AccDataType, - CDataType, - ck_tile::tuple<>, - CLayout, - ck_tile::element_wise::PassThrough, - TilePartitioner::MPerBlock, - TilePartitioner::NPerBlock, - M_Warp, - N_Warp, - M_Warp_Tile, - N_Warp_Tile, - K_Warp_Tile, - UniversalGemmProblem::TransposeC, - memory_operation>>; - - using Kernel = ck_tile::StreamKKernel; - - auto kargs = Kernel::MakeKernelArgs(args, num_cu, occupancy); - - if(!Kernel::IsSupportedArgument(kargs)) - { - return std::tuple{false, -1}; - } - - dim3 grid_dims = Kernel::GridSize(kargs.tile_partitioner); - dim3 block_dims = Kernel::BlockSize(); - - ck_tile::launch_kernel( - s, ck_tile::make_kernel(Kernel{}, grid_dims, block_dims, 0, kargs)); - - ck_tile::index_t num_accumulations_per_tile = - ck_tile::estimate_num_wgs_per_tile( - kargs.tile_partitioner.sk_num_blocks, - // k_iters_per_big_block could be 1, which indicates that all blocks are - // big and each does one iteration. Thus, we ensure the value passed in is at - // least 1 to avoid division by zero errors. - ck_tile::max(kargs.tile_partitioner.k_iters_per_big_block - 1, 1u), - kargs.tile_partitioner.k_iters_per_tile.get()); - - return std::tuple{true, num_accumulations_per_tile}; - }; - - return Run(ck_tile::integral_constant{}); - } - - public: - // Since Stream-K is build on gfx9, the lower bound for CUs is 104. Thus, we default num_cu to - // 104 and occupancy to 1 to ensure tests are reproducible on different architectures. - void Run(ck_tile::index_t M, - ck_tile::index_t N, - ck_tile::index_t K, - uint32_t num_sk_blocks = 0xffffffff, - ck_tile::StreamKReductionStrategy reduction_strategy = - ck_tile::StreamKReductionStrategy::Atomic, - int occupancy = 1, - int num_cu = 104, - ck_tile::index_t stride_A = 0, - ck_tile::index_t stride_B = 0, - ck_tile::index_t stride_C = 0) - { - - using namespace ck_tile::literals; - - if(reduction_strategy == ck_tile::StreamKReductionStrategy::Reduction) - { - throw std::runtime_error("Reduction Strategy is current unsupported!\n"); - } - - auto f_host_tensor_descriptor = [](std::size_t row, - std::size_t col, - std::size_t stride, - auto layout) { - if constexpr(std::is_same_v) - { - return ck_tile::HostTensorDescriptor({row, col}, {stride, 1_uz}); - } - else - { - return ck_tile::HostTensorDescriptor({row, col}, {1_uz, stride}); - } - }; - - auto f_get_default_stride = - [](std::size_t row, std::size_t col, std::size_t stride, auto layout) { - if(stride == 0) - { - if constexpr(std::is_same_v) - { - return col; - } - else - { - return row; - } - } - else - return stride; - }; - - stride_A = f_get_default_stride(M, K, stride_A, ALayout{}); - stride_B = f_get_default_stride(K, N, stride_B, BLayout{}); - stride_C = f_get_default_stride(M, N, stride_C, CLayout{}); - - ck_tile::HostTensor a_m_k(f_host_tensor_descriptor(M, K, stride_A, ALayout{})); - ck_tile::HostTensor b_k_n(f_host_tensor_descriptor(K, N, stride_B, BLayout{})); - ck_tile::HostTensor c_m_n_dev_result( - f_host_tensor_descriptor(M, N, stride_C, CLayout{})); - - // TODO: Add randomized number generation ranges for different datatypes - ck_tile::FillUniformDistributionIntegerValue{-3, 3, /*seed*/ 11939}(a_m_k); - ck_tile::FillUniformDistributionIntegerValue{-3, 3, /*seed*/ 11940}(b_k_n); - - ck_tile::DeviceMem a_m_k_dev_buf(a_m_k.get_element_space_size_in_bytes()); - ck_tile::DeviceMem b_k_n_dev_buf(b_k_n.get_element_space_size_in_bytes()); - ck_tile::DeviceMem c_m_n_dev_buf(c_m_n_dev_result.get_element_space_size_in_bytes()); - - a_m_k_dev_buf.ToDevice(a_m_k.data()); - b_k_n_dev_buf.ToDevice(b_k_n.data()); - c_m_n_dev_buf.SetZero(); - c_m_n_dev_result.SetZero(); - - ck_tile::StreamKHostArgs args{a_m_k_dev_buf.GetDeviceBuffer(), - b_k_n_dev_buf.GetDeviceBuffer(), - c_m_n_dev_buf.GetDeviceBuffer(), - M, - N, - K, - stride_A, - stride_B, - stride_C, - reduction_strategy, - num_sk_blocks}; - - const auto [is_valid_instance, num_accumulations_per_tile] = - invoke_streamk( - args, ck_tile::stream_config{nullptr, false, 0, 0, 1}, num_cu, occupancy); - - if(!is_valid_instance) - { - GTEST_SKIP() << "Skipping this test: The kernel cannot solve the problem\n"; - } - - c_m_n_dev_buf.FromDevice(c_m_n_dev_result.data()); - - ck_tile::HostTensor c_m_n_host_ref( - f_host_tensor_descriptor(M, N, stride_C, CLayout{})); - c_m_n_host_ref.SetZero(); - - ck_tile::reference_gemm( - a_m_k, b_k_n, c_m_n_host_ref); - - const float max_accumulated_value = - *std::max_element(c_m_n_host_ref.mData.begin(), c_m_n_host_ref.mData.end()); - const auto rtol_atol = calculate_rtol_atol( - K, num_accumulations_per_tile, max_accumulated_value); - - bool pass = ck_tile::check_err(c_m_n_dev_result, - c_m_n_host_ref, - "Error: Incorrect results!", - rtol_atol.at(ck_tile::number<0>{}), - rtol_atol.at(ck_tile::number<1>{})); - - EXPECT_TRUE(pass); - }; -}; diff --git a/test/ck_tile/gemm_streamk/test_gemm_streamk_cases.inc b/test/ck_tile/gemm_streamk/test_gemm_streamk_cases.inc deleted file mode 100644 index ff597d5015..0000000000 --- a/test/ck_tile/gemm_streamk/test_gemm_streamk_cases.inc +++ /dev/null @@ -1,174 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#pragma once - -// Ensure that we have the required macros defined before proceeding -#ifndef TEST_SUITE_NAME -#error "TEST_SUITE_NAME must be defined before including this file" -#endif -#ifndef TEST_SUITE_PARAMS -#error "TEST_SUITE_PARAMS must be defined before including this file" -#endif - -// Macros to help generate test names from the parameters given -// Concatenate is able to stitch the template parameters symbol together with the runtime args -// values -#define CONCATENATE_TEST_NAME(SIZE_M, SIZE_N, SIZE_K, NUM_SK_BLOCKS) \ - M##SIZE_M##_N##SIZE_N##_K##SIZE_K##_SKBlocks##NUM_SK_BLOCKS -// Helper macro to expand the arguments before passing them to CONCATENATE_TEST_NAME -#define MAKE_TEST_NAME(SIZE_M, SIZE_N, SIZE_K, NUM_SK_BLOCKS) \ - CONCATENATE_TEST_NAME(SIZE_M, SIZE_N, SIZE_K, NUM_SK_BLOCKS) - -// Macro to add a test TEST_NAME to the TEST_SUITE_NAME with the given parameters -#define STREAM_K_TEST_INTERNAL(SIZE_M, SIZE_N, SIZE_K, NUM_SK_BLOCKS, TEST_NAME) \ - TYPED_TEST(TEST_SUITE_NAME, TEST_NAME) \ - { \ - ck_tile::index_t M = SIZE_M; \ - ck_tile::index_t N = SIZE_N; \ - ck_tile::index_t K = SIZE_K; \ - uint32_t num_sk_blocks = NUM_SK_BLOCKS; \ - \ - this->Run(M, N, K, num_sk_blocks); \ - } - -// Macro that generates a test name from the TEST_SUITE_TPARAMS symbol and the given parameters, -// then adds that test to test suite TEST_SUITE_NAME -#define STREAM_K_TEST(SIZE_M, SIZE_N, SIZE_K, NUM_SK_BLOCKS) \ - STREAM_K_TEST_INTERNAL(SIZE_M, \ - SIZE_N, \ - SIZE_K, \ - NUM_SK_BLOCKS, \ - MAKE_TEST_NAME(SIZE_M, SIZE_N, SIZE_K, NUM_SK_BLOCKS)) - -STREAM_K_TEST(1, 1, 1, 0) -STREAM_K_TEST(1, 1, 1, 1) - -// TODO: fails for <= wave tile -// STREAM_K_TEST(16, 16, 16, 0) -// STREAM_K_TEST(16, 16, 16, 1) -// STREAM_K_TEST(32, 32, 16, 0) -// STREAM_K_TEST(32, 32, 16, 1) - -STREAM_K_TEST(32, 32, 32, 0) -STREAM_K_TEST(32, 32, 32, 1) -STREAM_K_TEST(32, 32, 32, 2) -STREAM_K_TEST(32, 32, 32, 3) - -/// Prime number odd offsets -STREAM_K_TEST(37, 32, 32, 0) -STREAM_K_TEST(37, 32, 32, 1) -STREAM_K_TEST(37, 32, 32, 2) -STREAM_K_TEST(37, 32, 32, 3) - -STREAM_K_TEST(32, 37, 32, 0) -STREAM_K_TEST(32, 37, 32, 1) -STREAM_K_TEST(32, 37, 32, 2) -STREAM_K_TEST(32, 37, 32, 3) - -// TODO: Fails -// STREAM_K_TEST(32, 32, 37, 0) -// STREAM_K_TEST(32, 32, 37, 1) -// STREAM_K_TEST(32, 32, 37, 2) -// STREAM_K_TEST(32, 32, 37, 3) - -// TODO: Fails -STREAM_K_TEST(37, 32, 37, 0) -STREAM_K_TEST(37, 32, 37, 1) -STREAM_K_TEST(37, 32, 37, 2) -STREAM_K_TEST(37, 32, 37, 3) - -STREAM_K_TEST(37, 37, 37, 0) -STREAM_K_TEST(37, 37, 37, 1) -STREAM_K_TEST(37, 37, 37, 2) -STREAM_K_TEST(37, 37, 37, 3) - -/// Cubed sizes -STREAM_K_TEST(256, 256, 256, 0) -STREAM_K_TEST(256, 256, 256, 4) -STREAM_K_TEST(256, 256, 256, 8) - -// TODO: Fails -// STREAM_K_TEST(272, 272, 272, 0) -// STREAM_K_TEST(272, 272, 272, 8) -// STREAM_K_TEST(272, 272, 272, 16) - -STREAM_K_TEST(288, 288, 288, 0) -STREAM_K_TEST(288, 288, 288, 4) -STREAM_K_TEST(288, 288, 288, 8) - -STREAM_K_TEST(512, 512, 512, 0) -STREAM_K_TEST(512, 512, 512, 8) -STREAM_K_TEST(512, 512, 512, 16) - -// TODO: Fails -// STREAM_K_TEST(528, 528, 528, 0) -// STREAM_K_TEST(528, 528, 528, 8) -// STREAM_K_TEST(528, 528, 528, 16) - -STREAM_K_TEST(544, 544, 544, 0) -STREAM_K_TEST(544, 544, 544, 8) -STREAM_K_TEST(544, 544, 544, 16) - -/// Long M skinny N and K -STREAM_K_TEST(512, 1, 1, 0) -STREAM_K_TEST(512, 1, 1, 8) -STREAM_K_TEST(512, 1, 1, 16) - -STREAM_K_TEST(512, 32, 32, 0) -STREAM_K_TEST(512, 32, 32, 8) -STREAM_K_TEST(512, 32, 32, 16) - -/// Long M and N and skinny K -// TODO: Fails with core dump -// STREAM_K_TEST(512, 512, 1, 0) -// STREAM_K_TEST(512, 512, 1, 8) -// STREAM_K_TEST(512, 512, 1, 16) - -STREAM_K_TEST(512, 512, 32, 0) -STREAM_K_TEST(512, 512, 32, 8) -STREAM_K_TEST(512, 512, 32, 16) - -/// Long M and K and skinny N -STREAM_K_TEST(512, 1, 512, 0) -STREAM_K_TEST(512, 1, 512, 8) -STREAM_K_TEST(512, 1, 512, 16) - -STREAM_K_TEST(512, 32, 512, 0) -STREAM_K_TEST(512, 32, 512, 8) -STREAM_K_TEST(512, 32, 512, 16) - -/// Long K and skinny M and N -STREAM_K_TEST(1, 1, 512, 0) -STREAM_K_TEST(1, 1, 512, 8) -STREAM_K_TEST(1, 1, 512, 16) - -STREAM_K_TEST(32, 32, 512, 0) -STREAM_K_TEST(32, 32, 512, 8) -STREAM_K_TEST(32, 32, 512, 16) - -// TODO: Renable this test once reduction is implemented -TYPED_TEST(TEST_SUITE_NAME, StreamK_M256_N256_K256_SKBlocks12) -{ - GTEST_SKIP() << "Skipping this test: There are precision issues with atomics due to >=3 WGs " - "contributing to each macro tile in C"; - - ck_tile::index_t M = 256; - ck_tile::index_t N = 256; - ck_tile::index_t K = 256; - uint32_t num_sk_blocks = 12; - - this->Run(M, N, K, num_sk_blocks); -} - -TYPED_TEST(TEST_SUITE_NAME, StreamK_Unsupported_Reduction) -{ - - ck_tile::index_t M = 3840; - ck_tile::index_t N = 4096; - ck_tile::index_t K = 4096; - uint32_t num_sk_blocks = 64; - - EXPECT_THROW(this->Run(M, N, K, num_sk_blocks, ck_tile::StreamKReductionStrategy::Reduction), - std::runtime_error); -} diff --git a/test/ck_tile/gemm_streamk/test_gemm_streamk_common_includes.hpp b/test/ck_tile/gemm_streamk/test_gemm_streamk_common_includes.hpp index b1faf3848b..a268fa917a 100644 --- a/test/ck_tile/gemm_streamk/test_gemm_streamk_common_includes.hpp +++ b/test/ck_tile/gemm_streamk/test_gemm_streamk_common_includes.hpp @@ -1,8 +1,7 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT #pragma once -#include "test_gemm_streamk.hpp" #include "test_gemm_streamk_types.hpp" #include "test_gemm_streamk_util.hpp" #include "gtest/gtest.h" diff --git a/test/ck_tile/gemm_streamk/test_gemm_streamk_reboot_extended_cases.inc b/test/ck_tile/gemm_streamk/test_gemm_streamk_extended_cases.inc similarity index 91% rename from test/ck_tile/gemm_streamk/test_gemm_streamk_reboot_extended_cases.inc rename to test/ck_tile/gemm_streamk/test_gemm_streamk_extended_cases.inc index 8b6522bd75..b0c34da2c1 100644 --- a/test/ck_tile/gemm_streamk/test_gemm_streamk_reboot_extended_cases.inc +++ b/test/ck_tile/gemm_streamk/test_gemm_streamk_extended_cases.inc @@ -1,5 +1,5 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT #pragma once diff --git a/test/ck_tile/gemm_streamk/test_gemm_streamk_reboot_types.hpp b/test/ck_tile/gemm_streamk/test_gemm_streamk_reboot_types.hpp deleted file mode 100644 index f01f7e142f..0000000000 --- a/test/ck_tile/gemm_streamk/test_gemm_streamk_reboot_types.hpp +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include -#include - -#include "gtest/gtest.h" - -#include "ck_tile/host.hpp" - -using F16 = ck_tile::half_t; -using F32 = float; -using BF16 = ck_tile::bf16_t; - -using Row = ck_tile::tensor_layout::gemm::RowMajor; -using Col = ck_tile::tensor_layout::gemm::ColumnMajor; - -using Persistent = std::true_type; -using NonPersistent = std::false_type; - -using I32 = ck_tile::number<32>; -using I128 = ck_tile::number<128>; - -// clang-format off -using KernelTypesStreamKFp16Persistent = ::testing::Types< -// ALayout BLayout CLayout ADataType BDataType AccDataType CDataType M_MacroTile N_MacroTile K_MacroTile Persistent - - std::tuple< Row, Row, Row, F16, F16, F32, F16, I128, I128, I32, Persistent>, - std::tuple< Row, Col, Row, F16, F16, F32, F16, I128, I128, I32, Persistent>, - std::tuple< Col, Col, Row, F16, F16, F32, F16, I128, I128, I32, Persistent>, - std::tuple< Col, Row, Row, F16, F16, F32, F16, I128, I128, I32, Persistent> ->; - -using KernelTypesStreamKBf16Persistent = ::testing::Types< - std::tuple< Row, Row, Row, BF16, BF16, F32, BF16, I128, I128, I32, Persistent>, - std::tuple< Row, Col, Row, BF16, BF16, F32, BF16, I128, I128, I32, Persistent>, - std::tuple< Col, Col, Row, BF16, BF16, F32, BF16, I128, I128, I32, Persistent>, - std::tuple< Col, Row, Row, BF16, BF16, F32, BF16, I128, I128, I32, Persistent> ->; - -using KernelTypesStreamKFp16NonPersistent = ::testing::Types< -// ALayout BLayout CLayout ADataType BDataType AccDataType CDataType M_MacroTile N_MacroTile K_MacroTile Persistent - - std::tuple< Row, Row, Row, F16, F16, F32, F16, I128, I128, I32, NonPersistent>, - std::tuple< Row, Col, Row, F16, F16, F32, F16, I128, I128, I32, NonPersistent>, - std::tuple< Col, Col, Row, F16, F16, F32, F16, I128, I128, I32, NonPersistent>, - std::tuple< Col, Row, Row, F16, F16, F32, F16, I128, I128, I32, NonPersistent> ->; - -using KernelTypesStreamKBf16NonPersistent = ::testing::Types< - std::tuple< Row, Row, Row, BF16, BF16, F32, BF16, I128, I128, I32, NonPersistent>, - std::tuple< Row, Col, Row, BF16, BF16, F32, BF16, I128, I128, I32, NonPersistent>, - std::tuple< Col, Col, Row, BF16, BF16, F32, BF16, I128, I128, I32, NonPersistent>, - std::tuple< Col, Row, Row, BF16, BF16, F32, BF16, I128, I128, I32, NonPersistent> ->; -// clang-format on diff --git a/test/ck_tile/gemm_streamk/test_gemm_streamk_reboot_util.hpp b/test/ck_tile/gemm_streamk/test_gemm_streamk_reboot_util.hpp deleted file mode 100644 index c3605cbcda..0000000000 --- a/test/ck_tile/gemm_streamk/test_gemm_streamk_reboot_util.hpp +++ /dev/null @@ -1,287 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#include -#include -#include -#include -#include - -#include "ck_tile/host.hpp" -#include "ck_tile/ops/epilogue.hpp" -#include "ck_tile/ops/gemm.hpp" - -template -auto calculate_rtol_atol(const ck_tile::index_t K, - const ck_tile::index_t kbatch, - const float max_accumulated_value) -{ - using ComputeType = - std::conditional_t; - // Calculate thresholds - const auto rtol = ck_tile::get_relative_threshold( - ck_tile::integer_divide_ceil(K, kbatch)); - const auto atol = ck_tile::get_absolute_threshold( - max_accumulated_value / kbatch, ck_tile::integer_divide_ceil(K, kbatch)); - - // The logic below may need to become more advanced once bugs in Stream-K Tile Partitioner are - // resolved. Because the number of WGs contributing to a macro tile in C may not be the same for - // all macro tiles in C. - - // Calculate error due to more than 1 WG contributing to the same macro tile in C - const auto rtol_split_k = - ck_tile::get_relative_threshold(kbatch); - const auto atol_split_k = ck_tile::get_absolute_threshold( - max_accumulated_value, kbatch); - // Use higher threshold - return ck_tile::make_tuple(std::max(rtol, rtol_split_k), std::max(atol, atol_split_k)); -} - -ck_tile::index_t get_cu_count(); - -template -class TestCkTileStreamKReboot : public ::testing::Test -{ - protected: - using ALayout = std::tuple_element_t<0, Tuple>; - using BLayout = std::tuple_element_t<1, Tuple>; - using CLayout = std::tuple_element_t<2, Tuple>; - using ADataType = std::tuple_element_t<3, Tuple>; - using BDataType = std::tuple_element_t<4, Tuple>; - using AccDataType = std::tuple_element_t<5, Tuple>; - using CDataType = std::tuple_element_t<6, Tuple>; - using DsLayout = ck_tile::tuple<>; - using DsDataType = ck_tile::tuple<>; - static constexpr ck_tile::index_t M_Tile = std::tuple_element_t<7, Tuple>::value; - static constexpr ck_tile::index_t N_Tile = std::tuple_element_t<8, Tuple>::value; - static constexpr ck_tile::index_t K_Tile = std::tuple_element_t<9, Tuple>::value; - static constexpr bool Persistent = std::tuple_element_t<10, Tuple>::value; - - template - ck_tile::index_t invoke_streamk(const ck_tile::reboot::StreamKHostArgs& args, - const ck_tile::stream_config& s) - { - constexpr ck_tile::index_t M_Warp = 2; - constexpr ck_tile::index_t N_Warp = 2; - constexpr ck_tile::index_t K_Warp = 1; -#if CK_TILE_USE_WMMA - constexpr ck_tile::index_t M_Warp_Tile = 16; - constexpr ck_tile::index_t N_Warp_Tile = 16; - constexpr ck_tile::index_t K_Warp_Tile = 16; -#else - constexpr ck_tile::index_t M_Warp_Tile = 32; - constexpr ck_tile::index_t N_Warp_Tile = 32; - constexpr ck_tile::index_t K_Warp_Tile = 16; -#endif - constexpr bool kPadM = PadM; - constexpr bool kPadN = PadN; - constexpr bool kPadK = PadK; - constexpr bool preshuffle = Preshuffle; - - constexpr bool DoubleSmemBuffer = false; - constexpr int kBlockPerCu = 1; - constexpr bool StructuredSparsity = false; - constexpr bool NumWaveGroup = 1; - - using GemmShape = - ck_tile::TileGemmShape, - ck_tile::sequence, - ck_tile::sequence>; - - using TilePartitioner = - ck_tile::StreamKTilePartitioner_v2; - - using GemmUniversalTraits = ck_tile::TileGemmUniversalTraits; - - const auto Run = [&](const auto memory_operation_) { - constexpr auto memory_operation = memory_operation_.value; - constexpr auto scheduler = ck_tile::GemmPipelineScheduler::Intrawave; - - // We create the GEMM pipeline without specifying has_hot_loop or tail_num. - // This is because num_loop can vary (a) per WG and (b) per iteration of the Stream-K - // while loop. Instead, has_hot_loop and tail_num are determined in the Stream-K - // Kernel's RunGemm function. This is a similar pattern used by grouped GEMM. - using UniversalGemmProblem = ck_tile::UniversalGemmPipelineProblem; - // For initial testing, we will just test with one pipeline. - // More extensive testing is coming later and will test other pipelines. - using GemmPipeline = ck_tile::GemmPipelineAgBgCrMem; - - using GemmEpilogue = ck_tile::CShuffleEpilogue< - ck_tile::CShuffleEpilogueProblem, - AccDataType, - CDataType, - ck_tile::tuple<>, - CLayout, - ck_tile::element_wise::PassThrough, - TilePartitioner::MPerBlock, - TilePartitioner::NPerBlock, - M_Warp, - N_Warp, - M_Warp_Tile, - N_Warp_Tile, - K_Warp_Tile, - UniversalGemmProblem::TransposeC, - memory_operation>>; - - using Kernel = - ck_tile::reboot::StreamKKernel; - - auto kargs = Kernel::MakeKernelArgs(args); - - if(!Kernel::IsSupportedArgument(kargs)) - { - EXPECT_TRUE(false); - } - - dim3 grid_dims = Kernel::GridSize(kargs.tile_partitioner); - dim3 block_dims = Kernel::BlockSize(); - - ck_tile::launch_kernel( - s, ck_tile::make_kernel(Kernel{}, grid_dims, block_dims, 0, kargs)); - - return kargs.tile_partitioner.estimate_num_wgs_per_tile(); - }; - - return Run(ck_tile::integral_constant{}); - } - - public: - void Run(ck_tile::index_t M, - ck_tile::index_t N, - ck_tile::index_t K, - ck_tile::StreamKReductionStrategy reduction_strategy = - ck_tile::StreamKReductionStrategy::Atomic, - ck_tile::index_t stride_A = 0, - ck_tile::index_t stride_B = 0, - ck_tile::index_t stride_C = 0) - { - // Since M, N, and K will vary depending on the number of CUs, we print it here to - // facilitate test output readability. - std::cout << "M: " << M << ", N: " << N << ", K: " << K << std::endl; - - using namespace ck_tile::literals; - - if(reduction_strategy == ck_tile::StreamKReductionStrategy::Reduction) - { - throw std::runtime_error("Reduction Strategy is current unsupported!\n"); - } - - auto f_host_tensor_descriptor = [](std::size_t row, - std::size_t col, - std::size_t stride, - auto layout) { - if constexpr(std::is_same_v) - { - return ck_tile::HostTensorDescriptor({row, col}, {stride, 1_uz}); - } - else - { - return ck_tile::HostTensorDescriptor({row, col}, {1_uz, stride}); - } - }; - - auto f_get_default_stride = - [](std::size_t row, std::size_t col, std::size_t stride, auto layout) { - if(stride == 0) - { - if constexpr(std::is_same_v) - { - return col; - } - else - { - return row; - } - } - else - return stride; - }; - - stride_A = f_get_default_stride(M, K, stride_A, ALayout{}); - stride_B = f_get_default_stride(K, N, stride_B, BLayout{}); - stride_C = f_get_default_stride(M, N, stride_C, CLayout{}); - - ck_tile::HostTensor a_m_k(f_host_tensor_descriptor(M, K, stride_A, ALayout{})); - ck_tile::HostTensor b_k_n(f_host_tensor_descriptor(K, N, stride_B, BLayout{})); - ck_tile::HostTensor c_m_n_dev_result( - f_host_tensor_descriptor(M, N, stride_C, CLayout{})); - - ck_tile::FillUniformDistributionIntegerValue{-5, 5, /*seed*/ 11939}(a_m_k); - ck_tile::FillUniformDistributionIntegerValue{-5, 5, /*seed*/ 11940}(b_k_n); - - ck_tile::DeviceMem a_m_k_dev_buf(a_m_k.get_element_space_size_in_bytes()); - ck_tile::DeviceMem b_k_n_dev_buf(b_k_n.get_element_space_size_in_bytes()); - ck_tile::DeviceMem c_m_n_dev_buf(c_m_n_dev_result.get_element_space_size_in_bytes()); - - a_m_k_dev_buf.ToDevice(a_m_k.data()); - b_k_n_dev_buf.ToDevice(b_k_n.data()); - c_m_n_dev_buf.SetZero(); - c_m_n_dev_result.SetZero(); - - ck_tile::reboot::StreamKHostArgs args{a_m_k_dev_buf.GetDeviceBuffer(), - b_k_n_dev_buf.GetDeviceBuffer(), - c_m_n_dev_buf.GetDeviceBuffer(), - M, - N, - K, - stride_A, - stride_B, - stride_C, - reduction_strategy}; - - ck_tile::index_t num_accumulations_per_tile = - invoke_streamk( - args, ck_tile::stream_config{nullptr, false, 0, 0, 1}); - - c_m_n_dev_buf.FromDevice(c_m_n_dev_result.data()); - - ck_tile::HostTensor c_m_n_host_ref( - f_host_tensor_descriptor(M, N, stride_C, CLayout{})); - c_m_n_host_ref.SetZero(); - - ck_tile::reference_gemm( - a_m_k, b_k_n, c_m_n_host_ref); - - const float max_accumulated_value = - *std::max_element(c_m_n_host_ref.mData.begin(), c_m_n_host_ref.mData.end()); - const auto rtol_atol = calculate_rtol_atol( - K, num_accumulations_per_tile, max_accumulated_value); - - bool pass = ck_tile::check_err(c_m_n_dev_result, - c_m_n_host_ref, - "Error: Incorrect results!", - rtol_atol.at(ck_tile::number<0>{}), - rtol_atol.at(ck_tile::number<1>{})); - - EXPECT_TRUE(pass); - }; -}; diff --git a/test/ck_tile/gemm_streamk/test_gemm_streamk_reboot_smoke_cases.inc b/test/ck_tile/gemm_streamk/test_gemm_streamk_smoke_cases.inc similarity index 94% rename from test/ck_tile/gemm_streamk/test_gemm_streamk_reboot_smoke_cases.inc rename to test/ck_tile/gemm_streamk/test_gemm_streamk_smoke_cases.inc index d714b3446c..4bd6e9d973 100644 --- a/test/ck_tile/gemm_streamk/test_gemm_streamk_reboot_smoke_cases.inc +++ b/test/ck_tile/gemm_streamk/test_gemm_streamk_smoke_cases.inc @@ -1,5 +1,5 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT #pragma once diff --git a/test/ck_tile/gemm_streamk/test_gemm_streamk_types.hpp b/test/ck_tile/gemm_streamk/test_gemm_streamk_types.hpp index 73e44d5cfd..efb7416580 100644 --- a/test/ck_tile/gemm_streamk/test_gemm_streamk_types.hpp +++ b/test/ck_tile/gemm_streamk/test_gemm_streamk_types.hpp @@ -1,127 +1,87 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT #pragma once #include #include - #include "gtest/gtest.h" - #include "ck_tile/host.hpp" -#include "test_gemm_streamk_util.hpp" -using F16 = ck_tile::half_t; -using F32 = float; -using BF16 = ck_tile::bf16_t; using F8 = ck_tile::fp8_t; +using F16 = ck_tile::half_t; +using BF16 = ck_tile::bf16_t; using BF8 = ck_tile::bf8_t; +using F32 = float; using Row = ck_tile::tensor_layout::gemm::RowMajor; using Col = ck_tile::tensor_layout::gemm::ColumnMajor; -using Mem = ck_tile::integral_constant; -using CompV3 = ck_tile::integral_constant; -using CompV4 = ck_tile::integral_constant; - using Persistent = std::true_type; using NonPersistent = std::false_type; -using I1 = ck_tile::number<1>; -using I2 = ck_tile::number<2>; -using I4 = ck_tile::number<4>; -using I8 = ck_tile::number<8>; -using I16 = ck_tile::number<16>; using I32 = ck_tile::number<32>; -using I64 = ck_tile::number<64>; using I128 = ck_tile::number<128>; using I256 = ck_tile::number<256>; -template -struct Layouts -{ - // clang-format off - // Create all combinations of A, B, Acc, C layouts - // ALayout, BLayout, CLayout, ADataType, BDataType, AccDataType, CDataType, M_MacroTile, N_MacroTile, K_MacroTile, M_Warps, N_Warps, K_Warps, M_MmaTile, N_MmaTile, K_MmaTile, PipelineType, Persistent - using RRR = ::testing::Types>; - using RRC = ::testing::Types>; - using RCR = ::testing::Types>; - using RCC = ::testing::Types>; - using CRR = ::testing::Types>; - using CRC = ::testing::Types>; - using CCR = ::testing::Types>; - using CCC = ::testing::Types>; - // clang-format on -}; - // clang-format off -// Here we use macros to generate a large number of parameter sets for different test configurations. -// One parameter set is intended to be be implemented per .cpp file to keep the compile time down. -// The naming convention is as follows: -// __________________________________________________ ____________________________________________________________________________________ -// | Parameter Name | | Parameter Value Type | -// using F16_RRR_Mem_128x128x32_2x2x1_32x32x16_NonPersistent = F16Layouts::RRR; -// / | \ \ \ \ \ | | | | \ \ \ \ \ \ \ \ \ -// DATA LAYOUT PIPELINE MACRO WARPS MMA PERSISTENT LAYOUT MACRO MACRO MACRO WARPS WARPS WARPS MMA MMA MMA PIPELINE PERSISTENT LAYOUT -// TYPE TYPE TILE MxNxK TILE TYPE CLASS TILE TILE TILE M N K TILE TILE TILE TYPE TYPE -// MxNxK MxNxK M N K M N K -// -// The example options for each field are: -// - DATA_TYPE: F16, BF16, F8, BF8 -// - LAYOUT: RRR, RRC, RCR, RCC, CRR, CRC, CCR, CCC -// - PIPELINE_TYPE: Mem, CompV3, CompV4 -// - M_MACRO_TILE: 128, 256, etc -// - N_MACRO_TILE: 128, 256, etc -// - K_MACRO_TILE: 32, 64, 128, etc -// - M_WARPS: 2, 4, 1 -// - N_WARPS: 2, 1, 4 -// - K_WARPS: 1 -// - M_MMA_TILE: 32, 16 -// - N_MMA_TILE: 32, 16 -// - K_MMA_TILE: 16 -// - PERSISTENT_TYPE: NonPersistent, Persistent +using KernelTypesStreamKFp16Persistent = ::testing::Types< +// ALayout BLayout CLayout ADataType BDataType AccDataType CDataType M_MacroTile N_MacroTile K_MacroTile Persistent -// Macro to concatenate the parameter name -// E.g. F16_RRR_Mem_128x128x32_2x2x1_32x32x16_NonPersistent -#define CONCATENATE_PARAM_NAME(DATA_TYPE, LAYOUT, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) \ - DATA_TYPE##_##LAYOUT##_##PIPELINE_TYPE##_##M_MACRO_TILE##x##N_MACRO_TILE##x##K_MACRO_TILE##_##M_WARPS##x##N_WARPS##x##K_WARPS##_##M_MMA_TILE##x##N_MMA_TILE##x##K_MMA_TILE##_##PERSISTENT + std::tuple< Row, Row, Row, F16, F16, F32, F16, I256, I256, I32, Persistent>, + std::tuple< Row, Col, Row, F16, F16, F32, F16, I256, I256, I32, Persistent>, + std::tuple< Col, Col, Row, F16, F16, F32, F16, I256, I256, I32, Persistent>, + std::tuple< Col, Row, Row, F16, F16, F32, F16, I256, I256, I32, Persistent> +>; -// Macro to get the parameter value type -// E.g. F16Layouts::RRR -#define CONCATENATE_PARAM_VALUE(LAYOUTS_CLASS, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PIPELINE_TYPE, PERSISTENT, LAYOUT) \ - LAYOUTS_CLASS::LAYOUT +using KernelTypesStreamKBf16Persistent = ::testing::Types< + std::tuple< Row, Row, Row, BF16, BF16, F32, BF16, I256, I256, I32, Persistent>, + std::tuple< Row, Col, Row, BF16, BF16, F32, BF16, I256, I256, I32, Persistent>, + std::tuple< Col, Col, Row, BF16, BF16, F32, BF16, I256, I256, I32, Persistent>, + std::tuple< Col, Row, Row, BF16, BF16, F32, BF16, I256, I256, I32, Persistent> +>; -// Macro to declare a single parameter set, consisting of a parameter name and value type -#define DECLARE_PARAM(LAYOUTS_CLASS, DATA_TYPE, LAYOUT, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) \ - using CONCATENATE_PARAM_NAME(DATA_TYPE, LAYOUT, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) = \ - CONCATENATE_PARAM_VALUE(LAYOUTS_CLASS, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PIPELINE_TYPE, PERSISTENT, LAYOUT); +using KernelTypesStreamKBf8Persistent = ::testing::Types< + std::tuple< Row, Row, Row, BF8, BF8, F32, BF16, I128, I128, I32, Persistent>, + std::tuple< Row, Col, Row, BF8, BF8, F32, BF16, I128, I128, I32, Persistent>, + std::tuple< Col, Col, Row, BF8, BF8, F32, BF16, I128, I128, I32, Persistent>, + std::tuple< Col, Row, Row, BF8, BF8, F32, BF16, I128, I128, I32, Persistent> +>; -// Macro to declare all layout combinations for a given set of parameters -#define DECLARE_PARAMS_ALL_LAYOUTS(LAYOUTS_CLASS, DATA_TYPE, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) \ - DECLARE_PARAM(LAYOUTS_CLASS, DATA_TYPE, RRR, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) \ - DECLARE_PARAM(LAYOUTS_CLASS, DATA_TYPE, RRC, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) \ - DECLARE_PARAM(LAYOUTS_CLASS, DATA_TYPE, RCR, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) \ - DECLARE_PARAM(LAYOUTS_CLASS, DATA_TYPE, RCC, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) \ - DECLARE_PARAM(LAYOUTS_CLASS, DATA_TYPE, CRR, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) \ - DECLARE_PARAM(LAYOUTS_CLASS, DATA_TYPE, CRC, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) \ - DECLARE_PARAM(LAYOUTS_CLASS, DATA_TYPE, CCR, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) \ - DECLARE_PARAM(LAYOUTS_CLASS, DATA_TYPE, CCC, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) +using KernelTypesStreamKFp8Persistent = ::testing::Types< + std::tuple< Row, Row, Row, F8, F8, F32, F16, I128, I128, I32, Persistent>, + std::tuple< Row, Col, Row, F8, F8, F32, F16, I128, I128, I32, Persistent>, + std::tuple< Col, Col, Row, F8, F8, F32, F16, I128, I128, I32, Persistent>, + std::tuple< Col, Row, Row, F8, F8, F32, F16, I128, I128, I32, Persistent> +>; -#include "test_gemm_streamk_types_fp16.hpp" -#include "test_gemm_streamk_types_bf16.hpp" -#include "test_gemm_streamk_types_fp8.hpp" -#include "test_gemm_streamk_types_bf8.hpp" +using KernelTypesStreamKFp16NonPersistent = ::testing::Types< +// ALayout BLayout CLayout ADataType BDataType AccDataType CDataType M_MacroTile N_MacroTile K_MacroTile Persistent + + std::tuple< Row, Row, Row, F16, F16, F32, F16, I256, I256, I32, NonPersistent>, + std::tuple< Row, Col, Row, F16, F16, F32, F16, I256, I256, I32, NonPersistent>, + std::tuple< Col, Col, Row, F16, F16, F32, F16, I256, I256, I32, NonPersistent>, + std::tuple< Col, Row, Row, F16, F16, F32, F16, I256, I256, I32, NonPersistent> +>; + +using KernelTypesStreamKBf16NonPersistent = ::testing::Types< + std::tuple< Row, Row, Row, BF16, BF16, F32, BF16, I256, I256, I32, NonPersistent>, + std::tuple< Row, Col, Row, BF16, BF16, F32, BF16, I256, I256, I32, NonPersistent>, + std::tuple< Col, Col, Row, BF16, BF16, F32, BF16, I256, I256, I32, NonPersistent>, + std::tuple< Col, Row, Row, BF16, BF16, F32, BF16, I256, I256, I32, NonPersistent> +>; + +using KernelTypesStreamKBf8NonPersistent = ::testing::Types< + std::tuple< Row, Row, Row, BF8, BF8, F32, BF16, I128, I128, I32, NonPersistent>, + std::tuple< Row, Col, Row, BF8, BF8, F32, BF16, I128, I128, I32, NonPersistent>, + std::tuple< Col, Col, Row, BF8, BF8, F32, BF16, I128, I128, I32, NonPersistent>, + std::tuple< Col, Row, Row, BF8, BF8, F32, BF16, I128, I128, I32, NonPersistent> +>; + +using KernelTypesStreamKFp8NonPersistent = ::testing::Types< + std::tuple< Row, Row, Row, F8, F8, F32, F16, I128, I128, I32, NonPersistent>, + std::tuple< Row, Col, Row, F8, F8, F32, F16, I128, I128, I32, NonPersistent>, + std::tuple< Col, Col, Row, F8, F8, F32, F16, I128, I128, I32, NonPersistent>, + std::tuple< Col, Row, Row, F8, F8, F32, F16, I128, I128, I32, NonPersistent> +>; + +// clang-format on diff --git a/test/ck_tile/gemm_streamk/test_gemm_streamk_types_bf16.hpp b/test/ck_tile/gemm_streamk/test_gemm_streamk_types_bf16.hpp deleted file mode 100644 index 07aa1e0f04..0000000000 --- a/test/ck_tile/gemm_streamk/test_gemm_streamk_types_bf16.hpp +++ /dev/null @@ -1,76 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#pragma once - -#include "test_gemm_streamk_types.hpp" - -template -struct BF16Layouts -{ - // clang-format off - // For CDNA, we support [A, B, Acc, C] = [bf16, bf16, f32, bf16] and [bf16, bf16, f32, f32]: - using BF16_BF16_F32_BF16 = Layouts; - using BF16_BF16_F32_F32 = Layouts; - using RRR = detail::combine_t; - using RRC = detail::combine_t; - using RCR = detail::combine_t; - using RCC = detail::combine_t; - using CRR = detail::combine_t; - using CRC = detail::combine_t; - using CCR = detail::combine_t; - using CCC = detail::combine_t; - // clang-format on -}; -// clang-format off - -// Macro to declare all layout combinations for BF16 data type -#define DECLARE_BF16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) \ - DECLARE_PARAMS_ALL_LAYOUTS(BF16Layouts, BF16, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) - -// Macro to declare all layout combinations for BF16 data type and a variety of sizes -#define DECLARE_BF16_PARAMS_ALL_LAYOUTS_ALL_SIZES(PIPELINE_TYPE, PERSISTENT) \ - DECLARE_BF16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 128, 32, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_BF16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 128, 64, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_BF16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 128, 128, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_BF16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 256, 128, 32, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_BF16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 256, 128, 64, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_BF16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 256, 32, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_BF16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 256, 64, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_BF16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 256, 256, 32, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_BF16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 256, 256, 64, 2, 2, 1, 32, 32, 16, PERSISTENT) - -// Declare all BF16 parameter sets for different pipeline types and persistence options -DECLARE_BF16_PARAMS_ALL_LAYOUTS_ALL_SIZES(Mem, NonPersistent) -DECLARE_BF16_PARAMS_ALL_LAYOUTS_ALL_SIZES(CompV3, NonPersistent) -DECLARE_BF16_PARAMS_ALL_LAYOUTS_ALL_SIZES(CompV4, NonPersistent) - -// Here, we have a combination of parameter set symbols that we can use to compile into test cases -// __________________________________________________ -// | Parameter Name | -// using BF16_RRR_Mem_128x128x32_2x2x1_32x32x16_NonPersistent = ... -// / | \ \ \ \ \ -// DATA LAYOUT PIPELINE MACRO WARPS MMA PERSISTENT -// TYPE TYPE TILE MxNxK TILE TYPE -// MxNxK MxNxK -// -// The options for each field are: -// - DATA TYPE: BF16 -// - LAYOUT: RRR, RRC, RCR, RCC, CRR, CRC, CCR, CCC -// - PIPELINE_TYPE: Mem, CompV3, CompV4 -// - Macro Tile: 128x128x32, 128x128x64, 128x128x128, 256x128x32, 256x128x64, 128x256x32, 128x256x64, 256x256x32, 256x256x64 -// - Warps: 2x2x1 -// - MMA Tile: 32x32x16 -// - PERSISTENT_TYPE: NonPersistent - -// clang-format on diff --git a/test/ck_tile/gemm_streamk/test_gemm_streamk_types_bf8.hpp b/test/ck_tile/gemm_streamk/test_gemm_streamk_types_bf8.hpp deleted file mode 100644 index 47f64e35ad..0000000000 --- a/test/ck_tile/gemm_streamk/test_gemm_streamk_types_bf8.hpp +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#pragma once - -#include "test_gemm_streamk_types.hpp" - -template -struct BF8Layouts -{ - // clang-format off - // For CDNA, we support [A, B, Acc, C] = [bf8, bf8, f32, f16] and [bf8, bf8, f32, f32]: - using BF8_BF8_F32_F16 = Layouts; - using BF8_BF8_F32_F32 = Layouts; - using RRR = detail::combine_t; - using RRC = detail::combine_t; - using RCR = detail::combine_t; - using RCC = detail::combine_t; - using CRR = detail::combine_t; - using CRC = detail::combine_t; - using CCR = detail::combine_t; - using CCC = detail::combine_t; - // clang-format on -}; - -// clang-format off - -// Macro to declare all layout combinations for BF8 data type -#define DECLARE_BF8_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) \ - DECLARE_PARAMS_ALL_LAYOUTS(BF8Layouts, BF8, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) - -// Macro to declare all layout combinations for BF8 data type and a variety of sizes -#define DECLARE_BF8_PARAMS_ALL_LAYOUTS_ALL_SIZES(PIPELINE_TYPE, PERSISTENT) \ - DECLARE_BF8_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 128, 32, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_BF8_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 128, 64, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_BF8_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 128, 128, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_BF8_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 256, 128, 32, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_BF8_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 256, 128, 64, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_BF8_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 256, 32, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_BF8_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 256, 64, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_BF8_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 256, 256, 32, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_BF8_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 256, 256, 64, 2, 2, 1, 32, 32, 16, PERSISTENT) - -// Declare all BF8 parameter sets for different pipeline types and persistence options -DECLARE_BF8_PARAMS_ALL_LAYOUTS_ALL_SIZES(Mem, NonPersistent) -DECLARE_BF8_PARAMS_ALL_LAYOUTS_ALL_SIZES(CompV3, NonPersistent) -DECLARE_BF8_PARAMS_ALL_LAYOUTS_ALL_SIZES(CompV4, NonPersistent) - -// Here, we have a combination of parameter set symbols that we can use to compile into test cases -// __________________________________________________ -// | Parameter Name | -// using BF8_RRR_Mem_128x128x32_2x2x1_32x32x16_NonPersistent = ... -// / | \ \ \ \ \ -// DATA LAYOUT PIPELINE MACRO WARPS MMA PERSISTENT -// TYPE TYPE TILE MxNxK TILE TYPE -// MxNxK MxNxK -// -// The options for each field are: -// - DATA TYPE: BF8 -// - LAYOUT: RRR, RRC, RCR, RCC, CRR, CRC, CCR, CCC -// - PIPELINE_TYPE: Mem, CompV3, CompV4 -// - Macro Tile: 128x128x32, 128x128x64, 128x128x128, 256x128x32, 256x128x64, 128x256x32, 128x256x64, 256x256x32, 256x256x64 -// - Warps: 2x2x1 -// - MMA Tile: 32x32x16 -// - PERSISTENT_TYPE: NonPersistent - -// clang-format on diff --git a/test/ck_tile/gemm_streamk/test_gemm_streamk_types_fp16.hpp b/test/ck_tile/gemm_streamk/test_gemm_streamk_types_fp16.hpp deleted file mode 100644 index 80dfdf99b3..0000000000 --- a/test/ck_tile/gemm_streamk/test_gemm_streamk_types_fp16.hpp +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#pragma once - -#include "test_gemm_streamk_types.hpp" - -template -struct F16Layouts -{ - // clang-format off - // For CDNA, we support [A, B, Acc, C] = [f16, f16, f32, f16] and [f16, f16, f32, f32]: - using F16_F16_F32_F16 = Layouts; - using F16_F16_F32_F32 = Layouts; - using RRR = detail::combine_t; - using RRC = detail::combine_t; - using RCR = detail::combine_t; - using RCC = detail::combine_t; - using CRR = detail::combine_t; - using CRC = detail::combine_t; - using CCR = detail::combine_t; - using CCC = detail::combine_t; - // clang-format on -}; - -// clang-format off - -// Macro to declare all layout combinations for FP16 data type -#define DECLARE_F16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) \ - DECLARE_PARAMS_ALL_LAYOUTS(F16Layouts, F16, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) - -// Macro to declare all layout combinations for FP16 data type and a variety of sizes -#define DECLARE_F16_PARAMS_ALL_LAYOUTS_ALL_SIZES(PIPELINE_TYPE, PERSISTENT) \ - DECLARE_F16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 128, 32, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_F16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 128, 64, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_F16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 128, 128, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_F16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 256, 128, 32, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_F16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 256, 128, 64, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_F16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 256, 32, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_F16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 256, 64, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_F16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 256, 256, 32, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_F16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 256, 256, 64, 2, 2, 1, 32, 32, 16, PERSISTENT) - -// Declare all FP16 parameter sets for different pipeline types and persistence options -DECLARE_F16_PARAMS_ALL_LAYOUTS_ALL_SIZES(Mem, NonPersistent) -DECLARE_F16_PARAMS_ALL_LAYOUTS_ALL_SIZES(CompV3, NonPersistent) -DECLARE_F16_PARAMS_ALL_LAYOUTS_ALL_SIZES(CompV4, NonPersistent) - -// Here, we have a combination of parameter set symbols that we can use to compile into test cases -// __________________________________________________ -// | Parameter Name | -// using F16_RRR_Mem_128x128x32_2x2x1_32x32x16_NonPersistent = ... -// / | \ \ \ \ \ -// DATA LAYOUT PIPELINE MACRO WARPS MMA PERSISTENT -// TYPE TYPE TILE MxNxK TILE TYPE -// MxNxK MxNxK -// -// The options for each field are: -// - DATA TYPE: F16 -// - LAYOUT: RRR, RRC, RCR, RCC, CRR, CRC, CCR, CCC -// - PIPELINE_TYPE: Mem, CompV3, CompV4 -// - Macro Tile: 128x128x32, 128x128x64, 128x128x128, 256x128x32, 256x128x64, 128x256x32, 128x256x64, 256x256x32, 256x256x64 -// - Warps: 2x2x1 -// - MMA Tile: 32x32x16 -// - PERSISTENT_TYPE: NonPersistent - -// clang-format on diff --git a/test/ck_tile/gemm_streamk/test_gemm_streamk_types_fp8.hpp b/test/ck_tile/gemm_streamk/test_gemm_streamk_types_fp8.hpp deleted file mode 100644 index 30132e6b6d..0000000000 --- a/test/ck_tile/gemm_streamk/test_gemm_streamk_types_fp8.hpp +++ /dev/null @@ -1,77 +0,0 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - -#pragma once - -#include "test_gemm_streamk_types.hpp" - -template -struct F8Layouts -{ - // clang-format off - // For CDNA, we support [A, B, Acc, C] = [f8, f8, f32, f16] and [f8, f8, f32, f32]: - using F8_F8_F32_F16 = Layouts; - using F8_F8_F32_F32 = Layouts; - using RRR = detail::combine_t; - using RRC = detail::combine_t; - using RCR = detail::combine_t; - using RCC = detail::combine_t; - using CRR = detail::combine_t; - using CRC = detail::combine_t; - using CCR = detail::combine_t; - using CCC = detail::combine_t; - // clang-format on -}; - -// clang-format off - -// Macro to declare all layout combinations for FP8 data type -#define DECLARE_F8_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) \ - DECLARE_PARAMS_ALL_LAYOUTS(F8Layouts, F8, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) - -// Macro to declare all layout combinations for FP8 data type and a variety of sizes -#define DECLARE_F8_PARAMS_ALL_LAYOUTS_ALL_SIZES(PIPELINE_TYPE, PERSISTENT) \ - DECLARE_F8_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 128, 32, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_F8_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 128, 64, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_F8_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 128, 128, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_F8_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 256, 128, 32, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_F8_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 256, 128, 64, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_F8_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 256, 32, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_F8_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 256, 64, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_F8_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 256, 256, 32, 2, 2, 1, 32, 32, 16, PERSISTENT) \ - DECLARE_F8_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 256, 256, 64, 2, 2, 1, 32, 32, 16, PERSISTENT) - -// Declare all FP8 parameter sets for different pipeline types and persistence options -DECLARE_F8_PARAMS_ALL_LAYOUTS_ALL_SIZES(Mem, NonPersistent) -DECLARE_F8_PARAMS_ALL_LAYOUTS_ALL_SIZES(CompV3, NonPersistent) -DECLARE_F8_PARAMS_ALL_LAYOUTS_ALL_SIZES(CompV4, NonPersistent) - -// Here, we have a combination of parameter set symbols that we can use to compile into test cases -// __________________________________________________ -// | Parameter Name | -// using F8_RRR_Mem_128x128x32_2x2x1_32x32x16_NonPersistent = ... -// / | \ \ \ \ \ -// DATA LAYOUT PIPELINE MACRO WARPS MMA PERSISTENT -// TYPE TYPE TILE MxNxK TILE TYPE -// MxNxK MxNxK -// -// The options for each field are: -// - DATA TYPE: F8 -// - LAYOUT: RRR, RRC, RCR, RCC, CRR, CRC, CCR, CCC -// - PIPELINE_TYPE: Mem, CompV3, CompV4 -// - Macro Tile: 128x128x32, 128x128x64, 128x128x128, 256x128x32, 256x128x64, 128x256x32, 128x256x64, 256x256x32, 256x256x64 -// - Warps: 2x2x1 -// - MMA Tile: 32x32x16 -// - PERSISTENT_TYPE: NonPersistent - -// clang-format on diff --git a/test/ck_tile/gemm_streamk/test_gemm_streamk_reboot_util.cpp b/test/ck_tile/gemm_streamk/test_gemm_streamk_util.cpp similarity index 84% rename from test/ck_tile/gemm_streamk/test_gemm_streamk_reboot_util.cpp rename to test/ck_tile/gemm_streamk/test_gemm_streamk_util.cpp index 39a92d622d..ac001e15e7 100644 --- a/test/ck_tile/gemm_streamk/test_gemm_streamk_reboot_util.cpp +++ b/test/ck_tile/gemm_streamk/test_gemm_streamk_util.cpp @@ -1,4 +1,4 @@ -#include "test_gemm_streamk_reboot_util.hpp" +#include "test_gemm_streamk_util.hpp" ck_tile::index_t get_cu_count() { diff --git a/test/ck_tile/gemm_streamk/test_gemm_streamk_util.hpp b/test/ck_tile/gemm_streamk/test_gemm_streamk_util.hpp index 1384bfc35b..72b4c52831 100644 --- a/test/ck_tile/gemm_streamk/test_gemm_streamk_util.hpp +++ b/test/ck_tile/gemm_streamk/test_gemm_streamk_util.hpp @@ -1,8 +1,6 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT - +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT #pragma once - #include #include #include @@ -39,75 +37,246 @@ auto calculate_rtol_atol(const ck_tile::index_t K, return ck_tile::make_tuple(std::max(rtol, rtol_split_k), std::max(atol, atol_split_k)); } -enum struct GemmPipelineType +ck_tile::index_t get_cu_count(); + +template +class TestCkTileStreamK : public ::testing::Test { - Mem, - CompV3, - CompV4 + protected: + using ALayout = std::tuple_element_t<0, Tuple>; + using BLayout = std::tuple_element_t<1, Tuple>; + using CLayout = std::tuple_element_t<2, Tuple>; + using ADataType = std::tuple_element_t<3, Tuple>; + using BDataType = std::tuple_element_t<4, Tuple>; + using AccDataType = std::tuple_element_t<5, Tuple>; + using CDataType = std::tuple_element_t<6, Tuple>; + using DsLayout = ck_tile::tuple<>; + using DsDataType = ck_tile::tuple<>; + static constexpr ck_tile::index_t M_Tile = std::tuple_element_t<7, Tuple>::value; + static constexpr ck_tile::index_t N_Tile = std::tuple_element_t<8, Tuple>::value; + static constexpr ck_tile::index_t K_Tile = std::tuple_element_t<9, Tuple>::value; + static constexpr bool Persistent = std::tuple_element_t<10, Tuple>::value; + + template + ck_tile::index_t invoke_streamk(const ck_tile::StreamKHostArgs& args, + const ck_tile::stream_config& s) + { + constexpr ck_tile::index_t M_Warp = 2; + constexpr ck_tile::index_t N_Warp = 2; + constexpr ck_tile::index_t K_Warp = 1; + + constexpr ck_tile::index_t M_Warp_Tile = 32; + constexpr ck_tile::index_t N_Warp_Tile = 32; + constexpr ck_tile::index_t K_Warp_Tile = 16; + + constexpr bool kPadM = PadM; + constexpr bool kPadN = PadN; + constexpr bool kPadK = PadK; + constexpr bool preshuffle = Preshuffle; + + constexpr bool DoubleSmemBuffer = false; + constexpr int kBlockPerCu = 1; + constexpr bool StructuredSparsity = false; + constexpr bool NumWaveGroup = 1; + + using GemmShape = + ck_tile::TileGemmShape, + ck_tile::sequence, + ck_tile::sequence>; + + using TilePartitioner = + ck_tile::StreamKTilePartitioner; + + using GemmUniversalTraits = ck_tile::TileGemmUniversalTraits; + + const auto Run = [&](const auto memory_operation_) { + constexpr auto memory_operation = memory_operation_.value; + constexpr auto scheduler = ck_tile::GemmPipelineScheduler::Intrawave; + + // We create the GEMM pipeline without specifying has_hot_loop or tail_num. + // This is because num_loop can vary (a) per WG and (b) per iteration of the Stream-K + // while loop. Instead, has_hot_loop and tail_num are determined in the Stream-K + // Kernel's RunGemm function. This is a similar pattern used by grouped GEMM. + using UniversalGemmProblem = ck_tile::UniversalGemmPipelineProblem; + // For initial testing, we will just test with one pipeline. + // More extensive testing is coming later and will test other pipelines. + using GemmPipeline = ck_tile::GemmPipelineAgBgCrCompV3; + + using GemmEpilogue = ck_tile::CShuffleEpilogue< + ck_tile::CShuffleEpilogueProblem, + AccDataType, + CDataType, + ck_tile::tuple<>, + CLayout, + ck_tile::element_wise::PassThrough, + TilePartitioner::MPerBlock, + TilePartitioner::NPerBlock, + M_Warp, + N_Warp, + M_Warp_Tile, + N_Warp_Tile, + K_Warp_Tile, + UniversalGemmProblem::TransposeC, + memory_operation>>; + + using Kernel = ck_tile::StreamKKernel; + + auto kargs = Kernel::MakeKernelArgs(args); + + if(!Kernel::IsSupportedArgument(kargs)) + { + EXPECT_TRUE(false); + } + + dim3 grid_dims = Kernel::GridSize(kargs.tile_partitioner); + dim3 block_dims = Kernel::BlockSize(); + + ck_tile::launch_kernel( + s, ck_tile::make_kernel(Kernel{}, grid_dims, block_dims, 0, kargs)); + + return kargs.tile_partitioner.estimate_num_wgs_per_tile(); + }; + + return Run(ck_tile::integral_constant{}); + } + + public: + void Run(ck_tile::index_t M, + ck_tile::index_t N, + ck_tile::index_t K, + ck_tile::StreamKReductionStrategy reduction_strategy = + ck_tile::StreamKReductionStrategy::Atomic, + ck_tile::index_t stride_A = 0, + ck_tile::index_t stride_B = 0, + ck_tile::index_t stride_C = 0) + { + // Since M, N, and K will vary depending on the number of CUs, we print it here to + // facilitate test output readability. + std::cout << "M: " << M << ", N: " << N << ", K: " << K << std::endl; + + using namespace ck_tile::literals; + + if(reduction_strategy == ck_tile::StreamKReductionStrategy::Reduction) + { + throw std::runtime_error("Reduction Strategy is current unsupported!\n"); + } + + auto f_host_tensor_descriptor = [](std::size_t row, + std::size_t col, + std::size_t stride, + auto layout) { + if constexpr(std::is_same_v) + { + return ck_tile::HostTensorDescriptor({row, col}, {stride, 1_uz}); + } + else + { + return ck_tile::HostTensorDescriptor({row, col}, {1_uz, stride}); + } + }; + + auto f_get_default_stride = + [](std::size_t row, std::size_t col, std::size_t stride, auto layout) { + if(stride == 0) + { + if constexpr(std::is_same_v) + { + return col; + } + else + { + return row; + } + } + else + return stride; + }; + + stride_A = f_get_default_stride(M, K, stride_A, ALayout{}); + stride_B = f_get_default_stride(K, N, stride_B, BLayout{}); + stride_C = f_get_default_stride(M, N, stride_C, CLayout{}); + + ck_tile::HostTensor a_m_k(f_host_tensor_descriptor(M, K, stride_A, ALayout{})); + ck_tile::HostTensor b_k_n(f_host_tensor_descriptor(K, N, stride_B, BLayout{})); + ck_tile::HostTensor c_m_n_dev_result( + f_host_tensor_descriptor(M, N, stride_C, CLayout{})); + + ck_tile::FillUniformDistributionIntegerValue{-5, 5, /*seed*/ 11939}(a_m_k); + ck_tile::FillUniformDistributionIntegerValue{-5, 5, /*seed*/ 11940}(b_k_n); + + ck_tile::DeviceMem a_m_k_dev_buf(a_m_k.get_element_space_size_in_bytes()); + ck_tile::DeviceMem b_k_n_dev_buf(b_k_n.get_element_space_size_in_bytes()); + ck_tile::DeviceMem c_m_n_dev_buf(c_m_n_dev_result.get_element_space_size_in_bytes()); + + a_m_k_dev_buf.ToDevice(a_m_k.data()); + b_k_n_dev_buf.ToDevice(b_k_n.data()); + c_m_n_dev_buf.SetZero(); + c_m_n_dev_result.SetZero(); + + ck_tile::StreamKHostArgs args{a_m_k_dev_buf.GetDeviceBuffer(), + b_k_n_dev_buf.GetDeviceBuffer(), + c_m_n_dev_buf.GetDeviceBuffer(), + M, + N, + K, + stride_A, + stride_B, + stride_C, + reduction_strategy}; + + ck_tile::index_t num_accumulations_per_tile = + invoke_streamk( + args, ck_tile::stream_config{nullptr, false, 0, 0, 1}); + + c_m_n_dev_buf.FromDevice(c_m_n_dev_result.data()); + + ck_tile::HostTensor c_m_n_host_ref( + f_host_tensor_descriptor(M, N, stride_C, CLayout{})); + c_m_n_host_ref.SetZero(); + + ck_tile::reference_gemm( + a_m_k, b_k_n, c_m_n_host_ref); + + const float max_accumulated_value = + *std::max_element(c_m_n_host_ref.mData.begin(), c_m_n_host_ref.mData.end()); + const auto rtol_atol = calculate_rtol_atol( + K, num_accumulations_per_tile, max_accumulated_value); + + bool pass = ck_tile::check_err(c_m_n_dev_result, + c_m_n_host_ref, + "Error: Incorrect results!", + rtol_atol.at(ck_tile::number<0>{}), + rtol_atol.at(ck_tile::number<1>{})); + + EXPECT_TRUE(pass); + }; }; - -template -struct GemmPipelineTypeSelector; - -template -struct GemmPipelineTypeSelector -{ - using base_pipeline = ck_tile::BaseGemmPipelineAgBgCrMem; - using pipeline = ck_tile::GemmPipelineAgBgCrMem; - - static constexpr auto GetName() { return "GemmPipelineAgBgCrMem"; } -}; - -template -struct GemmPipelineTypeSelector -{ - using base_pipeline = ck_tile::BaseGemmPipelineAgBgCrCompV3; - using pipeline = ck_tile::GemmPipelineAgBgCrCompV3; - - static constexpr auto GetName() { return "GemmPipelineAgBgCrCompV3"; } -}; - -template -struct GemmPipelineTypeSelector -{ - using base_pipeline = ck_tile::BaseGemmPipelineAgBgCrCompV4; - using pipeline = ck_tile::GemmPipelineAgBgCrCompV4; - - static constexpr auto GetName() { return "GemmPipelineAgBgCrCompV4"; } -}; - -namespace detail { -template -struct combine; - -template -struct combine<::testing::Types, ::testing::Types> -{ - using type = ::testing::Types; -}; - -template -using combine_t = typename combine::type; -} // namespace detail - -// This is the base class for all stream-k tests -#define STREAM_K_TEST_CLASS_BASE TestCkTileStreamK - -// Macros to help generate test suite names from the parameters given -#define CONCATENATE_TEST_SUITE_NAME(PREFIX, TEST_PARAMS) PREFIX##_##TEST_PARAMS -// Helper macro to expand the arguments before passing them to CONCATENATE_TEST_SUITE_NAME -#define MAKE_TEST_SUITE_NAME_INTERNAL(TEST_BASE_NAME, TEST_PARAMS) \ - CONCATENATE_TEST_SUITE_NAME(TEST_BASE_NAME, TEST_PARAMS) - -// Final macro to be used to create the test suite name from the base class name and the test -// parameters -#define MAKE_TEST_SUITE_NAME(TEST_PARAMS) \ - MAKE_TEST_SUITE_NAME_INTERNAL(STREAM_K_TEST_CLASS_BASE, TEST_PARAMS) - -// Macro to declare a test suite with the given name and parameters, based on the base test class -#define DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS) \ - template \ - class TEST_SUITE_NAME : public STREAM_K_TEST_CLASS_BASE \ - { \ - }; \ - TYPED_TEST_SUITE(TEST_SUITE_NAME, TEST_SUITE_PARAMS); diff --git a/test/ck_tile/gemm_streamk/test_streamk_tile_partitioner.cpp b/test/ck_tile/gemm_streamk/test_streamk_tile_partitioner.cpp index 9028f7bf10..525817641a 100644 --- a/test/ck_tile/gemm_streamk/test_streamk_tile_partitioner.cpp +++ b/test/ck_tile/gemm_streamk/test_streamk_tile_partitioner.cpp @@ -1,5 +1,5 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT #include "test_streamk_tile_partitioner_common.hpp" @@ -373,78 +373,77 @@ TEST(StreamKTilePartitionerBaseGetOutputTileIndex, TestAllMappings) } // Persistent -TEST(StreamKTilePartitioner_v2_PersistentConstructor, SKOnly) +TEST(StreamKTilePartitioner_PersistentConstructor, SKOnly) { using Config = StreamKTilePartitionerBaseConfigSKOnly; - ck_tile::StreamKTilePartitioner_v2 - tile_partitioner{Config::M, Config::N, Config::K, Config::GRID}; + ck_tile:: + StreamKTilePartitioner + tile_partitioner{Config::M, Config::N, Config::K, Config::GRID}; StreamKTilePartitionerV2PersistentExpected expected_values{0, 0, 3}; - validate_streamk_v2_persistent(expected_values, tile_partitioner); + validate_streamk_persistent(expected_values, tile_partitioner); } -TEST(StreamKTilePartitioner_v2_PersistentConstructor, DPOnly) +TEST(StreamKTilePartitioner_PersistentConstructor, DPOnly) { using Config = StreamKTilePartitionerBaseConfigDPOnly; - ck_tile::StreamKTilePartitioner_v2 + ck_tile::StreamKTilePartitioner tile_partitioner{Config::M, Config::N, Config::K, Config::GRID}; StreamKTilePartitionerV2PersistentExpected expected_values{2, 0, 3}; - validate_streamk_v2_persistent(expected_values, tile_partitioner); + validate_streamk_persistent(expected_values, tile_partitioner); } -TEST(StreamKTilePartitioner_v2_PersistentConstructor, DP2TileSK) +TEST(StreamKTilePartitioner_PersistentConstructor, DP2TileSK) { using Config = StreamKTilePartitionerBaseConfigDP2TileSK; - ck_tile::StreamKTilePartitioner_v2 + ck_tile::StreamKTilePartitioner tile_partitioner{Config::M, Config::N, Config::K, Config::GRID}; StreamKTilePartitionerV2PersistentExpected expected_values{1, 0, 3}; - validate_streamk_v2_persistent(expected_values, tile_partitioner); + validate_streamk_persistent(expected_values, tile_partitioner); } -TEST(StreamKTilePartitioner_v2_PersistentConstructor, EdgeCase) +TEST(StreamKTilePartitioner_PersistentConstructor, EdgeCase) { using Config = StreamKTilePartitionerBaseConfigEdgeCase; - ck_tile::StreamKTilePartitioner_v2 + ck_tile::StreamKTilePartitioner tile_partitioner{Config::M, Config::N, Config::K, Config::GRID}; StreamKTilePartitionerV2PersistentExpected expected_values{0, 1, 4}; - validate_streamk_v2_persistent(expected_values, tile_partitioner); + validate_streamk_persistent(expected_values, tile_partitioner); } -TEST(StreamKTilePartitioner_v2_GridSize_Persistent, SKOnly) +TEST(StreamKTilePartitioner_GridSize_Persistent, SKOnly) { using Config = StreamKTilePartitionerBaseConfigSKOnly; - ck_tile::StreamKTilePartitioner_v2 + ck_tile::StreamKTilePartitioner tile_partitioner{Config::M, Config::N, Config::K, Config::GRID}; const auto g = tile_partitioner.grid_size(); EXPECT_EQ(g.x, Config::GRID); } -TEST(StreamKTilePartitioner_v2_GridSize_Persistent, EdgeCase) +TEST(StreamKTilePartitioner_GridSize_Persistent, EdgeCase) { using Config = StreamKTilePartitionerBaseConfigEdgeCase; - ck_tile::StreamKTilePartitioner_v2 + ck_tile::StreamKTilePartitioner tile_partitioner{Config::M, Config::N, Config::K, Config::GRID}; const auto g = tile_partitioner.grid_size(); @@ -452,65 +451,64 @@ TEST(StreamKTilePartitioner_v2_GridSize_Persistent, EdgeCase) } // Non-Persistent Tests -TEST(StreamKTilePartitioner_v2_NonPersistentConstructor, SKOnly) +TEST(StreamKTilePartitioner_NonPersistentConstructor, SKOnly) { using Config = StreamKTilePartitionerBaseConfigSKOnly; - ck_tile::StreamKTilePartitioner_v2 - tile_partitioner{Config::M, Config::N, Config::K, Config::GRID}; + ck_tile:: + StreamKTilePartitioner + tile_partitioner{Config::M, Config::N, Config::K, Config::GRID}; StreamKTilePartitionerV2NonPersistentExpected expected_values{0, 0, 0, 3}; - validate_streamk_v2_nonpersistent(expected_values, tile_partitioner); + validate_streamk_nonpersistent(expected_values, tile_partitioner); } -TEST(StreamKTilePartitioner_v2_NonPersistentConstructor, DPOnly) +TEST(StreamKTilePartitioner_NonPersistentConstructor, DPOnly) { using Config = StreamKTilePartitionerBaseConfigDPOnly; - ck_tile::StreamKTilePartitioner_v2 + ck_tile::StreamKTilePartitioner tile_partitioner{Config::M, Config::N, Config::K, Config::GRID}; StreamKTilePartitionerV2NonPersistentExpected expected_values{6, 0, 6, 3}; - validate_streamk_v2_nonpersistent(expected_values, tile_partitioner); + validate_streamk_nonpersistent(expected_values, tile_partitioner); } -TEST(StreamKTilePartitioner_v2_NonPersistentConstructor, DP2TileSK) +TEST(StreamKTilePartitioner_NonPersistentConstructor, DP2TileSK) { using Config = StreamKTilePartitionerBaseConfigDP2TileSK; - ck_tile::StreamKTilePartitioner_v2 + ck_tile::StreamKTilePartitioner tile_partitioner{Config::M, Config::N, Config::K, Config::GRID}; StreamKTilePartitionerV2NonPersistentExpected expected_values{3, 0, 3, 3}; - validate_streamk_v2_nonpersistent(expected_values, tile_partitioner); + validate_streamk_nonpersistent(expected_values, tile_partitioner); } -TEST(StreamKTilePartitioner_v2_NonPersistentConstructor, EdgeCase) +TEST(StreamKTilePartitioner_NonPersistentConstructor, EdgeCase) { using Config = StreamKTilePartitionerBaseConfigEdgeCase; - ck_tile::StreamKTilePartitioner_v2 + ck_tile::StreamKTilePartitioner tile_partitioner{Config::M, Config::N, Config::K, Config::GRID}; StreamKTilePartitionerV2NonPersistentExpected expected_values{1, 0, 1, 4}; - validate_streamk_v2_nonpersistent(expected_values, tile_partitioner); + validate_streamk_nonpersistent(expected_values, tile_partitioner); } -TEST(StreamKTilePartitioner_v2_GridSize_NonPersistent, DP2TileSK) +TEST(StreamKTilePartitioner_GridSize_NonPersistent, DP2TileSK) { using Config = StreamKTilePartitionerBaseConfigDP2TileSK; - ck_tile::StreamKTilePartitioner_v2 + ck_tile::StreamKTilePartitioner tile_partitioner{Config::M, Config::N, Config::K, Config::GRID}; const auto g = tile_partitioner.grid_size(); diff --git a/test/ck_tile/gemm_streamk/test_streamk_tile_partitioner_common.hpp b/test/ck_tile/gemm_streamk/test_streamk_tile_partitioner_common.hpp index eb62f4253b..0bb0940651 100644 --- a/test/ck_tile/gemm_streamk/test_streamk_tile_partitioner_common.hpp +++ b/test/ck_tile/gemm_streamk/test_streamk_tile_partitioner_common.hpp @@ -1,5 +1,5 @@ -// Copyright © Advanced Micro Devices, Inc., or its affiliates. -// SPDX-License-Identifier: MIT +// Copyright (c) Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT #include "ck_tile/host.hpp" #include "ck_tile/ops/gemm.hpp" @@ -332,9 +332,9 @@ struct StreamKTilePartitionerV2NonPersistentExpected // Persistent template -void validate_streamk_v2_persistent( +void validate_streamk_persistent( StreamKTilePartitionerV2PersistentExpected& expected_values, - ck_tile::StreamKTilePartitioner_v2& + ck_tile::StreamKTilePartitioner& tile_partitioner) { EXPECT_EQ(tile_partitioner.get_dp_tiles_per_cta(), expected_values.dp_tiles_per_cta_); @@ -344,9 +344,9 @@ void validate_streamk_v2_persistent( // Non-Persistent template -void validate_streamk_v2_nonpersistent( +void validate_streamk_nonpersistent( StreamKTilePartitionerV2NonPersistentExpected& expected_values, - ck_tile::StreamKTilePartitioner_v2& + ck_tile::StreamKTilePartitioner& tile_partitioner) { EXPECT_EQ(tile_partitioner.get_dp_ctas(), expected_values.dp_ctas_);