mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-04 21:51:28 +00:00
Add interwave scheduler for gemm mem pipeline (#1647)
* add interwave scheduler for gemm mem pipeline * Fix merge artifacts. * Refactor unit tests. * Switch to interwave scheduler for mem example --------- Co-authored-by: Adam Osewski <19374865+aosewski@users.noreply.github.com> Co-authored-by: Adam Osewski <Adam.Osewski@amd.com>
This commit is contained in:
@@ -11,8 +11,20 @@
|
||||
using F16 = ck_tile::half_t;
|
||||
using F32 = float;
|
||||
|
||||
using Row = ck_tile::tensor_layout::gemm::RowMajor;
|
||||
using Col = ck_tile::tensor_layout::gemm::ColumnMajor;
|
||||
using Row = ck_tile::tensor_layout::gemm::RowMajor;
|
||||
using Col = ck_tile::tensor_layout::gemm::ColumnMajor;
|
||||
static constexpr auto Intrawave = ck_tile::GemmPipelineScheduler::Intrawave;
|
||||
static constexpr auto Interwave = ck_tile::GemmPipelineScheduler::Interwave;
|
||||
|
||||
template <typename Tuple>
|
||||
class TestCkTileGemmMemPipelineIntrawave : public TestCkTileGemmMemPipeline<Tuple, Intrawave>
|
||||
{
|
||||
};
|
||||
|
||||
template <typename Tuple>
|
||||
class TestCkTileGemmMemPipelineInterwave : public TestCkTileGemmMemPipeline<Tuple, Interwave>
|
||||
{
|
||||
};
|
||||
|
||||
// clang-format off
|
||||
using KernelTypes = ::testing::Types<
|
||||
@@ -24,6 +36,7 @@ using KernelTypes = ::testing::Types<
|
||||
>;
|
||||
// clang-format on
|
||||
|
||||
TYPED_TEST_SUITE(TestCkTileGemmMemPipeline, KernelTypes);
|
||||
TYPED_TEST_SUITE(TestCkTileGemmMemPipelineIntrawave, KernelTypes);
|
||||
TYPED_TEST_SUITE(TestCkTileGemmMemPipelineInterwave, KernelTypes);
|
||||
|
||||
#include "test_gemm_mem_pipeline_ut_cases.inc"
|
||||
|
||||
@@ -1,6 +1,13 @@
|
||||
// SPDX-License-Identifier: MIT
|
||||
// Copyright (c) 2024, Advanced Micro Devices, Inc. All rights reserved.
|
||||
|
||||
#pragma once
|
||||
|
||||
TYPED_TEST(TestCkTileGemmMemPipeline, SmallM)
|
||||
//------------------------------------------------------------------------------------------------
|
||||
// INTERWAVE SCHEDULER
|
||||
//------------------------------------------------------------------------------------------------
|
||||
|
||||
TYPED_TEST(TestCkTileGemmMemPipelineInterwave, SmallM)
|
||||
{
|
||||
std::vector<int> Ms{1, 2, 3, 4, 5, 6};
|
||||
constexpr int N = 1024;
|
||||
@@ -10,7 +17,7 @@ TYPED_TEST(TestCkTileGemmMemPipeline, SmallM)
|
||||
this->Run(M, N, K);
|
||||
}
|
||||
|
||||
TYPED_TEST(TestCkTileGemmMemPipeline, MidLargeM)
|
||||
TYPED_TEST(TestCkTileGemmMemPipelineInterwave, MidLargeM)
|
||||
{
|
||||
std::vector<int> Ms{127, 255, 312, 799, 1573};
|
||||
constexpr int N = 1024;
|
||||
@@ -20,7 +27,7 @@ TYPED_TEST(TestCkTileGemmMemPipeline, MidLargeM)
|
||||
this->Run(M, N, K);
|
||||
}
|
||||
|
||||
TYPED_TEST(TestCkTileGemmMemPipeline, PaddK)
|
||||
TYPED_TEST(TestCkTileGemmMemPipelineInterwave, PaddK)
|
||||
{
|
||||
std::vector<int> Ms{127};
|
||||
constexpr int N = 1024;
|
||||
@@ -30,7 +37,51 @@ TYPED_TEST(TestCkTileGemmMemPipeline, PaddK)
|
||||
this->Run(M, N, K);
|
||||
}
|
||||
|
||||
TYPED_TEST(TestCkTileGemmMemPipeline, Regular)
|
||||
TYPED_TEST(TestCkTileGemmMemPipelineInterwave, Regular)
|
||||
{
|
||||
std::vector<int> Ms{512};
|
||||
constexpr int N = 1024;
|
||||
constexpr int K = 512;
|
||||
|
||||
for(int M : Ms)
|
||||
this->Run(M, N, K);
|
||||
}
|
||||
|
||||
//------------------------------------------------------------------------------------------------
|
||||
// INTRAWAVE SCHEDULER
|
||||
//------------------------------------------------------------------------------------------------
|
||||
|
||||
TYPED_TEST(TestCkTileGemmMemPipelineIntrawave, SmallM)
|
||||
{
|
||||
std::vector<int> Ms{1, 2, 3, 4, 5, 6};
|
||||
constexpr int N = 1024;
|
||||
constexpr int K = 320;
|
||||
|
||||
for(int M : Ms)
|
||||
this->Run(M, N, K);
|
||||
}
|
||||
|
||||
TYPED_TEST(TestCkTileGemmMemPipelineIntrawave, MidLargeM)
|
||||
{
|
||||
std::vector<int> Ms{127, 255, 312, 799, 1573};
|
||||
constexpr int N = 1024;
|
||||
constexpr int K = 320;
|
||||
|
||||
for(int M : Ms)
|
||||
this->Run(M, N, K);
|
||||
}
|
||||
|
||||
TYPED_TEST(TestCkTileGemmMemPipelineIntrawave, PaddK)
|
||||
{
|
||||
std::vector<int> Ms{127};
|
||||
constexpr int N = 1024;
|
||||
constexpr int K = 432;
|
||||
|
||||
for(int M : Ms)
|
||||
this->Run(M, N, K);
|
||||
}
|
||||
|
||||
TYPED_TEST(TestCkTileGemmMemPipelineIntrawave, Regular)
|
||||
{
|
||||
std::vector<int> Ms{512};
|
||||
constexpr int N = 1024;
|
||||
|
||||
@@ -11,20 +11,21 @@
|
||||
#include "ck_tile/ops/epilogue.hpp"
|
||||
#include "ck_tile/ops/gemm.hpp"
|
||||
|
||||
template <typename Tuple>
|
||||
template <typename Tuple, ck_tile::GemmPipelineScheduler Scheduler_>
|
||||
class TestCkTileGemmMemPipeline : public ::testing::Test
|
||||
{
|
||||
protected:
|
||||
using ALayout = std::tuple_element_t<0, Tuple>;
|
||||
using BLayout = std::tuple_element_t<1, Tuple>;
|
||||
using CLayout = std::tuple_element_t<2, Tuple>;
|
||||
using ADataType = std::tuple_element_t<3, Tuple>;
|
||||
using BDataType = std::tuple_element_t<4, Tuple>;
|
||||
using AccDataType = std::tuple_element_t<5, Tuple>;
|
||||
using CDataType = std::tuple_element_t<6, Tuple>;
|
||||
using ALayout = std::tuple_element_t<0, Tuple>;
|
||||
using BLayout = std::tuple_element_t<1, Tuple>;
|
||||
using CLayout = std::tuple_element_t<2, Tuple>;
|
||||
using ADataType = std::tuple_element_t<3, Tuple>;
|
||||
using BDataType = std::tuple_element_t<4, Tuple>;
|
||||
using AccDataType = std::tuple_element_t<5, Tuple>;
|
||||
using CDataType = std::tuple_element_t<6, Tuple>;
|
||||
static constexpr auto Scheduler = Scheduler_;
|
||||
// TODO: expose tile size through test t-param ?
|
||||
|
||||
struct gemm_basic_args
|
||||
struct gemm_args
|
||||
{
|
||||
const void* p_a;
|
||||
const void* p_b;
|
||||
@@ -38,7 +39,7 @@ class TestCkTileGemmMemPipeline : public ::testing::Test
|
||||
ck_tile::index_t stride_C;
|
||||
};
|
||||
|
||||
void invoke_gemm(const gemm_basic_args& args, const ck_tile::stream_config& s)
|
||||
void invoke_gemm(const gemm_args& args, const ck_tile::stream_config& s)
|
||||
{
|
||||
// TODO: This should be parameterized in tests
|
||||
constexpr ck_tile::index_t M_Tile = 128;
|
||||
@@ -89,7 +90,7 @@ class TestCkTileGemmMemPipeline : public ::testing::Test
|
||||
AccDataType,
|
||||
GemmShape,
|
||||
Traits,
|
||||
ck_tile::GemmPipelineScheduler::Intrawave,
|
||||
Scheduler,
|
||||
has_hot_loop_v,
|
||||
tail_number_v>>;
|
||||
using Kernel = ck_tile::GemmKernel<TilePartitioner, GemmPipeline, GemmEpilogue>;
|
||||
@@ -288,7 +289,7 @@ class TestCkTileGemmMemPipeline : public ::testing::Test
|
||||
c_m_n_dev_buf.SetZero();
|
||||
c_m_n_dev_result.SetZero();
|
||||
|
||||
gemm_basic_args args;
|
||||
gemm_args args;
|
||||
args.p_a = a_m_k_dev_buf.GetDeviceBuffer();
|
||||
args.p_b = b_k_n_dev_buf.GetDeviceBuffer();
|
||||
args.p_c = c_m_n_dev_buf.GetDeviceBuffer();
|
||||
|
||||
Reference in New Issue
Block a user