Ck tile gemm cshuffle & CK Tile GEMM restructure (#1535)

* ake the cshuffle compilable

* modify Mhe reference on gpu and cpu. Correaccess of cshuffle

* fix the cpu reference code

* Complete the in tile shuffle logic

* restructure the kernel template input

* change the naming pattern of ck_tile gemm pipeline

* Re-format files using remod.py

* Solve the fmha conflict with gemm

* Comment Addressed from Carlus

---------

Co-authored-by: Po Yen, Chen <PoYen.Chen@amd.com>
This commit is contained in:
Thomas Ning
2024-10-10 03:02:22 -07:00
committed by GitHub
parent 2e1165c1a7
commit 6f27bc9872
15 changed files with 447 additions and 142 deletions

View File

@@ -11,20 +11,12 @@
namespace ck_tile {
template <typename TilePartitioner_,
typename GemmPipeline_,
typename EpiloguePipeline_,
typename LayoutA_,
typename LayoutB_,
typename LayoutC_>
template <typename TilePartitioner_, typename GemmPipeline_, typename EpiloguePipeline_>
struct GemmKernel
{
using TilePartitioner = remove_cvref_t<TilePartitioner_>;
using GemmPipeline = remove_cvref_t<GemmPipeline_>;
using EpiloguePipeline = remove_cvref_t<EpiloguePipeline_>;
using LayoutA = remove_cvref_t<LayoutA_>;
using LayoutB = remove_cvref_t<LayoutB_>;
using LayoutC = remove_cvref_t<LayoutC_>;
static constexpr index_t KernelBlockSize = GemmPipeline::kBlockSize;
using ADataType = remove_cvref_t<typename GemmPipeline::ADataType>;
@@ -32,6 +24,10 @@ struct GemmKernel
using CAccDataType = remove_cvref_t<typename GemmPipeline::CDataType>;
using CODataType = remove_cvref_t<typename EpiloguePipeline::ODataType>;
using LayoutA = remove_cvref_t<typename GemmPipeline::LayoutA>;
using LayoutB = remove_cvref_t<typename GemmPipeline::LayoutB>;
using LayoutC = remove_cvref_t<typename GemmPipeline::LayoutC>;
__host__ static constexpr auto GridSize(index_t M_size, index_t N_size, index_t Batch_size)
{
return TilePartitioner::GridSize(M_size, N_size, Batch_size);
@@ -184,6 +180,7 @@ struct GemmKernel
c_pad_view,
make_tuple(number<TilePartitioner::kM>{}, number<TilePartitioner::kN>{}),
{i_m, i_n});
EpiloguePipeline{}(CBlockWindow_pad, acc);
}
};