Ck tile GPU verification sample develop & Add the CK TILE GEMM to the CI/CD test (#1505)

* Finished the feature of gpu verification

* Add the ck_tile_gemm test in the CI CD

* add the include of tensor_layou in reference_gemm

* Comment Addressed

* split ck_tile fhma and gemm tests into separate stages

* restructure the reference gemm

* restructure a new reference_gemm api that could read the device mem

---------

Co-authored-by: carlushuang <carlus.huang@amd.com>
Co-authored-by: illsilin <Illia.Silin@amd.com>
This commit is contained in:
Thomas Ning
2024-09-14 06:08:40 -07:00
committed by GitHub
parent 49e012dee1
commit 844f5a1712
8 changed files with 368 additions and 60 deletions

View File

@@ -76,8 +76,7 @@ struct GemmKernel
CK_TILE_DEVICE void operator()(GemmCommonKargs kargs) const
{
const index_t i_m = TilePartitioner::iM;
const index_t i_n = TilePartitioner::iN;
const auto [i_m, i_n] = TilePartitioner{}();
// options
const ADataType* a_start = static_cast<const ADataType*>(kargs.a_ptr);
const BDataType* b_start = static_cast<const BDataType*>(kargs.b_ptr);
@@ -104,7 +103,7 @@ struct GemmKernel
}();
auto b_tensor_view = [&]() {
if constexpr(std::is_same_v<LayoutB, tensor_layout::gemm::ColumnMajor>)
if constexpr(std::is_same_v<LayoutB, tensor_layout::gemm::RowMajor>)
{
return make_naive_tensor_view<address_space_enum::global>(
b_start,

View File

@@ -15,9 +15,6 @@ struct GemmTilePartitioner
static constexpr ck_tile::index_t kN = BlockGemmShape::kN;
static constexpr ck_tile::index_t kK = BlockGemmShape::kK;
const index_t iM = __builtin_amdgcn_readfirstlane(i_tile_m * kM);
const index_t iN = __builtin_amdgcn_readfirstlane(i_tile_n * kN);
CK_TILE_HOST static constexpr auto
GridSize(ck_tile::index_t M, ck_tile::index_t N, ck_tile::index_t batch_size)
{
@@ -29,10 +26,9 @@ struct GemmTilePartitioner
CK_TILE_DEVICE auto operator()()
{
const index_t i_GridDimX = blockIdx.x;
const index_t i_GridDimY = blockIdx.y;
const index_t i_GridDimZ = blockIdx.z;
return ck_tile::make_tuple(i_GridDimX, i_GridDimY, i_GridDimZ);
const index_t iM = __builtin_amdgcn_readfirstlane(blockIdx.x * kM);
const index_t iN = __builtin_amdgcn_readfirstlane(blockIdx.y * kN);
return ck_tile::make_tuple(iM, iN);
}
};
} // namespace ck_tile