Ck tile GPU verification sample develop & Add the CK TILE GEMM to the CI/CD test (#1505)

* Finished the feature of gpu verification

* Add the ck_tile_gemm test in the CI CD

* add the include of tensor_layou in reference_gemm

* Comment Addressed

* split ck_tile fhma and gemm tests into separate stages

* restructure the reference gemm

* restructure a new reference_gemm api that could read the device mem

---------

Co-authored-by: carlushuang <carlus.huang@amd.com>
Co-authored-by: illsilin <Illia.Silin@amd.com>
This commit is contained in:
Thomas Ning
2024-09-14 06:08:40 -07:00
committed by GitHub
parent 49e012dee1
commit 844f5a1712
8 changed files with 368 additions and 60 deletions

View File

@@ -76,8 +76,7 @@ struct GemmKernel
CK_TILE_DEVICE void operator()(GemmCommonKargs kargs) const
{
const index_t i_m = TilePartitioner::iM;
const index_t i_n = TilePartitioner::iN;
const auto [i_m, i_n] = TilePartitioner{}();
// options
const ADataType* a_start = static_cast<const ADataType*>(kargs.a_ptr);
const BDataType* b_start = static_cast<const BDataType*>(kargs.b_ptr);
@@ -104,7 +103,7 @@ struct GemmKernel
}();
auto b_tensor_view = [&]() {
if constexpr(std::is_same_v<LayoutB, tensor_layout::gemm::ColumnMajor>)
if constexpr(std::is_same_v<LayoutB, tensor_layout::gemm::RowMajor>)
{
return make_naive_tensor_view<address_space_enum::global>(
b_start,