diff --git a/include/ck_tile/ops/gemm/kernel/streamk_gemm_kernel.hpp b/include/ck_tile/ops/gemm/kernel/streamk_gemm_kernel.hpp index ad85b5392d..58bce4795f 100644 --- a/include/ck_tile/ops/gemm/kernel/streamk_gemm_kernel.hpp +++ b/include/ck_tile/ops/gemm/kernel/streamk_gemm_kernel.hpp @@ -303,19 +303,20 @@ struct StreamKKernel auto spatial_idx = kargs.tile_partitioner.GetOutputTileIndex(tile_idx); // Get the offsets in A, B, C tensors. - index_t i_m = static_cast(spatial_idx[UniversalGemmKernel::I0] * + index_t i_m = static_cast(spatial_idx[UniversalGemmKernel::I0] * TilePartitioner::MPerBlock); - index_t i_n = static_cast(spatial_idx[UniversalGemmKernel::I1] * + index_t i_n = static_cast(spatial_idx[UniversalGemmKernel::I1] * TilePartitioner::NPerBlock); - index_t i_k = static_cast(iter_offset) * TilePartitioner::KPerBlock; + auto [i_k_a, i_k_b] = GetKOffsets( + static_cast(iter_offset), kargs.stride_As[0], kargs.stride_Bs[0]); // Determine the total size along the K dimension the WG is using in this iteration // (used to construct tensor views). index_t k_size = static_cast(current_iter_length * TilePartitioner::KPerBlock); // Update pointer offsets for A, B, and C. - const ADataType* a_ptr = static_cast(kargs.as_ptr[0]) + i_k; - const BDataType* b_ptr = static_cast(kargs.bs_ptr[0]) + i_k; + const ADataType* a_ptr = static_cast(kargs.as_ptr[0]) + i_k_a; + const BDataType* b_ptr = static_cast(kargs.bs_ptr[0]) + i_k_b; CDataType* c_ptr = static_cast(kargs.e_ptr); // Run the GEMM pipeline and Epilogue. @@ -339,6 +340,41 @@ struct StreamKKernel } private: + /// @brief Computes the K offsets in the A and B tensors given iter_offset, where iter_offset is + /// the starting macro tile index in the K dimension for the workgroup. + /// @return A tuple containing the offsets into the A and B tensors accounting for the layouts + /// of A and B. + /// @note The default case is that A is assumed to be row major and B is assumed to be column + /// major. + template + CK_TILE_DEVICE static tuple + GetKOffsets(index_t iter_offset, index_t stride_a, index_t stride_b) + { + index_t stride_offset_a; + index_t stride_offset_b; + if constexpr(std::is_same_v) + { + stride_offset_a = stride_a; + } + else + { + stride_offset_a = 1; + } + + if constexpr(std::is_same_v) + { + stride_offset_b = stride_b; + } + else + { + stride_offset_b = 1; + } + + index_t base_offset = iter_offset * TilePartitioner::KPerBlock; + + return make_tuple(base_offset * stride_offset_a, base_offset * stride_offset_b); + } + CK_TILE_HOST static int NumCU() { hipDeviceProp_t dev_prop; diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 292bc41a0b..96df4e32a1 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -45,6 +45,7 @@ set(REGRESSION_TESTS test_ck_tile_fmha_fwd_bf16 test_ck_tile_fmha_fwd_fp16 test_ck_tile_fmha_fwd_fp8 + test_ck_tile_streamk_extended ) function(add_test_executable TEST_NAME) diff --git a/test/ck_tile/gemm_streamk/CMakeLists.txt b/test/ck_tile/gemm_streamk/CMakeLists.txt index e00874ba07..ae527a24f7 100644 --- a/test/ck_tile/gemm_streamk/CMakeLists.txt +++ b/test/ck_tile/gemm_streamk/CMakeLists.txt @@ -1,7 +1,121 @@ # Currently test_ck_tile_streamk is only built on gfx9 if(GPU_TARGETS MATCHES "gfx9") + + include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}) + #TODO: support all arches - add_gtest_executable(test_ck_tile_streamk test_gemm_streamk.cpp) + #TODO: current stream-k c-shuffle only supports C layout as R + add_gtest_executable(test_ck_tile_streamk_smoke + ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/f16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/f16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/f16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/f16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/f16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/f16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/f16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/f16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + + ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/bf16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/bf16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/bf16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/bf16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/bf16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/bf16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/bf16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/bf16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + ) + + add_gtest_executable(test_ck_tile_streamk_extended + # compv3 pipeline + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_rrr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_rrc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_rcr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_rcc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_crr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_crc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_ccr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/f16_ccc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_rrr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_rrc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_rcr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_rcc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_crr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_crc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_ccr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv3/bf16_ccc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + + # TODO: add compv4 pipeline + # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_rrr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_rrc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_rcr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_rcc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_crr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_crc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_ccr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_ccc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_rrr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_rrc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_rcr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_rcc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_crr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_crc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_ccr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/f16_ccc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + + # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_rrr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_rrc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_rcr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_rcc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_crr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_crc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_ccr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_ccc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_rrr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_rrc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_rcr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_rcc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_crr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_crc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + # ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_ccr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + # #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/compv4/bf16_ccc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + + + # mem pipeline + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/f16_rrr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/f16_rrc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/f16_rcr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/f16_rcc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/f16_crr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/f16_crc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/f16_ccr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/f16_ccc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/bf16_rrr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/bf16_rrc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/bf16_rcr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/bf16_rcc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/bf16_crr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/bf16_crc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/bf16_ccr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + #${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/mem/bf16_ccc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp + ) else() message(DEBUG "Skipping test_ck_tile_streamk tests for current target") endif() diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_ccc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_ccc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..9bd736feb3 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_ccc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_CCC_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..9f43b7a0a7 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_CCC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_ccr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_ccr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..f71515503d --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_ccr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_CCR_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..cda7f2a72b --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_CCR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_crc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_crc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..be38c2b7d0 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_crc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_CRC_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..115c5449d4 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_CRC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_crr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_crr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..78c35c557c --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_crr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_CRR_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..b1cd42d599 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_CRR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rcc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rcc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..a0a20d5843 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rcc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_RCC_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..944b6b1960 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_RCC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rcr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rcr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..f434380f50 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rcr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_RCR_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..165f8349e9 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_RCR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rrc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rrc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..9003ece236 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rrc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_RRC_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..9705060b18 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_RRC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rrr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rrr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..dc30023521 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rrr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_RRR_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..5b3350534a --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/bf16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_RRR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_ccc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_ccc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..b5e1ac7478 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_ccc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_CCC_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..e51fb3b959 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_CCC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_ccr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_ccr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..9991d59995 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_ccr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_CCR_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..22d417bdde --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_CCR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_crc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_crc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..dc79747889 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_crc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_CRC_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..3d45ac02ab --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_CRC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_crr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_crr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..b681704dc4 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_crr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_CRR_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..dac4308d66 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_CRR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rcc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rcc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..a2294b5742 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rcc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_RCC_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..52b11dd8a2 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_RCC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rcr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rcr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..699a5dd6f1 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rcr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_RCR_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..0cb8d1d338 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_RCR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rrc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rrc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..75487a1c70 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rrc_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_RRC_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..ce9ec9244a --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_RRC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rrr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rrr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..c1239bba1a --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rrr_compv3_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_RRR_CompV3_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..93f2f90048 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv3/f16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_RRR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_ccc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_ccc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..346e865808 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_ccc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_CCC_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_ccc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_ccc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..42779c1d0c --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_ccc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_CCC_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_ccr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_ccr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..f0d76a25b4 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_ccr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_CCR_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_ccr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_ccr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..702dacf603 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_ccr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_CCR_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_crc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_crc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..6792a252e4 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_crc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_CRC_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_crc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_crc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..c8395d1702 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_crc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_CRC_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_crr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_crr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..2eba9e5e74 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_crr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_CRR_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_crr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_crr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..ea8ec795f7 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_crr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_CRR_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rcc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rcc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..55b66d1313 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rcc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_RCC_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rcc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rcc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..9351154bc8 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rcc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_RCC_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rcr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rcr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..826525ed01 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rcr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_RCR_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rcr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rcr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..a584346e8c --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rcr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_RCR_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rrc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rrc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..714a610ee9 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rrc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_RRC_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rrc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rrc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..c7ba8154eb --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rrc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_RRC_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rrr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rrr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..4f52ade2b6 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rrr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_RRR_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rrr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rrr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..6ee0580a60 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/bf16_rrr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_RRR_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_ccc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_ccc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..cdc1d4534e --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_ccc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_CCC_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_ccc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_ccc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..f934a06df5 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_ccc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_CCC_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_ccr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_ccr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..3b95352c13 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_ccr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_CCR_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_ccr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_ccr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..d8683a20c5 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_ccr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_CCR_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_crc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_crc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..5e675f0069 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_crc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_CRC_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_crc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_crc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..253d539241 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_crc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_CRC_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_crr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_crr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..a4420a8fc3 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_crr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_CRR_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_crr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_crr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..0dcb4afe05 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_crr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_CRR_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rcc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rcc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..eb46e78375 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rcc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_RCC_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rcc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rcc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..5653a05ccc --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rcc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_RCC_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rcr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rcr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..ae94416336 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rcr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_RCR_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rcr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rcr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..28673bc2d8 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rcr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_RCR_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rrc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rrc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..d61389c941 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rrc_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_RRC_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rrc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rrc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..d276153efc --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rrc_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_RRC_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rrr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rrr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..a31d8ed592 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rrr_compv4_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_RRR_CompV4_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rrr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rrr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..c68e67658a --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/compv4/f16_rrr_compv4_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_RRR_CompV4_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_ccc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_ccc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..851765f0aa --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_ccc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_CCC_Mem_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_ccr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_ccr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..0f7a3f8ca8 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_ccr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_CCR_Mem_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_crc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_crc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..0e7f1e1fad --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_crc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_CRC_Mem_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_crr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_crr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..8a85738652 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_crr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_CRR_Mem_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_rcc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_rcc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..ab0220f4ef --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_rcc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_RCC_Mem_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_rcr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_rcr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..9e60ef1717 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_rcr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_RCR_Mem_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_rrc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_rrc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..61cd772d51 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_rrc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_RRC_Mem_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_rrr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_rrr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..66e0e80c46 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/mem/bf16_rrr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_RRR_Mem_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/f16_ccc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/f16_ccc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..a2f26b768e --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/mem/f16_ccc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_CCC_Mem_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/f16_ccr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/f16_ccr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..d94547daa7 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/mem/f16_ccr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_CCR_Mem_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/f16_crc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/f16_crc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..090b472d45 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/mem/f16_crc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_CRC_Mem_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/f16_crr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/f16_crr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..5535325436 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/mem/f16_crr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_CRR_Mem_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/f16_rcc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/f16_rcc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..a2f999a69d --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/mem/f16_rcc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_RCC_Mem_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/f16_rcr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/f16_rcr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..f6d4b50c4a --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/mem/f16_rcr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_RCR_Mem_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/f16_rrc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/f16_rrc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..7dd85dbf0d --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/mem/f16_rrc_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_RRC_Mem_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/extended_tests/mem/f16_rrr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/extended_tests/mem/f16_rrr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..58ba61ad38 --- /dev/null +++ b/test/ck_tile/gemm_streamk/extended_tests/mem/f16_rrr_mem_128x128x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_RRR_Mem_128x128x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/bf16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/bf16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..9f43b7a0a7 --- /dev/null +++ b/test/ck_tile/gemm_streamk/smoke_tests/bf16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_CCC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/bf16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/bf16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..cda7f2a72b --- /dev/null +++ b/test/ck_tile/gemm_streamk/smoke_tests/bf16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_CCR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/bf16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/bf16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..115c5449d4 --- /dev/null +++ b/test/ck_tile/gemm_streamk/smoke_tests/bf16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_CRC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/bf16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/bf16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..b1cd42d599 --- /dev/null +++ b/test/ck_tile/gemm_streamk/smoke_tests/bf16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_CRR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/bf16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/bf16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..944b6b1960 --- /dev/null +++ b/test/ck_tile/gemm_streamk/smoke_tests/bf16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_RCC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/bf16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/bf16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..165f8349e9 --- /dev/null +++ b/test/ck_tile/gemm_streamk/smoke_tests/bf16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_RCR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/bf16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/bf16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..9705060b18 --- /dev/null +++ b/test/ck_tile/gemm_streamk/smoke_tests/bf16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_RRC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/bf16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/bf16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..5b3350534a --- /dev/null +++ b/test/ck_tile/gemm_streamk/smoke_tests/bf16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS BF16_RRR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/f16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/f16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..e51fb3b959 --- /dev/null +++ b/test/ck_tile/gemm_streamk/smoke_tests/f16_ccc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_CCC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/f16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/f16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..22d417bdde --- /dev/null +++ b/test/ck_tile/gemm_streamk/smoke_tests/f16_ccr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_CCR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/f16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/f16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..3d45ac02ab --- /dev/null +++ b/test/ck_tile/gemm_streamk/smoke_tests/f16_crc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_CRC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/f16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/f16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..dac4308d66 --- /dev/null +++ b/test/ck_tile/gemm_streamk/smoke_tests/f16_crr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_CRR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/f16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/f16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..52b11dd8a2 --- /dev/null +++ b/test/ck_tile/gemm_streamk/smoke_tests/f16_rcc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_RCC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/f16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/f16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..0cb8d1d338 --- /dev/null +++ b/test/ck_tile/gemm_streamk/smoke_tests/f16_rcr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_RCR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/f16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/f16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..ce9ec9244a --- /dev/null +++ b/test/ck_tile/gemm_streamk/smoke_tests/f16_rrc_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_RRC_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/smoke_tests/f16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp b/test/ck_tile/gemm_streamk/smoke_tests/f16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp new file mode 100644 index 0000000000..93f2f90048 --- /dev/null +++ b/test/ck_tile/gemm_streamk/smoke_tests/f16_rrr_compv3_256x256x32_2x2x1_32x32x16_NonPersistent.cpp @@ -0,0 +1,11 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#include "test_gemm_streamk_common_includes.hpp" + +#define TEST_SUITE_PARAMS F16_RRR_CompV3_256x256x32_2x2x1_32x32x16_NonPersistent +#define TEST_SUITE_NAME MAKE_TEST_SUITE_NAME(TEST_SUITE_PARAMS) + +DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS); + +#include "test_gemm_streamk_cases.inc" diff --git a/test/ck_tile/gemm_streamk/test_gemm_streamk.hpp b/test/ck_tile/gemm_streamk/test_gemm_streamk.hpp new file mode 100644 index 0000000000..da0b8d153d --- /dev/null +++ b/test/ck_tile/gemm_streamk/test_gemm_streamk.hpp @@ -0,0 +1,269 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#pragma once + +#include +#include +#include +#include +#include + +#include "ck_tile/host.hpp" +#include "ck_tile/ops/epilogue.hpp" +#include "ck_tile/ops/gemm.hpp" + +#include "test_gemm_streamk_util.hpp" + +template +class TestCkTileStreamK : public ::testing::Test +{ + protected: + using ALayout = std::tuple_element_t<0, Tuple>; + using BLayout = std::tuple_element_t<1, Tuple>; + using CLayout = std::tuple_element_t<2, Tuple>; + using ADataType = std::tuple_element_t<3, Tuple>; + using BDataType = std::tuple_element_t<4, Tuple>; + using AccDataType = std::tuple_element_t<5, Tuple>; + using CDataType = std::tuple_element_t<6, Tuple>; + using DsLayout = ck_tile::tuple<>; + using DsDataType = ck_tile::tuple<>; + + static constexpr ck_tile::index_t M_Tile = std::tuple_element_t<7, Tuple>::value; + static constexpr ck_tile::index_t N_Tile = std::tuple_element_t<8, Tuple>::value; + static constexpr ck_tile::index_t K_Tile = std::tuple_element_t<9, Tuple>::value; + + static constexpr ck_tile::index_t M_Warp = std::tuple_element_t<10, Tuple>::value; + static constexpr ck_tile::index_t N_Warp = std::tuple_element_t<11, Tuple>::value; + static constexpr ck_tile::index_t K_Warp = std::tuple_element_t<12, Tuple>::value; + + static constexpr ck_tile::index_t M_Warp_Tile = std::tuple_element_t<13, Tuple>::value; + static constexpr ck_tile::index_t N_Warp_Tile = std::tuple_element_t<14, Tuple>::value; + static constexpr ck_tile::index_t K_Warp_Tile = std::tuple_element_t<15, Tuple>::value; + + static constexpr GemmPipelineType PipelineType = std::tuple_element_t<16, Tuple>::value; + static constexpr bool Persistent = std::tuple_element_t<17, Tuple>::value; + + template + bool invoke_streamk(const ck_tile::StreamKHostArgs& args, + const ck_tile::stream_config& s, + int num_cu, + int occupancy) + { + constexpr bool kPadM = PadM; + constexpr bool kPadN = PadN; + constexpr bool kPadK = PadK; + constexpr bool preshuffle = Preshuffle; + + constexpr bool DoubleSmemBuffer = false; + constexpr int kBlockPerCu = 1; + constexpr bool StructuredSparsity = false; + constexpr bool NumWaveGroup = 1; + + using GemmShape = + ck_tile::TileGemmShape, + ck_tile::sequence, + ck_tile::sequence>; + + using TilePartitioner = ck_tile::StreamKTilePartitioner; + + using GemmUniversalTraits = ck_tile::TileGemmUniversalTraits; + + const auto Run = [&](const auto memory_operation_) { + constexpr auto memory_operation = memory_operation_.value; + constexpr auto scheduler = ck_tile::GemmPipelineScheduler::Intrawave; + + // We create the GEMM pipeline without specifying has_hot_loop or tail_num. + // This is because num_loop can vary (a) per WG and (b) per iteration of the Stream-K + // while loop. Instead, has_hot_loop and tail_num are determined in the Stream-K + // Kernel's RunGemm function. This is a similar pattern used by grouped GEMM. + using UniversalGemmProblem = ck_tile::UniversalGemmPipelineProblem; + // For initial testing, we will just test with one pipeline. + // More extensive testing is coming later and will test other pipelines. + using GemmPipeline = + typename GemmPipelineTypeSelector::pipeline; + + using GemmEpilogue = ck_tile::CShuffleEpilogue< + ck_tile::CShuffleEpilogueProblem, + AccDataType, + CDataType, + ck_tile::tuple<>, + CLayout, + ck_tile::element_wise::PassThrough, + TilePartitioner::MPerBlock, + TilePartitioner::NPerBlock, + M_Warp, + N_Warp, + M_Warp_Tile, + N_Warp_Tile, + K_Warp_Tile, + UniversalGemmProblem::TransposeC, + memory_operation>>; + + using Kernel = ck_tile::StreamKKernel; + + auto kargs = Kernel::MakeKernelArgs(args, num_cu, occupancy); + + if(!Kernel::IsSupportedArgument(kargs)) + { + return false; + } + + dim3 grid_dims = Kernel::GridSize(kargs.tile_partitioner); + dim3 block_dims = Kernel::BlockSize(); + + ck_tile::launch_kernel( + s, ck_tile::make_kernel(Kernel{}, grid_dims, block_dims, 0, kargs)); + + return true; + }; + + return Run(ck_tile::integral_constant{}); + } + + public: + // Since Stream-K is build on gfx9, the lower bound for CUs is 104. Thus, we default num_cu to + // 104 and occupancy to 1 to ensure tests are reproducible on different architectures. + void Run(ck_tile::index_t M, + ck_tile::index_t N, + ck_tile::index_t K, + uint32_t num_sk_blocks = 0xffffffff, + ck_tile::StreamKReductionStrategy reduction_strategy = + ck_tile::StreamKReductionStrategy::Atomic, + int occupancy = 1, + int num_cu = 104, + ck_tile::index_t stride_A = 0, + ck_tile::index_t stride_B = 0, + ck_tile::index_t stride_C = 0) + { + + using namespace ck_tile::literals; + + if(reduction_strategy == ck_tile::StreamKReductionStrategy::Reduction) + { + throw std::runtime_error("Reduction Strategy is current unsupported!\n"); + } + + auto f_host_tensor_descriptor = [](std::size_t row, + std::size_t col, + std::size_t stride, + auto layout) { + if constexpr(std::is_same_v) + { + return ck_tile::HostTensorDescriptor({row, col}, {stride, 1_uz}); + } + else + { + return ck_tile::HostTensorDescriptor({row, col}, {1_uz, stride}); + } + }; + + auto f_get_default_stride = + [](std::size_t row, std::size_t col, std::size_t stride, auto layout) { + if(stride == 0) + { + if constexpr(std::is_same_v) + { + return col; + } + else + { + return row; + } + } + else + return stride; + }; + + stride_A = f_get_default_stride(M, K, stride_A, ALayout{}); + stride_B = f_get_default_stride(K, N, stride_B, BLayout{}); + stride_C = f_get_default_stride(M, N, stride_C, CLayout{}); + + ck_tile::HostTensor a_m_k(f_host_tensor_descriptor(M, K, stride_A, ALayout{})); + ck_tile::HostTensor b_k_n(f_host_tensor_descriptor(K, N, stride_B, BLayout{})); + ck_tile::HostTensor c_m_n_dev_result( + f_host_tensor_descriptor(M, N, stride_C, CLayout{})); + + // TODO: Add randomized number generation ranges for different datatypes + ck_tile::FillUniformDistributionIntegerValue{-3, 3, /*seed*/ 11939}(a_m_k); + ck_tile::FillUniformDistributionIntegerValue{-3, 3, /*seed*/ 11940}(b_k_n); + + ck_tile::DeviceMem a_m_k_dev_buf(a_m_k.get_element_space_size_in_bytes()); + ck_tile::DeviceMem b_k_n_dev_buf(b_k_n.get_element_space_size_in_bytes()); + ck_tile::DeviceMem c_m_n_dev_buf(c_m_n_dev_result.get_element_space_size_in_bytes()); + + a_m_k_dev_buf.ToDevice(a_m_k.data()); + b_k_n_dev_buf.ToDevice(b_k_n.data()); + c_m_n_dev_buf.SetZero(); + c_m_n_dev_result.SetZero(); + + ck_tile::StreamKHostArgs args{a_m_k_dev_buf.GetDeviceBuffer(), + b_k_n_dev_buf.GetDeviceBuffer(), + c_m_n_dev_buf.GetDeviceBuffer(), + M, + N, + K, + stride_A, + stride_B, + stride_C, + reduction_strategy, + num_sk_blocks}; + + if(!invoke_streamk( + args, ck_tile::stream_config{nullptr, false, 0, 0, 1}, num_cu, occupancy)) + { + GTEST_SKIP() << "Skipping this test: The kernel cannot solve the problem\n"; + } + + c_m_n_dev_buf.FromDevice(c_m_n_dev_result.data()); + + ck_tile::HostTensor c_m_n_host_ref( + f_host_tensor_descriptor(M, N, stride_C, CLayout{})); + c_m_n_host_ref.SetZero(); + + ck_tile::reference_gemm( + a_m_k, b_k_n, c_m_n_host_ref); + + const float max_accumulated_value = + *std::max_element(c_m_n_host_ref.mData.begin(), c_m_n_host_ref.mData.end()); + const auto rtol_atol = calculate_rtol_atol( + K, /*kbatch*/ 1, max_accumulated_value); + + bool pass = ck_tile::check_err(c_m_n_dev_result, + c_m_n_host_ref, + "Error: Incorrect results!", + rtol_atol.at(ck_tile::number<0>{}), + rtol_atol.at(ck_tile::number<1>{})); + + EXPECT_TRUE(pass); + }; +}; diff --git a/test/ck_tile/gemm_streamk/test_gemm_streamk_cases.inc b/test/ck_tile/gemm_streamk/test_gemm_streamk_cases.inc index 1db7ef0fb0..ff597d5015 100644 --- a/test/ck_tile/gemm_streamk/test_gemm_streamk_cases.inc +++ b/test/ck_tile/gemm_streamk/test_gemm_streamk_cases.inc @@ -3,27 +3,149 @@ #pragma once -TYPED_TEST(TEST_SUITE_NAME, StreamK_M256_N256_K256_DP) -{ +// Ensure that we have the required macros defined before proceeding +#ifndef TEST_SUITE_NAME +#error "TEST_SUITE_NAME must be defined before including this file" +#endif +#ifndef TEST_SUITE_PARAMS +#error "TEST_SUITE_PARAMS must be defined before including this file" +#endif - ck_tile::index_t M = 256; - ck_tile::index_t N = 256; - ck_tile::index_t K = 256; - uint32_t num_sk_blocks = 0; +// Macros to help generate test names from the parameters given +// Concatenate is able to stitch the template parameters symbol together with the runtime args +// values +#define CONCATENATE_TEST_NAME(SIZE_M, SIZE_N, SIZE_K, NUM_SK_BLOCKS) \ + M##SIZE_M##_N##SIZE_N##_K##SIZE_K##_SKBlocks##NUM_SK_BLOCKS +// Helper macro to expand the arguments before passing them to CONCATENATE_TEST_NAME +#define MAKE_TEST_NAME(SIZE_M, SIZE_N, SIZE_K, NUM_SK_BLOCKS) \ + CONCATENATE_TEST_NAME(SIZE_M, SIZE_N, SIZE_K, NUM_SK_BLOCKS) - this->Run(M, N, K, num_sk_blocks); -} +// Macro to add a test TEST_NAME to the TEST_SUITE_NAME with the given parameters +#define STREAM_K_TEST_INTERNAL(SIZE_M, SIZE_N, SIZE_K, NUM_SK_BLOCKS, TEST_NAME) \ + TYPED_TEST(TEST_SUITE_NAME, TEST_NAME) \ + { \ + ck_tile::index_t M = SIZE_M; \ + ck_tile::index_t N = SIZE_N; \ + ck_tile::index_t K = SIZE_K; \ + uint32_t num_sk_blocks = NUM_SK_BLOCKS; \ + \ + this->Run(M, N, K, num_sk_blocks); \ + } -TYPED_TEST(TEST_SUITE_NAME, StreamK_M256_N256_K256_SKBlocks4) -{ +// Macro that generates a test name from the TEST_SUITE_TPARAMS symbol and the given parameters, +// then adds that test to test suite TEST_SUITE_NAME +#define STREAM_K_TEST(SIZE_M, SIZE_N, SIZE_K, NUM_SK_BLOCKS) \ + STREAM_K_TEST_INTERNAL(SIZE_M, \ + SIZE_N, \ + SIZE_K, \ + NUM_SK_BLOCKS, \ + MAKE_TEST_NAME(SIZE_M, SIZE_N, SIZE_K, NUM_SK_BLOCKS)) - ck_tile::index_t M = 256; - ck_tile::index_t N = 256; - ck_tile::index_t K = 256; - uint32_t num_sk_blocks = 4; +STREAM_K_TEST(1, 1, 1, 0) +STREAM_K_TEST(1, 1, 1, 1) - this->Run(M, N, K, num_sk_blocks); -} +// TODO: fails for <= wave tile +// STREAM_K_TEST(16, 16, 16, 0) +// STREAM_K_TEST(16, 16, 16, 1) +// STREAM_K_TEST(32, 32, 16, 0) +// STREAM_K_TEST(32, 32, 16, 1) + +STREAM_K_TEST(32, 32, 32, 0) +STREAM_K_TEST(32, 32, 32, 1) +STREAM_K_TEST(32, 32, 32, 2) +STREAM_K_TEST(32, 32, 32, 3) + +/// Prime number odd offsets +STREAM_K_TEST(37, 32, 32, 0) +STREAM_K_TEST(37, 32, 32, 1) +STREAM_K_TEST(37, 32, 32, 2) +STREAM_K_TEST(37, 32, 32, 3) + +STREAM_K_TEST(32, 37, 32, 0) +STREAM_K_TEST(32, 37, 32, 1) +STREAM_K_TEST(32, 37, 32, 2) +STREAM_K_TEST(32, 37, 32, 3) + +// TODO: Fails +// STREAM_K_TEST(32, 32, 37, 0) +// STREAM_K_TEST(32, 32, 37, 1) +// STREAM_K_TEST(32, 32, 37, 2) +// STREAM_K_TEST(32, 32, 37, 3) + +// TODO: Fails +STREAM_K_TEST(37, 32, 37, 0) +STREAM_K_TEST(37, 32, 37, 1) +STREAM_K_TEST(37, 32, 37, 2) +STREAM_K_TEST(37, 32, 37, 3) + +STREAM_K_TEST(37, 37, 37, 0) +STREAM_K_TEST(37, 37, 37, 1) +STREAM_K_TEST(37, 37, 37, 2) +STREAM_K_TEST(37, 37, 37, 3) + +/// Cubed sizes +STREAM_K_TEST(256, 256, 256, 0) +STREAM_K_TEST(256, 256, 256, 4) +STREAM_K_TEST(256, 256, 256, 8) + +// TODO: Fails +// STREAM_K_TEST(272, 272, 272, 0) +// STREAM_K_TEST(272, 272, 272, 8) +// STREAM_K_TEST(272, 272, 272, 16) + +STREAM_K_TEST(288, 288, 288, 0) +STREAM_K_TEST(288, 288, 288, 4) +STREAM_K_TEST(288, 288, 288, 8) + +STREAM_K_TEST(512, 512, 512, 0) +STREAM_K_TEST(512, 512, 512, 8) +STREAM_K_TEST(512, 512, 512, 16) + +// TODO: Fails +// STREAM_K_TEST(528, 528, 528, 0) +// STREAM_K_TEST(528, 528, 528, 8) +// STREAM_K_TEST(528, 528, 528, 16) + +STREAM_K_TEST(544, 544, 544, 0) +STREAM_K_TEST(544, 544, 544, 8) +STREAM_K_TEST(544, 544, 544, 16) + +/// Long M skinny N and K +STREAM_K_TEST(512, 1, 1, 0) +STREAM_K_TEST(512, 1, 1, 8) +STREAM_K_TEST(512, 1, 1, 16) + +STREAM_K_TEST(512, 32, 32, 0) +STREAM_K_TEST(512, 32, 32, 8) +STREAM_K_TEST(512, 32, 32, 16) + +/// Long M and N and skinny K +// TODO: Fails with core dump +// STREAM_K_TEST(512, 512, 1, 0) +// STREAM_K_TEST(512, 512, 1, 8) +// STREAM_K_TEST(512, 512, 1, 16) + +STREAM_K_TEST(512, 512, 32, 0) +STREAM_K_TEST(512, 512, 32, 8) +STREAM_K_TEST(512, 512, 32, 16) + +/// Long M and K and skinny N +STREAM_K_TEST(512, 1, 512, 0) +STREAM_K_TEST(512, 1, 512, 8) +STREAM_K_TEST(512, 1, 512, 16) + +STREAM_K_TEST(512, 32, 512, 0) +STREAM_K_TEST(512, 32, 512, 8) +STREAM_K_TEST(512, 32, 512, 16) + +/// Long K and skinny M and N +STREAM_K_TEST(1, 1, 512, 0) +STREAM_K_TEST(1, 1, 512, 8) +STREAM_K_TEST(1, 1, 512, 16) + +STREAM_K_TEST(32, 32, 512, 0) +STREAM_K_TEST(32, 32, 512, 8) +STREAM_K_TEST(32, 32, 512, 16) // TODO: Renable this test once reduction is implemented TYPED_TEST(TEST_SUITE_NAME, StreamK_M256_N256_K256_SKBlocks12) @@ -39,72 +161,6 @@ TYPED_TEST(TEST_SUITE_NAME, StreamK_M256_N256_K256_SKBlocks12) this->Run(M, N, K, num_sk_blocks); } -TYPED_TEST(TEST_SUITE_NAME, StreamK_M256_N256_K256_SKBlocks8) -{ - - ck_tile::index_t M = 256; - ck_tile::index_t N = 256; - ck_tile::index_t K = 256; - uint32_t num_sk_blocks = 8; - - this->Run(M, N, K, num_sk_blocks); -} - -TYPED_TEST(TEST_SUITE_NAME, StreamK_M512_N512_K512_DP) -{ - - ck_tile::index_t M = 512; - ck_tile::index_t N = 512; - ck_tile::index_t K = 512; - uint32_t num_sk_blocks = 0; - - this->Run(M, N, K, num_sk_blocks); -} - -TYPED_TEST(TEST_SUITE_NAME, StreamK_M512_N512_K512_SKBlocks16) -{ - - ck_tile::index_t M = 512; - ck_tile::index_t N = 512; - ck_tile::index_t K = 512; - uint32_t num_sk_blocks = 16; - - this->Run(M, N, K, num_sk_blocks); -} - -TYPED_TEST(TEST_SUITE_NAME, StreamK_M512_N512_K512_SKBlocks8) -{ - - ck_tile::index_t M = 512; - ck_tile::index_t N = 512; - ck_tile::index_t K = 512; - uint32_t num_sk_blocks = 8; - - this->Run(M, N, K, num_sk_blocks); -} - -TYPED_TEST(TEST_SUITE_NAME, StreamK_M3840_N4096_K4096_DP) -{ - - ck_tile::index_t M = 3840; - ck_tile::index_t N = 4096; - ck_tile::index_t K = 4096; - uint32_t num_sk_blocks = 0; - - this->Run(M, N, K, num_sk_blocks); -} - -TYPED_TEST(TEST_SUITE_NAME, StreamK_M3840_N4096_K4096_SKBlocks64) -{ - - ck_tile::index_t M = 3840; - ck_tile::index_t N = 4096; - ck_tile::index_t K = 4096; - uint32_t num_sk_blocks = 64; - - this->Run(M, N, K, num_sk_blocks); -} - TYPED_TEST(TEST_SUITE_NAME, StreamK_Unsupported_Reduction) { diff --git a/test/ck_tile/gemm_streamk/test_gemm_streamk.cpp b/test/ck_tile/gemm_streamk/test_gemm_streamk_common_includes.hpp similarity index 54% rename from test/ck_tile/gemm_streamk/test_gemm_streamk.cpp rename to test/ck_tile/gemm_streamk/test_gemm_streamk_common_includes.hpp index 99c3fb397f..b1faf3848b 100644 --- a/test/ck_tile/gemm_streamk/test_gemm_streamk.cpp +++ b/test/ck_tile/gemm_streamk/test_gemm_streamk_common_includes.hpp @@ -1,14 +1,8 @@ // Copyright © Advanced Micro Devices, Inc., or its affiliates. // SPDX-License-Identifier: MIT +#pragma once +#include "test_gemm_streamk.hpp" #include "test_gemm_streamk_types.hpp" #include "test_gemm_streamk_util.hpp" #include "gtest/gtest.h" - -#define TEST_SUITE_NAME TestCkTileStreamK - -TYPED_TEST_SUITE(TestCkTileStreamK, KernelTypesStreamK); - -#include "test_gemm_streamk_cases.inc" - -#undef TEST_SUITE_NAME diff --git a/test/ck_tile/gemm_streamk/test_gemm_streamk_types.hpp b/test/ck_tile/gemm_streamk/test_gemm_streamk_types.hpp index 399f3f11e8..578eb31189 100644 --- a/test/ck_tile/gemm_streamk/test_gemm_streamk_types.hpp +++ b/test/ck_tile/gemm_streamk/test_gemm_streamk_types.hpp @@ -1,12 +1,15 @@ // Copyright © Advanced Micro Devices, Inc., or its affiliates. // SPDX-License-Identifier: MIT +#pragma once + #include #include #include "gtest/gtest.h" #include "ck_tile/host.hpp" +#include "test_gemm_streamk_util.hpp" using F16 = ck_tile::half_t; using F32 = float; @@ -15,11 +18,106 @@ using BF16 = ck_tile::bf16_t; using Row = ck_tile::tensor_layout::gemm::RowMajor; using Col = ck_tile::tensor_layout::gemm::ColumnMajor; -// clang-format off -using KernelTypesStreamK = ::testing::Types< -// ALayout BLayout CLayout ADataType BDataType AccDataType CDataType - std::tuple< Row, Col, Row, F16, F16, F32, F16>, - std::tuple< Row, Col, Row, BF16, BF16, F32, BF16> ->; +using Mem = ck_tile::integral_constant; +using CompV3 = ck_tile::integral_constant; +using CompV4 = ck_tile::integral_constant; -// clang-format on +using Persistent = std::true_type; +using NonPersistent = std::false_type; + +using I1 = ck_tile::number<1>; +using I2 = ck_tile::number<2>; +using I4 = ck_tile::number<4>; +using I8 = ck_tile::number<8>; +using I16 = ck_tile::number<16>; +using I32 = ck_tile::number<32>; +using I64 = ck_tile::number<64>; +using I128 = ck_tile::number<128>; +using I256 = ck_tile::number<256>; + +template +struct Layouts +{ + // clang-format off + // Create all combinations of A, B, Acc, C layouts + // ALayout, BLayout, CLayout, ADataType, BDataType, AccDataType, CDataType, M_MacroTile, N_MacroTile, K_MacroTile, M_Warps, N_Warps, K_Warps, M_MmaTile, N_MmaTile, K_MmaTile, PipelineType, Persistent + using RRR = ::testing::Types>; + using RRC = ::testing::Types>; + using RCR = ::testing::Types>; + using RCC = ::testing::Types>; + using CRR = ::testing::Types>; + using CRC = ::testing::Types>; + using CCR = ::testing::Types>; + using CCC = ::testing::Types>; + // clang-format on +}; + +// clang-format off +// Here we use macros to generate a large number of parameter sets for different test configurations. +// One parameter set is intended to be be implemented per .cpp file to keep the compile time down. +// The naming convention is as follows: +// __________________________________________________ ____________________________________________________________________________________ +// | Parameter Name | | Parameter Value Type | +// using F16_RRR_Mem_128x128x32_2x2x1_32x32x16_NonPersistent = F16Layouts::RRR; +// / | \ \ \ \ \ | | | | \ \ \ \ \ \ \ \ \ +// DATA LAYOUT PIPELINE MACRO WARPS MMA PERSISTENT LAYOUT MACRO MACRO MACRO WARPS WARPS WARPS MMA MMA MMA PIPELINE PERSISTENT LAYOUT +// TYPE TYPE TILE MxNxK TILE TYPE CLASS TILE TILE TILE M N K TILE TILE TILE TYPE TYPE +// MxNxK MxNxK M N K M N K +// +// The example options for each field are: +// - DATA_TYPE: F16, BF16 +// - LAYOUT: RRR, RRC, RCR, RCC, CRR, CRC, CCR, CCC +// - PIPELINE_TYPE: Mem, CompV3, CompV4 +// - M_MACRO_TILE: 128, 256, etc +// - N_MACRO_TILE: 128, 256, etc +// - K_MACRO_TILE: 32, 64, 128, etc +// - M_WARPS: 2, 4, 1 +// - N_WARPS: 2, 1, 4 +// - K_WARPS: 1 +// - M_MMA_TILE: 32, 16 +// - N_MMA_TILE: 32, 16 +// - K_MMA_TILE: 16 +// - PERSISTENT_TYPE: NonPersistent, Persistent + +// Macro to concatenate the parameter name +// E.g. F16_RRR_Mem_128x128x32_2x2x1_32x32x16_NonPersistent +#define CONCATENATE_PARAM_NAME(DATA_TYPE, LAYOUT, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) \ + DATA_TYPE##_##LAYOUT##_##PIPELINE_TYPE##_##M_MACRO_TILE##x##N_MACRO_TILE##x##K_MACRO_TILE##_##M_WARPS##x##N_WARPS##x##K_WARPS##_##M_MMA_TILE##x##N_MMA_TILE##x##K_MMA_TILE##_##PERSISTENT + +// Macro to get the parameter value type +// E.g. F16Layouts::RRR +#define CONCATENATE_PARAM_VALUE(LAYOUTS_CLASS, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PIPELINE_TYPE, PERSISTENT, LAYOUT) \ + LAYOUTS_CLASS::LAYOUT + +// Macro to declare a single parameter set, consisting of a parameter name and value type +#define DECLARE_PARAM(LAYOUTS_CLASS, DATA_TYPE, LAYOUT, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) \ + using CONCATENATE_PARAM_NAME(DATA_TYPE, LAYOUT, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) = \ + CONCATENATE_PARAM_VALUE(LAYOUTS_CLASS, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PIPELINE_TYPE, PERSISTENT, LAYOUT); + +// Macro to declare all layout combinations for a given set of parameters +#define DECLARE_PARAMS_ALL_LAYOUTS(LAYOUTS_CLASS, DATA_TYPE, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) \ + DECLARE_PARAM(LAYOUTS_CLASS, DATA_TYPE, RRR, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) \ + DECLARE_PARAM(LAYOUTS_CLASS, DATA_TYPE, RRC, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) \ + DECLARE_PARAM(LAYOUTS_CLASS, DATA_TYPE, RCR, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) \ + DECLARE_PARAM(LAYOUTS_CLASS, DATA_TYPE, RCC, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) \ + DECLARE_PARAM(LAYOUTS_CLASS, DATA_TYPE, CRR, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) \ + DECLARE_PARAM(LAYOUTS_CLASS, DATA_TYPE, CRC, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) \ + DECLARE_PARAM(LAYOUTS_CLASS, DATA_TYPE, CCR, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) \ + DECLARE_PARAM(LAYOUTS_CLASS, DATA_TYPE, CCC, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) + +#include "test_gemm_streamk_types_fp16.hpp" +#include "test_gemm_streamk_types_bf16.hpp" diff --git a/test/ck_tile/gemm_streamk/test_gemm_streamk_types_bf16.hpp b/test/ck_tile/gemm_streamk/test_gemm_streamk_types_bf16.hpp new file mode 100644 index 0000000000..07aa1e0f04 --- /dev/null +++ b/test/ck_tile/gemm_streamk/test_gemm_streamk_types_bf16.hpp @@ -0,0 +1,76 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#pragma once + +#include "test_gemm_streamk_types.hpp" + +template +struct BF16Layouts +{ + // clang-format off + // For CDNA, we support [A, B, Acc, C] = [bf16, bf16, f32, bf16] and [bf16, bf16, f32, f32]: + using BF16_BF16_F32_BF16 = Layouts; + using BF16_BF16_F32_F32 = Layouts; + using RRR = detail::combine_t; + using RRC = detail::combine_t; + using RCR = detail::combine_t; + using RCC = detail::combine_t; + using CRR = detail::combine_t; + using CRC = detail::combine_t; + using CCR = detail::combine_t; + using CCC = detail::combine_t; + // clang-format on +}; +// clang-format off + +// Macro to declare all layout combinations for BF16 data type +#define DECLARE_BF16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) \ + DECLARE_PARAMS_ALL_LAYOUTS(BF16Layouts, BF16, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) + +// Macro to declare all layout combinations for BF16 data type and a variety of sizes +#define DECLARE_BF16_PARAMS_ALL_LAYOUTS_ALL_SIZES(PIPELINE_TYPE, PERSISTENT) \ + DECLARE_BF16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 128, 32, 2, 2, 1, 32, 32, 16, PERSISTENT) \ + DECLARE_BF16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 128, 64, 2, 2, 1, 32, 32, 16, PERSISTENT) \ + DECLARE_BF16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 128, 128, 2, 2, 1, 32, 32, 16, PERSISTENT) \ + DECLARE_BF16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 256, 128, 32, 2, 2, 1, 32, 32, 16, PERSISTENT) \ + DECLARE_BF16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 256, 128, 64, 2, 2, 1, 32, 32, 16, PERSISTENT) \ + DECLARE_BF16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 256, 32, 2, 2, 1, 32, 32, 16, PERSISTENT) \ + DECLARE_BF16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 256, 64, 2, 2, 1, 32, 32, 16, PERSISTENT) \ + DECLARE_BF16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 256, 256, 32, 2, 2, 1, 32, 32, 16, PERSISTENT) \ + DECLARE_BF16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 256, 256, 64, 2, 2, 1, 32, 32, 16, PERSISTENT) + +// Declare all BF16 parameter sets for different pipeline types and persistence options +DECLARE_BF16_PARAMS_ALL_LAYOUTS_ALL_SIZES(Mem, NonPersistent) +DECLARE_BF16_PARAMS_ALL_LAYOUTS_ALL_SIZES(CompV3, NonPersistent) +DECLARE_BF16_PARAMS_ALL_LAYOUTS_ALL_SIZES(CompV4, NonPersistent) + +// Here, we have a combination of parameter set symbols that we can use to compile into test cases +// __________________________________________________ +// | Parameter Name | +// using BF16_RRR_Mem_128x128x32_2x2x1_32x32x16_NonPersistent = ... +// / | \ \ \ \ \ +// DATA LAYOUT PIPELINE MACRO WARPS MMA PERSISTENT +// TYPE TYPE TILE MxNxK TILE TYPE +// MxNxK MxNxK +// +// The options for each field are: +// - DATA TYPE: BF16 +// - LAYOUT: RRR, RRC, RCR, RCC, CRR, CRC, CCR, CCC +// - PIPELINE_TYPE: Mem, CompV3, CompV4 +// - Macro Tile: 128x128x32, 128x128x64, 128x128x128, 256x128x32, 256x128x64, 128x256x32, 128x256x64, 256x256x32, 256x256x64 +// - Warps: 2x2x1 +// - MMA Tile: 32x32x16 +// - PERSISTENT_TYPE: NonPersistent + +// clang-format on diff --git a/test/ck_tile/gemm_streamk/test_gemm_streamk_types_fp16.hpp b/test/ck_tile/gemm_streamk/test_gemm_streamk_types_fp16.hpp new file mode 100644 index 0000000000..80dfdf99b3 --- /dev/null +++ b/test/ck_tile/gemm_streamk/test_gemm_streamk_types_fp16.hpp @@ -0,0 +1,77 @@ +// Copyright © Advanced Micro Devices, Inc., or its affiliates. +// SPDX-License-Identifier: MIT + +#pragma once + +#include "test_gemm_streamk_types.hpp" + +template +struct F16Layouts +{ + // clang-format off + // For CDNA, we support [A, B, Acc, C] = [f16, f16, f32, f16] and [f16, f16, f32, f32]: + using F16_F16_F32_F16 = Layouts; + using F16_F16_F32_F32 = Layouts; + using RRR = detail::combine_t; + using RRC = detail::combine_t; + using RCR = detail::combine_t; + using RCC = detail::combine_t; + using CRR = detail::combine_t; + using CRC = detail::combine_t; + using CCR = detail::combine_t; + using CCC = detail::combine_t; + // clang-format on +}; + +// clang-format off + +// Macro to declare all layout combinations for FP16 data type +#define DECLARE_F16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) \ + DECLARE_PARAMS_ALL_LAYOUTS(F16Layouts, F16, PIPELINE_TYPE, M_MACRO_TILE, N_MACRO_TILE, K_MACRO_TILE, M_WARPS, N_WARPS, K_WARPS, M_MMA_TILE, N_MMA_TILE, K_MMA_TILE, PERSISTENT) + +// Macro to declare all layout combinations for FP16 data type and a variety of sizes +#define DECLARE_F16_PARAMS_ALL_LAYOUTS_ALL_SIZES(PIPELINE_TYPE, PERSISTENT) \ + DECLARE_F16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 128, 32, 2, 2, 1, 32, 32, 16, PERSISTENT) \ + DECLARE_F16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 128, 64, 2, 2, 1, 32, 32, 16, PERSISTENT) \ + DECLARE_F16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 128, 128, 2, 2, 1, 32, 32, 16, PERSISTENT) \ + DECLARE_F16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 256, 128, 32, 2, 2, 1, 32, 32, 16, PERSISTENT) \ + DECLARE_F16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 256, 128, 64, 2, 2, 1, 32, 32, 16, PERSISTENT) \ + DECLARE_F16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 256, 32, 2, 2, 1, 32, 32, 16, PERSISTENT) \ + DECLARE_F16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 128, 256, 64, 2, 2, 1, 32, 32, 16, PERSISTENT) \ + DECLARE_F16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 256, 256, 32, 2, 2, 1, 32, 32, 16, PERSISTENT) \ + DECLARE_F16_PARAMS_ALL_LAYOUTS(PIPELINE_TYPE, 256, 256, 64, 2, 2, 1, 32, 32, 16, PERSISTENT) + +// Declare all FP16 parameter sets for different pipeline types and persistence options +DECLARE_F16_PARAMS_ALL_LAYOUTS_ALL_SIZES(Mem, NonPersistent) +DECLARE_F16_PARAMS_ALL_LAYOUTS_ALL_SIZES(CompV3, NonPersistent) +DECLARE_F16_PARAMS_ALL_LAYOUTS_ALL_SIZES(CompV4, NonPersistent) + +// Here, we have a combination of parameter set symbols that we can use to compile into test cases +// __________________________________________________ +// | Parameter Name | +// using F16_RRR_Mem_128x128x32_2x2x1_32x32x16_NonPersistent = ... +// / | \ \ \ \ \ +// DATA LAYOUT PIPELINE MACRO WARPS MMA PERSISTENT +// TYPE TYPE TILE MxNxK TILE TYPE +// MxNxK MxNxK +// +// The options for each field are: +// - DATA TYPE: F16 +// - LAYOUT: RRR, RRC, RCR, RCC, CRR, CRC, CCR, CCC +// - PIPELINE_TYPE: Mem, CompV3, CompV4 +// - Macro Tile: 128x128x32, 128x128x64, 128x128x128, 256x128x32, 256x128x64, 128x256x32, 128x256x64, 256x256x32, 256x256x64 +// - Warps: 2x2x1 +// - MMA Tile: 32x32x16 +// - PERSISTENT_TYPE: NonPersistent + +// clang-format on diff --git a/test/ck_tile/gemm_streamk/test_gemm_streamk_util.hpp b/test/ck_tile/gemm_streamk/test_gemm_streamk_util.hpp index b8a55b024d..1384bfc35b 100644 --- a/test/ck_tile/gemm_streamk/test_gemm_streamk_util.hpp +++ b/test/ck_tile/gemm_streamk/test_gemm_streamk_util.hpp @@ -1,6 +1,8 @@ // Copyright © Advanced Micro Devices, Inc., or its affiliates. // SPDX-License-Identifier: MIT +#pragma once + #include #include #include @@ -37,246 +39,75 @@ auto calculate_rtol_atol(const ck_tile::index_t K, return ck_tile::make_tuple(std::max(rtol, rtol_split_k), std::max(atol, atol_split_k)); } -template -class TestCkTileStreamK : public ::testing::Test +enum struct GemmPipelineType { - protected: - using ALayout = std::tuple_element_t<0, Tuple>; - using BLayout = std::tuple_element_t<1, Tuple>; - using CLayout = std::tuple_element_t<2, Tuple>; - using ADataType = std::tuple_element_t<3, Tuple>; - using BDataType = std::tuple_element_t<4, Tuple>; - using AccDataType = std::tuple_element_t<5, Tuple>; - using CDataType = std::tuple_element_t<6, Tuple>; - using DsLayout = ck_tile::tuple<>; - using DsDataType = ck_tile::tuple<>; - - template - void invoke_streamk(const ck_tile::StreamKHostArgs& args, - const ck_tile::stream_config& s, - int num_cu, - int occupancy) - { - - constexpr ck_tile::index_t M_Tile = 128; - constexpr ck_tile::index_t N_Tile = 128; - constexpr ck_tile::index_t K_Tile = 32; - - constexpr ck_tile::index_t M_Warp = 2; - constexpr ck_tile::index_t N_Warp = 2; - constexpr ck_tile::index_t K_Warp = 1; - - constexpr ck_tile::index_t M_Warp_Tile = 32; - constexpr ck_tile::index_t N_Warp_Tile = 32; - constexpr ck_tile::index_t K_Warp_Tile = 16; - - constexpr bool kPadM = PadM; - constexpr bool kPadN = PadN; - constexpr bool kPadK = PadK; - constexpr bool preshuffle = Preshuffle; - - constexpr bool DoubleSmemBuffer = false; - constexpr int kBlockPerCu = 1; - constexpr bool StructuredSparsity = false; - constexpr bool NumWaveGroup = 1; - - using GemmShape = - ck_tile::TileGemmShape, - ck_tile::sequence, - ck_tile::sequence>; - - using TilePartitioner = ck_tile::StreamKTilePartitioner; - - using GemmUniversalTraits = ck_tile::TileGemmUniversalTraits; - - const auto Run = [&](const auto memory_operation_) { - constexpr auto memory_operation = memory_operation_.value; - constexpr auto scheduler = ck_tile::GemmPipelineScheduler::Intrawave; - - // We create the GEMM pipeline without specifying has_hot_loop or tail_num. - // This is because num_loop can vary (a) per WG and (b) per iteration of the Stream-K - // while loop. Instead, has_hot_loop and tail_num are determined in the Stream-K - // Kernel's RunGemm function. This is a similar pattern used by grouped GEMM. - using UniversalGemmProblem = ck_tile::UniversalGemmPipelineProblem; - // For initial testing, we will just test with one pipeline. - // More extensive testing is coming later and will test other pipelines. - using GemmPipeline = ck_tile::GemmPipelineAgBgCrMem; - - using GemmEpilogue = ck_tile::CShuffleEpilogue< - ck_tile::CShuffleEpilogueProblem, - AccDataType, - CDataType, - ck_tile::tuple<>, - CLayout, - ck_tile::element_wise::PassThrough, - TilePartitioner::MPerBlock, - TilePartitioner::NPerBlock, - M_Warp, - N_Warp, - M_Warp_Tile, - N_Warp_Tile, - K_Warp_Tile, - UniversalGemmProblem::TransposeC, - memory_operation>>; - - using Kernel = ck_tile::StreamKKernel; - - auto kargs = Kernel::MakeKernelArgs(args, num_cu, occupancy); - - if(!Kernel::IsSupportedArgument(kargs)) - { - EXPECT_TRUE(false); - } - - dim3 grid_dims = Kernel::GridSize(kargs.tile_partitioner); - dim3 block_dims = Kernel::BlockSize(); - - ck_tile::launch_kernel( - s, ck_tile::make_kernel(Kernel{}, grid_dims, block_dims, 0, kargs)); - }; - - Run(ck_tile::integral_constant{}); - } - - public: - // Since Stream-K is build on gfx9, the lower bound for CUs is 104. Thus, we default num_cu to - // 104 and occupancy to 1 to ensure tests are reproducible on different architectures. - void Run(ck_tile::index_t M, - ck_tile::index_t N, - ck_tile::index_t K, - uint32_t num_sk_blocks = 0xffffffff, - ck_tile::StreamKReductionStrategy reduction_strategy = - ck_tile::StreamKReductionStrategy::Atomic, - int occupancy = 1, - int num_cu = 104, - ck_tile::index_t stride_A = 0, - ck_tile::index_t stride_B = 0, - ck_tile::index_t stride_C = 0) - { - - using namespace ck_tile::literals; - - if(reduction_strategy == ck_tile::StreamKReductionStrategy::Reduction) - { - throw std::runtime_error("Reduction Strategy is current unsupported!\n"); - } - - auto f_host_tensor_descriptor = [](std::size_t row, - std::size_t col, - std::size_t stride, - auto layout) { - if constexpr(std::is_same_v) - { - return ck_tile::HostTensorDescriptor({row, col}, {stride, 1_uz}); - } - else - { - return ck_tile::HostTensorDescriptor({row, col}, {1_uz, stride}); - } - }; - - auto f_get_default_stride = - [](std::size_t row, std::size_t col, std::size_t stride, auto layout) { - if(stride == 0) - { - if constexpr(std::is_same_v) - { - return col; - } - else - { - return row; - } - } - else - return stride; - }; - - stride_A = f_get_default_stride(M, K, stride_A, ALayout{}); - stride_B = f_get_default_stride(K, N, stride_B, BLayout{}); - stride_C = f_get_default_stride(M, N, stride_C, CLayout{}); - - ck_tile::HostTensor a_m_k(f_host_tensor_descriptor(M, K, stride_A, ALayout{})); - ck_tile::HostTensor b_k_n(f_host_tensor_descriptor(K, N, stride_B, BLayout{})); - ck_tile::HostTensor c_m_n_dev_result( - f_host_tensor_descriptor(M, N, stride_C, CLayout{})); - - ck_tile::FillUniformDistributionIntegerValue{-5, 5, /*seed*/ 11939}(a_m_k); - ck_tile::FillUniformDistributionIntegerValue{-5, 5, /*seed*/ 11940}(b_k_n); - - ck_tile::DeviceMem a_m_k_dev_buf(a_m_k.get_element_space_size_in_bytes()); - ck_tile::DeviceMem b_k_n_dev_buf(b_k_n.get_element_space_size_in_bytes()); - ck_tile::DeviceMem c_m_n_dev_buf(c_m_n_dev_result.get_element_space_size_in_bytes()); - - a_m_k_dev_buf.ToDevice(a_m_k.data()); - b_k_n_dev_buf.ToDevice(b_k_n.data()); - c_m_n_dev_buf.SetZero(); - c_m_n_dev_result.SetZero(); - - ck_tile::StreamKHostArgs args{a_m_k_dev_buf.GetDeviceBuffer(), - b_k_n_dev_buf.GetDeviceBuffer(), - c_m_n_dev_buf.GetDeviceBuffer(), - M, - N, - K, - stride_A, - stride_B, - stride_C, - reduction_strategy, - num_sk_blocks}; - - invoke_streamk( - args, ck_tile::stream_config{nullptr, false, 0, 0, 1}, num_cu, occupancy); - - c_m_n_dev_buf.FromDevice(c_m_n_dev_result.data()); - - ck_tile::HostTensor c_m_n_host_ref( - f_host_tensor_descriptor(M, N, stride_C, CLayout{})); - c_m_n_host_ref.SetZero(); - - ck_tile::reference_gemm( - a_m_k, b_k_n, c_m_n_host_ref); - - const float max_accumulated_value = - *std::max_element(c_m_n_host_ref.mData.begin(), c_m_n_host_ref.mData.end()); - const auto rtol_atol = calculate_rtol_atol( - K, /*kbatch*/ 1, max_accumulated_value); - - bool pass = ck_tile::check_err(c_m_n_dev_result, - c_m_n_host_ref, - "Error: Incorrect results!", - rtol_atol.at(ck_tile::number<0>{}), - rtol_atol.at(ck_tile::number<1>{})); - - EXPECT_TRUE(pass); - }; + Mem, + CompV3, + CompV4 }; + +template +struct GemmPipelineTypeSelector; + +template +struct GemmPipelineTypeSelector +{ + using base_pipeline = ck_tile::BaseGemmPipelineAgBgCrMem; + using pipeline = ck_tile::GemmPipelineAgBgCrMem; + + static constexpr auto GetName() { return "GemmPipelineAgBgCrMem"; } +}; + +template +struct GemmPipelineTypeSelector +{ + using base_pipeline = ck_tile::BaseGemmPipelineAgBgCrCompV3; + using pipeline = ck_tile::GemmPipelineAgBgCrCompV3; + + static constexpr auto GetName() { return "GemmPipelineAgBgCrCompV3"; } +}; + +template +struct GemmPipelineTypeSelector +{ + using base_pipeline = ck_tile::BaseGemmPipelineAgBgCrCompV4; + using pipeline = ck_tile::GemmPipelineAgBgCrCompV4; + + static constexpr auto GetName() { return "GemmPipelineAgBgCrCompV4"; } +}; + +namespace detail { +template +struct combine; + +template +struct combine<::testing::Types, ::testing::Types> +{ + using type = ::testing::Types; +}; + +template +using combine_t = typename combine::type; +} // namespace detail + +// This is the base class for all stream-k tests +#define STREAM_K_TEST_CLASS_BASE TestCkTileStreamK + +// Macros to help generate test suite names from the parameters given +#define CONCATENATE_TEST_SUITE_NAME(PREFIX, TEST_PARAMS) PREFIX##_##TEST_PARAMS +// Helper macro to expand the arguments before passing them to CONCATENATE_TEST_SUITE_NAME +#define MAKE_TEST_SUITE_NAME_INTERNAL(TEST_BASE_NAME, TEST_PARAMS) \ + CONCATENATE_TEST_SUITE_NAME(TEST_BASE_NAME, TEST_PARAMS) + +// Final macro to be used to create the test suite name from the base class name and the test +// parameters +#define MAKE_TEST_SUITE_NAME(TEST_PARAMS) \ + MAKE_TEST_SUITE_NAME_INTERNAL(STREAM_K_TEST_CLASS_BASE, TEST_PARAMS) + +// Macro to declare a test suite with the given name and parameters, based on the base test class +#define DECLARE_STREAM_K_TEST(TEST_SUITE_NAME, TEST_SUITE_PARAMS) \ + template \ + class TEST_SUITE_NAME : public STREAM_K_TEST_CLASS_BASE \ + { \ + }; \ + TYPED_TEST_SUITE(TEST_SUITE_NAME, TEST_SUITE_PARAMS);