From ab80fd3d7f598233b81f1bd0f0b212de31ca3d8b Mon Sep 17 00:00:00 2001 From: Gino Lu Date: Wed, 30 Jul 2025 02:36:06 +0000 Subject: [PATCH] first commit --- .../core/utility/functional_with_tuple.hpp | 16 ++++++ include/ck_tile/host/kernel_launch.hpp | 56 ++++++++++++++++--- test/ck_tile/CMakeLists.txt | 1 + test/ck_tile/kernel_launch/CMakeLists.txt | 1 + .../kernel_launch/test_kernel_launch.cpp | 46 +++++++++++++++ 5 files changed, 112 insertions(+), 8 deletions(-) create mode 100644 test/ck_tile/kernel_launch/CMakeLists.txt create mode 100644 test/ck_tile/kernel_launch/test_kernel_launch.cpp diff --git a/include/ck_tile/core/utility/functional_with_tuple.hpp b/include/ck_tile/core/utility/functional_with_tuple.hpp index 4b40403190..b586c4286e 100644 --- a/include/ck_tile/core/utility/functional_with_tuple.hpp +++ b/include/ck_tile/core/utility/functional_with_tuple.hpp @@ -170,4 +170,20 @@ struct static_uford } }; +namespace detail { +template +auto tuple_slice_impl(Tuple&& t, std::index_sequence) +{ + return std::forward_as_tuple(std::get(std::forward(t))...); +} +} + +template +auto tuple_slice(Tuple&& t) +{ + return detail::tuple_slice_impl( + std::forward(t), + std::make_index_sequence{}); +} + } // namespace ck_tile diff --git a/include/ck_tile/host/kernel_launch.hpp b/include/ck_tile/host/kernel_launch.hpp index f6ccb6968b..84ef841710 100644 --- a/include/ck_tile/host/kernel_launch.hpp +++ b/include/ck_tile/host/kernel_launch.hpp @@ -5,6 +5,7 @@ #include "ck_tile/core/config.hpp" #include "ck_tile/core/utility/ignore.hpp" +#include "ck_tile/core/utility/functional_with_tuple.hpp" #include "ck_tile/host/hip_check_error.hpp" #include "ck_tile/host/stream_config.hpp" #include "ck_tile/host/timer.hpp" @@ -62,6 +63,13 @@ CK_TILE_HOST void launch_and_check(const stream_config& sc, Callables&&... calla HIP_CHECK_ERROR(hipGetLastError()); } } +template +CK_TILE_HOST void launch_and_check_tuple(const stream_config& sc, Callables&& callables) +{ + std::apply([&](auto&&... fs) { + launch_and_check(sc, std::forward(fs)...); + }, std::forward(callables)); +} // clang-format off /* @@ -90,10 +98,14 @@ CK_TILE_HOST void launch_and_check(const stream_config& sc, Callables&&... calla * ...); **/ // clang-format on -template +template CK_TILE_HOST float launch_kernel(const stream_config& s, Callables&&... callables) { - static_assert(sizeof...(callables) > 0, "At least one callable is required!"); + constexpr auto N_callables = sizeof...(callables); + + static_assert(N_callables > 0, "At least one callable is required!"); + static_assert(N_callables >= TimerEnd, "Wrong timer range."); + static_assert(TimerEnd >= TimerStart, "Wrong timer range."); if(!s.time_kernel_) { @@ -102,20 +114,41 @@ CK_TILE_HOST float launch_kernel(const stream_config& s, Callables&&... callable } auto time_launches = [&](auto timer) { + + auto callables_all = std::forward_as_tuple(std::forward(callables)...); + auto callables_start = tuple_slice<0, TimerStart>(callables_all); + auto callables_timed = tuple_slice(callables_all); + auto callables_end = tuple_slice(callables_all); + // Warmup for(int i = 0; i < s.cold_niters_; i++) { - launch_and_check(s, std::forward(callables)...); + launch_and_check_tuple(s, callables_all); } - timer.start(s.stream_id_); + float times = 0.f; + for(int i = 0; i < s.nrepeat_; i++) { - launch_and_check(s, std::forward(callables)...); - } - timer.stop(s.stream_id_); + if constexpr (std::tuple_size_v > 0) + { + launch_and_check_tuple(s, callables_start); + } - return timer.duration() / s.nrepeat_; + if constexpr (std::tuple_size_v > 0) + { + timer.start(s.stream_id_); + launch_and_check_tuple(s, callables_timed); + timer.stop(s.stream_id_); + times += timer.duration(); + } + if constexpr (std::tuple_size_v > 0) + { + launch_and_check_tuple(s, callables_end); + } + } + + return times / s.nrepeat_; }; if(s.is_gpu_timer_) @@ -128,6 +161,13 @@ CK_TILE_HOST float launch_kernel(const stream_config& s, Callables&&... callable } } +template +CK_TILE_HOST float launch_kernel(const stream_config& s, Callables&&... callables) +{ + return launch_kernel( + s, std::forward(callables)...); +} + template CK_TILE_HOST float launch_kernel_preprocess(const stream_config& s, PreprocessFunc preprocess, diff --git a/test/ck_tile/CMakeLists.txt b/test/ck_tile/CMakeLists.txt index 42605f2513..fa6f686804 100644 --- a/test/ck_tile/CMakeLists.txt +++ b/test/ck_tile/CMakeLists.txt @@ -21,3 +21,4 @@ add_subdirectory(add_rmsnorm2d_rdquant) # add_subdirectory(layernorm2d) # add_subdirectory(rmsnorm2d) add_subdirectory(gemm_block_scale) +add_subdirectory(kernel_launch) diff --git a/test/ck_tile/kernel_launch/CMakeLists.txt b/test/ck_tile/kernel_launch/CMakeLists.txt new file mode 100644 index 0000000000..10b6d5ede2 --- /dev/null +++ b/test/ck_tile/kernel_launch/CMakeLists.txt @@ -0,0 +1 @@ +add_gtest_executable(test_kernel_launch test_kernel_launch.cpp) diff --git a/test/ck_tile/kernel_launch/test_kernel_launch.cpp b/test/ck_tile/kernel_launch/test_kernel_launch.cpp new file mode 100644 index 0000000000..9c897a3449 --- /dev/null +++ b/test/ck_tile/kernel_launch/test_kernel_launch.cpp @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: MIT +// Copyright (c) 2025, Advanced Micro Devices, Inc. All rights reserved. + +#include "gtest/gtest.h" +#include +#include "ck_tile/host.hpp" + +using ck_tile::stream_config; +TEST(Kernel, old) +{ + stream_config s1; + s1.time_kernel_ = true; + s1.cold_niters_ = 1; + s1.nrepeat_ = 2; + ck_tile::launch_kernel(s1, // timer for all kernel + [=](const stream_config& s[[maybe_unused]]) { printf("0, "); }, + [=](const stream_config& s[[maybe_unused]]) { printf("1, "); }, + [=](const stream_config& s[[maybe_unused]]) { printf("2, "); } + ); +} + +TEST(Kernel, new_1) +{ + stream_config s1; + s1.time_kernel_ = true; + s1.cold_niters_ = 2; + s1.nrepeat_ = 2; + ck_tile::launch_kernel<1, 2>(s1, // timer for kernel [1, 2) + [=](const stream_config& s[[maybe_unused]]) { printf("0, "); }, + [=](const stream_config& s[[maybe_unused]]) { printf("1, "); }, + [=](const stream_config& s[[maybe_unused]]) { printf("2, "); } + ); +} + +TEST(Kernel, new_2) +{ + stream_config s1; + s1.time_kernel_ = true; + s1.cold_niters_ = 2; + s1.nrepeat_ = 2; + ck_tile::launch_kernel<1>(s1, // timer for kernel [1, N) + [=](const stream_config& s[[maybe_unused]]) { printf("0, "); }, + [=](const stream_config& s[[maybe_unused]]) { printf("1, "); }, + [=](const stream_config& s[[maybe_unused]]) { printf("2, "); } + ); +}