diff --git a/codegen/test/CMakeLists.txt b/codegen/test/CMakeLists.txt index 6dd130bc3f..94b1b99409 100644 --- a/codegen/test/CMakeLists.txt +++ b/codegen/test/CMakeLists.txt @@ -1,3 +1,10 @@ +option(USE_HIPRTC_FOR_CODEGEN_TESTS "Whether to enable hipRTC for codegen tests." ON) + +if(USE_HIPRTC_FOR_CODEGEN_TESTS) + add_compile_definitions(HIPRTC_FOR_CODEGEN_TESTS) + message("CK compiled with USE_HIPRTC_FOR_CODEGEN_TESTS set to ${USE_HIPRTC_FOR_CODEGEN_TESTS}") +endif() + list(APPEND CMAKE_PREFIX_PATH /opt/rocm) add_subdirectory(rtc) file(GLOB TEST_SRCS CONFIGURE_DEPENDS *.cpp) diff --git a/codegen/test/common.hpp b/codegen/test/common.hpp index f97b4436c1..7ea0b8cc83 100644 --- a/codegen/test/common.hpp +++ b/codegen/test/common.hpp @@ -1,27 +1,27 @@ #pragma once + +#include "ck/host/headers.hpp" +#include "ck/host/stringutils.hpp" +#include +#include +#include #include #include +#include #include #include #include -#include -#include -#include -#include #include -#include "ck/host/headers.hpp" -#include "rtc/hiprtc_enable_env.hpp" -#include "ck/host/stringutils.hpp" // NOLINTNEXTLINE -const char* const content_wrapper = R"__ck__( +const char* const ck_content_wrapper = R"__ck__( ${content} )__ck__"; template -inline std::string ck_content_wrapper(P p) +inline std::string content_wrapper(P p) { - return ck::host::InterpolateString(content_wrapper, + return ck::host::InterpolateString(ck_content_wrapper, {{"content", std::string{p.data(), p.size()}}}); } @@ -29,11 +29,9 @@ inline std::vector create_headers_for_test() { auto ck_headers = ck::host::GetHeaders(); std::vector result; - - std::transform(ck_headers.begin(), ck_headers.end(), std::back_inserter(result), [&](auto& p) { - return rtc::src_file{p.first, ck_content_wrapper(p.second)}; + std::transform(ck_headers.begin(), ck_headers.end(), std::back_inserter(result), [](auto& p) { + return rtc::src_file{p.first, content_wrapper(p.second)}; }); - return result; } @@ -83,7 +81,7 @@ bool allclose(const T& a, const U& b, double atol = 0.01, double rtol = 0.01) }); } -std::string classify(double x) +inline std::string classify(double x) { switch(std::fpclassify(x)) { diff --git a/codegen/test/gemm_multiple_d.cpp b/codegen/test/gemm_multiple_d.cpp index ed438c77a8..58c71c9036 100644 --- a/codegen/test/gemm_multiple_d.cpp +++ b/codegen/test/gemm_multiple_d.cpp @@ -1,33 +1,29 @@ -#include "common.hpp" #include "ck/host/device_gemm_multiple_d/problem.hpp" #include "ck/host/device_gemm_multiple_d/operation.hpp" #include "ck/host/headers.hpp" #include "ck/host/stringutils.hpp" #include "ck/host/utils.hpp" -#include -#include -#include -#include -#include +#include "common.hpp" #include #include +#include +#include +#include #include +#include +#include using half = _Float16; -// using half = __fp16; const std::string gemm_compile_check = R"__ck__( #include <${include}> extern "C" __global__ void f(const ck::half_t* a, const ck::half_t* b, ck::half_t* c) { using G = ${template}; - constexpr auto desc = - G::make_descriptor(ck::make_naive_tensor_descriptor_packed(ck::make_tuple(${m}, - ${k})), - ck::make_naive_tensor_descriptor(ck::make_tuple(${n}, - ${k}), ck::make_tuple(1, ${n})), ck::make_tuple(), - ck::make_naive_tensor_descriptor_packed(ck::make_tuple(${m}, - ${n}))); + constexpr auto desc = G::make_descriptor(ck::make_naive_tensor_descriptor_packed(ck::make_tuple(${m}, ${k})), + ck::make_naive_tensor_descriptor(ck::make_tuple(${n}, ${k}), ck::make_tuple(1, ${n})), + ck::make_tuple(), + ck::make_naive_tensor_descriptor_packed(ck::make_tuple(${m}, ${n}))); static_assert(desc.IsValid(), "Invalid ck gemm."); @@ -69,15 +65,15 @@ TEST_CASE(test_problem_kernel) {"m", std::to_string(prob.M)}, {"n", std::to_string(prob.N)}, {"k", std::to_string(prob.K)}}); - auto srcs = get_headers_for_test(); + auto srcs = get_headers_for_test(); srcs.push_back({"main.cpp", src}); rtc::compile_options options; - options.kernel_name = "f"; - auto k = rtc::compile_kernel(srcs, options); - auto block_size = solution.GetTemplateParameter("BlockSize"); - auto m_per_block = solution.GetTemplateParameter("MPerBlock"); - auto n_per_block = solution.GetTemplateParameter("NPerBlock"); - auto grid_size = ck::host::integer_divide_ceil(prob.M, m_per_block) * + options.kernel_name = "f"; + auto k = rtc::compile_kernel(srcs, options); + auto block_size = solution.GetTemplateParameter("BlockSize"); + auto m_per_block = solution.GetTemplateParameter("MPerBlock"); + auto n_per_block = solution.GetTemplateParameter("NPerBlock"); + auto grid_size = ck::host::integer_divide_ceil(prob.M, m_per_block) * ck::host::integer_divide_ceil(prob.N, n_per_block); k.launch(nullptr, grid_size * block_size, block_size)(a.data(), b.data(), c.data()); diff --git a/codegen/test/rtc/include/rtc/compile_kernel.hpp b/codegen/test/rtc/include/rtc/compile_kernel.hpp index 6f3d107afa..0b5decc311 100644 --- a/codegen/test/rtc/include/rtc/compile_kernel.hpp +++ b/codegen/test/rtc/include/rtc/compile_kernel.hpp @@ -1,10 +1,10 @@ #ifndef GUARD_HOST_TEST_RTC_INCLUDE_RTC_COMPILE_KERNEL #define GUARD_HOST_TEST_RTC_INCLUDE_RTC_COMPILE_KERNEL -#include #include -#include +#include #include +#include namespace rtc { diff --git a/codegen/test/rtc/include/rtc/hip.hpp b/codegen/test/rtc/include/rtc/hip.hpp index e962d4cd3e..3118de9e45 100644 --- a/codegen/test/rtc/include/rtc/hip.hpp +++ b/codegen/test/rtc/include/rtc/hip.hpp @@ -3,8 +3,8 @@ #include #include -#include #include +#include namespace rtc { diff --git a/codegen/test/rtc/include/rtc/hiprtc_enable_env.hpp b/codegen/test/rtc/include/rtc/hiprtc_enable_env.hpp deleted file mode 100644 index 9ee56154ee..0000000000 --- a/codegen/test/rtc/include/rtc/hiprtc_enable_env.hpp +++ /dev/null @@ -1,3 +0,0 @@ -#include - -CK_DECLARE_ENV_VAR_BOOL(CK_CODEGEN_TESTS_ENABLE_HIPRTC) \ No newline at end of file diff --git a/codegen/test/rtc/src/compile_kernel.cpp b/codegen/test/rtc/src/compile_kernel.cpp index b15756d6e0..445f8725d2 100644 --- a/codegen/test/rtc/src/compile_kernel.cpp +++ b/codegen/test/rtc/src/compile_kernel.cpp @@ -1,16 +1,16 @@ -#include "rtc/hip.hpp" -#include -// TODO include only if USE_RTC is set? -#include -#include -#include -#include -#include -#include -#include -#include -#include #include +#include +#include +#ifdef HIPRTC_FOR_CODEGEN_TESTS +#include +#endif +#include +#include +#include +#include +#include +#include +#include namespace rtc { @@ -106,6 +106,8 @@ kernel clang_compile_kernel(const std::vector& srcs, compile_options o return kernel{obj.data(), options.kernel_name}; } +#ifdef HIPRTC_FOR_CODEGEN_TESTS + struct hiprtc_src_file { hiprtc_src_file() = default; @@ -274,20 +276,18 @@ static kernel hiprtc_compile_kernel(const std::vector& srcs, compile_o if(cos.size() != 1) std::runtime_error("No code object"); auto& obj = cos.front(); - return kernel{obj.data(), options.kernel_name}; } +#endif + kernel compile_kernel(const std::vector& srcs, compile_options options) { - if(ck::EnvIsEnabled(CK_ENV(CK_CODEGEN_TESTS_ENABLE_HIPRTC))) - { - return hiprtc_compile_kernel(srcs, options); - } - else - { - return clang_compile_kernel(srcs, options); - } +#ifdef HIPRTC_FOR_CODEGEN_TESTS + return hiprtc_compile_kernel(srcs, options); +#else + return clang_compile_kernel(srcs, options); +#endif } } // namespace rtc diff --git a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp index 9dc4347d4c..12adbcd0fd 100644 --- a/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp +++ b/include/ck/tensor_operation/gpu/device/impl/device_batched_gemm_softmax_gemm_xdl_cshuffle.hpp @@ -1127,4 +1127,4 @@ struct DeviceBatchedGemmSoftmaxGemm_Xdl_CShuffle } // namespace device } // namespace tensor_operation -} // namespace ck \ No newline at end of file +} // namespace ck diff --git a/include/ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp b/include/ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp index c7d2dc316c..0e9bbf5806 100644 --- a/include/ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp +++ b/include/ck/tensor_operation/gpu/element/binary_element_wise_operation.hpp @@ -340,8 +340,8 @@ struct Bilinear }; template <> - __host__ __device__ constexpr void operator()( - int8_t& y, const int32_t& x0, const int8_t& x1) const + __host__ __device__ constexpr void + operator()(int8_t& y, const int32_t& x0, const int8_t& x1) const { y = type_convert(alpha_ * type_convert(x0) + beta_ * type_convert(x1)); diff --git a/include/ck/utility/functional4.hpp b/include/ck/utility/functional4.hpp index e9a5ef50d1..70fb329fcc 100644 --- a/include/ck/utility/functional4.hpp +++ b/include/ck/utility/functional4.hpp @@ -36,7 +36,7 @@ struct unpack2_impl, Sequence> __host__ __device__ constexpr auto operator()(F&& f, X&& x, Y&& y) const { return ck::forward(f)(ck::forward(x).At(Number{})..., - ck::forward(y).At(Number{})...); + ck::forward(y).At(Number{})...); } }; diff --git a/include/ck/utility/type.hpp b/include/ck/utility/type.hpp index 2e7b2446b6..fb4e706739 100644 --- a/include/ck/utility/type.hpp +++ b/include/ck/utility/type.hpp @@ -113,7 +113,6 @@ constexpr T&& forward(typename remove_reference::type&& t_) noexcept return static_cast(t_); } -// TODO template struct is_const : false_type {}; template struct is_const : true_type {}; template< class T >