From 576bdcc359e7ab3bc3e76d5c4c57c475a71fb3ce Mon Sep 17 00:00:00 2001 From: arai713 <67439843+arai713@users.noreply.github.com> Date: Fri, 9 Aug 2024 08:15:06 -0700 Subject: [PATCH] Codegen build w/CK (#1428) * initial push * cleaned up compiler errors * removed commented code * build codegen folder only for gfx9 targets * remove separate stage for codegen tests from CI * removed commented code from CMake --------- Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com> Co-authored-by: illsilin [ROCm/composable_kernel commit: da214a5a58fc232cbed2bbc2bef6156f49057c40] --- CMakeLists.txt | 3 ++ Jenkinsfile | 31 ---------------- codegen/CMakeLists.txt | 18 +++------- codegen/include/ck/host/types.hpp | 3 ++ ...gemm_multiple_d_operation_xdl_cshuffle.cpp | 13 +++---- ...wd_multiple_abd_operation_xdl_cshuffle.cpp | 36 +++++-------------- codegen/src/headers.cpp | 3 ++ codegen/test/CMakeLists.txt | 4 ++- .../test/grouped_conv_fwd_multiple_d_v1.cpp | 2 -- .../test/grouped_conv_fwd_multiple_d_v2.cpp | 2 -- .../test/grouped_conv_fwd_multiple_d_v3.cpp | 2 -- .../test/grouped_conv_fwd_multiple_d_v4.cpp | 2 -- codegen/test/rtc/src/kernel.cpp | 2 +- codegen/test/rtc/src/tmp_dir.cpp | 2 +- 14 files changed, 34 insertions(+), 89 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c00db26f3a..96a49b1c00 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -541,6 +541,9 @@ if(NOT DEFINED INSTANCES_ONLY) PACKAGE_NAME examples ) add_subdirectory(example) + if(GPU_TARGETS MATCHES "gfx9" AND NOT INSTANCES_ONLY) + add_subdirectory(codegen) + endif() if(BUILD_TESTING) add_subdirectory(test) endif() diff --git a/Jenkinsfile b/Jenkinsfile index 139f928037..3fccb2881b 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -746,10 +746,6 @@ pipeline { name: "RUN_PERFORMANCE_TESTS", defaultValue: true, description: "Run the performance tests (default: ON)") - booleanParam( - name: "RUN_CODEGEN_TESTS", - defaultValue: true, - description: "Run the codegen tests (default: ON)") booleanParam( name: "RUN_CK_TILE_TESTS", defaultValue: false, @@ -841,33 +837,6 @@ pipeline { } } } - stage("Run Codegen Tests") - { - parallel - { - stage("Run Codegen Tests on gfx90a") - { - when { - beforeAgent true - expression { params.RUN_CODEGEN_TESTS.toBoolean() } - } - agent{ label rocmnode("gfx90a")} - environment{ - setup_args = "NO_CK_BUILD" - execute_args = """ cd ../codegen && rm -rf build && mkdir build && cd build && \ - cmake -D CMAKE_PREFIX_PATH=/opt/rocm \ - -D CMAKE_CXX_COMPILER=/opt/rocm/llvm/bin/clang++ \ - -D CMAKE_BUILD_TYPE=Release \ - -D GPU_TARGETS="gfx90a" \ - -DCMAKE_CXX_FLAGS=" -O3 " .. && make -j check""" - } - steps{ - buildHipClangJobAndReboot(setup_args:setup_args, no_reboot:true, build_type: 'Release', execute_cmd: execute_args) - cleanWs() - } - } - } - } stage("Run CK_TILE Tests") { parallel diff --git a/codegen/CMakeLists.txt b/codegen/CMakeLists.txt index d8b22fc943..d08fe2380b 100644 --- a/codegen/CMakeLists.txt +++ b/codegen/CMakeLists.txt @@ -1,6 +1,3 @@ -cmake_minimum_required(VERSION 3.16) -project(composable_kernel_host LANGUAGES CXX HIP) - set(CMAKE_EXPORT_COMPILE_COMMANDS ON) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) @@ -8,17 +5,9 @@ set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin) set(CK_ROOT ${CMAKE_CURRENT_SOURCE_DIR}/..) -find_package(ROCM) -include(ROCMInstallTargets) -include(ROCMTest) - add_compile_options(-std=c++17) find_package(hip) -## HIP -set(CMAKE_HIP_PLATFORM amd) -set(CMAKE_HIP_COMPILER ${CMAKE_CXX_COMPILER}) -set(CMAKE_HIP_EXTENSIONS ON) -message("CMAKE_HIP_COMPILER: ${CMAKE_HIP_COMPILER}") +add_custom_target(codegen) # add include directories include_directories(BEFORE @@ -32,8 +21,9 @@ list(APPEND CMAKE_MODULE_PATH ${CK_ROOT}/cmake) include(Embed) file(GLOB_RECURSE KERNEL_FILES CONFIGURE_DEPENDS ${CK_ROOT}/include/ck/*.hpp) -message(STATUS "KERNEL_FILES: ${KERNEL_FILES}") -message(STATUS "RELATIVE: ${CK_ROOT}/include") +#printouts fot debug purposes +#message(STATUS "KERNEL_FILES: ${KERNEL_FILES}") +#message(STATUS "RELATIVE: ${CK_ROOT}/include") add_embed_library(ck_headers ${KERNEL_FILES} RELATIVE ${CK_ROOT}/include) file(GLOB SOURCES CONFIGURE_DEPENDS src/*.cpp) diff --git a/codegen/include/ck/host/types.hpp b/codegen/include/ck/host/types.hpp index 812c073678..8bad7bf89c 100644 --- a/codegen/include/ck/host/types.hpp +++ b/codegen/include/ck/host/types.hpp @@ -76,8 +76,11 @@ std::string SequenceStr(const std::vector& v); std::string MakeTuple(const std::vector& v); +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wglobal-constructors" template const std::string S = SequenceStr({xs...}); +#pragma clang diagnostic pop constexpr const char* PassThrough = "ck::tensor_operation::element_wise::PassThrough"; constexpr const char* Bilinear = "ck::tensor_operation::element_wise::Bilinear"; diff --git a/codegen/src/device_gemm_multiple_d_operation_xdl_cshuffle.cpp b/codegen/src/device_gemm_multiple_d_operation_xdl_cshuffle.cpp index a2e8eccbf1..fff75c1962 100644 --- a/codegen/src/device_gemm_multiple_d_operation_xdl_cshuffle.cpp +++ b/codegen/src/device_gemm_multiple_d_operation_xdl_cshuffle.cpp @@ -3,6 +3,7 @@ #include "ck/host/device_gemm_multiple_d/operation.hpp" #include "ck/host/stringutils.hpp" +#include "ck/host/types.hpp" #include "ck/host/utils.hpp" #include @@ -32,11 +33,11 @@ static std::string GetGemmSpec(const std::size_t m, } // function to update prologue/epilogue with user provided operation -void Operation_Xdl_CShuffle::update_prologue(const std::string& prologue) +void Operation_Xdl_CShuffle::update_prologue(const std::string& pro) { - if(!prologue.empty()) + if(!pro.empty()) { - this->prologue = prologue; + this->prologue = pro; this->cde_elem_op = "CDEElementOp"; } else @@ -45,11 +46,11 @@ void Operation_Xdl_CShuffle::update_prologue(const std::string& prologue) } } -void Operation_Xdl_CShuffle::update_epilogue(const std::string& epilogue) +void Operation_Xdl_CShuffle::update_epilogue(const std::string& epi) { - if(!epilogue.empty()) + if(!epi.empty()) { - this->epilogue = epilogue; + this->epilogue = epi; this->cde_elem_op = "CDEElementOp"; } else diff --git a/codegen/src/device_grouped_conv_fwd_multiple_abd_operation_xdl_cshuffle.cpp b/codegen/src/device_grouped_conv_fwd_multiple_abd_operation_xdl_cshuffle.cpp index 5ed59dd56b..36c9a13b4c 100644 --- a/codegen/src/device_grouped_conv_fwd_multiple_abd_operation_xdl_cshuffle.cpp +++ b/codegen/src/device_grouped_conv_fwd_multiple_abd_operation_xdl_cshuffle.cpp @@ -4,6 +4,7 @@ #include "ck/host/device_grouped_conv_fwd_multiple_d/conv_fwd_op.hpp" #include #include "ck/host/stringutils.hpp" +#include "ck/host/types.hpp" #include "ck/host/utils.hpp" #include @@ -11,34 +12,15 @@ namespace ck { namespace host { namespace conv { -// calculate appropriate Gemm Specification based on input tensor dimensions -// NOTE: in CK, MNKPadding is always used for forward convolution -static std::string GetGemmSpec(const std::size_t m, - const std::size_t n, - const std::size_t k, - const std::size_t m_per_block, - const std::size_t n_per_block, - const std::size_t k_per_block) -{ - std::string spec = ""; - if(integer_divide_ceil(m, m_per_block) * m_per_block - m != 0) - spec += "M"; - if(integer_divide_ceil(n, n_per_block) * n_per_block - n != 0) - spec += "N"; - if(integer_divide_ceil(k, k_per_block) * k_per_block - k != 0) - spec += "K"; - if(spec == "") - return "ck::tensor_operation::device::GemmSpecialization::Default"; - - return "ck::tensor_operation::device::GemmSpecialization::" + spec + "Padding"; -} +// NOTE: in CK, MNKPadding is always used for forward convolution, so didn't +// add GemmSpec function here // function to update prologue/epilogue with user provided operation -void Operation_Conv_Fwd_Xdl_Cshuffle::update_prologue(const std::string& prologue) +void Operation_Conv_Fwd_Xdl_Cshuffle::update_prologue(const std::string& pro) { - if(!prologue.empty()) + if(!pro.empty()) { - this->prologue = prologue; + this->prologue = pro; this->cde_elem_op = "CDEElementOp"; } else @@ -47,11 +29,11 @@ void Operation_Conv_Fwd_Xdl_Cshuffle::update_prologue(const std::string& prologu } } -void Operation_Conv_Fwd_Xdl_Cshuffle::update_epilogue(const std::string& epilogue) +void Operation_Conv_Fwd_Xdl_Cshuffle::update_epilogue(const std::string& epi) { - if(!epilogue.empty()) + if(!epi.empty()) { - this->epilogue = epilogue; + this->epilogue = epi; this->cde_elem_op = "CDEElementOp"; } else diff --git a/codegen/src/headers.cpp b/codegen/src/headers.cpp index f685aca044..5b0c929db3 100644 --- a/codegen/src/headers.cpp +++ b/codegen/src/headers.cpp @@ -4,7 +4,10 @@ namespace ck { namespace host { +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wglobal-constructors" const std::string config_header = ""; +#pragma clang diagnostic pop std::unordered_map GetHeaders() { diff --git a/codegen/test/CMakeLists.txt b/codegen/test/CMakeLists.txt index f891286019..5aad1ef877 100644 --- a/codegen/test/CMakeLists.txt +++ b/codegen/test/CMakeLists.txt @@ -4,7 +4,9 @@ file(GLOB TEST_SRCS CONFIGURE_DEPENDS *.cpp) foreach(TEST_SRC ${TEST_SRCS}) set_source_files_properties(${TEST_SRC} PROPERTIES LANGUAGE HIP) get_filename_component(BASE_NAME ${TEST_SRC} NAME_WE) - rocm_add_test_executable(test_host_${BASE_NAME} ${TEST_SRC}) + add_executable(test_host_${BASE_NAME} ${TEST_SRC}) + add_dependencies(codegen test_host_${BASE_NAME}) + add_test(NAME codegen_test_${BASE_NAME} COMMAND test_host_${BASE_NAME}) target_link_libraries(test_host_${BASE_NAME} ck_rtc ck_host) # target_link_libraries(test_host_${BASE_NAME} ${CK_ROOT}/build/lib/libutility.a) target_include_directories(test_host_${BASE_NAME} PUBLIC include()) diff --git a/codegen/test/grouped_conv_fwd_multiple_d_v1.cpp b/codegen/test/grouped_conv_fwd_multiple_d_v1.cpp index 3c477692e5..50290fa25a 100644 --- a/codegen/test/grouped_conv_fwd_multiple_d_v1.cpp +++ b/codegen/test/grouped_conv_fwd_multiple_d_v1.cpp @@ -92,7 +92,6 @@ struct Epilogue static_cast(prob.C), static_cast(prob.Y), static_cast(prob.X)}; - ck::Array d_lengths = {}; ck::Array in_strides{static_cast(prob.C), static_cast(prob.Hi * prob.Wi * prob.G * prob.C), @@ -109,7 +108,6 @@ struct Epilogue 1, static_cast(prob.X * prob.C), static_cast(prob.C)}; - ck::Array d_strides = {}; ck::Array conv_filter_strides = {2, 2}; ck::Array conv_filter_dilations = {1, 1}; diff --git a/codegen/test/grouped_conv_fwd_multiple_d_v2.cpp b/codegen/test/grouped_conv_fwd_multiple_d_v2.cpp index ec9bd2b781..b558d97c78 100644 --- a/codegen/test/grouped_conv_fwd_multiple_d_v2.cpp +++ b/codegen/test/grouped_conv_fwd_multiple_d_v2.cpp @@ -92,7 +92,6 @@ struct Epilogue static_cast(prob.C), static_cast(prob.Y), static_cast(prob.X)}; - ck::Array d_lengths = {}; ck::Array in_strides{static_cast(prob.C), static_cast(prob.Hi * prob.Wi * prob.G * prob.C), @@ -109,7 +108,6 @@ struct Epilogue 1, static_cast(prob.X * prob.C), static_cast(prob.C)}; - ck::Array d_strides = {}; ck::Array conv_filter_strides = {1, 1}; ck::Array conv_filter_dilations = {1, 1}; diff --git a/codegen/test/grouped_conv_fwd_multiple_d_v3.cpp b/codegen/test/grouped_conv_fwd_multiple_d_v3.cpp index 9850184c5e..e2972a93d2 100644 --- a/codegen/test/grouped_conv_fwd_multiple_d_v3.cpp +++ b/codegen/test/grouped_conv_fwd_multiple_d_v3.cpp @@ -92,7 +92,6 @@ struct Epilogue static_cast(prob.C), static_cast(prob.Y), static_cast(prob.X)}; - ck::Array d_lengths = {}; ck::Array in_strides{static_cast(prob.C), static_cast(prob.Hi * prob.Wi * prob.G * prob.C), @@ -109,7 +108,6 @@ struct Epilogue 1, static_cast(prob.X * prob.C), static_cast(prob.C)}; - ck::Array d_strides = {}; ck::Array conv_filter_strides = {2, 2}; ck::Array conv_filter_dilations = {1, 1}; diff --git a/codegen/test/grouped_conv_fwd_multiple_d_v4.cpp b/codegen/test/grouped_conv_fwd_multiple_d_v4.cpp index 907f744db4..b728096c51 100644 --- a/codegen/test/grouped_conv_fwd_multiple_d_v4.cpp +++ b/codegen/test/grouped_conv_fwd_multiple_d_v4.cpp @@ -92,7 +92,6 @@ struct Epilogue static_cast(prob.C), static_cast(prob.Y), static_cast(prob.X)}; - ck::Array d_lengths = {}; ck::Array in_strides{static_cast(prob.C), static_cast(prob.Hi * prob.Wi * prob.G * prob.C), @@ -109,7 +108,6 @@ struct Epilogue 1, static_cast(prob.X * prob.C), static_cast(prob.C)}; - ck::Array d_strides = {}; ck::Array conv_filter_strides = {1, 1}; ck::Array conv_filter_dilations = {1, 1}; diff --git a/codegen/test/rtc/src/kernel.cpp b/codegen/test/rtc/src/kernel.cpp index f4fb19130c..9fe38e84ad 100644 --- a/codegen/test/rtc/src/kernel.cpp +++ b/codegen/test/rtc/src/kernel.cpp @@ -118,4 +118,4 @@ void kernel::launch(hipStream_t stream, launch_kernel(impl->fun, stream, global, local, kernargs.data(), size); } -} // namespace rtc \ No newline at end of file +} // namespace rtc diff --git a/codegen/test/rtc/src/tmp_dir.cpp b/codegen/test/rtc/src/tmp_dir.cpp index 3b0f0170e8..1cc8f75b29 100644 --- a/codegen/test/rtc/src/tmp_dir.cpp +++ b/codegen/test/rtc/src/tmp_dir.cpp @@ -45,4 +45,4 @@ void tmp_dir::execute(const std::string& cmd) const tmp_dir::~tmp_dir() { std::filesystem::remove_all(this->path); } -} // namespace rtc \ No newline at end of file +} // namespace rtc