From e9ec2910a0f2ca083be4e1f862f7b18cab09e266 Mon Sep 17 00:00:00 2001 From: Artur Wojcik Date: Wed, 20 Dec 2023 23:34:53 +0100 Subject: [PATCH] enable compilation of INSTANCES_ONLY for Windows (#1082) * enable compilation of INSTANCES_ONLY for Windows * suppress ROCMChecks warnings on GoogleTests * suppress -Wfloat-equal warning on GoogleTests --------- Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com> [ROCm/composable_kernel commit: fb5bd51b42e68decf2a5f17cf10dd2f97f890d11] --- .gitignore | 9 +++ CMakeLists.txt | 34 +++++---- cmake/getopt.cmake | 28 ++++++++ cmake/googletest.cmake | 50 ------------- cmake/gtest.cmake | 71 +++++++++++++++++++ .../element/unary_element_wise_operation.hpp | 5 ++ ...elementwise_layernorm_welford_variance.hpp | 4 +- .../tensor_operation/gpu/warp/wmma_gemm.hpp | 2 +- .../gpu/CMakeLists.txt | 1 - .../gpu/softmax/CMakeLists.txt | 4 +- library/src/utility/CMakeLists.txt | 10 +-- profiler/src/CMakeLists.txt | 2 +- test/CMakeLists.txt | 11 ++- 13 files changed, 149 insertions(+), 82 deletions(-) create mode 100644 cmake/getopt.cmake delete mode 100644 cmake/googletest.cmake create mode 100644 cmake/gtest.cmake diff --git a/.gitignore b/.gitignore index 340f11cbd2..090594a8df 100644 --- a/.gitignore +++ b/.gitignore @@ -55,3 +55,12 @@ _static/ _templates/ _toc.yml _doxygen/ + +# JetBrains IDE +.idea/ +cmake-build*/ +build*/ + +# Python virtualenv +.venv/ + diff --git a/CMakeLists.txt b/CMakeLists.txt index d78e887efb..240832998d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,22 +4,27 @@ if(POLICY CMP0140) cmake_policy(SET CMP0140 NEW) endif() +get_property(_GENERATOR_IS_MULTI_CONFIG GLOBAL PROPERTY GENERATOR_IS_MULTI_CONFIG) + # This has to be initialized before the project() command appears # Set the default of CMAKE_BUILD_TYPE to be release, unless user specifies with -D. MSVC_IDE does not use CMAKE_BUILD_TYPE -if( NOT MSVC_IDE AND NOT CMAKE_BUILD_TYPE ) - set( CMAKE_BUILD_TYPE Release CACHE STRING "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel." ) +if(_GENERATOR_IS_MULTI_CONFIG) + set(CMAKE_CONFIGURATION_TYPES "Debug;Release;RelWithDebInfo;MinSizeRel" CACHE STRING + "Available build types (configurations) on multi-config generators") +else() + set(CMAKE_BUILD_TYPE Release CACHE STRING + "Choose the type of build, options are: None Debug Release RelWithDebInfo MinSizeRel.") endif() # Default installation path -if(WIN32) - set(CMAKE_INSTALL_PREFIX "/opt/rocm/x86_64-w64-mingw32" CACHE PATH "") -else() +if(NOT WIN32) set(CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "") endif() set(version 1.1.0) # Check support for CUDA/HIP in Cmake -project(composable_kernel VERSION ${version}) +project(composable_kernel VERSION ${version} LANGUAGES CXX) +include(CTest) list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") @@ -73,15 +78,15 @@ if(INSTANCES_ONLY) set(CK_ENABLE_INSTANCES_ONLY "ON") endif() +include(getopt) + # CK config file to record supported datatypes, etc. -configure_file("${PROJECT_SOURCE_DIR}/include/ck/config.h.in" "${PROJECT_BINARY_DIR}/include/ck/config.h") +configure_file(include/ck/config.h.in ${CMAKE_CURRENT_BINARY_DIR}/include/ck/config.h) # CK version file to record release version as well as git commit hash find_package(Git REQUIRED) execute_process(COMMAND "${GIT_EXECUTABLE}" rev-parse HEAD OUTPUT_VARIABLE COMMIT_ID OUTPUT_STRIP_TRAILING_WHITESPACE) -configure_file("${PROJECT_SOURCE_DIR}/include/ck/version.h.in" "${PROJECT_BINARY_DIR}/include/ck/version.h") - -enable_testing() +configure_file(include/ck/version.h.in ${CMAKE_CURRENT_BINARY_DIR}/include/ck/version.h) set(ROCM_SYMLINK_LIBS OFF) find_package(ROCM REQUIRED PATHS /opt/rocm) @@ -97,7 +102,7 @@ include(TargetFlags) rocm_setup_version(VERSION ${version}) -list(APPEND CMAKE_PREFIX_PATH ${CMAKE_INSTALL_PREFIX} ${CMAKE_INSTALL_PREFIX}/llvm ${CMAKE_INSTALL_PREFIX}/hip /opt/rocm /opt/rocm/llvm /opt/rocm/hip) +list(APPEND CMAKE_PREFIX_PATH ${CMAKE_INSTALL_PREFIX} ${CMAKE_INSTALL_PREFIX}/llvm ${CMAKE_INSTALL_PREFIX}/hip /opt/rocm /opt/rocm/llvm /opt/rocm/hip "$ENV{ROCM_PATH}" "$ENV{HIP_PATH}") message("GPU_TARGETS= ${GPU_TARGETS}") @@ -142,7 +147,7 @@ find_package(hip) # SWDEV-413293 and https://reviews.llvm.org/D155213 math(EXPR hip_VERSION_FLAT "(${hip_VERSION_MAJOR} * 1000 + ${hip_VERSION_MINOR}) * 100000 + ${hip_VERSION_PATCH}") message("hip_version_flat=${hip_VERSION_FLAT}") -if(${hip_VERSION_FLAT} GREATER 500723302) +if(NOT WIN32 AND ${hip_VERSION_FLAT} GREATER 500723302) message("Adding the fno-offload-uniform-block compiler flag") add_compile_options(-fno-offload-uniform-block) endif() @@ -195,7 +200,6 @@ find_package(Threads REQUIRED) link_libraries(Threads::Threads) ## C++ -enable_language(CXX) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_STANDARD_REQUIRED ON) set(CMAKE_CXX_EXTENSIONS OFF) @@ -466,7 +470,9 @@ if(NOT DEFINED INSTANCES_ONLY) PACKAGE_NAME examples ) add_subdirectory(example) - add_subdirectory(test) + if(BUILD_TESTING) + add_subdirectory(test) + endif() rocm_package_setup_component(profiler LIBRARY_NAME composablekernel diff --git a/cmake/getopt.cmake b/cmake/getopt.cmake new file mode 100644 index 0000000000..dd985ff472 --- /dev/null +++ b/cmake/getopt.cmake @@ -0,0 +1,28 @@ +# SPDX-License-Identifier: MIT +# Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved. + +add_library(getopt::getopt INTERFACE IMPORTED GLOBAL) + +if(WIN32) + include(FetchContent) + + FetchContent_Declare( + getopt + GIT_REPOSITORY https://github.com/apwojcik/getopt.git + GIT_TAG main + SYSTEM + ) + + set(__build_shared_libs ${BUILD_SHARED_LIBS}) + set(BUILD_SHARED_LIBS OFF CACHE INTERNAL "") + + FetchContent_MakeAvailable(getopt) + + # Restore the old value of BUILD_SHARED_LIBS + set(BUILD_SHARED_LIBS ${__build_shared_libs} CACHE BOOL "Type of libraries to build" FORCE) + + FetchContent_GetProperties(getopt) + + target_link_libraries(getopt::getopt INTERFACE wingetopt) + target_include_directories(getopt::getopt INTERFACE ${getopt_SOURCE_DIR}/src) +endif() \ No newline at end of file diff --git a/cmake/googletest.cmake b/cmake/googletest.cmake deleted file mode 100644 index d6577ac33e..0000000000 --- a/cmake/googletest.cmake +++ /dev/null @@ -1,50 +0,0 @@ -include(FetchContent) - -set(GOOGLETEST_DIR "" CACHE STRING "Location of local GoogleTest repo to build against") - -if(GOOGLETEST_DIR) - set(FETCHCONTENT_SOURCE_DIR_GOOGLETEST ${GOOGLETEST_DIR} CACHE STRING "GoogleTest source directory override") -endif() - -message(STATUS "Fetching GoogleTest") - -list(APPEND GTEST_CMAKE_CXX_FLAGS - -Wno-undef - -Wno-reserved-identifier - -Wno-global-constructors - -Wno-missing-noreturn - -Wno-disabled-macro-expansion - -Wno-used-but-marked-unused - -Wno-switch-enum - -Wno-zero-as-null-pointer-constant - -Wno-unused-member-function - -Wno-comma - -Wno-old-style-cast - -Wno-deprecated - -Wno-unsafe-buffer-usage -) -message(STATUS "Suppressing googltest warnings with flags: ${GTEST_CMAKE_CXX_FLAGS}") - -FetchContent_Declare( - googletest - GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG b85864c64758dec007208e56af933fc3f52044ee -) - -# Will be necessary for windows build -# set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) -FetchContent_GetProperties(googletest) -if(NOT googletest_POPULATED) - FetchContent_Populate(googletest) - add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR} EXCLUDE_FROM_ALL) -endif() - -target_compile_options(gtest PRIVATE ${GTEST_CMAKE_CXX_FLAGS}) -target_compile_options(gtest_main PRIVATE ${GTEST_CMAKE_CXX_FLAGS}) -target_compile_options(gmock PRIVATE ${GTEST_CMAKE_CXX_FLAGS}) -target_compile_options(gmock_main PRIVATE ${GTEST_CMAKE_CXX_FLAGS}) - -set_target_properties(gtest PROPERTIES POSITION_INDEPENDENT_CODE ON) -set_target_properties(gtest_main PROPERTIES POSITION_INDEPENDENT_CODE ON) -set_target_properties(gmock PROPERTIES POSITION_INDEPENDENT_CODE ON) -set_target_properties(gmock_main PROPERTIES POSITION_INDEPENDENT_CODE ON) diff --git a/cmake/gtest.cmake b/cmake/gtest.cmake new file mode 100644 index 0000000000..dc840e4e80 --- /dev/null +++ b/cmake/gtest.cmake @@ -0,0 +1,71 @@ +include(FetchContent) + +set(GOOGLETEST_DIR "" CACHE STRING "Location of local GoogleTest repo to build against") + +if(GOOGLETEST_DIR) + set(FETCHCONTENT_SOURCE_DIR_GOOGLETEST ${GOOGLETEST_DIR} CACHE STRING "GoogleTest source directory override") +endif() + +FetchContent_Declare( + GTest + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG f8d7d77c06936315286eb55f8de22cd23c188571 + SYSTEM +) + +# Suppress ROCMChecks WARNING on GoogleTests +set(ROCM_DISABLE_CHECKS FALSE) +macro(rocm_check_toolchain_var var access value list_file) + if(NOT ROCM_DISABLE_CHECKS) + _rocm_check_toolchain_var("${var}" "${access}" "${value}" "${list_file}") + endif() +endmacro() + +if(WIN32) + set(gtest_force_shared_crt ON CACHE_INTERNAL "") +endif() + +set(BUILD_GMOCK OFF CACHE INTERNAL "") +set(INSTALL_GTEST OFF CACHE INTERNAL "") + +# Store the current value of BUILD_SHARED_LIBS +set(__build_shared_libs ${BUILD_SHARED_LIBS}) +set(BUILD_SHARED_LIBS OFF CACHE INTERNAL "") + +set(ROCM_DISABLE_CHECKS TRUE) +FetchContent_MakeAvailable(GTest) +set(ROCM_DISABLE_CHECKS FALSE) + +# Restore the old value of BUILD_SHARED_LIBS +set(BUILD_SHARED_LIBS ${__build_shared_libs} CACHE BOOL "Type of libraries to build" FORCE) + +set(BUILD_GMOCK OFF CACHE INTERNAL "") +set(INSTALL_GTEST OFF CACHE INTERNAL "") + +set(GTEST_CXX_FLAGS + -Wno-undef + -Wno-reserved-identifier + -Wno-global-constructors + -Wno-missing-noreturn + -Wno-disabled-macro-expansion + -Wno-used-but-marked-unused + -Wno-switch-enum + -Wno-zero-as-null-pointer-constant + -Wno-unused-member-function + -Wno-comma + -Wno-old-style-cast + -Wno-deprecated + -Wno-unsafe-buffer-usage + -Wno-float-equal +) + +if(WIN32) + list(APPEND GTEST_CXX_FLAGS + -Wno-suggest-destructor-override + -Wno-suggest-override + -Wno-nonportable-system-include-path + -Wno-language-extension-token) +endif() + +target_compile_options(gtest PRIVATE ${GTEST_CXX_FLAGS}) +target_compile_options(gtest_main PRIVATE ${GTEST_CXX_FLAGS}) diff --git a/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp b/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp index e9c85964c5..eed60caef4 100644 --- a/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp +++ b/include/ck/tensor_operation/gpu/element/unary_element_wise_operation.hpp @@ -174,6 +174,11 @@ struct PassThrough { y = x; } + template <> + __host__ __device__ void operator()(int4_t& y, const int& x) const + { + y = type_convert(x); + } #endif template <> diff --git a/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_layernorm_welford_variance.hpp b/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_layernorm_welford_variance.hpp index 3ea72b8534..072275c089 100644 --- a/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_layernorm_welford_variance.hpp +++ b/include/ck/tensor_operation/gpu/grid/gridwise_elementwise_layernorm_welford_variance.hpp @@ -119,7 +119,7 @@ struct GridwiseElementwiseLayernormWelfordVariance_mk_to_mk index_t num_k_block_tile_iteration, AccDataType epsilon, const InDataTypePointerTuple p_in_global_tuple, - XDataType* const __restrict__ p_x_lds, + XDataType* const __restrict__ p_x_lds_, const GammaDataType* const __restrict__ p_gamma_global, const BetaDataType* const __restrict__ p_beta_global, YDataType* const __restrict__ p_y_global, @@ -149,7 +149,7 @@ struct GridwiseElementwiseLayernormWelfordVariance_mk_to_mk p_y_global, y_grid_desc_m_k.GetElementSpaceSize()); auto x_lds_val_buf = make_dynamic_buffer( - p_x_lds, x_grid_desc_m_k.GetElementSpaceSize() / grid_size); + p_x_lds_, x_grid_desc_m_k.GetElementSpaceSize() / grid_size); auto in_thread_buf_tuple = generate_tuple( [&](auto) { diff --git a/include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp b/include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp index 979f3567e9..814b4167b8 100644 --- a/include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp +++ b/include/ck/tensor_operation/gpu/warp/wmma_gemm.hpp @@ -328,7 +328,7 @@ struct WmmaSelector } #ifdef CK_EXPERIMENTAL_BIT_INT_EXTENSION_INT4 template <> - static constexpr auto GetWmma() + static constexpr auto GetWmma() { return WmmaInstr::wmma_i32_16x16x16_iu4; } diff --git a/library/src/tensor_operation_instance/gpu/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/CMakeLists.txt index ac01c1b416..0a12e1c49e 100644 --- a/library/src/tensor_operation_instance/gpu/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/CMakeLists.txt @@ -152,7 +152,6 @@ ENDFOREACH() if(CK_DEVICE_OTHER_INSTANCES) add_library(device_other_operations STATIC ${CK_DEVICE_OTHER_INSTANCES}) add_library(composablekernels::device_other_operations ALIAS device_other_operations) - target_compile_features(device_other_operations PUBLIC) set_target_properties(device_other_operations PROPERTIES POSITION_INDEPENDENT_CODE ON) target_include_directories(device_other_operations PUBLIC $ diff --git a/library/src/tensor_operation_instance/gpu/softmax/CMakeLists.txt b/library/src/tensor_operation_instance/gpu/softmax/CMakeLists.txt index dbe3764115..6daaec738a 100644 --- a/library/src/tensor_operation_instance/gpu/softmax/CMakeLists.txt +++ b/library/src/tensor_operation_instance/gpu/softmax/CMakeLists.txt @@ -1,5 +1,4 @@ -set(DEVICE_SOFTMAX_INSTANCES) -list(APPEND DEVICE_SOFTMAX_INSTANCES +add_instance_library(device_softmax_instance device_softmax_f16_f16_instance_rank3_reduce1.cpp device_softmax_f16_f16_instance_rank3_reduce2.cpp device_softmax_f16_f16_instance_rank3_reduce3.cpp @@ -14,4 +13,3 @@ list(APPEND DEVICE_SOFTMAX_INSTANCES device_softmax_f32_f32_instance_rank4_reduce2.cpp device_softmax_f32_f32_instance_rank4_reduce3.cpp device_softmax_f32_f32_instance_rank4_reduce4.cpp) -add_instance_library(device_softmax_instance ${DEVICE_SOFTMAX_INSTANCES}) diff --git a/library/src/utility/CMakeLists.txt b/library/src/utility/CMakeLists.txt index 7f6a59eebe..296e6c993a 100644 --- a/library/src/utility/CMakeLists.txt +++ b/library/src/utility/CMakeLists.txt @@ -1,17 +1,19 @@ -## utility -set(UTILITY_SOURCE +add_library(utility STATIC device_memory.cpp host_tensor.cpp convolution_parameter.cpp ) -add_library(utility STATIC ${UTILITY_SOURCE}) add_library(composable_kernel::utility ALIAS utility) - +set_target_properties(utility PROPERTIES POSITION_INDEPENDENT_CODE ON) +target_compile_options(utility PRIVATE ${CMAKE_COMPILER_WARNINGS}) target_include_directories(utility PUBLIC "$" "$" ) +if(WIN32) + target_compile_definitions(utility PUBLIC NOMINMAX) +endif() rocm_install( TARGETS utility diff --git a/profiler/src/CMakeLists.txt b/profiler/src/CMakeLists.txt index 7674b3b4f0..5144785aa0 100644 --- a/profiler/src/CMakeLists.txt +++ b/profiler/src/CMakeLists.txt @@ -58,7 +58,7 @@ set(PROFILER_EXECUTABLE ckProfiler) add_executable(${PROFILER_EXECUTABLE} ${PROFILER_SOURCES}) target_compile_options(${PROFILER_EXECUTABLE} PRIVATE -Wno-global-constructors) -target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE utility) +target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE utility getopt::getopt) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_splitk_instance) target_link_libraries(${PROFILER_EXECUTABLE} PRIVATE device_gemm_add_multiply_instance) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 94c5f2750f..90140659f6 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -3,7 +3,7 @@ include_directories(BEFORE ${PROJECT_SOURCE_DIR}/profiler/include ) -include(googletest) +include(gtest) add_custom_target(tests) @@ -50,6 +50,7 @@ function(add_test_executable TEST_NAME) #only continue if there are some source files left on the list if(ARGN) add_executable(${TEST_NAME} ${ARGN}) + target_link_libraries(${TEST_NAME} PRIVATE getopt::getopt) add_test(NAME ${TEST_NAME} COMMAND $) add_dependencies(tests ${TEST_NAME}) add_dependencies(check ${TEST_NAME}) @@ -58,9 +59,7 @@ function(add_test_executable TEST_NAME) endif() #message("add_test returns ${result}") set(result ${result} PARENT_SCOPE) -endfunction(add_test_executable TEST_NAME) - -include(GoogleTest) +endfunction() function(add_gtest_executable TEST_NAME) message("adding gtest ${TEST_NAME}") @@ -109,14 +108,14 @@ function(add_gtest_executable TEST_NAME) # suppress gtest warnings target_compile_options(${TEST_NAME} PRIVATE -Wno-global-constructors -Wno-undef) - target_link_libraries(${TEST_NAME} PRIVATE gtest_main) + target_link_libraries(${TEST_NAME} PRIVATE gtest_main getopt::getopt) add_test(NAME ${TEST_NAME} COMMAND $) rocm_install(TARGETS ${TEST_NAME} COMPONENT tests) set(result 0) endif() #message("add_gtest returns ${result}") set(result ${result} PARENT_SCOPE) -endfunction(add_gtest_executable TEST_NAME) +endfunction() add_subdirectory(magic_number_division) add_subdirectory(space_filling_curve)