mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-20 12:59:49 +00:00
* Add online-compiling facility
* Synchronize from fwd-v4r5 and implement host interfaces to call conv-fwd v4r4/v4r5 using on-line compiling method
* Tiny adjustment to time reporting
* Use object assignment to replace explicit bytes copying in the first kernel of v4r4/v4r5
* Use single thread to assign descriptor object to device memory
* Adjust to the workload assignment of the two kernels of v4r4 (experimental)
* Revert "Adjust to the workload assignment of the two kernels of v4r4 (experimental)"
This reverts commit eb38461456bb0c82b6c0d32cdd616e181907e20c.
* Update to make constexpr for generating descriptor types in kernel 2 of dynamic conv-fwd v4r4
* Update to dynamic conv-fwd v4r4 online-compiling
* Update to dynamic conv-fwd v4r5 online-compiling (result not accurate)
* Tiny update to driver/CMakeLists.txt
* clang-format
* Tiny comments change
* Add env OLC_DUMP_SAVE_TMP_DIR to support saving of temperary dir
* Fwd v4r5 olc perf (#39)
* added hip-clang flags that fix perf issue of online compilation
* fix bug for olc fwd-v4r5-nchw
* Move constexpr and type reference statements out of the function body in conv-fwd v4r4/v4r5 kernel wrapper
* Remove printing in hip_build_utils.cpp
* Update to root CMakeLists.txt
* Revert "Move constexpr and type reference statements out of the function body in conv-fwd v4r4/v4r5 kernel wrapper"
This reverts commit 3d2c5d8ecdd8298b72d127110500ed5b38d9835c.
Co-authored-by: Chao Liu <chao.liu2@amd.com>
Co-authored-by: Chao Liu <lc.roy86@gmail.com>
Co-authored-by: root <root@dc-smc-18.amd.com>
[ROCm/composable_kernel commit: 1685048a67]
202 lines
7.5 KiB
CMake
202 lines
7.5 KiB
CMake
|
|
set(CMAKE_CXX_COMPILER /opt/rocm/llvm/bin/clang++)
|
|
|
|
## for online-compiling of HIP kernels
|
|
set(OLC_HIP_COMPILER ${CMAKE_CXX_COMPILER} CACHE PATH "")
|
|
|
|
## reset to avoid the C++ options from the parent project
|
|
set(CMAKE_CXX_FLAGS "")
|
|
message("Compiling options for library and kernels: ${CMAKE_CXX_FLAGS}")
|
|
|
|
# look for and register clang-offload-bundler
|
|
if(OLC_HIP_COMPILER MATCHES ".*clang\\+\\+$")
|
|
find_program(OLC_OFFLOADBUNDLER_BIN clang-offload-bundler
|
|
PATH_SUFFIXES bin
|
|
PATHS
|
|
/opt/rocm/llvm
|
|
${CMAKE_INSTALL_PREFIX}/llvm
|
|
)
|
|
endif()
|
|
if(OLC_OFFLOADBUNDLER_BIN)
|
|
message(STATUS "clang-offload-bundler found: ${OLC_OFFLOADBUNDLER_BIN}")
|
|
set(OLC_OFFLOADBUNDLER_BIN "${OLC_OFFLOADBUNDLER_BIN}")
|
|
else()
|
|
# look for and register extractkernel
|
|
message(STATUS "clang-offload-bundler not found")
|
|
|
|
find_program(EXTRACTKERNEL_BIN extractkernel
|
|
PATH_SUFFIXES bin
|
|
PATHS
|
|
/opt/rocm/hip
|
|
/opt/rocm/hcc
|
|
/opt/rocm
|
|
${CMAKE_INSTALL_PREFIX}/hip
|
|
${CMAKE_INSTALL_PREFIX}/hcc
|
|
${CMAKE_INSTALL_PREFIX}
|
|
|
|
)
|
|
if(EXTRACTKERNEL_BIN)
|
|
message(STATUS "extractkernel found: ${EXTRACTKERNEL_BIN}")
|
|
set(EXTRACTKERNEL_BIN "${EXTRACTKERNEL_BIN}")
|
|
else()
|
|
message(FATAL_ERROR "extractkernel not found")
|
|
endif()
|
|
endif()
|
|
|
|
option(Boost_USE_STATIC_LIBS "Use boost static libraries" OFF)
|
|
set(BOOST_COMPONENTS filesystem)
|
|
add_definitions(-DBOOST_ALL_NO_LIB=1)
|
|
find_package(Boost REQUIRED COMPONENTS ${BOOST_COMPONENTS})
|
|
|
|
# HIP is always required
|
|
find_package(hip REQUIRED PATHS /opt/rocm)
|
|
message(STATUS "Build with HIP ${hip_VERSION}")
|
|
target_flags(HIP_COMPILER_FLAGS hip::device)
|
|
# Remove cuda arch flags
|
|
string(REGEX REPLACE --cuda-gpu-arch=[a-z0-9]+ "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
|
|
string(REGEX REPLACE --offload-arch=[a-z0-9]+ "" HIP_COMPILER_FLAGS "${HIP_COMPILER_FLAGS}")
|
|
|
|
set(OLC_hip_VERSION_MAJOR "${hip_VERSION_MAJOR}")
|
|
set(OLC_hip_VERSION_MINOR "${hip_VERSION_MINOR}")
|
|
set(OLC_hip_VERSION_PATCH "${hip_VERSION_PATCH}")
|
|
|
|
option(ENABLE_DEBUG "Build to enable debugging" ON)
|
|
if(ENABLE_DEBUG)
|
|
set(OLC_DEBUG 1)
|
|
else()
|
|
set(OLC_DEBUG 0)
|
|
endif()
|
|
|
|
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/olCompiling/include/config.h.in" "${CMAKE_CURRENT_SOURCE_DIR}/olCompiling/include/config.h")
|
|
|
|
message(STATUS "Hip compiler flags: ${HIP_COMPILER_FLAGS}")
|
|
|
|
## HIP_COMPILER_FLAGS will be used for on-line compiling of the HIP kernels
|
|
add_definitions("-DHIP_COMPILER_FLAGS=${HIP_COMPILER_FLAGS}")
|
|
|
|
file(GLOB COMPOSABLE_KERNEL_INCLUDE_1 "${PROJECT_SOURCE_DIR}/composable_kernel/include/kernel_algorithm/*.hpp")
|
|
file(GLOB COMPOSABLE_KERNEL_INCLUDE_2 "${PROJECT_SOURCE_DIR}/composable_kernel/include/tensor_description/*.hpp")
|
|
file(GLOB COMPOSABLE_KERNEL_INCLUDE_3 "${PROJECT_SOURCE_DIR}/composable_kernel/include/tensor_operation/*.hpp")
|
|
file(GLOB COMPOSABLE_KERNEL_INCLUDE_4 "${PROJECT_SOURCE_DIR}/composable_kernel/include/utility/*.hpp")
|
|
file(GLOB COMPOSABLE_KERNEL_INCLUDE_5 "${PROJECT_BINARY_DIR}/composable_kernel/include/utility/*.hpp")
|
|
file(GLOB COMPOSABLE_KERNEL_INCLUDE_6 "${PROJECT_SOURCE_DIR}/external/rocm/include/bfloat16_dev.hpp")
|
|
set(MCONV_KERNEL_INCLUDES
|
|
${COMPOSABLE_KERNEL_INCLUDE_1}
|
|
${COMPOSABLE_KERNEL_INCLUDE_2}
|
|
${COMPOSABLE_KERNEL_INCLUDE_3}
|
|
${COMPOSABLE_KERNEL_INCLUDE_4}
|
|
${COMPOSABLE_KERNEL_INCLUDE_5}
|
|
${COMPOSABLE_KERNEL_INCLUDE_6}
|
|
)
|
|
|
|
set(MCONV_KERNELS
|
|
../composable_kernel/src/kernel_wrapper/dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw.cpp
|
|
../composable_kernel/src/kernel_wrapper/dynamic_convolution_forward_implicit_gemm_v4r5_nchw_kcyx_nkhw.cpp
|
|
)
|
|
|
|
add_kernels("olCompiling/" "${MCONV_KERNELS}")
|
|
add_kernel_includes("olCompiling/" "${MCONV_KERNEL_INCLUDES}")
|
|
|
|
set(MCONV_SOURCES
|
|
src/host_tensor.cpp;
|
|
src/device.cpp;
|
|
)
|
|
|
|
set(OLC_HIP_UTILITY_HEADERS
|
|
olCompiling/include/config.h
|
|
olCompiling/include/logger.hpp
|
|
olCompiling/include/stringutils.hpp
|
|
olCompiling/include/tmp_dir.hpp
|
|
olCompiling/include/write_file.hpp
|
|
olCompiling/include/env.hpp
|
|
olCompiling/include/manage_ptr.hpp
|
|
olCompiling/include/md5.hpp
|
|
olCompiling/include/simple_hash.hpp
|
|
olCompiling/include/exec_utils.hpp
|
|
olCompiling/include/hipCheck.hpp
|
|
olCompiling/include/target_properties.hpp
|
|
olCompiling/include/handle.hpp
|
|
olCompiling/include/op_kernel_args.hpp
|
|
olCompiling/include/kernel.hpp
|
|
olCompiling/include/kernel_build_params.hpp
|
|
olCompiling/include/hip_build_utils.hpp
|
|
olCompiling/include/hipoc_program.hpp
|
|
olCompiling/include/hipoc_program_impl.hpp
|
|
olCompiling/include/hipoc_kernel.hpp
|
|
olCompiling/include/kernel_cache.hpp
|
|
olCompiling/include/binary_cache.hpp
|
|
)
|
|
|
|
set(OLC_HIP_UTILITY_CPPS
|
|
olCompiling/hip_utility/logger.cpp
|
|
olCompiling/hip_utility/tmp_dir.cpp
|
|
olCompiling/hip_utility/md5.cpp
|
|
olCompiling/hip_utility/exec_utils.cpp
|
|
olCompiling/hip_utility/target_properties.cpp
|
|
olCompiling/hip_utility/handlehip.cpp
|
|
olCompiling/hip_utility/kernel_build_params.cpp
|
|
olCompiling/hip_utility/hip_build_utils.cpp
|
|
olCompiling/hip_utility/hipoc_program.cpp
|
|
olCompiling/hip_utility/hipoc_kernel.cpp
|
|
olCompiling/hip_utility/kernel_cache.cpp
|
|
olCompiling/hip_utility/binary_cache.cpp
|
|
)
|
|
|
|
list(APPEND OLC_SOURCES ${OLC_HIP_UTILITY_CPPS} ${OLC_HIP_UTILITY_HEADERS})
|
|
|
|
list(INSERT MCONV_SOURCES 0
|
|
${PROJECT_BINARY_DIR}/kernel.cpp
|
|
${PROJECT_BINARY_DIR}/kernel_includes.cpp
|
|
)
|
|
|
|
## addkernels provide the tool to create inlined kernels in one header
|
|
add_subdirectory(olCompiling/addkernels)
|
|
|
|
function(inline_kernels_src KERNELS KERNEL_INCLUDES)
|
|
set(KERNEL_SRC_HPP_FILENAME batch_all.cpp.hpp)
|
|
set(KERNEL_SRC_HPP_PATH ${PROJECT_BINARY_DIR}/inlined_kernels/${KERNEL_SRC_HPP_FILENAME})
|
|
set(KERNEL_SRC_CPP_PATH ${PROJECT_BINARY_DIR}/inlined_kernels/batch_all.cpp)
|
|
|
|
add_custom_command(
|
|
OUTPUT ${KERNEL_SRC_HPP_PATH}
|
|
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
|
DEPENDS addkernels ${KERNELS} ${KERNEL_INCLUDES}
|
|
COMMAND $<TARGET_FILE:addkernels> -target ${KERNEL_SRC_HPP_PATH} -extern -source ${KERNELS}
|
|
COMMENT "Inlining All kernels"
|
|
)
|
|
configure_file(olCompiling/kernels_batch.cpp.in ${KERNEL_SRC_CPP_PATH})
|
|
list(APPEND OLC_SOURCES ${KERNEL_SRC_CPP_PATH} ${KERNEL_SRC_HPP_PATH})
|
|
|
|
set(OLC_SOURCES ${OLC_SOURCES} PARENT_SCOPE)
|
|
endfunction()
|
|
|
|
inline_kernels_src("${MCONV_KERNELS}" "${MCONV_KERNEL_INCLUDES}")
|
|
|
|
list(APPEND MCONV_SOURCES ${OLC_SOURCES} ${PROJECT_BINARY_DIR}/olc_kernel_includes.h)
|
|
|
|
add_custom_command(
|
|
OUTPUT ${PROJECT_BINARY_DIR}/olc_kernel_includes.h
|
|
WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}
|
|
DEPENDS addkernels ${MCONV_KERNEL_INCLUDES}
|
|
COMMAND $<TARGET_FILE:addkernels> -no-recurse -guard GUARD_OLC_KERNEL_INCLUDES_HPP_ -target ${PROJECT_BINARY_DIR}/olc_kernel_includes.h -source ${MCONV_KERNEL_INCLUDES}
|
|
COMMENT "Inlining HIP kernel includes"
|
|
)
|
|
|
|
## the library target
|
|
add_library(modConv SHARED ${MCONV_SOURCES})
|
|
|
|
target_include_directories(modConv PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}/olCompiling/include/)
|
|
target_include_directories(modConv PRIVATE ${PROJECT_BINARY_DIR})
|
|
target_include_directories(modConv PRIVATE ${PROJECT_SOURCE_DIR}/external/half/include/)
|
|
|
|
target_link_libraries(modConv PRIVATE hip::device)
|
|
target_link_libraries(modConv INTERFACE hip::host)
|
|
target_link_libraries(modConv PRIVATE Boost::filesystem)
|
|
|
|
target_compile_options(modConv PRIVATE -mfma)
|
|
|
|
target_compile_features(modConv PUBLIC)
|
|
set_target_properties(modConv PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
|
|
|
install(TARGETS modConv LIBRARY DESTINATION lib)
|