mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 10:09:41 +00:00
adding mha as static lib (#1366)
* adding mha as static lib
* add fmha fwd compile options
* typo
* fix python version
* python version to 3
* increase path length
* add max path flag in mha cmake
* fix long path issue
* mha currently only runs in gfx94x
* only buld mha in mi300
* populate gpu_list
* add mha compile flags
* avoid building mha in gpu other then gfx94x
* some comments and include ck_tile in rocm
* use rocm_install
* place ck_tile in include
* correct ck_tile path
---------
Co-authored-by: Illia Silin <98187287+illsilin@users.noreply.github.com>
[ROCm/composable_kernel commit: 840c5397bb]
This commit is contained in:
@@ -64,6 +64,13 @@ function(add_instance_library INSTANCE_NAME)
|
||||
list(REMOVE_ITEM ARGN "${source}")
|
||||
endif()
|
||||
endforeach()
|
||||
# Do not build mha instances if gfx94 targets are not on the target list
|
||||
foreach(source IN LISTS ARGN)
|
||||
if(NOT INST_TARGETS MATCHES "gfx94" AND source MATCHES "mha")
|
||||
message("removing mha instance ${source} ")
|
||||
list(REMOVE_ITEM ARGN "${source}")
|
||||
endif()
|
||||
endforeach()
|
||||
#only continue if there are some source files left on the list
|
||||
if(ARGN)
|
||||
set(INST_OBJ)
|
||||
@@ -77,6 +84,8 @@ function(add_instance_library INSTANCE_NAME)
|
||||
list(REMOVE_ITEM INST_TARGETS gfx900 gfx906 gfx1030 gfx1100 gfx1101 gfx1102 gfx1103)
|
||||
elseif(ARGN MATCHES "_wmma")
|
||||
list(REMOVE_ITEM INST_TARGETS gfx900 gfx906 gfx908 gfx90a gfx940 gfx941 gfx942 gfx1030)
|
||||
elseif(ARGN MATCHES "mha")
|
||||
list(REMOVE_ITEM INST_TARGETS gfx900 gfx906 gfx908 gfx90a gfx1030 gfx1100 gfx1101 gfx1102 gfx1103)
|
||||
endif()
|
||||
set(offload_targets)
|
||||
foreach(target IN LISTS INST_TARGETS)
|
||||
@@ -86,6 +95,21 @@ function(add_instance_library INSTANCE_NAME)
|
||||
list(APPEND INST_OBJ ${source})
|
||||
endforeach()
|
||||
add_library(${INSTANCE_NAME} OBJECT ${INST_OBJ})
|
||||
|
||||
# Allow comparing floating points directly in order to check sentinel values
|
||||
if(${INSTANCE_NAME} STREQUAL "device_mha_instance")
|
||||
if(NOT DEFINED FMHA_FWD_FAST_EXP2)
|
||||
set(FMHA_FWD_FAST_EXP2 true)
|
||||
endif()
|
||||
if(FMHA_FWD_FAST_EXP2)
|
||||
list(APPEND EXAMPLE_FMHA_FWD_COMPILE_OPTIONS -Wno-undefined-func-template -DCK_TILE_FMHA_FWD_FAST_EXP2=1 -fgpu-flush-denormals-to-zero)
|
||||
else()
|
||||
list(APPEND EXAMPLE_FMHA_FWD_COMPILE_OPTIONS -Wno-undefined-func-template -DCK_TILE_FMHA_FWD_FAST_EXP2=0)
|
||||
endif()
|
||||
list(APPEND EXAMPLE_FMHA_FWD_COMPILE_OPTIONS -Wno-float-equal)
|
||||
target_compile_options(device_mha_instance PRIVATE ${EXAMPLE_FMHA_FWD_COMPILE_OPTIONS})
|
||||
endif()
|
||||
|
||||
target_compile_features(${INSTANCE_NAME} PUBLIC)
|
||||
|
||||
# flags to compress the library
|
||||
@@ -293,20 +317,22 @@ if(CK_DEVICE_CONV_INSTANCES)
|
||||
)
|
||||
endif()
|
||||
if(CK_DEVICE_MHA_INSTANCES)
|
||||
add_library(device_mha_operations STATIC ${CK_DEVICE_MHA_INSTANCES})
|
||||
add_library(composablekernels::device_mha_operations ALIAS device_mha_operations)
|
||||
target_compile_features(device_mha_operations PUBLIC)
|
||||
set_target_properties(device_mha_operations PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
target_include_directories(device_mha_operations PUBLIC
|
||||
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/ck/library/tensor_operation_instance/gpu/mha>
|
||||
)
|
||||
rocm_install(TARGETS device_mha_operations
|
||||
EXPORT device_mha_operationsTargets)
|
||||
rocm_install(EXPORT device_mha_operationsTargets
|
||||
FILE composable_kerneldevice_mha_operationsTargets.cmake
|
||||
NAMESPACE composable_kernel::
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/composable_kernel
|
||||
)
|
||||
set(gpu_list ${INST_TARGETS})
|
||||
list(FILTER gpu_list INCLUDE REGEX "^gfx94")
|
||||
if(gpu_list)
|
||||
add_library(device_mha_operations STATIC ${CK_DEVICE_MHA_INSTANCES})
|
||||
add_library(composablekernels::device_mha_operations ALIAS device_mha_operations)
|
||||
target_compile_features(device_mha_operations PUBLIC)
|
||||
set_target_properties(device_mha_operations PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||
|
||||
rocm_install(TARGETS device_mha_operations
|
||||
EXPORT device_mha_operationsTargets)
|
||||
rocm_install(EXPORT device_mha_operationsTargets
|
||||
FILE composable_kerneldevice_mha_operationsTargets.cmake
|
||||
NAMESPACE composable_kernel::
|
||||
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/composable_kernel
|
||||
)
|
||||
endif()
|
||||
endif()
|
||||
if(CK_DEVICE_CONTRACTION_INSTANCES)
|
||||
add_library(device_contraction_operations STATIC ${CK_DEVICE_CONTRACTION_INSTANCES})
|
||||
|
||||
55
library/src/tensor_operation_instance/gpu/mha/CMakeLists.txt
Normal file
55
library/src/tensor_operation_instance/gpu/mha/CMakeLists.txt
Normal file
@@ -0,0 +1,55 @@
|
||||
set(FMHA_CPP_FOLDER ${CMAKE_CURRENT_BINARY_DIR})
|
||||
set(FMHA_SRC_FOLDER ${CMAKE_SOURCE_DIR}/example/ck_tile/01_fmha/)
|
||||
set(CK_TILE_SRC_FOLDER ${CMAKE_SOURCE_DIR}/include/ck_tile/)
|
||||
# python stuff
|
||||
find_package(PythonInterp 3 REQUIRED)
|
||||
|
||||
rocm_install(DIRECTORY ${CK_TILE_SRC_FOLDER} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/ck_tile)
|
||||
|
||||
rocm_install(FILES
|
||||
"${FMHA_SRC_FOLDER}/fmha_fwd.hpp"
|
||||
"${FMHA_SRC_FOLDER}/bias.hpp"
|
||||
"${FMHA_SRC_FOLDER}/mask.hpp"
|
||||
DESTINATION include/ck_tile/ops
|
||||
)
|
||||
|
||||
# header for building lib
|
||||
file(COPY ${FMHA_SRC_FOLDER}/fmha_fwd.hpp DESTINATION ${FMHA_CPP_FOLDER})
|
||||
file(COPY ${FMHA_SRC_FOLDER}/bias.hpp DESTINATION ${FMHA_CPP_FOLDER})
|
||||
file(COPY ${FMHA_SRC_FOLDER}/mask.hpp DESTINATION ${FMHA_CPP_FOLDER})
|
||||
|
||||
# generate a list of kernels, but not actually emit files at config stage
|
||||
execute_process(
|
||||
COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/example/ck_tile/01_fmha/generate.py
|
||||
--list_blobs ${FMHA_CPP_FOLDER}/blob_list.txt
|
||||
)
|
||||
file(STRINGS ${FMHA_CPP_FOLDER}/blob_list.txt FMHA_FWD_GEN_BLOBS)
|
||||
|
||||
# actually generate the cpp files
|
||||
add_custom_command(
|
||||
OUTPUT ${FMHA_FWD_GEN_BLOBS}
|
||||
COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/example/ck_tile/01_fmha/generate.py
|
||||
--output_dir ${FMHA_CPP_FOLDER}
|
||||
COMMENT "Generating mha kernel (cpp) files now ..."
|
||||
VERBATIM
|
||||
)
|
||||
|
||||
# This is done to remove path info and just
|
||||
# have filename. Since, it was cauing the cmake
|
||||
# to throw "File name too long"
|
||||
set(device_files)
|
||||
foreach(filepath IN LISTS FMHA_FWD_GEN_BLOBS)
|
||||
get_filename_component(filename ${filepath} NAME)
|
||||
# Append the filename to the device_files list
|
||||
list(APPEND device_files ${filename})
|
||||
endforeach()
|
||||
add_custom_target(generate_cpp_files DEPENDS ${FMHA_FWD_GEN_BLOBS})
|
||||
|
||||
add_instance_library(device_mha_instance ${device_files})
|
||||
|
||||
|
||||
|
||||
if (TARGET device_mha_instance)
|
||||
add_dependencies(device_mha_instance generate_cpp_files)
|
||||
endif()
|
||||
|
||||
Reference in New Issue
Block a user