mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-14 02:02:46 +00:00
Add additional instances to device_mha_instance (#1522)
* Add additional instances to device_mha_instance
* Add comment to describe what receipt 3 option filters
---------
Co-authored-by: Po Yen Chen <PoYen.Chen@amd.com>
[ROCm/composable_kernel commit: 3528a523ff]
This commit is contained in:
@@ -102,12 +102,14 @@ function(add_instance_library INSTANCE_NAME)
|
||||
set(FMHA_FWD_FAST_EXP2 true)
|
||||
endif()
|
||||
if(FMHA_FWD_FAST_EXP2)
|
||||
list(APPEND EXAMPLE_FMHA_FWD_COMPILE_OPTIONS -Wno-undefined-func-template -DCK_TILE_FMHA_FWD_FAST_EXP2=1 -fgpu-flush-denormals-to-zero)
|
||||
list(APPEND FMHA_COMPILE_OPTIONS -Wno-undefined-func-template -DCK_TILE_FMHA_FWD_FAST_EXP2=1 -fgpu-flush-denormals-to-zero)
|
||||
else()
|
||||
list(APPEND EXAMPLE_FMHA_FWD_COMPILE_OPTIONS -Wno-undefined-func-template -DCK_TILE_FMHA_FWD_FAST_EXP2=0)
|
||||
list(APPEND FMHA_COMPILE_OPTIONS -Wno-undefined-func-template -DCK_TILE_FMHA_FWD_FAST_EXP2=0)
|
||||
endif()
|
||||
list(APPEND EXAMPLE_FMHA_FWD_COMPILE_OPTIONS -Wno-float-equal)
|
||||
target_compile_options(device_mha_instance PRIVATE ${EXAMPLE_FMHA_FWD_COMPILE_OPTIONS})
|
||||
list(APPEND FMHA_COMPILE_OPTIONS -Wno-float-equal)
|
||||
list(APPEND FMHA_COMPILE_OPTIONS -DCK_TILE_FMHA_FWD_SPLITKV_API=1)
|
||||
list(APPEND FMHA_COMPILE_OPTIONS -DCK_TILE_FMHA_FWD_APPENDKV_API=1)
|
||||
target_compile_options(device_mha_instance PRIVATE ${FMHA_COMPILE_OPTIONS})
|
||||
endif()
|
||||
|
||||
target_compile_features(${INSTANCE_NAME} PUBLIC)
|
||||
|
||||
@@ -32,23 +32,33 @@ if(EXISTS ${FMHA_CPP_FOLDER}/blob_list.txt)
|
||||
file(REMOVE ${FMHA_CPP_FOLDER}/blob_list.txt)
|
||||
endif()
|
||||
|
||||
set(FMHA_KNOWN_APIS "fwd,fwd_splitkv,fwd_appendkv,bwd")
|
||||
|
||||
# generate a list of kernels, but not actually emit files at config stage
|
||||
# Note: The receipt 3 arg filters the generated backwards instances to reduce compilation time.
|
||||
# With receipt 3 set, we are generating instances for datatype == {fp16 || bfp16}, bias == {no || alibi}, deterministic == off, and dpad == dvpad.
|
||||
execute_process(
|
||||
COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/example/ck_tile/01_fmha/generate.py
|
||||
COMMAND ${PYTHON_EXECUTABLE} ${FMHA_SRC_FOLDER}/generate.py
|
||||
--list_blobs ${FMHA_CPP_FOLDER}/blob_list.txt
|
||||
--api ${FMHA_KNOWN_APIS}
|
||||
--receipt 3
|
||||
RESULT_VARIABLE ret
|
||||
)
|
||||
if(ret AND NOT ret EQUAL 0)
|
||||
message( FATAL_ERROR "CK Tile MHA FAILED to genrate a list of kernels via Python.")
|
||||
else()
|
||||
file(STRINGS ${FMHA_CPP_FOLDER}/blob_list.txt FMHA_FWD_GEN_BLOBS)
|
||||
file(STRINGS ${FMHA_CPP_FOLDER}/blob_list.txt FMHA_GEN_BLOBS)
|
||||
endif()
|
||||
|
||||
# actually generate the kernel content now
|
||||
# Note: The receipt 3 arg filters the generated backwards instances to reduce compilation time.
|
||||
# With receipt 3 set, we are generating instances for datatype == {fp16 || bfp16}, bias == {no || alibi}, deterministic == off, and dpad == dvpad.
|
||||
add_custom_command(
|
||||
OUTPUT ${FMHA_FWD_GEN_BLOBS}
|
||||
COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_SOURCE_DIR}/example/ck_tile/01_fmha/generate.py
|
||||
OUTPUT ${FMHA_GEN_BLOBS}
|
||||
COMMAND ${PYTHON_EXECUTABLE} ${FMHA_SRC_FOLDER}/generate.py
|
||||
--output_dir ${FMHA_CPP_FOLDER}
|
||||
--api ${FMHA_KNOWN_APIS}
|
||||
--receipt 3
|
||||
COMMENT "Generating mha kernel (cpp) files now ..."
|
||||
VERBATIM
|
||||
)
|
||||
@@ -57,12 +67,12 @@ add_custom_command(
|
||||
# have filename. Since, it was cauing the cmake
|
||||
# to throw "File name too long"
|
||||
set(device_files)
|
||||
foreach(filepath IN LISTS FMHA_FWD_GEN_BLOBS)
|
||||
foreach(filepath IN LISTS FMHA_GEN_BLOBS)
|
||||
get_filename_component(filename ${filepath} NAME)
|
||||
# Append the filename to the device_files list
|
||||
list(APPEND device_files ${filename})
|
||||
endforeach()
|
||||
add_custom_target(generate_cpp_files DEPENDS ${FMHA_FWD_GEN_BLOBS})
|
||||
add_custom_target(generate_cpp_files DEPENDS ${FMHA_GEN_BLOBS})
|
||||
|
||||
add_instance_library(device_mha_instance ${device_files})
|
||||
|
||||
|
||||
Reference in New Issue
Block a user