mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-24 06:44:36 +00:00
* add DeviceGemmSplitKXdl
* add file device_gemm_splitk_xdl.hpp
* set c matrix zero
* using atomic
* add all tuning parameter to f32 mkkn
* grid size change to 720
* add tunning parameter for NT
* add tunning parameter for TN
* add tunning parameter for TT
* add m=96tunning parameter
* add lost config
* add element wise operation
* fixed MPerBlock=96
* remove marco for slpitk swtich
* add test
* add new line at the end of device_gemm_xdl_instance.hpp
* remove step hack
* seperate split-k instance files
* add tunning parameters
* change disired grid size to parameters
* remove slice length
* add desiredgridsize parameter to ckProfiler
* add losting file device_gemm_xdl_splitk_instance.hpp
* change desired gride size to kbatch
* format
* format
* clean up
* add selection of device_instances
* clean code
* fix build issue
Co-authored-by: ltqin <letaoqin@amd.com>
Co-authored-by: Chao Liu <chao.liu2@amd.com>
Co-authored-by: Jing Zhang <jizhan@amd.com>
[ROCm/composable_kernel commit: 4be7f0198e]
24 lines
1022 B
CMake
24 lines
1022 B
CMake
include_directories(BEFORE
|
|
include
|
|
${PROJECT_SOURCE_DIR}/host/host_tensor/include
|
|
${PROJECT_SOURCE_DIR}/host/device/include
|
|
${PROJECT_SOURCE_DIR}/device_operation/include
|
|
${PROJECT_SOURCE_DIR}/composable_kernel/include
|
|
${PROJECT_SOURCE_DIR}/composable_kernel/include/utility
|
|
${PROJECT_SOURCE_DIR}/composable_kernel/include/tensor_description
|
|
${PROJECT_SOURCE_DIR}/composable_kernel/include/tensor_operation
|
|
${PROJECT_SOURCE_DIR}/composable_kernel/include/problem_transform
|
|
${PROJECT_SOURCE_DIR}/external/rocm/include
|
|
)
|
|
|
|
# test_magic_number_division
|
|
set(MAGIC_NUMBER_DIVISISON_SOURCE magic_number_division/main.cpp)
|
|
add_executable(test_magic_number_division ${MAGIC_NUMBER_DIVISISON_SOURCE})
|
|
target_link_libraries(test_magic_number_division PRIVATE host_tensor)
|
|
|
|
# test_split_k
|
|
set(SPLIT_K_SOURCE split_k/main.cpp)
|
|
add_executable(test_split_k ${SPLIT_K_SOURCE})
|
|
target_link_libraries(test_split_k PRIVATE host_tensor)
|
|
target_link_libraries(test_split_k PRIVATE device_gemm_instance)
|