mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 05:01:25 +00:00
* CK Tile Stream-K Tree Reduction This change adds the first implementation of the Stream-K tree reduction strategy into CK Tile. The tree reduction reduces the the number of steps for accumulating results for a tile from O(N) to O(logN) where N is the number of workgroups contributing to a C tile. Additionally, in the original non-atomic reduction strategy, atomics were used to set the flags buffer and to read from the flags buffer. Howeover, through investigation with the tree reduciton, atomics with default (relaxed) semantics were not enough to guarantee workgroups would not read stale data, leading to incorrect results. Stronger acquire/release memory orderings are too expensive. So, this change also eliminates the use of atomics for setting the flags. Instead, we leverage cache modifiers (e.g., GLC) to avoid writing to cache, thereby avoiding the use of atomics. Prelimiary tests were also added for the normal reduction and tree reduction. More will be added in a future PR via tile engine. * Move Stream-K kernel files to a subdirectory * Cleanup Code Style & Handle Unsupported Reductions This change makes the following small changes: - Add an explicit else block for unimplemented reduction strategies - Clarify type of sk_flags_ptr via auto* - Add description for extra_iters_before_me variable * Run new copyright script on new files
54 lines
3.2 KiB
CMake
54 lines
3.2 KiB
CMake
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
|
# SPDX-License-Identifier: MIT
|
|
|
|
set(EXAMPLE_GEMM_COMPILE_OPTIONS)
|
|
if(CK_USE_OCP_FP8)
|
|
list(APPEND EXAMPLE_GEMM_COMPILE_OPTIONS -DCK_TILE_USE_OCP_FP8)
|
|
endif()
|
|
set(EXAMPLE_GEMM_COMPILE_COMPUTE_V4_OPTIONS)
|
|
if(CK_USE_OCP_FP8)
|
|
list(APPEND EXAMPLE_GEMM_COMPILE_COMPUTE_V4_OPTIONS -DCK_TILE_USE_OCP_FP8)
|
|
endif()
|
|
list(APPEND EXAMPLE_GEMM_COMPILE_COMPUTE_V4_OPTIONS
|
|
-mllvm
|
|
-enable-noalias-to-md-conversion=0
|
|
)
|
|
set(EXAMPLE_GEMM_COMPILE_COMPUTE_ASYNC_OPTIONS ${EXAMPLE_GEMM_COMPILE_COMPUTE_V4_OPTIONS})
|
|
|
|
# Currently test_ck_tile_streamk_smoke is only built on gfx9
|
|
if(GPU_TARGETS MATCHES "gfx90a|gfx942|gfx950")
|
|
|
|
include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR})
|
|
|
|
#TODO: support all arches
|
|
#TODO: current c-shuffle only supports C layout as R
|
|
add_gtest_executable(test_ck_tile_streamk_tile_partitioner test_streamk_tile_partitioner.cpp)
|
|
add_gtest_executable(test_ck_tile_streamk_reduction
|
|
${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_fp16_reduction.cpp
|
|
test_gemm_streamk_util.cpp)
|
|
add_gtest_executable(test_ck_tile_streamk_smoke
|
|
${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_fp16_persistent.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_bf16_persistent.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_fp8_persistent.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_bf8_persistent.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_fp16_nonpersistent.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_bf16_nonpersistent.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_fp8_nonpersistent.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_bf8_nonpersistent.cpp
|
|
test_gemm_streamk_util.cpp)
|
|
add_gtest_executable(test_ck_tile_streamk_extended
|
|
${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/test_gemm_streamk_fp16_persistent.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/test_gemm_streamk_bf16_persistent.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/test_gemm_streamk_fp8_persistent.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/test_gemm_streamk_bf8_persistent.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/test_gemm_streamk_fp16_nonpersistent.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/test_gemm_streamk_bf16_nonpersistent.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/test_gemm_streamk_fp8_nonpersistent.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/test_gemm_streamk_bf8_nonpersistent.cpp
|
|
test_gemm_streamk_util.cpp)
|
|
target_compile_options(test_ck_tile_streamk_smoke PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
|
|
target_compile_options(test_ck_tile_streamk_extended PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
|
|
else()
|
|
message(DEBUG "Skipping test_ck_tile_streamk unit tests for current target")
|
|
endif()
|