mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-05 06:01:23 +00:00
* Fix alignment issue in Stream-K workspace buffer In CK Tile Stream-K, the workspace buffer is used to hold flags and partials, where the first i bytes holds the flags and the remaining bytes hold partials. This change adds padding to the flags prefix of the workspace buffer to ensure the number of bytes is 128B-aligned. Without this alignment, since workgroups do not skip cache when reading from partials, they may read stale partials data in cache, leading to incorrect results. The added padding avoids the stale data reading. This change also re-enables the test_ck_tile_streamk_reduction tests. * Compute reference GEMM on GPU for test verification to decrease testing time
54 lines
3.2 KiB
CMake
54 lines
3.2 KiB
CMake
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
|
# SPDX-License-Identifier: MIT
|
|
|
|
set(EXAMPLE_GEMM_COMPILE_OPTIONS)
|
|
if(CK_USE_OCP_FP8)
|
|
list(APPEND EXAMPLE_GEMM_COMPILE_OPTIONS -DCK_TILE_USE_OCP_FP8)
|
|
endif()
|
|
set(EXAMPLE_GEMM_COMPILE_COMPUTE_V4_OPTIONS)
|
|
if(CK_USE_OCP_FP8)
|
|
list(APPEND EXAMPLE_GEMM_COMPILE_COMPUTE_V4_OPTIONS -DCK_TILE_USE_OCP_FP8)
|
|
endif()
|
|
list(APPEND EXAMPLE_GEMM_COMPILE_COMPUTE_V4_OPTIONS
|
|
-mllvm
|
|
-enable-noalias-to-md-conversion=0
|
|
)
|
|
set(EXAMPLE_GEMM_COMPILE_COMPUTE_ASYNC_OPTIONS ${EXAMPLE_GEMM_COMPILE_COMPUTE_V4_OPTIONS})
|
|
|
|
# Currently test_ck_tile_streamk_smoke is only built on gfx9
|
|
if(GPU_TARGETS MATCHES "gfx90a|gfx942|gfx950")
|
|
|
|
include_directories(BEFORE ${CMAKE_CURRENT_SOURCE_DIR})
|
|
|
|
#TODO: support all arches
|
|
#TODO: current c-shuffle only supports C layout as R
|
|
add_gtest_executable(test_ck_tile_streamk_tile_partitioner test_streamk_tile_partitioner.cpp)
|
|
add_gtest_executable(test_ck_tile_streamk_reduction
|
|
${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_fp16_reduction.cpp
|
|
test_gemm_streamk_util.cpp)
|
|
add_gtest_executable(test_ck_tile_streamk_smoke
|
|
${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_fp16_persistent.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_bf16_persistent.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_fp8_persistent.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_bf8_persistent.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_fp16_nonpersistent.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_bf16_nonpersistent.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_fp8_nonpersistent.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/smoke_tests/test_gemm_streamk_bf8_nonpersistent.cpp
|
|
test_gemm_streamk_util.cpp)
|
|
add_gtest_executable(test_ck_tile_streamk_extended
|
|
${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/test_gemm_streamk_fp16_persistent.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/test_gemm_streamk_bf16_persistent.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/test_gemm_streamk_fp8_persistent.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/test_gemm_streamk_bf8_persistent.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/test_gemm_streamk_fp16_nonpersistent.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/test_gemm_streamk_bf16_nonpersistent.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/test_gemm_streamk_fp8_nonpersistent.cpp
|
|
${CMAKE_CURRENT_SOURCE_DIR}/extended_tests/test_gemm_streamk_bf8_nonpersistent.cpp
|
|
test_gemm_streamk_util.cpp)
|
|
target_compile_options(test_ck_tile_streamk_smoke PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
|
|
target_compile_options(test_ck_tile_streamk_extended PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
|
|
else()
|
|
message(DEBUG "Skipping test_ck_tile_streamk unit tests for current target")
|
|
endif()
|