mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 13:11:25 +00:00
* HasHotLoop is a constexpr * Remove an unused function * Remove some unused include statements * Add implementation and tests for fp8 x bf8 weight preshuffle GEMM * Add implementation and tests for fp8 x bf8 in CK Tile basic and universal GEMMs * Remove two barrier calls that HotLoopScheduler already calls * No need to suppress a variable that hasn't been declared * Replace six arg_parser arguments with constexpr literals * Simplify run_gemm_test_prec_type * The strides don't need to be passed via arg_parser as we use their default values * The layouts don't need to be passed as arguments twice * Pass M N and K as regular arguments, not using the argument parser * We can now remove the argument parser * Add a common file for precision types to be used in testing * Convert basic and universal GEMM tests to use gtest * Make GemmConfig a test parameter, and form test cases as the cartesian product GemmConfigs x PrecTypes * Add GemmConfigComputeV4 to the GEMM configs to run the universal tests on * Added a changelog entry * Add missing copyright statements * ifndef-define-endif is not needed with pragma once * Fix a comment * Add F8 x BF8 tests for CompV4 in test_gemm_pipeline_kernel_types.hpp * Disable the unreliable test MoeSortingCase4 --------- Co-authored-by: Adam Osewski <19374865+aosewski@users.noreply.github.com>
87 lines
5.5 KiB
CMake
87 lines
5.5 KiB
CMake
# Currently ck_tile_gemm is only built on gfx94/gfx95
|
|
set(EXAMPLE_GEMM_COMPILE_OPTIONS)
|
|
if(CK_USE_OCP_FP8)
|
|
list(APPEND EXAMPLE_GEMM_COMPILE_OPTIONS -DCK_TILE_USE_OCP_FP8)
|
|
endif()
|
|
set(EXAMPLE_GEMM_COMPILE_COMPUTE_V4_OPTIONS)
|
|
if(CK_USE_OCP_FP8)
|
|
list(APPEND EXAMPLE_GEMM_COMPILE_COMPUTE_V4_OPTIONS -DCK_TILE_USE_OCP_FP8)
|
|
endif()
|
|
list(APPEND EXAMPLE_GEMM_COMPILE_COMPUTE_V4_OPTIONS
|
|
-mllvm
|
|
-enable-noalias-to-md-conversion=0
|
|
)
|
|
set(EXAMPLE_GEMM_COMPILE_COMPUTE_ASYNC_OPTIONS ${EXAMPLE_GEMM_COMPILE_COMPUTE_V4_OPTIONS})
|
|
|
|
if(GPU_TARGETS MATCHES "gfx94|gfx95|gfx11|gfx12")
|
|
add_gtest_executable(test_ck_tile_gemm_pipeline_universal_int8 test_gemm_pipeline_universal_int8.cpp)
|
|
target_compile_options(test_ck_tile_gemm_pipeline_universal_int8 PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
|
|
add_gtest_executable(test_ck_tile_gemm_pipeline_universal_pk_int4 test_gemm_pipeline_universal_pk_int4.cpp)
|
|
target_compile_options(test_ck_tile_gemm_pipeline_universal_pk_int4 PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
|
|
else()
|
|
message(DEBUG "Skipping ck_tile_gemm tests for current target")
|
|
endif()
|
|
|
|
if(GPU_TARGETS MATCHES "gfx94|gfx95|gfx12")
|
|
add_gtest_executable(test_ck_tile_gemm_pipeline_universal_fp8 test_gemm_pipeline_universal_fp8.cpp)
|
|
add_gtest_executable(test_ck_tile_gemm_pipeline_universal_bf8 test_gemm_pipeline_universal_bf8.cpp)
|
|
add_gtest_executable(test_ck_tile_gemm_pipeline_basic_fp8 test_gemm_pipeline_basic_fp8.cpp)
|
|
add_gtest_executable(test_ck_tile_gemm_pipeline_basic_bf8 test_gemm_pipeline_basic_bf8.cpp)
|
|
|
|
target_compile_options(test_ck_tile_gemm_pipeline_universal_fp8 PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
|
|
target_compile_options(test_ck_tile_gemm_pipeline_universal_bf8 PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
|
|
target_compile_options(test_ck_tile_gemm_pipeline_basic_fp8 PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
|
|
target_compile_options(test_ck_tile_gemm_pipeline_basic_bf8 PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
|
|
else()
|
|
message(DEBUG "Skipping ck_tile_gemm tests for current target")
|
|
endif()
|
|
|
|
if(GPU_TARGETS MATCHES "gfx94|gfx95|gfx90a|gfx11|gfx12")
|
|
add_gtest_executable(test_ck_tile_gemm_pipeline_universal_fp16 test_gemm_pipeline_universal_fp16.cpp)
|
|
target_compile_options(test_ck_tile_gemm_pipeline_universal_fp16 PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
|
|
target_compile_options(test_ck_tile_gemm_pipeline_universal_fp16 PRIVATE --save-temps -Wno-gnu-line-marker)
|
|
add_gtest_executable(test_ck_tile_gemm_pipeline_universal_bf16 test_gemm_pipeline_universal_bf16.cpp)
|
|
target_compile_options(test_ck_tile_gemm_pipeline_universal_bf16 PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
|
|
add_gtest_executable(test_ck_tile_gemm_pipeline_basic_fp16 test_gemm_pipeline_basic_fp16.cpp)
|
|
target_compile_options(test_ck_tile_gemm_pipeline_basic_fp16 PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
|
|
add_gtest_executable(test_ck_tile_gemm_pipeline_basic_bf16 test_gemm_pipeline_basic_bf16.cpp)
|
|
target_compile_options(test_ck_tile_gemm_pipeline_basic_bf16 PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
|
|
else()
|
|
message(DEBUG "Skipping ck_tile_gemm tests for current target ")
|
|
endif()
|
|
|
|
if(GPU_TARGETS MATCHES "gfx94|gfx95|gfx90a|gfx11|gfx12")
|
|
if(GPU_TARGETS MATCHES "gfx94|gfx95")
|
|
add_gtest_executable(test_ck_tile_gemm_pipeline_mem test_gemm_pipeline_mem.cpp)
|
|
add_gtest_executable(test_ck_tile_gemm_pipeline_compv3 test_gemm_pipeline_compv3.cpp)
|
|
add_gtest_executable(test_ck_tile_gemm_pipeline_compv4 test_gemm_pipeline_compv4.cpp)
|
|
add_gtest_executable(test_ck_tile_gemm_pipeline_persistent test_gemm_pipeline_persistent.cpp)
|
|
add_gtest_executable(test_ck_tile_gemm_pipeline_compv6 test_gemm_pipeline_compv6.cpp)
|
|
|
|
target_compile_options(test_ck_tile_gemm_pipeline_mem PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
|
|
target_compile_options(test_ck_tile_gemm_pipeline_compv3 PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
|
|
target_compile_options(test_ck_tile_gemm_pipeline_compv4 PRIVATE ${EXAMPLE_GEMM_COMPILE_COMPUTE_V4_OPTIONS})
|
|
target_compile_options(test_ck_tile_gemm_pipeline_persistent PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
|
|
target_compile_options(test_ck_tile_gemm_pipeline_compv6 PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
|
|
endif()
|
|
|
|
if(GPU_TARGETS MATCHES "gfx95")
|
|
add_gtest_executable(test_ck_tile_gemm_pipeline_comp_async test_gemm_pipeline_comp_async.cpp)
|
|
target_compile_options(test_ck_tile_gemm_pipeline_comp_async PRIVATE ${EXAMPLE_GEMM_COMPILE_COMPUTE_ASYNC_OPTIONS})
|
|
endif()
|
|
|
|
if(GPU_TARGETS MATCHES "gfx11|gfx12")
|
|
# On Radeon devices, build the WMMA version instead
|
|
add_gtest_executable(test_ck_tile_gemm_pipeline_mem_wmma test_gemm_pipeline_mem_wmma.cpp)
|
|
add_gtest_executable(test_ck_tile_gemm_pipeline_compv3_wmma test_gemm_pipeline_compv3_wmma.cpp)
|
|
add_gtest_executable(test_ck_tile_gemm_pipeline_compv4_wmma test_gemm_pipeline_compv4_wmma.cpp)
|
|
add_gtest_executable(test_ck_tile_gemm_pipeline_persistent_wmma test_gemm_pipeline_persistent_wmma.cpp)
|
|
target_compile_options(test_ck_tile_gemm_pipeline_mem_wmma PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
|
|
target_compile_options(test_ck_tile_gemm_pipeline_compv3_wmma PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
|
|
target_compile_options(test_ck_tile_gemm_pipeline_compv4_wmma PRIVATE ${EXAMPLE_GEMM_COMPILE_COMPUTE_V4_OPTIONS})
|
|
target_compile_options(test_ck_tile_gemm_pipeline_persistent_wmma PRIVATE ${EXAMPLE_GEMM_COMPILE_OPTIONS})
|
|
endif()
|
|
else()
|
|
message(DEBUG "Skipping ck_tile_gemm tests for current target test_ck_tile_gemm_pipeline")
|
|
endif()
|