mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 13:11:25 +00:00
[CK_TILE] Fix CShuffleEpilogue test to use correct GEMM accumulator distribution (#4518) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary The test was using LDS distribution to create the accumulator tile, but CShuffleEpilogue expects the GEMM accumulator distribution that BlockGemm produces. This mismatch caused incorrect data permutation. ## Changes - Use WarpGemmDispatcher to get correct accumulator distribution encoding - Load test input from host-initialized global memory for deterministic verification - Shard tests by data type (FP16, FP8) with gfx950-specific FP8 tests - Extract scale tests into separate target for better organization - Implement exact permutation verification (all unique values appear once) - Reduce tile size from 256x256 to 128x128 to fit in unique fp16 range - Add parameterized test configurations for various warp layouts and MFMA types ## Test plan - [x] Run new cshuffle epilogue tests 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude <noreply@anthropic.com>
18 lines
775 B
CMake
18 lines
775 B
CMake
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
|
|
# SPDX-License-Identifier: MIT
|
|
|
|
add_gtest_executable(test_ck_tile_cshuffle_epilogue_fp16 test_cshuffle_epilogue_fp16.cpp)
|
|
add_gtest_executable(test_ck_tile_cshuffle_epilogue_fp8 test_cshuffle_epilogue_fp8.cpp)
|
|
add_gtest_executable(test_ck_tile_cshuffle_epilogue_scale test_cshuffle_epilogue_scale.cpp)
|
|
|
|
if(CK_USE_OCP_FP8)
|
|
target_compile_options(test_ck_tile_cshuffle_epilogue_fp8 PRIVATE -DCK_TILE_USE_OCP_FP8)
|
|
endif()
|
|
|
|
if(GPU_TARGETS MATCHES "gfx950")
|
|
add_gtest_executable(test_ck_tile_cshuffle_epilogue_fp8_gfx950 test_cshuffle_epilogue_fp8_gfx950.cpp)
|
|
if(CK_USE_OCP_FP8)
|
|
target_compile_options(test_ck_tile_cshuffle_epilogue_fp8_gfx950 PRIVATE -DCK_TILE_USE_OCP_FP8)
|
|
endif()
|
|
endif()
|