Implement device_gemm_universal_preshuffle_instance for RDNA4 (#3429)

* add device_gemm_wmma_cshuffle_v3_b_preshuffle.hpp

* add examples

* add instances to test

* remove duplicate code between examples
This commit is contained in:
Yung-sheng Tu
2026-01-15 16:19:31 +01:00
committed by GitHub
parent e30207985a
commit 6df2d70143
20 changed files with 1229 additions and 14 deletions

View File

@@ -2,8 +2,8 @@
# SPDX-License-Identifier: MIT
if(GPU_TARGETS MATCHES "gfx9[45]|gfx12")
add_gtest_executable(test_gemm_universal_preshuffle_xdl_fp8 test_gemm_universal_preshuffle_xdl_fp8.cpp)
add_gtest_executable(test_gemm_universal_preshuffle_fp8 test_gemm_universal_preshuffle_fp8.cpp)
if(result EQUAL 0)
target_link_libraries(test_gemm_universal_preshuffle_xdl_fp8 PRIVATE utility device_gemm_universal_preshuffle_instance)
target_link_libraries(test_gemm_universal_preshuffle_fp8 PRIVATE utility device_gemm_universal_preshuffle_instance)
endif()
endif()