Implement grouped gemm tile loop for RDNA4 (#3304)

* feat: grouped gemm tile loop support for RDNA4

* fix: removed extra parameter from grouped gemm example instance

* fix: FP8 check incorrectly enabling FP8 on RDNA3
This commit is contained in:
Erwin Terpstra
2026-01-13 07:14:23 +01:00
committed by GitHub
parent 141f77aa12
commit eb041079a3
44 changed files with 3067 additions and 1223 deletions

View File

@@ -0,0 +1,18 @@
# Copyright (c) Advanced Micro Devices, Inc., or its affiliates.
# SPDX-License-Identifier: MIT
add_custom_target(test_grouped_gemm_tile_loop)
if (CK_USE_XDL OR CK_USE_WMMA)
add_gtest_executable(test_grouped_gemm_tile_loop_vanilla test_grouped_gemm_tile_loop.cpp)
if(result EQUAL 0)
target_link_libraries(test_grouped_gemm_tile_loop_vanilla PRIVATE utility device_grouped_gemm_tile_loop_instance)
add_dependencies(test_grouped_gemm_tile_loop test_grouped_gemm_tile_loop_vanilla)
endif()
add_gtest_executable(test_grouped_gemm_tile_loop_multiply test_grouped_gemm_tile_loop_multiply.cpp)
if(result EQUAL 0)
target_link_libraries(test_grouped_gemm_tile_loop_multiply PRIVATE utility device_grouped_gemm_tile_loop_instance)
add_dependencies(test_grouped_gemm_tile_loop test_grouped_gemm_tile_loop_multiply)
endif()
endif()