Extend XDL kernel to Support RDNA3/4 - Part 5 (#2725)

* Enable xdl in gfx11 & gfx12

* update cmake file

* fix all instance build (cmake)

* fix batched_gemm_gemm(cmake)

* rebase cmake files

* fix cmake build error

* remve CK_ENABLE_DYNAMIC_WARP_SIZE

* update cmake build error2

* fix gfx11 build

CK_USE_XDL is enabled on gfx11 and gfx12

* fix gfx10 build

* fix gfx11 error

---------

Co-authored-by: Lin, Qun <Quentin.Lin+amdeng@amd.com>

[ROCm/composable_kernel commit: f22740df82]
This commit is contained in:
linqunAMD
2025-09-16 01:59:25 +08:00
committed by GitHub
parent 954db22b39
commit 71dc8a9d4d
33 changed files with 243 additions and 397 deletions

View File

@@ -44,8 +44,7 @@ list(APPEND GEMM_OPTIONS "SHELL: -mllvm -greedy-reverse-local-assignment=1 -mllv
example_compile_options(example_gemm_xdl_fp8_v3 PRIVATE ${GEMM_OPTIONS})
example_compile_options(example_gemm_xdl_bf16_v3 PRIVATE ${GEMM_OPTIONS})
list(APPEND gpu_list gfx942 gfx950)
list(APPEND gpu_list gfx942 gfx950 gfx1200 gfx1201 gfx12-generic)
set(target 0)
foreach(gpu IN LISTS GPU_TARGETS)
if(gpu IN_LIST gpu_list AND target EQUAL 0)
@@ -89,7 +88,14 @@ foreach(gpu IN LISTS GPU_TARGETS)
add_example_executable(example_gemm_xdl_lds_direct_load_fp16 gemm_xdl_lds_direct_load_fp16.cpp)
add_example_dependencies(example_gemm_xdl example_gemm_xdl_lds_direct_load_fp16)
set(target 1)
endif()
endforeach()
list(APPEND gpu_list gfx90a gfx942 gfx950 gfx1200 gfx1201 gfx12-generic)
set(target 0)
foreach(gpu IN LISTS GPU_TARGETS)
if(gpu IN_LIST gpu_list AND target EQUAL 0)
add_example_executable(example_gemm_xdl_bf16_streamk_v3 gemm_xdl_bf16_streamk_v3.cpp)
add_example_dependencies(example_gemm_xdl example_gemm_xdl_bf16_streamk_v3)