Extend XDL kernel to Support RDNA3/4 - Part 5 (#2725)

* Enable xdl in gfx11 & gfx12

* update cmake file

* fix all instance build (cmake)

* fix batched_gemm_gemm(cmake)

* rebase cmake files

* fix cmake build error

* remve CK_ENABLE_DYNAMIC_WARP_SIZE

* update cmake build error2

* fix gfx11 build

CK_USE_XDL is enabled on gfx11 and gfx12

* fix gfx10 build

* fix gfx11 error

---------

Co-authored-by: Lin, Qun <Quentin.Lin+amdeng@amd.com>
This commit is contained in:
linqunAMD
2025-09-16 01:59:25 +08:00
committed by GitHub
parent 03b59f8c76
commit f22740df82
33 changed files with 243 additions and 397 deletions

View File

@@ -359,7 +359,7 @@ CK_TILE_DEVICE void atomic_add_g(T* p_dst, const thread_buffer<T, N>& x)
{
static_assert((std::is_same<T, int32_t>::value && (N == 1)) ||
(std::is_same<T, uint32_t>::value && (N == 1)) ||
(std::is_same<T, float>::value && (N == 1 || N == 2)) ||
(std::is_same<T, float>::value && (N == 1 || N == 2 || N == 4)) ||
(std::is_same<T, double>::value && (N == 1 || N == 2)) ||
(std::is_same<T, fp16_t>::value && (N == 2 || N == 4 || N == 8)) ||
(std::is_same<T, bf16_t>::value && (N == 2 || N == 4 || N == 8)) ||
@@ -369,6 +369,8 @@ CK_TILE_DEVICE void atomic_add_g(T* p_dst, const thread_buffer<T, N>& x)
constexpr auto I0 = number<0>{};
constexpr auto I1 = number<1>{};
constexpr auto I2 = number<2>{};
constexpr auto I3 = number<3>{};
if constexpr(std::is_same<T, float>::value)
{
@@ -381,6 +383,13 @@ CK_TILE_DEVICE void atomic_add_g(T* p_dst, const thread_buffer<T, N>& x)
atomicAdd(c_style_pointer_cast<float*>(p_dst), x.template get_as<float>()[I0]);
atomicAdd(c_style_pointer_cast<float*>(p_dst) + 1, x.template get_as<float>()[I1]);
}
else if constexpr(N == 4)
{
atomicAdd(c_style_pointer_cast<float*>(p_dst), x.template get_as<float>()[I0]);
atomicAdd(c_style_pointer_cast<float*>(p_dst) + 1, x.template get_as<float>()[I1]);
atomicAdd(c_style_pointer_cast<float*>(p_dst) + 2, x.template get_as<float>()[I2]);
atomicAdd(c_style_pointer_cast<float*>(p_dst) + 3, x.template get_as<float>()[I3]);
}
}
else if constexpr(std::is_same<T, double>::value)
{