Extend XDL kernel to Support RDNA3/4 - Part 1 (#2606)

This commit is contained in:
linqunAMD
2025-08-23 05:46:30 +08:00
committed by GitHub
parent 0db21053e6
commit d6e49c5fde
11 changed files with 683 additions and 127 deletions

View File

@@ -75,9 +75,9 @@ template <index_t BlockSize,
bool IsF4F6 = false>
struct BlockwiseGemmXdlops_pipeline_hotloop_inst
{
static constexpr index_t WaveSize = 64;
static constexpr index_t WaveNumM = MPerBlock / (MRepeat * MPerXDL);
static constexpr index_t WaveNumN = NPerBlock / (NRepeat * NPerXDL);
static constexpr index_t WaveSize = BlockSize / WaveNumM / WaveNumN;
static constexpr index_t A_LDS_Read_Width = ALDSReadWidth;
static constexpr index_t B_LDS_Read_Width = BLDSReadWidth;

View File

@@ -1,5 +1,5 @@
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2023, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-2025, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
@@ -7,6 +7,38 @@
namespace ck {
#if defined(CK_ENABLE_DYNAMIC_WARP_SIZE)
__device__ constexpr index_t get_warp_size()
{
#if defined(__HIP_DEVICE_COMPILE__)
#if defined(__GFX9__)
return 64;
#else
return 32;
#endif
#else
return 64;
#endif
}
inline __host__ index_t get_warp_size()
{
#if !(defined(__HIPCC_RTC__) || defined(CK_CODE_GEN_RTC))
int device = 0;
int result = 0;
auto status = hipGetDevice(&device);
if(status == hipSuccess)
{
status = hipDeviceGetAttribute(&result, hipDeviceAttributeWarpSize, device);
if(status == hipSuccess)
{
return result;
}
}
#endif
return 64;
}
#else
__host__ __device__ constexpr index_t get_warp_size()
{
#if defined(__GFX9__) || !defined(__HIP_DEVICE_COMPILE__)
@@ -15,6 +47,7 @@ __host__ __device__ constexpr index_t get_warp_size()
return 32;
#endif
}
#endif
__device__ index_t get_thread_local_1d_id() { return threadIdx.x; }