Merge commit 'cb27e7c77fe807dbdc763feb128bbd127f49b4c8' into develop

This commit is contained in:
github-actions[bot]
2025-05-08 20:06:39 +00:00
parent 9a7dbe8e7d
commit 15935fe1c5
7 changed files with 11 additions and 22 deletions

View File

@@ -714,7 +714,7 @@ struct DeviceGemmMX_Xdl_CShuffleV3 : public DeviceGemmMX<ALayout,
return false;
}
if(!ck::is_xdl_supported())
if(ck::get_device_name() != "gfx950")
{
return false;
}

View File

@@ -35,7 +35,7 @@ __global__ void
// __attribute__((amdgpu_waves_per_eu(1, 1)))
kernel_gemm_xdl_cshuffle_v3(typename GridwiseGemm::Argument karg)
{
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx9__))
#if defined(__gfx950__) && __HIP_DEVICE_COMPILE__
__shared__ char p_shared[GridwiseGemm::GetSharedMemoryNumberOfByte()];
auto splitk_batch_offset = typename GridwiseGemm::SplitKBatchOffset(karg, blockIdx.z);
@@ -66,7 +66,7 @@ __global__ void
// __attribute__((amdgpu_waves_per_eu(1, 1)))
kernel_gemm_xdl_cshuffle_v3_2lds(typename GridwiseGemm::Argument karg)
{
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx9__))
#if defined(__gfx950__) && __HIP_DEVICE_COMPILE__
// Pass two lds pointer is the key to tell compiler that ds_read/write
// operate on different lds chunk at same time without order dependecy
__shared__ char p_shared_0[GridwiseGemm::GetSharedMemoryNumberOfByte()];