Ensure MX GEMM Instances can be Cross-Compiled for Multiple Architectures (#2171)

* Re-enable MX GEMM instances

* Fix compilation error when building MX GEMM for multiple architectures
This commit is contained in:
Andriy Roshchenko
2025-05-08 13:26:03 -06:00
committed by GitHub
parent c757046d49
commit cb27e7c77f
7 changed files with 11 additions and 22 deletions

View File

@@ -714,7 +714,7 @@ struct DeviceGemmMX_Xdl_CShuffleV3 : public DeviceGemmMX<ALayout,
return false;
}
if(!ck::is_xdl_supported())
if(ck::get_device_name() != "gfx950")
{
return false;
}

View File

@@ -35,7 +35,7 @@ __global__ void
// __attribute__((amdgpu_waves_per_eu(1, 1)))
kernel_gemm_xdl_cshuffle_v3(typename GridwiseGemm::Argument karg)
{
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx9__))
#if defined(__gfx950__) && __HIP_DEVICE_COMPILE__
__shared__ char p_shared[GridwiseGemm::GetSharedMemoryNumberOfByte()];
auto splitk_batch_offset = typename GridwiseGemm::SplitKBatchOffset(karg, blockIdx.z);
@@ -66,7 +66,7 @@ __global__ void
// __attribute__((amdgpu_waves_per_eu(1, 1)))
kernel_gemm_xdl_cshuffle_v3_2lds(typename GridwiseGemm::Argument karg)
{
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx9__))
#if defined(__gfx950__) && __HIP_DEVICE_COMPILE__
// Pass two lds pointer is the key to tell compiler that ds_read/write
// operate on different lds chunk at same time without order dependecy
__shared__ char p_shared_0[GridwiseGemm::GetSharedMemoryNumberOfByte()];