mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-03 13:11:25 +00:00
Ensure MX GEMM Instances can be Cross-Compiled for Multiple Architectures (#2171)
* Re-enable MX GEMM instances * Fix compilation error when building MX GEMM for multiple architectures
This commit is contained in:
committed by
GitHub
parent
c757046d49
commit
cb27e7c77f
@@ -714,7 +714,7 @@ struct DeviceGemmMX_Xdl_CShuffleV3 : public DeviceGemmMX<ALayout,
|
||||
return false;
|
||||
}
|
||||
|
||||
if(!ck::is_xdl_supported())
|
||||
if(ck::get_device_name() != "gfx950")
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -35,7 +35,7 @@ __global__ void
|
||||
// __attribute__((amdgpu_waves_per_eu(1, 1)))
|
||||
kernel_gemm_xdl_cshuffle_v3(typename GridwiseGemm::Argument karg)
|
||||
{
|
||||
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx9__))
|
||||
#if defined(__gfx950__) && __HIP_DEVICE_COMPILE__
|
||||
__shared__ char p_shared[GridwiseGemm::GetSharedMemoryNumberOfByte()];
|
||||
|
||||
auto splitk_batch_offset = typename GridwiseGemm::SplitKBatchOffset(karg, blockIdx.z);
|
||||
@@ -66,7 +66,7 @@ __global__ void
|
||||
// __attribute__((amdgpu_waves_per_eu(1, 1)))
|
||||
kernel_gemm_xdl_cshuffle_v3_2lds(typename GridwiseGemm::Argument karg)
|
||||
{
|
||||
#if(!defined(__HIP_DEVICE_COMPILE__) || defined(__gfx9__))
|
||||
#if defined(__gfx950__) && __HIP_DEVICE_COMPILE__
|
||||
// Pass two lds pointer is the key to tell compiler that ds_read/write
|
||||
// operate on different lds chunk at same time without order dependecy
|
||||
__shared__ char p_shared_0[GridwiseGemm::GetSharedMemoryNumberOfByte()];
|
||||
|
||||
Reference in New Issue
Block a user