mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-11 17:00:18 +00:00
Syncing up from internal repo to enable MI300. (#690)
* enable gfx940 * switch between intrinsic mfma routines on mi100/200 and mi300 * fix mfma_int8 on MI300 * disable 2 int8 examples on MI300 * Update cmake-ck-dev.sh * restore gitignore file * modify Jenkinsfile to the internal repo --------- Co-authored-by: Jing Zhang <jizha@amd.com> Co-authored-by: zjing14 <zhangjing14@gmail.com>
This commit is contained in:
@@ -31,7 +31,7 @@
|
||||
#ifndef __HIP_DEVICE_COMPILE__ // for host code
|
||||
#define CK_BUFFER_RESOURCE_3RD_DWORD -1
|
||||
#elif defined(__gfx803__) || defined(__gfx900__) || defined(__gfx906__) || defined(__gfx908__) || \
|
||||
defined(__gfx90a__) // for GPU code
|
||||
defined(__gfx90a__) || defined(__gfx940__) // for GPU code
|
||||
#define CK_BUFFER_RESOURCE_3RD_DWORD 0x00020000
|
||||
#elif defined(__gfx1030__) // for GPU code
|
||||
#define CK_BUFFER_RESOURCE_3RD_DWORD 0x31014000
|
||||
@@ -43,8 +43,8 @@
|
||||
#ifndef __HIP_DEVICE_COMPILE__ // for host code, define nothing
|
||||
#elif defined(__gfx803__) || defined(__gfx900__) // for GPU code
|
||||
#define CK_USE_AMD_V_MAC_F32
|
||||
#elif defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) || \
|
||||
defined(__gfx1030__) // for GPU code
|
||||
#elif defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx1030__) || \
|
||||
defined(__gfx940__) // for GPU code
|
||||
#define CK_USE_AMD_V_FMAC_F32
|
||||
#define CK_USE_AMD_V_DOT2_F32_F16
|
||||
#define CK_USE_AMD_V_DOT4_I32_I8
|
||||
@@ -53,14 +53,18 @@
|
||||
// MFMA instruction
|
||||
#ifndef __HIP_DEVICE_COMPILE__ // for host code
|
||||
#define CK_USE_AMD_MFMA
|
||||
#elif defined(__gfx908__) || defined(__gfx90a__) // for GPU code
|
||||
#elif defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx940__) // for GPU code
|
||||
#define CK_USE_AMD_MFMA
|
||||
#endif
|
||||
|
||||
#if defined(__gfx90a__)
|
||||
#if(defined(__gfx90a__) || defined(__gfx940__))
|
||||
#define CK_USE_AMD_MFMA_BF16_1K_OP
|
||||
#endif
|
||||
|
||||
#if defined(__gfx940__)
|
||||
#define CK_USE_AMD_MFMA_GFX940
|
||||
#endif
|
||||
|
||||
// WMMA instruction
|
||||
#ifndef __HIP_DEVICE_COMPILE__ // for host code
|
||||
#define CK_USE_AMD_WMMA
|
||||
@@ -80,13 +84,13 @@
|
||||
// buffer atomic add: floating point
|
||||
#ifndef __HIP_DEVICE_COMPILE__ // for host code
|
||||
#define CK_USE_AMD_BUFFER_ATOMIC_ADD_FLOAT 1
|
||||
#elif defined(__gfx908__) || defined(__gfx90a__) // for GPU code
|
||||
#elif defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx940__) // for GPU code
|
||||
#define CK_USE_AMD_BUFFER_ATOMIC_ADD_FLOAT 1
|
||||
#else // for GPU code
|
||||
#define CK_USE_AMD_BUFFER_ATOMIC_ADD_FLOAT 0
|
||||
#endif
|
||||
|
||||
#if defined(__gfx90a__) // for GPU code
|
||||
#if(defined(__gfx90a__) || defined(__gfx940__)) // for GPU code
|
||||
#define CK_USE_AMD_BUFFER_ATOMIC_MAX_FLOAT64 1
|
||||
#else
|
||||
#define CK_USE_AMD_BUFFER_ATOMIC_MAX_FLOAT64 0
|
||||
|
||||
Reference in New Issue
Block a user