mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-11 08:50:17 +00:00
[Navi3x-LWPCK-449] wmma_op + unit test (#484)
* wmma_op + unit test * add arch limitation to wmma test * change arch limitation * Refactor + Add all type unit test(int4 compile failed) * Add f32_16x16x16_bf16 unit test * Remote int4 related * delete deprecated test Co-authored-by: Po Yen Chen <PoYen.Chen@amd.com> Co-authored-by: Chao Liu <chao.liu2@amd.com>
This commit is contained in:
@@ -25,7 +25,7 @@
|
||||
// check GPU target
|
||||
#ifdef __HIP_DEVICE_COMPILE__
|
||||
#if !(defined(__gfx803__) || defined(__gfx900__) || defined(__gfx906__) || defined(__gfx908__) || \
|
||||
defined(__gfx90a__) || defined(__gfx1030__))
|
||||
defined(__gfx90a__) || defined(__gfx1030__) || defined(__gfx1100__))
|
||||
#error Not supported target
|
||||
#endif
|
||||
#endif
|
||||
@@ -38,6 +38,8 @@
|
||||
#define CK_BUFFER_RESOURCE_3RD_DWORD 0x00020000
|
||||
#elif defined(__gfx1030__) // for GPU code
|
||||
#define CK_BUFFER_RESOURCE_3RD_DWORD 0x31014000
|
||||
#elif defined(__gfx1100__) // for GPU code
|
||||
#define CK_BUFFER_RESOURCE_3RD_DWORD 0x10020000
|
||||
#endif
|
||||
|
||||
// FMA instruction
|
||||
@@ -62,6 +64,13 @@
|
||||
#define CK_USE_AMD_MFMA_BF16_1K_OP
|
||||
#endif
|
||||
|
||||
// WMMA instruction
|
||||
#ifndef __HIP_DEVICE_COMPILE__ // for host code
|
||||
#define CK_USE_AMD_WMMA
|
||||
#elif defined(__gfx1100__) // for GPU code
|
||||
#define CK_USE_AMD_WMMA
|
||||
#endif
|
||||
|
||||
// buffer load
|
||||
#define CK_USE_AMD_BUFFER_LOAD 1
|
||||
|
||||
|
||||
Reference in New Issue
Block a user