mirror of
https://github.com/ROCm/composable_kernel.git
synced 2026-05-15 18:42:06 +00:00
Fix the DL kernel issues on Navi3x. (#998)
* apply the patch for dl kernels on gfx11
* build DL kernels on navi32 CI
[ROCm/composable_kernel commit: f7331c603b]
This commit is contained in:
@@ -66,6 +66,10 @@
|
||||
#define CK_USE_AMD_V_FMAC_F32
|
||||
#define CK_USE_AMD_V_DOT2_F32_F16
|
||||
#define CK_USE_AMD_V_DOT4_I32_I8
|
||||
#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__)
|
||||
#define CK_USE_AMD_V_FMAC_F32
|
||||
#define CK_USE_AMD_V_DOT2_F32_F16
|
||||
#define CK_USE_AMD_V_DOT4_I32_I8_GFX11
|
||||
#endif
|
||||
|
||||
// MFMA instruction
|
||||
|
||||
@@ -192,6 +192,8 @@ inner_product<int8x4_t, int8x4_t, int32_t>(const int8x4_t& a, const int8x4_t& b,
|
||||
#else
|
||||
c = __builtin_amdgcn_sdot4(bit_cast<int32_t>(a), bit_cast<int32_t>(b), c, false);
|
||||
#endif
|
||||
#elif defined(CK_USE_AMD_V_DOT4_I32_I8_GFX11)
|
||||
c = __builtin_amdgcn_sudot4(true, bit_cast<int32_t>(a), true, bit_cast<int32_t>(b), c, false);
|
||||
#else
|
||||
const vector_type<int8_t, 4> a_vector{a};
|
||||
const vector_type<int8_t, 4> b_vector{b};
|
||||
|
||||
Reference in New Issue
Block a user