From 6d30e20d44d7beac792e451bc4dd105f8b62eeff Mon Sep 17 00:00:00 2001 From: Illia Silin <98187287+illsilin@users.noreply.github.com> Date: Thu, 19 Oct 2023 09:34:39 -0700 Subject: [PATCH] Fix the DL kernel issues on Navi3x. (#998) * apply the patch for dl kernels on gfx11 * build DL kernels on navi32 CI [ROCm/composable_kernel commit: f7331c603b87b940419715360cb81600d3338b0e] --- Jenkinsfile | 4 ++-- include/ck/ck.hpp | 4 ++++ include/ck/utility/inner_product.hpp | 2 ++ 3 files changed, 8 insertions(+), 2 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 38bd3d7df4..021c19f150 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -790,8 +790,8 @@ pipeline { } agent{ label rocmnode("navi32") } environment{ - setup_args = """ -DCMAKE_INSTALL_PREFIX=../install -DGPU_TARGETS="gfx1101" """ - execute_args = """ cd ../client_example && rm -rf build && mkdir build && cd build && cmake -D CMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" -DGPU_TARGETS="gfx1101" -D CMAKE_CXX_COMPILER="${build_compiler()}" .. && make -j """ + setup_args = """ -DCMAKE_INSTALL_PREFIX=../install -DGPU_TARGETS="gfx1101" -DDL_KERNELS=ON """ + execute_args = """ cd ../client_example && rm -rf build && mkdir build && cd build && cmake -D CMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" -DGPU_TARGETS="gfx1101" -DDL_KERNELS=ON -D CMAKE_CXX_COMPILER="${build_compiler()}" .. && make -j """ } steps{ Build_CK_and_Reboot(setup_args: setup_args, config_targets: "install", no_reboot:true, build_type: 'Release', execute_cmd: execute_args, prefixpath: '/usr/local') diff --git a/include/ck/ck.hpp b/include/ck/ck.hpp index 069ff7fc74..1e41404192 100644 --- a/include/ck/ck.hpp +++ b/include/ck/ck.hpp @@ -66,6 +66,10 @@ #define CK_USE_AMD_V_FMAC_F32 #define CK_USE_AMD_V_DOT2_F32_F16 #define CK_USE_AMD_V_DOT4_I32_I8 +#elif defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) +#define CK_USE_AMD_V_FMAC_F32 +#define CK_USE_AMD_V_DOT2_F32_F16 +#define CK_USE_AMD_V_DOT4_I32_I8_GFX11 #endif // MFMA instruction diff --git a/include/ck/utility/inner_product.hpp b/include/ck/utility/inner_product.hpp index 16c139c008..65efaf388a 100644 --- a/include/ck/utility/inner_product.hpp +++ b/include/ck/utility/inner_product.hpp @@ -192,6 +192,8 @@ inner_product(const int8x4_t& a, const int8x4_t& b, #else c = __builtin_amdgcn_sdot4(bit_cast(a), bit_cast(b), c, false); #endif +#elif defined(CK_USE_AMD_V_DOT4_I32_I8_GFX11) + c = __builtin_amdgcn_sudot4(true, bit_cast(a), true, bit_cast(b), c, false); #else const vector_type a_vector{a}; const vector_type b_vector{b};