From e53f1dbb46f2cca40abcd96905e5bbbe7083c1c3 Mon Sep 17 00:00:00 2001 From: Rostyslav Geyyer <46627076+geyyer@users.noreply.github.com> Date: Wed, 5 Jul 2023 18:29:56 -0500 Subject: [PATCH] Add the missing archs (#785) [ROCm/composable_kernel commit: 61dc9aa93277561bde816098ba8487d985b1b439] --- include/ck/utility/amd_xdlops.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/include/ck/utility/amd_xdlops.hpp b/include/ck/utility/amd_xdlops.hpp index ca38077cf5..ea7755036f 100644 --- a/include/ck/utility/amd_xdlops.hpp +++ b/include/ck/utility/amd_xdlops.hpp @@ -364,7 +364,7 @@ struct intrin_mfma_f32_32x32x16f8f8<32, 32> template __device__ static void Run(const f8x8_t& reg_a, const f8x8_t& reg_b, FloatC& reg_c) { -#if defined(__gfx940__) +#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__) reg_c.template AsType()(Number<0>{}) = __builtin_amdgcn_mfma_f32_32x32x16_fp8_fp8( bit_cast(reg_a), @@ -396,7 +396,7 @@ struct intrin_mfma_f32_16x16x32f8f8<16, 16> template __device__ static void Run(const f8x8_t& reg_a, const f8x8_t& reg_b, FloatC& reg_c) { -#if defined(__gfx940__) +#if defined(__gfx940__) || defined(__gfx941__) || defined(__gfx942__) reg_c.template AsType()(Number<0>{}) = __builtin_amdgcn_mfma_f32_16x16x32_fp8_fp8( bit_cast(reg_a), bit_cast(reg_b),