From 459469f66a06d8ad751cfe04b3269ac13b52ef76 Mon Sep 17 00:00:00 2001 From: Haocong WANG Date: Wed, 15 Mar 2023 22:44:13 +0800 Subject: [PATCH] Fix arch limitation bug (#639) [ROCm/composable_kernel commit: ea028ac65a60b7bbc0144e09dd5a97af15814e01] --- include/ck/utility/amd_wmma.hpp | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/include/ck/utility/amd_wmma.hpp b/include/ck/utility/amd_wmma.hpp index 7598fe667b..bf09142548 100644 --- a/include/ck/utility/amd_wmma.hpp +++ b/include/ck/utility/amd_wmma.hpp @@ -25,7 +25,7 @@ struct intrin_wmma_f32_16x16x16_f16_w32<16, 16> // delete them. // amd_assembly_wmma_f32_16x16x16_f16_w32( // reg_a, reg_b, reg_c.template AsType()(Number<0>{})); -#if defined(__gfx11__) +#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) reg_c.template AsType()(Number<0>{}) = __builtin_amdgcn_wmma_f32_16x16x16_f16_w32( reg_a, reg_b, reg_c.template AsType()[Number<0>{}]); #else @@ -46,7 +46,7 @@ struct intrin_wmma_f32_16x16x16_bf16_w32<16, 16> template __device__ static void Run(const bhalf16_t& reg_a, const bhalf16_t& reg_b, FloatC& reg_c) { -#if defined(__gfx11__) +#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) reg_c.template AsType()(Number<0>{}) = __builtin_amdgcn_wmma_f32_16x16x16_bf16_w32( reg_a, reg_b, reg_c.template AsType()[Number<0>{}]); @@ -71,7 +71,7 @@ struct intrin_wmma_f16_16x16x16_f16_w32<16, 16, Opsel> // opsel usage // false: D0.[0:15] = result // true : D0.[16:31]= result -#if defined(__gfx11__) +#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) reg_c.template AsType()(Number<0>{}) = __builtin_amdgcn_wmma_f16_16x16x16_f16_w32( reg_a, reg_b, reg_c.template AsType()[Number<0>{}], Opsel); #else @@ -95,7 +95,7 @@ struct intrin_wmma_bf16_16x16x16_bf16_w32<16, 16, Opsel> // opsel usage // false: D0.[0:15] = result // true : D0.[16:31]= result -#if defined(__gfx11__) +#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) reg_c.template AsType()(Number<0>{}) = __builtin_amdgcn_wmma_bf16_16x16x16_bf16_w32( reg_a, reg_b, reg_c.template AsType()[Number<0>{}], Opsel); @@ -117,7 +117,7 @@ struct intrin_wmma_i32_16x16x16_iu8_w32<16, 16, neg_a, neg_b, clamp> template __device__ static void Run(const int8x16_t& reg_a, const int8x16_t& reg_b, FloatC& reg_c) { -#if defined(__gfx11__) +#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) reg_c.template AsType()(Number<0>{}) = __builtin_amdgcn_wmma_i32_16x16x16_iu8_w32( neg_a, @@ -145,7 +145,7 @@ struct intrin_wmma_f32_16x16x16_f16_w64<16, 16> template __device__ static void Run(const half16_t& reg_a, const half16_t& reg_b, FloatC& reg_c) { -#if defined(__gfx11__) +#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) reg_c.template AsType()(Number<0>{}) = __builtin_amdgcn_wmma_f32_16x16x16_f16_w64( reg_a, reg_b, reg_c.template AsType()[Number<0>{}]); #else @@ -166,7 +166,7 @@ struct intrin_wmma_f32_16x16x16_bf16_w64<16, 16> template __device__ static void Run(const bhalf16_t& reg_a, const bhalf16_t& reg_b, FloatC& reg_c) { -#if defined(__gfx11__) +#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) reg_c.template AsType()(Number<0>{}) = __builtin_amdgcn_wmma_f32_16x16x16_bf16_w64( reg_a, reg_b, reg_c.template AsType()[Number<0>{}]); @@ -191,7 +191,7 @@ struct intrin_wmma_f16_16x16x16_f16_w64<16, 16, Opsel> // opsel usage // false: D0.[0:15] = result // true : D0.[16:31]= result -#if defined(__gfx11__) +#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) reg_c.template AsType()(Number<0>{}) = __builtin_amdgcn_wmma_f16_16x16x16_f16_w64( reg_a, reg_b, reg_c.template AsType()[Number<0>{}], Opsel); #else @@ -215,7 +215,7 @@ struct intrin_wmma_bf16_16x16x16_bf16_w64<16, 16, Opsel> // opsel usage // false: D0.[0:15] = result // true : D0.[16:31]= result -#if defined(__gfx11__) +#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) reg_c.template AsType()(Number<0>{}) = __builtin_amdgcn_wmma_bf16_16x16x16_bf16_w64( reg_a, reg_b, reg_c.template AsType()[Number<0>{}], Opsel); @@ -237,7 +237,7 @@ struct intrin_wmma_i32_16x16x16_iu8_w64<16, 16, neg_a, neg_b, clamp> template __device__ static void Run(const int8x16_t& reg_a, const int8x16_t& reg_b, FloatC& reg_c) { -#if defined(__gfx11__) +#if defined(__gfx1100__) || defined(__gfx1101__) || defined(__gfx1102__) reg_c.template AsType()(Number<0>{}) = __builtin_amdgcn_wmma_i32_16x16x16_iu8_w64( neg_a,