From f2acd4fd49615027b7861a628e773303f07dcb4e Mon Sep 17 00:00:00 2001 From: Hari Govind S Date: Thu, 1 Aug 2024 13:47:12 +0530 Subject: [PATCH] AOCL Dynamic for zen3 dcopy - Create seperate AOCL Dynamic values for multithreading dcopy API for zen1, zen2 and zen3 AMD-Internal: [CPUPL-5238] Change-Id: I42f56393716edeeace8bfe71d7adab0ba7325b47 --- frame/base/bli_rntm.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/frame/base/bli_rntm.c b/frame/base/bli_rntm.c index 2c7d6019c..d5d86e9fb 100644 --- a/frame/base/bli_rntm.c +++ b/frame/base/bli_rntm.c @@ -2120,9 +2120,6 @@ BLIS_INLINE void aocl_dcopyv_dynamic break; case BLIS_ARCH_ZEN4: - case BLIS_ARCH_ZEN: - case BLIS_ARCH_ZEN2: - case BLIS_ARCH_ZEN3: if ( n_elem <= 17000 ) *nt_ideal = 1; @@ -2134,6 +2131,18 @@ BLIS_INLINE void aocl_dcopyv_dynamic *nt_ideal = 8; // dcopy does not scale with more than 8 threads break; + case BLIS_ARCH_ZEN: + case BLIS_ARCH_ZEN2: + case BLIS_ARCH_ZEN3: + + if ( n_elem <= 17000 ) + *nt_ideal = 1; + else if (n_elem <= 52200) + *nt_ideal = 4; + else + *nt_ideal = 8; + // dcopy does not scale with more than 8 threads + break; default: // Without this default condition, compiler will throw