Dynamic selection of AVX2 or AVX512 DNRM2 kernels

- Added a kernel selection logic based on the input
  dimension(runtime parameter), to choose between
  deploying AVX2 or AVX512 computational kernel for
  single-thread execution.

- An empirical analysis was conducted to arrive at the
  thresholds, for ZEN4 and ZEN5 architectures.

- Updated the fast-path threshold for ZEN4 to be in hand
  with the tipping points of its dynamic thread-setter(used
  when AOCL_DYNAMIC is enabled).

AMD-Internal: [CPUPL-5937]
Change-Id: I96d7f167658c9e25a0098c4c67e12e4ba673e228
This commit is contained in:
Vignesh Balasubramanian
2024-12-02 10:11:21 +05:30
committed by Vignesh Balasubramanian
parent baeebe75c9
commit da6e9defcb

View File

@@ -1062,11 +1062,34 @@ void bli_dnormfv_unb_var1
switch ( id )
{
case BLIS_ARCH_ZEN5:
#if defined(BLIS_KERNELS_ZEN4)
if( n <= 30 )
norm_fp = bli_dnorm2fv_unb_var1_avx2;
else
norm_fp = bli_dnorm2fv_unb_var1_avx512;
#ifdef __clang__
fast_path_thresh = 6000;
#else
fast_path_thresh = 4500;
#endif
#ifdef BLIS_ENABLE_OPENMP
simd_factor = 8;
#endif
break;
#endif
case BLIS_ARCH_ZEN4:
#if defined(BLIS_KERNELS_ZEN4)
norm_fp = bli_dnorm2fv_unb_var1_avx512;
fast_path_thresh = 4500;
if( n <= 250 )
norm_fp = bli_dnorm2fv_unb_var1_avx2;
else
norm_fp = bli_dnorm2fv_unb_var1_avx512;
fast_path_thresh = 4000;
#ifdef BLIS_ENABLE_OPENMP
simd_factor = 8;