mirror of
https://github.com/amd/blis.git
synced 2026-04-19 23:28:52 +00:00
Dynamic selection of AVX2 or AVX512 DNRM2 kernels
- Added a kernel selection logic based on the input dimension(runtime parameter), to choose between deploying AVX2 or AVX512 computational kernel for single-thread execution. - An empirical analysis was conducted to arrive at the thresholds, for ZEN4 and ZEN5 architectures. - Updated the fast-path threshold for ZEN4 to be in hand with the tipping points of its dynamic thread-setter(used when AOCL_DYNAMIC is enabled). AMD-Internal: [CPUPL-5937] Change-Id: I96d7f167658c9e25a0098c4c67e12e4ba673e228
This commit is contained in:
committed by
Vignesh Balasubramanian
parent
baeebe75c9
commit
da6e9defcb
@@ -1062,11 +1062,34 @@ void bli_dnormfv_unb_var1
|
||||
switch ( id )
|
||||
{
|
||||
case BLIS_ARCH_ZEN5:
|
||||
#if defined(BLIS_KERNELS_ZEN4)
|
||||
|
||||
if( n <= 30 )
|
||||
norm_fp = bli_dnorm2fv_unb_var1_avx2;
|
||||
else
|
||||
norm_fp = bli_dnorm2fv_unb_var1_avx512;
|
||||
|
||||
#ifdef __clang__
|
||||
fast_path_thresh = 6000;
|
||||
#else
|
||||
fast_path_thresh = 4500;
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_ENABLE_OPENMP
|
||||
simd_factor = 8;
|
||||
#endif
|
||||
|
||||
break;
|
||||
#endif
|
||||
case BLIS_ARCH_ZEN4:
|
||||
#if defined(BLIS_KERNELS_ZEN4)
|
||||
|
||||
norm_fp = bli_dnorm2fv_unb_var1_avx512;
|
||||
fast_path_thresh = 4500;
|
||||
if( n <= 250 )
|
||||
norm_fp = bli_dnorm2fv_unb_var1_avx2;
|
||||
else
|
||||
norm_fp = bli_dnorm2fv_unb_var1_avx512;
|
||||
|
||||
fast_path_thresh = 4000;
|
||||
|
||||
#ifdef BLIS_ENABLE_OPENMP
|
||||
simd_factor = 8;
|
||||
|
||||
Reference in New Issue
Block a user