AOCL_ENABLE_INSTRUCTIONS improvements

Changes to how AOCL_ENABLE_INSTRUCTIONS handles requests
for different ISAs (i.e. BLIS sub-configurations):
- Add missing SSE and AVX options. These will all chose the
  generic option in amdzen builds.
- For unsupported ISAs (e.g. AVX512 on Milan), select the
  hardware's default sub-configuration instead of trying
  to step down through alternative choices.
- For invalid options, or options not implemented in the BLIS
  build (e.g. skx in amdzen build), select the hardware's
  default sub-configuration instead of aborting.

Currently BLIS_ARCH_TYPE behaviour is not affected by these
changes.

AMD-Internal: [CPUPL-5078]
Change-Id: Idbd00d2806b1679889a9249878c51981c8d23b3f
This commit is contained in:
Edward Smyth
2024-05-10 06:06:31 -04:00
parent 580282e655
commit ca7ba707e7
2 changed files with 207 additions and 118 deletions

View File

@@ -179,23 +179,41 @@ void bli_arch_set_id( void )
#ifndef BLIS_CONFIGURETIME_CPUID
if ( req_id != -1 )
{
// BLIS_ARCH_TYPE was set. Cautiously check whether its value is usable.
// BLIS_ARCH_TYPE and/or AOCL_ENABLE_INSTRUCTIONS was set.
// Cautiously check whether its value is usable.
// If req_id was set to an invalid arch_t value (ie: outside the range
// [1,BLIS_NUM_ARCHS-1]), output an error message and abort.
// Test if req_id was set to an invalid arch_t value (ie: outside the range
// [1,BLIS_NUM_ARCHS-1]), and handle appropriately depending on how it was set.
if ( bli_error_checking_is_enabled() )
{
err_t e_val = bli_check_valid_arch_id( req_id );
bli_check_error_code( e_val );
if (aocl_e_i)
{
// AOCL_ENABLE_INSTRUCTIONS was used:
// If req_id is invalid, ignore user supplied
// value and reset to -1 so we'll use normal
// subconfig selection below.
if ( e_val != BLIS_SUCCESS )
req_id = -1;
}
else
{
// BLIS_ARCH_TYPE was used:
// Abort on invalid value.
bli_check_error_code( e_val );
}
}
}
if ( req_id != -1 )
{
// Check again context actually initialized deferred to
// bli_arch_check_id() called later.
// For now, we can only be confident that req_id is in range.
arch_id = req_id;
}
else
}
else
#endif
#endif
@@ -359,6 +377,7 @@ void bli_arch_check_id( void )
{
bli_arch_set_id_once();
bool arch_not_in_build = FALSE;
bool arch_reset = FALSE;
arch_t orig_arch_id= req_id;
model_t orig_model_id = model_id;
@@ -379,22 +398,96 @@ void bli_arch_check_id( void )
#ifndef BLIS_CONFIGURETIME_CPUID
if ( req_id != -1 )
{
// BLIS_ARCH_TYPE was set. Cautiously check whether its value is usable.
// In BLAS1 and BLAS2 routines, bli_init_auto() may not have been
// called, so ensure cntx has been initialized here.
bli_gks_init_once();
bool test_arch = TRUE;
while (test_arch)
{
// At this point, we know that req_id is in the valid range, but we
// don't yet know if it refers to a context that was actually
// initialized. Query the address of an internal context data structure
// corresponding to req_id. This pointer will be NULL if the associated
// subconfig is not available.
cntx_t** req_cntx = bli_gks_lookup_id( req_id );
// At this point, we know that req_id is in the valid range, but we
// don't yet know if it refers to a context that was actually
// initialized. Query the address of an internal context data structure
// corresponding to req_id. This pointer will be NULL if the associated
// subconfig is not available.
cntx_t** req_cntx = bli_gks_lookup_id( req_id );
if ( aocl_e_i )
{
// AOCL_ENABLE_INSTRUCTIONS was set. Cautiously check whether its value is usable.
// This function checks the context pointer and aborts with a useful
// error message if the pointer is found to be NULL.
if ( bli_error_checking_is_enabled() )
{
err_t e_val = bli_check_initialized_gks_cntx( req_cntx );
if ( e_val != BLIS_SUCCESS )
{
arch_not_in_build = TRUE;
arch_reset = TRUE;
req_id = actual_arch_id;
model_id = actual_model_id;
}
}
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)
// If AVX2 test fails here we assume either:
// 1. Config was either zen, zen2, zen3, zen4, zen5, haswell or skx,
// so there is no fallback code path, hence error checking
// above will fail.
// 2. Config was amdzen, intel64 or x86_64, and will have
// generic code path.
if ( !bli_cpuid_is_avx2fma3_supported() )
{
switch (req_id)
{
case BLIS_ARCH_ZEN5:
case BLIS_ARCH_ZEN4:
case BLIS_ARCH_ZEN3:
case BLIS_ARCH_ZEN2:
case BLIS_ARCH_ZEN:
case BLIS_ARCH_EXCAVATOR:
case BLIS_ARCH_SKX:
case BLIS_ARCH_HASWELL:
arch_reset = TRUE;
req_id = actual_arch_id;
model_id = actual_model_id;
break;
}
}
// If AVX512 test fails here we assume either:
// 1. Config was either zen5, zen4 or skx, so there is
// no fallback code path, hence error checking
// above will fail.
// 2. Config was amdzen, intel64 or x86_64, and will have
// appropriate avx2 code path to try.
if ( !bli_cpuid_is_avx512_supported() )
{
switch (req_id)
{
case BLIS_ARCH_ZEN5:
case BLIS_ARCH_ZEN4:
case BLIS_ARCH_SKX:
arch_reset = TRUE;
req_id = actual_arch_id;
model_id = actual_model_id;
break;
}
}
// Note: Pre-AVX2 systems from AMD and Intel, and Intel KNL,
// have not been included in these tests, and thus could
// continue to give illegal instruction errors on other
// platforms, just as if BLIS_ARCH_TYPE was set to the
// same value.
#else
// Non-x86 platforms just accept value given for now.
// Similar logic to x86 if block could be implemented
// here if desired.
test_arch = FALSE;
#endif
}
else
{
// BLIS_ARCH_TYPE was set. Cautiously check whether its value is usable.
// This function checks the context pointer and aborts with a useful
// error message if the pointer is found to be NULL.
@@ -403,89 +496,8 @@ void bli_arch_check_id( void )
err_t e_val = bli_check_initialized_gks_cntx( req_cntx );
bli_check_error_code( e_val );
}
// If BLIS_ARCH_TYPE (or renamed version of this environment variable)
// was set, we always use this value of req_id to set arch_id.
// However, if AOCL_ENABLE_INSTRUCTIONS was set instead, we check for
// ISA compatibility and switch to a supported option if necessary.
if ( aocl_e_i )
{
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)
// If AVX2 test fails here we assume either:
// 1. Config was either zen, zen2, zen3, zen4, zen5, haswell or skx,
// so there is no fallback code path, hence error checking
// above will fail.
// 2. Config was amdzen, intel64 or x86_64, and will have
// generic code path.
if ( !bli_cpuid_is_avx2fma3_supported() )
{
switch (req_id)
{
case BLIS_ARCH_ZEN5:
case BLIS_ARCH_ZEN4:
case BLIS_ARCH_ZEN3:
case BLIS_ARCH_ZEN2:
case BLIS_ARCH_ZEN:
case BLIS_ARCH_EXCAVATOR:
case BLIS_ARCH_SKX:
case BLIS_ARCH_HASWELL:
arch_reset = TRUE;
req_id = BLIS_ARCH_GENERIC;
model_id = BLIS_MODEL_DEFAULT;
continue;
break;
}
}
// If AVX512 test fails here we assume either:
// 1. Config was either zen5, zen4 or skx, so there is
// no fallback code path, hence error checking
// above will fail.
// 2. Config was amdzen, intel64 or x86_64, and will have
// appropriate avx2 code path to try.
if ( !bli_cpuid_is_avx512_supported() )
{
switch (req_id)
{
case BLIS_ARCH_ZEN5:
arch_reset = TRUE;
req_id = BLIS_ARCH_ZEN3;
model_id = BLIS_MODEL_DEFAULT;
continue;
break;
case BLIS_ARCH_ZEN4:
arch_reset = TRUE;
req_id = BLIS_ARCH_ZEN3;
model_id = BLIS_MODEL_DEFAULT;
continue;
break;
case BLIS_ARCH_SKX:
arch_reset = TRUE;
req_id = BLIS_ARCH_HASWELL;
model_id = BLIS_MODEL_DEFAULT;
continue;
break;
}
}
// If both tests above pass, we accept req_id choice.
test_arch = FALSE;
// Note: Pre-AVX2 systems from AMD and Intel, and Intel KNL,
// have not been included in these tests, and thus could
// continue to give illegal instruction errors on other
// platforms, just as if BLIS_ARCH_TYPE was set to the
// same value.
#else
// Non-x86 platforms just accept value given for now.
// Similar logic to x86 if block could be implemented
// here if desired.
test_arch = FALSE;
#endif
}
else
{
test_arch = FALSE;
}
}
// Finally, we can be confident that req_id (1) is in range and (2)
@@ -498,16 +510,50 @@ void bli_arch_check_id( void )
if ( bli_arch_get_logging() )
{
if ( arch_reset )
if ( req_id == -1 && aocl_e_i)
{
// AOCL_ENABLE_INSTRUCTIONS was set to an invalid value
// normal system arch_id was used instead.
if ( model_id == BLIS_MODEL_DEFAULT )
{
fprintf( stderr, "libblis: AOCL_ENABLE_INSTRUCTIONS env var was set to an invalid value.\n"
"libblis: Selecting system default sub-configuration '%s'.\n",
bli_arch_string( arch_id ) );
}
else
{
fprintf( stderr, "libblis: AOCL_ENABLE_INSTRUCTIONS env var was set to an invalid value.\n"
"libblis: Selecting system default sub-configuration '%s', model '%s'.\n",
bli_arch_string( arch_id ), bli_model_string( model_id ) );
}
}
else if ( arch_not_in_build )
{
if ( orig_model_id == BLIS_MODEL_DEFAULT )
{
fprintf( stderr, "libblis: Sub-configuration '%s' is not supported on this system.\nlibblis: Switching to sub-configuration '%s'.\n",
fprintf( stderr, "libblis: Sub-configuration '%s' is not implemented in this build.\n"
"libblis: Selecting system default sub-configuration '%s'.\n",
bli_arch_string( orig_arch_id ), bli_arch_string( arch_id ) );
}
else
{
fprintf( stderr, "libblis: Sub-configuration '%s', model '%s' is not supported on this system.\nlibblis: Switching to sub-configuration '%s', model '%s'.\n",
fprintf( stderr, "libblis: Sub-configuration '%s', model '%s' is not implemented in this build.\n"
"libblis: Selecting system default sub-configuration '%s', model '%s'.\n",
bli_arch_string( orig_arch_id ), bli_model_string( orig_model_id ), bli_arch_string( arch_id ), bli_model_string( model_id ) );
}
}
else if ( arch_reset )
{
if ( orig_model_id == BLIS_MODEL_DEFAULT )
{
fprintf( stderr, "libblis: Sub-configuration '%s' is not supported on this system.\n"
"libblis: Selecting system default sub-configuration '%s'.\n",
bli_arch_string( orig_arch_id ), bli_arch_string( arch_id ) );
}
else
{
fprintf( stderr, "libblis: Sub-configuration '%s', model '%s' is not supported on this system.\n"
"libblis: Selecting system default sub-configuration '%s', model '%s'.\n",
bli_arch_string( orig_arch_id ), bli_model_string( orig_model_id ), bli_arch_string( arch_id ), bli_model_string( model_id ) );
}
}

View File

@@ -188,42 +188,85 @@ gint_t bli_env_get_var_arch_type( const char* env, gint_t fallback )
r_val = BLIS_ARCH_BULLDOZER;
}
// Some aliases for mapping AMD and Intel ISA
// names to a suitable sub-configuration.
#if defined(BLIS_FAMILY_AMDZEN) || defined(BLIS_FAMILY_X86_64) || defined(BLIS_FAMILY_ZEN5) || defined(BLIS_FAMILY_ZEN4) || defined(BLIS_FAMILY_ZEN3) || defined(BLIS_FAMILY_ZEN2) || defined(BLIS_FAMILY_ZEN)
// names to a suitable sub-configuration for each
// x86-64 processor family.
#if defined(BLIS_FAMILY_AMDZEN)
else if (strcmp(str, "avx512") == 0)
{
r_val = BLIS_ARCH_ZEN4;
}
#endif
#if defined(BLIS_FAMILY_INTEL64) || defined(BLIS_FAMILY_SKX) || defined(BLIS_FAMILY_HASWELL)
else if (strcmp(str, "avx512") == 0)
{
r_val = BLIS_ARCH_SKX;
}
#endif
#if defined(BLIS_FAMILY_AMDZEN) || defined(BLIS_FAMILY_X86_64) || defined(BLIS_FAMILY_ZEN5) || defined(BLIS_FAMILY_ZEN4) ||defined(BLIS_FAMILY_ZEN3)
else if (strcmp(str, "avx2") == 0)
{
r_val = BLIS_ARCH_ZEN3;
}
#endif
#if defined(BLIS_FAMILY_ZEN2)
else if (strcmp(str, "avx2") == 0)
else if (strcmp(str, "avx") == 0)
{
r_val = BLIS_ARCH_ZEN2;
r_val = BLIS_ARCH_GENERIC;
}
else if ((strcmp(str, "sse4_2") == 0) ||
(strcmp(str, "sse4.2") == 0) ||
(strcmp(str, "sse4_1") == 0) ||
(strcmp(str, "sse4.1") == 0) ||
(strcmp(str, "sse4a") == 0) ||
(strcmp(str, "sse4") == 0) ||
(strcmp(str, "ssse3") == 0) ||
(strcmp(str, "sse3") == 0) ||
(strcmp(str, "sse2") == 0))
{
r_val = BLIS_ARCH_GENERIC;
}
#endif
#if defined(BLIS_FAMILY_ZEN)
#if defined(BLIS_FAMILY_X86_64)
else if (strcmp(str, "avx512") == 0)
{
r_val = BLIS_ARCH_ZEN4;
}
else if (strcmp(str, "avx2") == 0)
{
r_val = BLIS_ARCH_ZEN;
r_val = BLIS_ARCH_ZEN3;
}
else if (strcmp(str, "avx") == 0)
{
r_val = BLIS_ARCH_SANDYBRIDGE;
}
else if ((strcmp(str, "sse4_2") == 0) ||
(strcmp(str, "sse4.2") == 0) ||
(strcmp(str, "sse4_1") == 0) ||
(strcmp(str, "sse4.1") == 0) ||
(strcmp(str, "sse4a") == 0) ||
(strcmp(str, "sse4") == 0) ||
(strcmp(str, "ssse3") == 0) ||
(strcmp(str, "sse3") == 0) ||
(strcmp(str, "sse2") == 0))
{
r_val = BLIS_ARCH_GENERIC;
}
#endif
#if defined(BLIS_FAMILY_INTEL64) || defined(BLIS_FAMILY_SKX) || defined(BLIS_FAMILY_HASWELL)
#if defined(BLIS_FAMILY_INTEL64)
else if (strcmp(str, "avx512") == 0)
{
r_val = BLIS_ARCH_SKX;
}
else if (strcmp(str, "avx2") == 0)
{
r_val = BLIS_ARCH_HASWELL;
}
else if (strcmp(str, "avx") == 0)
{
r_val = BLIS_ARCH_SANDYBRIDGE;
}
else if ((strcmp(str, "sse4_2") == 0) ||
(strcmp(str, "sse4.2") == 0) ||
(strcmp(str, "sse4_1") == 0) ||
(strcmp(str, "sse4.1") == 0) ||
(strcmp(str, "sse4a") == 0) ||
(strcmp(str, "sse4") == 0) ||
(strcmp(str, "ssse3") == 0) ||
(strcmp(str, "sse3") == 0) ||
(strcmp(str, "sse2") == 0))
{
r_val = BLIS_ARCH_GENERIC;
}
#endif
// ARM
else if (strcmp(str, "thunderx2") == 0)