mirror of
https://github.com/amd/blis.git
synced 2026-05-25 02:44:31 +00:00
AOCL_ENABLE_INSTRUCTIONS improvements
Changes to how AOCL_ENABLE_INSTRUCTIONS handles requests for different ISAs (i.e. BLIS sub-configurations): - Add missing SSE and AVX options. These will all chose the generic option in amdzen builds. - For unsupported ISAs (e.g. AVX512 on Milan), select the hardware's default sub-configuration instead of trying to step down through alternative choices. - For invalid options, or options not implemented in the BLIS build (e.g. skx in amdzen build), select the hardware's default sub-configuration instead of aborting. Currently BLIS_ARCH_TYPE behaviour is not affected by these changes. AMD-Internal: [CPUPL-5078] Change-Id: Idbd00d2806b1679889a9249878c51981c8d23b3f
This commit is contained in:
@@ -179,23 +179,41 @@ void bli_arch_set_id( void )
|
||||
#ifndef BLIS_CONFIGURETIME_CPUID
|
||||
if ( req_id != -1 )
|
||||
{
|
||||
// BLIS_ARCH_TYPE was set. Cautiously check whether its value is usable.
|
||||
// BLIS_ARCH_TYPE and/or AOCL_ENABLE_INSTRUCTIONS was set.
|
||||
// Cautiously check whether its value is usable.
|
||||
|
||||
// If req_id was set to an invalid arch_t value (ie: outside the range
|
||||
// [1,BLIS_NUM_ARCHS-1]), output an error message and abort.
|
||||
// Test if req_id was set to an invalid arch_t value (ie: outside the range
|
||||
// [1,BLIS_NUM_ARCHS-1]), and handle appropriately depending on how it was set.
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
{
|
||||
err_t e_val = bli_check_valid_arch_id( req_id );
|
||||
bli_check_error_code( e_val );
|
||||
if (aocl_e_i)
|
||||
{
|
||||
// AOCL_ENABLE_INSTRUCTIONS was used:
|
||||
// If req_id is invalid, ignore user supplied
|
||||
// value and reset to -1 so we'll use normal
|
||||
// subconfig selection below.
|
||||
if ( e_val != BLIS_SUCCESS )
|
||||
req_id = -1;
|
||||
}
|
||||
else
|
||||
{
|
||||
// BLIS_ARCH_TYPE was used:
|
||||
// Abort on invalid value.
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ( req_id != -1 )
|
||||
{
|
||||
// Check again context actually initialized deferred to
|
||||
// bli_arch_check_id() called later.
|
||||
|
||||
// For now, we can only be confident that req_id is in range.
|
||||
arch_id = req_id;
|
||||
}
|
||||
else
|
||||
}
|
||||
else
|
||||
#endif
|
||||
|
||||
#endif
|
||||
@@ -359,6 +377,7 @@ void bli_arch_check_id( void )
|
||||
{
|
||||
bli_arch_set_id_once();
|
||||
|
||||
bool arch_not_in_build = FALSE;
|
||||
bool arch_reset = FALSE;
|
||||
arch_t orig_arch_id= req_id;
|
||||
model_t orig_model_id = model_id;
|
||||
@@ -379,22 +398,96 @@ void bli_arch_check_id( void )
|
||||
#ifndef BLIS_CONFIGURETIME_CPUID
|
||||
if ( req_id != -1 )
|
||||
{
|
||||
// BLIS_ARCH_TYPE was set. Cautiously check whether its value is usable.
|
||||
|
||||
// In BLAS1 and BLAS2 routines, bli_init_auto() may not have been
|
||||
// called, so ensure cntx has been initialized here.
|
||||
bli_gks_init_once();
|
||||
|
||||
bool test_arch = TRUE;
|
||||
while (test_arch)
|
||||
{
|
||||
// At this point, we know that req_id is in the valid range, but we
|
||||
// don't yet know if it refers to a context that was actually
|
||||
// initialized. Query the address of an internal context data structure
|
||||
// corresponding to req_id. This pointer will be NULL if the associated
|
||||
// subconfig is not available.
|
||||
cntx_t** req_cntx = bli_gks_lookup_id( req_id );
|
||||
|
||||
// At this point, we know that req_id is in the valid range, but we
|
||||
// don't yet know if it refers to a context that was actually
|
||||
// initialized. Query the address of an internal context data structure
|
||||
// corresponding to req_id. This pointer will be NULL if the associated
|
||||
// subconfig is not available.
|
||||
cntx_t** req_cntx = bli_gks_lookup_id( req_id );
|
||||
if ( aocl_e_i )
|
||||
{
|
||||
// AOCL_ENABLE_INSTRUCTIONS was set. Cautiously check whether its value is usable.
|
||||
|
||||
// This function checks the context pointer and aborts with a useful
|
||||
// error message if the pointer is found to be NULL.
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
{
|
||||
err_t e_val = bli_check_initialized_gks_cntx( req_cntx );
|
||||
if ( e_val != BLIS_SUCCESS )
|
||||
{
|
||||
arch_not_in_build = TRUE;
|
||||
arch_reset = TRUE;
|
||||
req_id = actual_arch_id;
|
||||
model_id = actual_model_id;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)
|
||||
|
||||
// If AVX2 test fails here we assume either:
|
||||
// 1. Config was either zen, zen2, zen3, zen4, zen5, haswell or skx,
|
||||
// so there is no fallback code path, hence error checking
|
||||
// above will fail.
|
||||
// 2. Config was amdzen, intel64 or x86_64, and will have
|
||||
// generic code path.
|
||||
if ( !bli_cpuid_is_avx2fma3_supported() )
|
||||
{
|
||||
switch (req_id)
|
||||
{
|
||||
case BLIS_ARCH_ZEN5:
|
||||
case BLIS_ARCH_ZEN4:
|
||||
case BLIS_ARCH_ZEN3:
|
||||
case BLIS_ARCH_ZEN2:
|
||||
case BLIS_ARCH_ZEN:
|
||||
case BLIS_ARCH_EXCAVATOR:
|
||||
case BLIS_ARCH_SKX:
|
||||
case BLIS_ARCH_HASWELL:
|
||||
arch_reset = TRUE;
|
||||
req_id = actual_arch_id;
|
||||
model_id = actual_model_id;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// If AVX512 test fails here we assume either:
|
||||
// 1. Config was either zen5, zen4 or skx, so there is
|
||||
// no fallback code path, hence error checking
|
||||
// above will fail.
|
||||
// 2. Config was amdzen, intel64 or x86_64, and will have
|
||||
// appropriate avx2 code path to try.
|
||||
if ( !bli_cpuid_is_avx512_supported() )
|
||||
{
|
||||
switch (req_id)
|
||||
{
|
||||
case BLIS_ARCH_ZEN5:
|
||||
case BLIS_ARCH_ZEN4:
|
||||
case BLIS_ARCH_SKX:
|
||||
arch_reset = TRUE;
|
||||
req_id = actual_arch_id;
|
||||
model_id = actual_model_id;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Note: Pre-AVX2 systems from AMD and Intel, and Intel KNL,
|
||||
// have not been included in these tests, and thus could
|
||||
// continue to give illegal instruction errors on other
|
||||
// platforms, just as if BLIS_ARCH_TYPE was set to the
|
||||
// same value.
|
||||
#else
|
||||
// Non-x86 platforms just accept value given for now.
|
||||
// Similar logic to x86 if block could be implemented
|
||||
// here if desired.
|
||||
test_arch = FALSE;
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
// BLIS_ARCH_TYPE was set. Cautiously check whether its value is usable.
|
||||
|
||||
// This function checks the context pointer and aborts with a useful
|
||||
// error message if the pointer is found to be NULL.
|
||||
@@ -403,89 +496,8 @@ void bli_arch_check_id( void )
|
||||
err_t e_val = bli_check_initialized_gks_cntx( req_cntx );
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
// If BLIS_ARCH_TYPE (or renamed version of this environment variable)
|
||||
// was set, we always use this value of req_id to set arch_id.
|
||||
// However, if AOCL_ENABLE_INSTRUCTIONS was set instead, we check for
|
||||
// ISA compatibility and switch to a supported option if necessary.
|
||||
if ( aocl_e_i )
|
||||
{
|
||||
#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86)
|
||||
|
||||
// If AVX2 test fails here we assume either:
|
||||
// 1. Config was either zen, zen2, zen3, zen4, zen5, haswell or skx,
|
||||
// so there is no fallback code path, hence error checking
|
||||
// above will fail.
|
||||
// 2. Config was amdzen, intel64 or x86_64, and will have
|
||||
// generic code path.
|
||||
if ( !bli_cpuid_is_avx2fma3_supported() )
|
||||
{
|
||||
switch (req_id)
|
||||
{
|
||||
case BLIS_ARCH_ZEN5:
|
||||
case BLIS_ARCH_ZEN4:
|
||||
case BLIS_ARCH_ZEN3:
|
||||
case BLIS_ARCH_ZEN2:
|
||||
case BLIS_ARCH_ZEN:
|
||||
case BLIS_ARCH_EXCAVATOR:
|
||||
case BLIS_ARCH_SKX:
|
||||
case BLIS_ARCH_HASWELL:
|
||||
arch_reset = TRUE;
|
||||
req_id = BLIS_ARCH_GENERIC;
|
||||
model_id = BLIS_MODEL_DEFAULT;
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// If AVX512 test fails here we assume either:
|
||||
// 1. Config was either zen5, zen4 or skx, so there is
|
||||
// no fallback code path, hence error checking
|
||||
// above will fail.
|
||||
// 2. Config was amdzen, intel64 or x86_64, and will have
|
||||
// appropriate avx2 code path to try.
|
||||
if ( !bli_cpuid_is_avx512_supported() )
|
||||
{
|
||||
switch (req_id)
|
||||
{
|
||||
case BLIS_ARCH_ZEN5:
|
||||
arch_reset = TRUE;
|
||||
req_id = BLIS_ARCH_ZEN3;
|
||||
model_id = BLIS_MODEL_DEFAULT;
|
||||
continue;
|
||||
break;
|
||||
case BLIS_ARCH_ZEN4:
|
||||
arch_reset = TRUE;
|
||||
req_id = BLIS_ARCH_ZEN3;
|
||||
model_id = BLIS_MODEL_DEFAULT;
|
||||
continue;
|
||||
break;
|
||||
case BLIS_ARCH_SKX:
|
||||
arch_reset = TRUE;
|
||||
req_id = BLIS_ARCH_HASWELL;
|
||||
model_id = BLIS_MODEL_DEFAULT;
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// If both tests above pass, we accept req_id choice.
|
||||
test_arch = FALSE;
|
||||
|
||||
// Note: Pre-AVX2 systems from AMD and Intel, and Intel KNL,
|
||||
// have not been included in these tests, and thus could
|
||||
// continue to give illegal instruction errors on other
|
||||
// platforms, just as if BLIS_ARCH_TYPE was set to the
|
||||
// same value.
|
||||
#else
|
||||
// Non-x86 platforms just accept value given for now.
|
||||
// Similar logic to x86 if block could be implemented
|
||||
// here if desired.
|
||||
test_arch = FALSE;
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
test_arch = FALSE;
|
||||
}
|
||||
}
|
||||
|
||||
// Finally, we can be confident that req_id (1) is in range and (2)
|
||||
@@ -498,16 +510,50 @@ void bli_arch_check_id( void )
|
||||
|
||||
if ( bli_arch_get_logging() )
|
||||
{
|
||||
if ( arch_reset )
|
||||
if ( req_id == -1 && aocl_e_i)
|
||||
{
|
||||
// AOCL_ENABLE_INSTRUCTIONS was set to an invalid value
|
||||
// normal system arch_id was used instead.
|
||||
if ( model_id == BLIS_MODEL_DEFAULT )
|
||||
{
|
||||
fprintf( stderr, "libblis: AOCL_ENABLE_INSTRUCTIONS env var was set to an invalid value.\n"
|
||||
"libblis: Selecting system default sub-configuration '%s'.\n",
|
||||
bli_arch_string( arch_id ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf( stderr, "libblis: AOCL_ENABLE_INSTRUCTIONS env var was set to an invalid value.\n"
|
||||
"libblis: Selecting system default sub-configuration '%s', model '%s'.\n",
|
||||
bli_arch_string( arch_id ), bli_model_string( model_id ) );
|
||||
}
|
||||
}
|
||||
else if ( arch_not_in_build )
|
||||
{
|
||||
if ( orig_model_id == BLIS_MODEL_DEFAULT )
|
||||
{
|
||||
fprintf( stderr, "libblis: Sub-configuration '%s' is not supported on this system.\nlibblis: Switching to sub-configuration '%s'.\n",
|
||||
fprintf( stderr, "libblis: Sub-configuration '%s' is not implemented in this build.\n"
|
||||
"libblis: Selecting system default sub-configuration '%s'.\n",
|
||||
bli_arch_string( orig_arch_id ), bli_arch_string( arch_id ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf( stderr, "libblis: Sub-configuration '%s', model '%s' is not supported on this system.\nlibblis: Switching to sub-configuration '%s', model '%s'.\n",
|
||||
fprintf( stderr, "libblis: Sub-configuration '%s', model '%s' is not implemented in this build.\n"
|
||||
"libblis: Selecting system default sub-configuration '%s', model '%s'.\n",
|
||||
bli_arch_string( orig_arch_id ), bli_model_string( orig_model_id ), bli_arch_string( arch_id ), bli_model_string( model_id ) );
|
||||
}
|
||||
}
|
||||
else if ( arch_reset )
|
||||
{
|
||||
if ( orig_model_id == BLIS_MODEL_DEFAULT )
|
||||
{
|
||||
fprintf( stderr, "libblis: Sub-configuration '%s' is not supported on this system.\n"
|
||||
"libblis: Selecting system default sub-configuration '%s'.\n",
|
||||
bli_arch_string( orig_arch_id ), bli_arch_string( arch_id ) );
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf( stderr, "libblis: Sub-configuration '%s', model '%s' is not supported on this system.\n"
|
||||
"libblis: Selecting system default sub-configuration '%s', model '%s'.\n",
|
||||
bli_arch_string( orig_arch_id ), bli_model_string( orig_model_id ), bli_arch_string( arch_id ), bli_model_string( model_id ) );
|
||||
}
|
||||
}
|
||||
|
||||
@@ -188,42 +188,85 @@ gint_t bli_env_get_var_arch_type( const char* env, gint_t fallback )
|
||||
r_val = BLIS_ARCH_BULLDOZER;
|
||||
}
|
||||
// Some aliases for mapping AMD and Intel ISA
|
||||
// names to a suitable sub-configuration.
|
||||
#if defined(BLIS_FAMILY_AMDZEN) || defined(BLIS_FAMILY_X86_64) || defined(BLIS_FAMILY_ZEN5) || defined(BLIS_FAMILY_ZEN4) || defined(BLIS_FAMILY_ZEN3) || defined(BLIS_FAMILY_ZEN2) || defined(BLIS_FAMILY_ZEN)
|
||||
// names to a suitable sub-configuration for each
|
||||
// x86-64 processor family.
|
||||
#if defined(BLIS_FAMILY_AMDZEN)
|
||||
else if (strcmp(str, "avx512") == 0)
|
||||
{
|
||||
r_val = BLIS_ARCH_ZEN4;
|
||||
}
|
||||
#endif
|
||||
#if defined(BLIS_FAMILY_INTEL64) || defined(BLIS_FAMILY_SKX) || defined(BLIS_FAMILY_HASWELL)
|
||||
else if (strcmp(str, "avx512") == 0)
|
||||
{
|
||||
r_val = BLIS_ARCH_SKX;
|
||||
}
|
||||
#endif
|
||||
#if defined(BLIS_FAMILY_AMDZEN) || defined(BLIS_FAMILY_X86_64) || defined(BLIS_FAMILY_ZEN5) || defined(BLIS_FAMILY_ZEN4) ||defined(BLIS_FAMILY_ZEN3)
|
||||
else if (strcmp(str, "avx2") == 0)
|
||||
{
|
||||
r_val = BLIS_ARCH_ZEN3;
|
||||
}
|
||||
#endif
|
||||
#if defined(BLIS_FAMILY_ZEN2)
|
||||
else if (strcmp(str, "avx2") == 0)
|
||||
else if (strcmp(str, "avx") == 0)
|
||||
{
|
||||
r_val = BLIS_ARCH_ZEN2;
|
||||
r_val = BLIS_ARCH_GENERIC;
|
||||
}
|
||||
else if ((strcmp(str, "sse4_2") == 0) ||
|
||||
(strcmp(str, "sse4.2") == 0) ||
|
||||
(strcmp(str, "sse4_1") == 0) ||
|
||||
(strcmp(str, "sse4.1") == 0) ||
|
||||
(strcmp(str, "sse4a") == 0) ||
|
||||
(strcmp(str, "sse4") == 0) ||
|
||||
(strcmp(str, "ssse3") == 0) ||
|
||||
(strcmp(str, "sse3") == 0) ||
|
||||
(strcmp(str, "sse2") == 0))
|
||||
{
|
||||
r_val = BLIS_ARCH_GENERIC;
|
||||
}
|
||||
#endif
|
||||
#if defined(BLIS_FAMILY_ZEN)
|
||||
#if defined(BLIS_FAMILY_X86_64)
|
||||
else if (strcmp(str, "avx512") == 0)
|
||||
{
|
||||
r_val = BLIS_ARCH_ZEN4;
|
||||
}
|
||||
else if (strcmp(str, "avx2") == 0)
|
||||
{
|
||||
r_val = BLIS_ARCH_ZEN;
|
||||
r_val = BLIS_ARCH_ZEN3;
|
||||
}
|
||||
else if (strcmp(str, "avx") == 0)
|
||||
{
|
||||
r_val = BLIS_ARCH_SANDYBRIDGE;
|
||||
}
|
||||
else if ((strcmp(str, "sse4_2") == 0) ||
|
||||
(strcmp(str, "sse4.2") == 0) ||
|
||||
(strcmp(str, "sse4_1") == 0) ||
|
||||
(strcmp(str, "sse4.1") == 0) ||
|
||||
(strcmp(str, "sse4a") == 0) ||
|
||||
(strcmp(str, "sse4") == 0) ||
|
||||
(strcmp(str, "ssse3") == 0) ||
|
||||
(strcmp(str, "sse3") == 0) ||
|
||||
(strcmp(str, "sse2") == 0))
|
||||
{
|
||||
r_val = BLIS_ARCH_GENERIC;
|
||||
}
|
||||
#endif
|
||||
#if defined(BLIS_FAMILY_INTEL64) || defined(BLIS_FAMILY_SKX) || defined(BLIS_FAMILY_HASWELL)
|
||||
#if defined(BLIS_FAMILY_INTEL64)
|
||||
else if (strcmp(str, "avx512") == 0)
|
||||
{
|
||||
r_val = BLIS_ARCH_SKX;
|
||||
}
|
||||
else if (strcmp(str, "avx2") == 0)
|
||||
{
|
||||
r_val = BLIS_ARCH_HASWELL;
|
||||
}
|
||||
else if (strcmp(str, "avx") == 0)
|
||||
{
|
||||
r_val = BLIS_ARCH_SANDYBRIDGE;
|
||||
}
|
||||
else if ((strcmp(str, "sse4_2") == 0) ||
|
||||
(strcmp(str, "sse4.2") == 0) ||
|
||||
(strcmp(str, "sse4_1") == 0) ||
|
||||
(strcmp(str, "sse4.1") == 0) ||
|
||||
(strcmp(str, "sse4a") == 0) ||
|
||||
(strcmp(str, "sse4") == 0) ||
|
||||
(strcmp(str, "ssse3") == 0) ||
|
||||
(strcmp(str, "sse3") == 0) ||
|
||||
(strcmp(str, "sse2") == 0))
|
||||
{
|
||||
r_val = BLIS_ARCH_GENERIC;
|
||||
}
|
||||
#endif
|
||||
// ARM
|
||||
else if (strcmp(str, "thunderx2") == 0)
|
||||
|
||||
Reference in New Issue
Block a user