From ca7ba707e7ec7a4121df8b4d345bfae583312395 Mon Sep 17 00:00:00 2001 From: Edward Smyth Date: Fri, 10 May 2024 06:06:31 -0400 Subject: [PATCH] AOCL_ENABLE_INSTRUCTIONS improvements Changes to how AOCL_ENABLE_INSTRUCTIONS handles requests for different ISAs (i.e. BLIS sub-configurations): - Add missing SSE and AVX options. These will all chose the generic option in amdzen builds. - For unsupported ISAs (e.g. AVX512 on Milan), select the hardware's default sub-configuration instead of trying to step down through alternative choices. - For invalid options, or options not implemented in the BLIS build (e.g. skx in amdzen build), select the hardware's default sub-configuration instead of aborting. Currently BLIS_ARCH_TYPE behaviour is not affected by these changes. AMD-Internal: [CPUPL-5078] Change-Id: Idbd00d2806b1679889a9249878c51981c8d23b3f --- frame/base/bli_arch.c | 248 +++++++++++++++++++++++++----------------- frame/base/bli_env.c | 77 ++++++++++--- 2 files changed, 207 insertions(+), 118 deletions(-) diff --git a/frame/base/bli_arch.c b/frame/base/bli_arch.c index d04e014b9..0deb09c33 100644 --- a/frame/base/bli_arch.c +++ b/frame/base/bli_arch.c @@ -179,23 +179,41 @@ void bli_arch_set_id( void ) #ifndef BLIS_CONFIGURETIME_CPUID if ( req_id != -1 ) { - // BLIS_ARCH_TYPE was set. Cautiously check whether its value is usable. + // BLIS_ARCH_TYPE and/or AOCL_ENABLE_INSTRUCTIONS was set. + // Cautiously check whether its value is usable. - // If req_id was set to an invalid arch_t value (ie: outside the range - // [1,BLIS_NUM_ARCHS-1]), output an error message and abort. + // Test if req_id was set to an invalid arch_t value (ie: outside the range + // [1,BLIS_NUM_ARCHS-1]), and handle appropriately depending on how it was set. if ( bli_error_checking_is_enabled() ) { err_t e_val = bli_check_valid_arch_id( req_id ); - bli_check_error_code( e_val ); + if (aocl_e_i) + { + // AOCL_ENABLE_INSTRUCTIONS was used: + // If req_id is invalid, ignore user supplied + // value and reset to -1 so we'll use normal + // subconfig selection below. + if ( e_val != BLIS_SUCCESS ) + req_id = -1; + } + else + { + // BLIS_ARCH_TYPE was used: + // Abort on invalid value. + bli_check_error_code( e_val ); + } } + } + if ( req_id != -1 ) + { // Check again context actually initialized deferred to // bli_arch_check_id() called later. // For now, we can only be confident that req_id is in range. arch_id = req_id; - } - else + } + else #endif #endif @@ -359,6 +377,7 @@ void bli_arch_check_id( void ) { bli_arch_set_id_once(); + bool arch_not_in_build = FALSE; bool arch_reset = FALSE; arch_t orig_arch_id= req_id; model_t orig_model_id = model_id; @@ -379,22 +398,96 @@ void bli_arch_check_id( void ) #ifndef BLIS_CONFIGURETIME_CPUID if ( req_id != -1 ) { - // BLIS_ARCH_TYPE was set. Cautiously check whether its value is usable. - // In BLAS1 and BLAS2 routines, bli_init_auto() may not have been // called, so ensure cntx has been initialized here. bli_gks_init_once(); - bool test_arch = TRUE; - while (test_arch) - { + // At this point, we know that req_id is in the valid range, but we + // don't yet know if it refers to a context that was actually + // initialized. Query the address of an internal context data structure + // corresponding to req_id. This pointer will be NULL if the associated + // subconfig is not available. + cntx_t** req_cntx = bli_gks_lookup_id( req_id ); - // At this point, we know that req_id is in the valid range, but we - // don't yet know if it refers to a context that was actually - // initialized. Query the address of an internal context data structure - // corresponding to req_id. This pointer will be NULL if the associated - // subconfig is not available. - cntx_t** req_cntx = bli_gks_lookup_id( req_id ); + if ( aocl_e_i ) + { + // AOCL_ENABLE_INSTRUCTIONS was set. Cautiously check whether its value is usable. + + // This function checks the context pointer and aborts with a useful + // error message if the pointer is found to be NULL. + if ( bli_error_checking_is_enabled() ) + { + err_t e_val = bli_check_initialized_gks_cntx( req_cntx ); + if ( e_val != BLIS_SUCCESS ) + { + arch_not_in_build = TRUE; + arch_reset = TRUE; + req_id = actual_arch_id; + model_id = actual_model_id; + } + } + +#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) + + // If AVX2 test fails here we assume either: + // 1. Config was either zen, zen2, zen3, zen4, zen5, haswell or skx, + // so there is no fallback code path, hence error checking + // above will fail. + // 2. Config was amdzen, intel64 or x86_64, and will have + // generic code path. + if ( !bli_cpuid_is_avx2fma3_supported() ) + { + switch (req_id) + { + case BLIS_ARCH_ZEN5: + case BLIS_ARCH_ZEN4: + case BLIS_ARCH_ZEN3: + case BLIS_ARCH_ZEN2: + case BLIS_ARCH_ZEN: + case BLIS_ARCH_EXCAVATOR: + case BLIS_ARCH_SKX: + case BLIS_ARCH_HASWELL: + arch_reset = TRUE; + req_id = actual_arch_id; + model_id = actual_model_id; + break; + } + } + // If AVX512 test fails here we assume either: + // 1. Config was either zen5, zen4 or skx, so there is + // no fallback code path, hence error checking + // above will fail. + // 2. Config was amdzen, intel64 or x86_64, and will have + // appropriate avx2 code path to try. + if ( !bli_cpuid_is_avx512_supported() ) + { + switch (req_id) + { + case BLIS_ARCH_ZEN5: + case BLIS_ARCH_ZEN4: + case BLIS_ARCH_SKX: + arch_reset = TRUE; + req_id = actual_arch_id; + model_id = actual_model_id; + break; + } + } + + // Note: Pre-AVX2 systems from AMD and Intel, and Intel KNL, + // have not been included in these tests, and thus could + // continue to give illegal instruction errors on other + // platforms, just as if BLIS_ARCH_TYPE was set to the + // same value. +#else + // Non-x86 platforms just accept value given for now. + // Similar logic to x86 if block could be implemented + // here if desired. + test_arch = FALSE; +#endif + } + else + { + // BLIS_ARCH_TYPE was set. Cautiously check whether its value is usable. // This function checks the context pointer and aborts with a useful // error message if the pointer is found to be NULL. @@ -403,89 +496,8 @@ void bli_arch_check_id( void ) err_t e_val = bli_check_initialized_gks_cntx( req_cntx ); bli_check_error_code( e_val ); } - // If BLIS_ARCH_TYPE (or renamed version of this environment variable) // was set, we always use this value of req_id to set arch_id. - // However, if AOCL_ENABLE_INSTRUCTIONS was set instead, we check for - // ISA compatibility and switch to a supported option if necessary. - if ( aocl_e_i ) - { -#if defined(__x86_64__) || defined(_M_X64) || defined(__i386) || defined(_M_IX86) - - // If AVX2 test fails here we assume either: - // 1. Config was either zen, zen2, zen3, zen4, zen5, haswell or skx, - // so there is no fallback code path, hence error checking - // above will fail. - // 2. Config was amdzen, intel64 or x86_64, and will have - // generic code path. - if ( !bli_cpuid_is_avx2fma3_supported() ) - { - switch (req_id) - { - case BLIS_ARCH_ZEN5: - case BLIS_ARCH_ZEN4: - case BLIS_ARCH_ZEN3: - case BLIS_ARCH_ZEN2: - case BLIS_ARCH_ZEN: - case BLIS_ARCH_EXCAVATOR: - case BLIS_ARCH_SKX: - case BLIS_ARCH_HASWELL: - arch_reset = TRUE; - req_id = BLIS_ARCH_GENERIC; - model_id = BLIS_MODEL_DEFAULT; - continue; - break; - } - } - // If AVX512 test fails here we assume either: - // 1. Config was either zen5, zen4 or skx, so there is - // no fallback code path, hence error checking - // above will fail. - // 2. Config was amdzen, intel64 or x86_64, and will have - // appropriate avx2 code path to try. - if ( !bli_cpuid_is_avx512_supported() ) - { - switch (req_id) - { - case BLIS_ARCH_ZEN5: - arch_reset = TRUE; - req_id = BLIS_ARCH_ZEN3; - model_id = BLIS_MODEL_DEFAULT; - continue; - break; - case BLIS_ARCH_ZEN4: - arch_reset = TRUE; - req_id = BLIS_ARCH_ZEN3; - model_id = BLIS_MODEL_DEFAULT; - continue; - break; - case BLIS_ARCH_SKX: - arch_reset = TRUE; - req_id = BLIS_ARCH_HASWELL; - model_id = BLIS_MODEL_DEFAULT; - continue; - break; - } - } - // If both tests above pass, we accept req_id choice. - test_arch = FALSE; - - // Note: Pre-AVX2 systems from AMD and Intel, and Intel KNL, - // have not been included in these tests, and thus could - // continue to give illegal instruction errors on other - // platforms, just as if BLIS_ARCH_TYPE was set to the - // same value. -#else - // Non-x86 platforms just accept value given for now. - // Similar logic to x86 if block could be implemented - // here if desired. - test_arch = FALSE; -#endif - } - else - { - test_arch = FALSE; - } } // Finally, we can be confident that req_id (1) is in range and (2) @@ -498,16 +510,50 @@ void bli_arch_check_id( void ) if ( bli_arch_get_logging() ) { - if ( arch_reset ) + if ( req_id == -1 && aocl_e_i) + { + // AOCL_ENABLE_INSTRUCTIONS was set to an invalid value + // normal system arch_id was used instead. + if ( model_id == BLIS_MODEL_DEFAULT ) + { + fprintf( stderr, "libblis: AOCL_ENABLE_INSTRUCTIONS env var was set to an invalid value.\n" + "libblis: Selecting system default sub-configuration '%s'.\n", + bli_arch_string( arch_id ) ); + } + else + { + fprintf( stderr, "libblis: AOCL_ENABLE_INSTRUCTIONS env var was set to an invalid value.\n" + "libblis: Selecting system default sub-configuration '%s', model '%s'.\n", + bli_arch_string( arch_id ), bli_model_string( model_id ) ); + } + } + else if ( arch_not_in_build ) { if ( orig_model_id == BLIS_MODEL_DEFAULT ) { - fprintf( stderr, "libblis: Sub-configuration '%s' is not supported on this system.\nlibblis: Switching to sub-configuration '%s'.\n", + fprintf( stderr, "libblis: Sub-configuration '%s' is not implemented in this build.\n" + "libblis: Selecting system default sub-configuration '%s'.\n", bli_arch_string( orig_arch_id ), bli_arch_string( arch_id ) ); } else { - fprintf( stderr, "libblis: Sub-configuration '%s', model '%s' is not supported on this system.\nlibblis: Switching to sub-configuration '%s', model '%s'.\n", + fprintf( stderr, "libblis: Sub-configuration '%s', model '%s' is not implemented in this build.\n" + "libblis: Selecting system default sub-configuration '%s', model '%s'.\n", + bli_arch_string( orig_arch_id ), bli_model_string( orig_model_id ), bli_arch_string( arch_id ), bli_model_string( model_id ) ); + } + } + else if ( arch_reset ) + { + if ( orig_model_id == BLIS_MODEL_DEFAULT ) + { + fprintf( stderr, "libblis: Sub-configuration '%s' is not supported on this system.\n" + "libblis: Selecting system default sub-configuration '%s'.\n", + bli_arch_string( orig_arch_id ), bli_arch_string( arch_id ) ); + } + else + { + fprintf( stderr, "libblis: Sub-configuration '%s', model '%s' is not supported on this system.\n" + "libblis: Selecting system default sub-configuration '%s', model '%s'.\n", bli_arch_string( orig_arch_id ), bli_model_string( orig_model_id ), bli_arch_string( arch_id ), bli_model_string( model_id ) ); } } diff --git a/frame/base/bli_env.c b/frame/base/bli_env.c index faa5fcd93..7e28f026c 100644 --- a/frame/base/bli_env.c +++ b/frame/base/bli_env.c @@ -188,42 +188,85 @@ gint_t bli_env_get_var_arch_type( const char* env, gint_t fallback ) r_val = BLIS_ARCH_BULLDOZER; } // Some aliases for mapping AMD and Intel ISA - // names to a suitable sub-configuration. -#if defined(BLIS_FAMILY_AMDZEN) || defined(BLIS_FAMILY_X86_64) || defined(BLIS_FAMILY_ZEN5) || defined(BLIS_FAMILY_ZEN4) || defined(BLIS_FAMILY_ZEN3) || defined(BLIS_FAMILY_ZEN2) || defined(BLIS_FAMILY_ZEN) + // names to a suitable sub-configuration for each + // x86-64 processor family. +#if defined(BLIS_FAMILY_AMDZEN) else if (strcmp(str, "avx512") == 0) { r_val = BLIS_ARCH_ZEN4; } -#endif -#if defined(BLIS_FAMILY_INTEL64) || defined(BLIS_FAMILY_SKX) || defined(BLIS_FAMILY_HASWELL) - else if (strcmp(str, "avx512") == 0) - { - r_val = BLIS_ARCH_SKX; - } -#endif -#if defined(BLIS_FAMILY_AMDZEN) || defined(BLIS_FAMILY_X86_64) || defined(BLIS_FAMILY_ZEN5) || defined(BLIS_FAMILY_ZEN4) ||defined(BLIS_FAMILY_ZEN3) else if (strcmp(str, "avx2") == 0) { r_val = BLIS_ARCH_ZEN3; } -#endif -#if defined(BLIS_FAMILY_ZEN2) - else if (strcmp(str, "avx2") == 0) + else if (strcmp(str, "avx") == 0) { - r_val = BLIS_ARCH_ZEN2; + r_val = BLIS_ARCH_GENERIC; + } + else if ((strcmp(str, "sse4_2") == 0) || + (strcmp(str, "sse4.2") == 0) || + (strcmp(str, "sse4_1") == 0) || + (strcmp(str, "sse4.1") == 0) || + (strcmp(str, "sse4a") == 0) || + (strcmp(str, "sse4") == 0) || + (strcmp(str, "ssse3") == 0) || + (strcmp(str, "sse3") == 0) || + (strcmp(str, "sse2") == 0)) + { + r_val = BLIS_ARCH_GENERIC; } #endif -#if defined(BLIS_FAMILY_ZEN) +#if defined(BLIS_FAMILY_X86_64) + else if (strcmp(str, "avx512") == 0) + { + r_val = BLIS_ARCH_ZEN4; + } else if (strcmp(str, "avx2") == 0) { - r_val = BLIS_ARCH_ZEN; + r_val = BLIS_ARCH_ZEN3; + } + else if (strcmp(str, "avx") == 0) + { + r_val = BLIS_ARCH_SANDYBRIDGE; + } + else if ((strcmp(str, "sse4_2") == 0) || + (strcmp(str, "sse4.2") == 0) || + (strcmp(str, "sse4_1") == 0) || + (strcmp(str, "sse4.1") == 0) || + (strcmp(str, "sse4a") == 0) || + (strcmp(str, "sse4") == 0) || + (strcmp(str, "ssse3") == 0) || + (strcmp(str, "sse3") == 0) || + (strcmp(str, "sse2") == 0)) + { + r_val = BLIS_ARCH_GENERIC; } #endif -#if defined(BLIS_FAMILY_INTEL64) || defined(BLIS_FAMILY_SKX) || defined(BLIS_FAMILY_HASWELL) +#if defined(BLIS_FAMILY_INTEL64) + else if (strcmp(str, "avx512") == 0) + { + r_val = BLIS_ARCH_SKX; + } else if (strcmp(str, "avx2") == 0) { r_val = BLIS_ARCH_HASWELL; } + else if (strcmp(str, "avx") == 0) + { + r_val = BLIS_ARCH_SANDYBRIDGE; + } + else if ((strcmp(str, "sse4_2") == 0) || + (strcmp(str, "sse4.2") == 0) || + (strcmp(str, "sse4_1") == 0) || + (strcmp(str, "sse4.1") == 0) || + (strcmp(str, "sse4a") == 0) || + (strcmp(str, "sse4") == 0) || + (strcmp(str, "ssse3") == 0) || + (strcmp(str, "sse3") == 0) || + (strcmp(str, "sse2") == 0)) + { + r_val = BLIS_ARCH_GENERIC; + } #endif // ARM else if (strcmp(str, "thunderx2") == 0)