diff --git a/frame/base/bli_init.c b/frame/base/bli_init.c index db598cede..90bc51fa0 100644 --- a/frame/base/bli_init.c +++ b/frame/base/bli_init.c @@ -40,8 +40,10 @@ pthread_mutex_t initialize_mutex = PTHREAD_MUTEX_INITIALIZER; static bool_t bli_is_init = FALSE; - -err_t bli_init( void ) +// If BLIS is built using a compiler that supports __attribute__((constructor)), +// then bli_init() will be executed before the application enters main(). +// In that case there is no need to call bli_init() in the application code. +BLIS_ATTRIB_CTOR err_t bli_init( void ) { err_t r_val = BLIS_FAILURE; @@ -105,7 +107,10 @@ err_t bli_init( void ) return r_val; } -err_t bli_finalize( void ) +// If BLIS is built using a compiler that supports __attribute__((destrutor)), +// then bli_finalize() will be executed after the application exits main(). +// In that case there is no need to call bli_finalize() in the application code. +BLIS_ATTRIB_DTOR err_t bli_finalize( void ) { err_t r_val = BLIS_FAILURE; diff --git a/frame/include/bli_macro_defs.h b/frame/include/bli_macro_defs.h index d99be2345..1162a7e1e 100644 --- a/frame/include/bli_macro_defs.h +++ b/frame/include/bli_macro_defs.h @@ -64,6 +64,48 @@ #endif +// -- BLIS Thread Local Storage Keyword -- + +// __thread for TLS is supported by GCC, CLANG, ICC, and IBMC. +// There is a small risk here as __GNUC__ can also be defined by some other +// compiler (other than ICC and CLANG which we know define it) that +// doesn't support __thread, as __GNUC__ is not quite unique to GCC. +// But the possibility of someone using such non-main-stream compiler +// for building BLIS is low. +#if defined(__GNUC__) || defined(__clang__) || defined(__ICC) || defined(__IBMC__) + #define BLIS_THREAD_LOCAL __thread +#else + #define BLIS_THREAD_LOCAL +#endif + + +// -- BLIS constructor/destructor function attribute -- + +// __attribute__((constructor/destructor)) is supported by GCC only. +// There is a small risk here as __GNUC__ can also be defined by some other +// compiler (other than ICC and CLANG which we know define it) that +// doesn't support this, as __GNUC__ is not quite unique to GCC. +// But the possibility of someone using such non-main-stream compiler +// for building BLIS is low. + +#if defined(__ICC) || defined(__INTEL_COMPILER) + // ICC defines __GNUC__ but doesn't support this + #define BLIS_ATTRIB_CTOR + #define BLIS_ATTRIB_DTOR +#elif defined(__clang__) + // CLANG supports __attribute__, but its documentation doesn't + // mention support for constructor/destructor. Compiling with + // clang and testing shows that it does support. + #define BLIS_ATTRIB_CTOR __attribute__((constructor)) + #define BLIS_ATTRIB_DTOR __attribute__((destructor)) +#elif defined(__GNUC__) + #define BLIS_ATTRIB_CTOR __attribute__((constructor)) + #define BLIS_ATTRIB_DTOR __attribute__((destructor)) +#else + #define BLIS_ATTRIB_CTOR + #define BLIS_ATTRIB_DTOR +#endif + // -- Boolean values -- #ifndef TRUE diff --git a/frame/ind/bli_l3_ind.c b/frame/ind/bli_l3_ind.c index e694f5384..cedf40d10 100644 --- a/frame/ind/bli_l3_ind.c +++ b/frame/ind/bli_l3_ind.c @@ -60,7 +60,11 @@ static void* bli_l3_ind_oper_fp[BLIS_NUM_IND_METHODS][BLIS_NUM_LEVEL3_OPS] = // // NOTE: "2" is used instead of BLIS_NUM_FP_TYPES/2. // -static bool_t bli_l3_ind_oper_st[BLIS_NUM_IND_METHODS][BLIS_NUM_LEVEL3_OPS][2] = +// BLIS provides APIs to modify this state during runtime. So, one application thread +// can modify the state, before another starts the corresponding BLIS operation. +// This is solved by making the induced method status array local to threads. + +static BLIS_THREAD_LOCAL bool_t bli_l3_ind_oper_st[BLIS_NUM_IND_METHODS][BLIS_NUM_LEVEL3_OPS][2] = { /* gemm hemm herk her2k symm syrk, syr2k trmm3 trmm trsm */ /* c z */