From 0f5ce26fc597cda6e8ae93a7526f52eb8cba01e9 Mon Sep 17 00:00:00 2001 From: Nisanth M P Date: Mon, 16 Oct 2017 21:07:50 +0530 Subject: [PATCH 1/3] Thread safety: Make the global induced method status array local to thread BLIS retains a global status array for induced methods, and provides APIs to modify this state during runtime. So, one application thread can modify the state, before another starts the corresponding BLIS operation. This patch solves this issue by making the induced method status array local to threads. Change-Id: Iff59b6f473771344054c010b4eda51b7aa4317fe --- frame/include/bli_macro_defs.h | 14 ++++++++++++++ frame/ind/bli_l3_ind.c | 6 +++++- 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/frame/include/bli_macro_defs.h b/frame/include/bli_macro_defs.h index d99be2345..a332554cc 100644 --- a/frame/include/bli_macro_defs.h +++ b/frame/include/bli_macro_defs.h @@ -64,6 +64,20 @@ #endif +// -- BLIS Thread Local Storage Keyword -- + +// __thread for TLS is supported by GCC, CLANG, ICC, and IBMC. +// There is a small risk here as __GNUC__ can also be defined by some other +// compiler (other than ICC and CLANG which we know define it) that +// doesn't support __thread, as __GNUC__ is not quite unique to GCC. +// But the possibility of someone using such non-main-stream compiler +// for building BLIS is low. +#if defined(__GNUC__) || defined(__clang__) || defined(__ICC) || defined(__IBMC__) + #define BLIS_THREAD_LOCAL __thread +#else + #define BLIS_THREAD_LOCAL +#endif + // -- Boolean values -- #ifndef TRUE diff --git a/frame/ind/bli_l3_ind.c b/frame/ind/bli_l3_ind.c index e694f5384..cedf40d10 100644 --- a/frame/ind/bli_l3_ind.c +++ b/frame/ind/bli_l3_ind.c @@ -60,7 +60,11 @@ static void* bli_l3_ind_oper_fp[BLIS_NUM_IND_METHODS][BLIS_NUM_LEVEL3_OPS] = // // NOTE: "2" is used instead of BLIS_NUM_FP_TYPES/2. // -static bool_t bli_l3_ind_oper_st[BLIS_NUM_IND_METHODS][BLIS_NUM_LEVEL3_OPS][2] = +// BLIS provides APIs to modify this state during runtime. So, one application thread +// can modify the state, before another starts the corresponding BLIS operation. +// This is solved by making the induced method status array local to threads. + +static BLIS_THREAD_LOCAL bool_t bli_l3_ind_oper_st[BLIS_NUM_IND_METHODS][BLIS_NUM_LEVEL3_OPS][2] = { /* gemm hemm herk her2k symm syrk, syr2k trmm3 trmm trsm */ /* c z */ From 4607aac297e55ad540cbe5fffbe02e6b1889c181 Mon Sep 17 00:00:00 2001 From: Nisanth M P Date: Mon, 16 Oct 2017 22:06:57 +0530 Subject: [PATCH 2/3] Thread Safety: Move bli_init() before and bli_finalize() after main() BLIS provides APIs to initialize and finalize its global context. One application thread can finalize BLIS, while other threads in the application are stil using BLIS. This issue can be solved by removing bli_finalize() from API. One way to do this is by getting bli_finalize() to execute by default after application exits from main(). GCC supports this behaviour with the help of __attribute__((destructor)) added to the function that need to be executed after main exits. Similarly bli_init() can be made to run before application enters main() so that application need not call it. Change-Id: I7ce6cfa28b384e92c0bdf772f3baea373fd9feac --- frame/base/bli_init.c | 11 ++++++++--- frame/include/bli_macro_defs.h | 28 ++++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 3 deletions(-) diff --git a/frame/base/bli_init.c b/frame/base/bli_init.c index db598cede..90bc51fa0 100644 --- a/frame/base/bli_init.c +++ b/frame/base/bli_init.c @@ -40,8 +40,10 @@ pthread_mutex_t initialize_mutex = PTHREAD_MUTEX_INITIALIZER; static bool_t bli_is_init = FALSE; - -err_t bli_init( void ) +// If BLIS is built using a compiler that supports __attribute__((constructor)), +// then bli_init() will be executed before the application enters main(). +// In that case there is no need to call bli_init() in the application code. +BLIS_ATTRIB_CTOR err_t bli_init( void ) { err_t r_val = BLIS_FAILURE; @@ -105,7 +107,10 @@ err_t bli_init( void ) return r_val; } -err_t bli_finalize( void ) +// If BLIS is built using a compiler that supports __attribute__((destrutor)), +// then bli_finalize() will be executed after the application exits main(). +// In that case there is no need to call bli_finalize() in the application code. +BLIS_ATTRIB_DTOR err_t bli_finalize( void ) { err_t r_val = BLIS_FAILURE; diff --git a/frame/include/bli_macro_defs.h b/frame/include/bli_macro_defs.h index a332554cc..f5abe7902 100644 --- a/frame/include/bli_macro_defs.h +++ b/frame/include/bli_macro_defs.h @@ -78,6 +78,34 @@ #define BLIS_THREAD_LOCAL #endif + +// -- BLIS constructor/destructor function attribute -- + +// __attribute__((constructor/destructor)) is supported by GCC only. +// There is a small risk here as __GNUC__ can also be defined by some other +// compiler (other than ICC and CLANG which we know define it) that +// doesn't support this, as __GNUC__ is not quite unique to GCC. +// But the possibility of someone using such non-main-stream compiler +// for building BLIS is low. + +#if defined(__ICC) || defined(__INTEL_COMPILER) + // ICC defines __GNUC__ but doesn't support this + #define BLIS_ATTRIB_CTOR + #define BLIS_ATTRIB_DTOR +#elif defined(__clang__) + // CLANG supports __attribute__, but doesn't mention support for + // constructor/destructor. If we can confirm that CLANG supports + // this attribute, modify it to proper definition + #define BLIS_ATTRIB_CTOR + #define BLIS_ATTRIB_DTOR +#elif defined(__GNUC__) + #define BLIS_ATTRIB_CTOR __attribute__((constructor)) + #define BLIS_ATTRIB_DTOR __attribute__((destructor)) +#else + #define BLIS_ATTRIB_CTOR + #define BLIS_ATTRIB_DTOR +#endif + // -- Boolean values -- #ifndef TRUE From 3eb44f67618b91ae5f5f0aaaba67e38f16042ee4 Mon Sep 17 00:00:00 2001 From: Nisanth M P Date: Tue, 24 Oct 2017 16:36:36 +0530 Subject: [PATCH 3/3] Adding __attribute__((constructor/destructor)) for CLANG case. CLANG supports __attribute__, but its documentation doesn't mention support for constructor/destructor. Compiling with clang and testing shows that it does support this. Change-Id: Ie115b20634c26bda475cc09c20960d687fb7050b --- frame/include/bli_macro_defs.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/frame/include/bli_macro_defs.h b/frame/include/bli_macro_defs.h index f5abe7902..1162a7e1e 100644 --- a/frame/include/bli_macro_defs.h +++ b/frame/include/bli_macro_defs.h @@ -93,11 +93,11 @@ #define BLIS_ATTRIB_CTOR #define BLIS_ATTRIB_DTOR #elif defined(__clang__) - // CLANG supports __attribute__, but doesn't mention support for - // constructor/destructor. If we can confirm that CLANG supports - // this attribute, modify it to proper definition - #define BLIS_ATTRIB_CTOR - #define BLIS_ATTRIB_DTOR + // CLANG supports __attribute__, but its documentation doesn't + // mention support for constructor/destructor. Compiling with + // clang and testing shows that it does support. + #define BLIS_ATTRIB_CTOR __attribute__((constructor)) + #define BLIS_ATTRIB_DTOR __attribute__((destructor)) #elif defined(__GNUC__) #define BLIS_ATTRIB_CTOR __attribute__((constructor)) #define BLIS_ATTRIB_DTOR __attribute__((destructor))