From 45957cc7745e9bb1698408d72f53ef192e960820 Mon Sep 17 00:00:00 2001 From: Tyler Smith Date: Tue, 13 May 2014 17:14:46 -0500 Subject: [PATCH] Allowed threading to be turned off No longer requires OpenMP to compile Define the following in bli_config.h in order to enable multithreading: BLIS_ENABLE_MULTITHREADING BLIS_ENABLE_OPENMP Also fixes a bug with bli_get_range_weighted --- frame/3/gemm/bli_gemm_threading.c | 12 +++++- frame/3/herk/bli_herk_threading.c | 9 +++++ frame/3/trmm/bli_trmm_threading.c | 8 ++++ frame/base/bli_mem.c | 11 ++++-- frame/base/bli_threading.c | 64 ++++++++++++++++++++++++++++++- 5 files changed, 98 insertions(+), 6 deletions(-) diff --git a/frame/3/gemm/bli_gemm_threading.c b/frame/3/gemm/bli_gemm_threading.c index 93c146ef9..3056f019f 100644 --- a/frame/3/gemm/bli_gemm_threading.c +++ b/frame/3/gemm/bli_gemm_threading.c @@ -86,7 +86,7 @@ gemm_thrinfo_t* bli_create_gemm_thrinfo_node( thread_comm_t* ocomm, dim_t ocomm_ void bli_gemm_thrinfo_free( gemm_thrinfo_t* thread) { - if( thread == NULL ) return; + if( thread == NULL || thread == &BLIS_GEMM_SINGLE_THREADED ) return; // Free Communicators if( thread_am_ochief( thread ) ) @@ -109,12 +109,22 @@ void bli_gemm_thrinfo_free_paths( gemm_thrinfo_t** threads, dim_t num ) gemm_thrinfo_t** bli_create_gemm_thrinfo_paths( ) { + +#ifdef BLIS_ENABLE_MULTITHREADING dim_t jc_way = bli_read_nway_from_env( "BLIS_JC_NT" ); // dim_t kc_way = bli_read_nway_from_env( "BLIS_KC_NT" ); dim_t kc_way = 1; dim_t ic_way = bli_read_nway_from_env( "BLIS_IC_NT" ); dim_t jr_way = bli_read_nway_from_env( "BLIS_JR_NT" ); dim_t ir_way = bli_read_nway_from_env( "BLIS_IR_NT" ); +#else + dim_t jc_way = 1; + dim_t kc_way = 1; + dim_t ic_way = 1; + dim_t jr_way = 1; + dim_t ir_way = 1; +#endif + dim_t global_num_threads = jc_way * kc_way * ic_way * jr_way * ir_way; assert( global_num_threads != 0 ); diff --git a/frame/3/herk/bli_herk_threading.c b/frame/3/herk/bli_herk_threading.c index 00141ac68..b0def6f3f 100644 --- a/frame/3/herk/bli_herk_threading.c +++ b/frame/3/herk/bli_herk_threading.c @@ -109,12 +109,21 @@ void bli_herk_thrinfo_free_paths( herk_thrinfo_t** threads, dim_t num ) herk_thrinfo_t** bli_create_herk_thrinfo_paths( ) { + +#ifdef BLIS_ENABLE_MULTITHREADING dim_t jc_way = bli_read_nway_from_env( "BLIS_JC_NT" ); // dim_t kc_way = bli_read_nway_from_env( "BLIS_KC_NT" ); dim_t kc_way = 1; dim_t ic_way = bli_read_nway_from_env( "BLIS_IC_NT" ); dim_t jr_way = bli_read_nway_from_env( "BLIS_JR_NT" ); dim_t ir_way = bli_read_nway_from_env( "BLIS_IR_NT" ); +#else + dim_t jc_way = 1; + dim_t kc_way = 1; + dim_t ic_way = 1; + dim_t jr_way = 1; + dim_t ir_way = 1; +#endif dim_t global_num_threads = jc_way * kc_way * ic_way * jr_way * ir_way; assert( global_num_threads != 0 ); diff --git a/frame/3/trmm/bli_trmm_threading.c b/frame/3/trmm/bli_trmm_threading.c index 7d6d89cff..7c675bbb7 100644 --- a/frame/3/trmm/bli_trmm_threading.c +++ b/frame/3/trmm/bli_trmm_threading.c @@ -110,12 +110,20 @@ void bli_trmm_thrinfo_free_paths( trmm_thrinfo_t** threads, dim_t num ) trmm_thrinfo_t** bli_create_trmm_thrinfo_paths( ) { +#ifdef BLIS_ENABLE_MULTITHREADING dim_t jc_way = bli_read_nway_from_env( "BLIS_JC_NT" ); // dim_t kc_way = bli_read_nway_from_env( "BLIS_KC_NT" ); dim_t kc_way = 1; dim_t ic_way = bli_read_nway_from_env( "BLIS_IC_NT" ); dim_t jr_way = bli_read_nway_from_env( "BLIS_JR_NT" ); dim_t ir_way = bli_read_nway_from_env( "BLIS_IR_NT" ); +#else + dim_t jc_way = 1; + dim_t kc_way = 1; + dim_t ic_way = 1; + dim_t jr_way = 1; + dim_t ir_way = 1; +#endif dim_t global_num_threads = jc_way * kc_way * ic_way * jr_way * ir_way; assert( global_num_threads != 0 ); diff --git a/frame/base/bli_mem.c b/frame/base/bli_mem.c index 6df0361d4..06688cacf 100644 --- a/frame/base/bli_mem.c +++ b/frame/base/bli_mem.c @@ -127,7 +127,10 @@ void bli_mem_acquire_m( siz_t req_size, // BEGIN CRITICAL SECTION - _Pragma( "omp critical (mem)" ){ +#ifdef BLIS_ENABLE_OPENMP + _Pragma( "omp critical (mem)" ) +#endif + { // Query the index of the contiguous memory block that resides at the // "top" of the pool. @@ -199,8 +202,10 @@ void bli_mem_release( mem_t* mem ) // BEGIN CRITICAL SECTION - _Pragma( "omp critical (mem)" ){ - +#ifdef BLIS_ENABLE_OPENMP + _Pragma( "omp critical (mem)" ) +#endif + { // Increment the top of the memory pool. bli_pool_inc_top_index( pool ); diff --git a/frame/base/bli_threading.c b/frame/base/bli_threading.c index b2c9ae29c..5f16b34fe 100644 --- a/frame/base/bli_threading.c +++ b/frame/base/bli_threading.c @@ -105,6 +105,7 @@ void bli_setup_communicator( thread_comm_t* communicator, dim_t n_threads) void tree_barrier( barrier_t* barack ) { +#ifdef BLIS_ENABLE_OPENMP int my_signal = barack->signal; int my_count; @@ -122,6 +123,9 @@ void tree_barrier( barrier_t* barack ) volatile int* listener = &barack->signal; while( *listener == my_signal ) {} } +#else + return +#endif } void bli_barrier( thread_comm_t* comm, dim_t t_id ) @@ -130,6 +134,7 @@ void bli_barrier( thread_comm_t* comm, dim_t t_id ) } #else + void bli_cleanup_communicator( thread_comm_t* communicator ) { if( communicator == NULL ) return; @@ -145,6 +150,7 @@ void bli_setup_communicator( thread_comm_t* communicator, dim_t n_threads) //barrier routine taken from art of multicore programming or something void bli_barrier( thread_comm_t* communicator, dim_t t_id ) { +#ifdef BLIS_ENABLE_OPENMP if(communicator == NULL || communicator->n_threads == 1) return; bool_t my_sense = communicator->barrier_sense; @@ -161,6 +167,9 @@ void bli_barrier( thread_comm_t* communicator, dim_t t_id ) volatile bool_t* listener = &communicator->barrier_sense; while( *listener == my_sense ) {} } +#else + return; +#endif } #endif @@ -226,6 +235,45 @@ void bli_get_range( void* thr, dim_t all_start, dim_t all_end, dim_t block_facto *end = bli_min( *start + n_pt, size + all_start ); } +void bli_get_range_weighted( void* thr, dim_t all_start, dim_t all_end, dim_t block_factor, bool_t forward, dim_t* out_start, dim_t* out_end) +{ + //bli_get_range( thr, all_start, all_end, block_factor, out_start, out_end ); + //return; + + thrinfo_t* thread = (thrinfo_t*) thr; + dim_t n_way = thread->n_way; + dim_t work_id = thread->work_id; + + dim_t size = all_end - all_start; + dim_t start = all_start; + dim_t end = all_end; + + if( !forward ) { + work_id = n_way - work_id - 1; + } + + dim_t curr_caucus = n_way - 1; + dim_t len = 0; + dim_t num = size*size / n_way; // 2xArea per thread? + while(1){ + dim_t width = sqrt( len*len + num ) - len; // The width of the current caucus + width = (width % block_factor == 0) ? width : width + block_factor - (width % block_factor); + if( curr_caucus == work_id ) { + if( end > width ) + start = bli_max(end - width, start); + break; + } + else{ + end -= width; + len += width; + curr_caucus--; + } + } + + *out_start = start; + *out_end = end; +} +/* void bli_get_range_weighted( void* thr, dim_t all_start, dim_t all_end, dim_t block_factor, bool_t forward, dim_t* start, dim_t* end) { thrinfo_t* thread = (thrinfo_t*) thr; @@ -257,11 +305,12 @@ void bli_get_range_weighted( void* thr, dim_t all_start, dim_t all_end, dim_t bl } else{ dim_t len = *end - *start; - dim_t num = len * len / n_way; + dim_t num = size*size / n_way; while(1){ dim_t width = sqrt(*start * *start + num) - *start; width = (width % block_factor == 0) ? width : width + block_factor - (width % block_factor); - if(!work_id) { + + if( work_id == 0 ) { *end = bli_min( *start + width, *end ); return; } @@ -272,6 +321,7 @@ void bli_get_range_weighted( void* thr, dim_t all_start, dim_t all_end, dim_t bl } } } +*/ void bli_level3_thread_decorator( dim_t n_threads, level3_int_t func, @@ -283,6 +333,7 @@ void bli_level3_thread_decorator( dim_t n_threads, void* cntl, void** thread ) { +#ifdef BLIS_ENABLE_OPENMP _Pragma( "omp parallel num_threads(n_threads)" ) { dim_t omp_id = omp_get_thread_num(); @@ -295,6 +346,15 @@ void bli_level3_thread_decorator( dim_t n_threads, cntl, thread[omp_id] ); } +#else + func( alpha, + a, + b, + beta, + c, + cntl, + thread[0] ); +#endif } dim_t bli_read_nway_from_env( char* env )