diff --git a/frame/1m/packm/bli_packm_threading.c b/frame/1m/packm/bli_packm_threading.c index 0fa6b0bf2..098475c5e 100644 --- a/frame/1m/packm/bli_packm_threading.c +++ b/frame/1m/packm/bli_packm_threading.c @@ -34,13 +34,20 @@ #include "blis.h" +void bli_packm_thrinfo_free( packm_thrinfo_t* thread ) +{ + //Assume that the ocomm and the icomm are freed by something else and don't need to be freed. + bli_free(thread); +} + packm_thrinfo_t* bli_create_packm_thread_info( thread_comm_t* ocomm, dim_t ocomm_id, thread_comm_t* icomm, dim_t icomm_id, dim_t n_way, dim_t work_id ) { return (packm_thrinfo_t*) bli_create_thread_info( ocomm, ocomm_id, icomm, icomm_id, n_way, work_id ); } -void bli_setup_packm_thread_info( packm_thrinfo_t* thread, thread_comm_t* ocomm, dim_t ocomm_id, thread_comm_t* icomm, dim_t icomm_id, +void bli_setup_packm_thread_info( packm_thrinfo_t* thread, thread_comm_t* ocomm, dim_t ocomm_id, + thread_comm_t* icomm, dim_t icomm_id, dim_t n_way, dim_t work_id ) { bli_setup_thread_info( (thrinfo_t*) thread, ocomm, ocomm_id, icomm, icomm_id, n_way, work_id ); diff --git a/frame/1m/packm/bli_packm_threading.h b/frame/1m/packm/bli_packm_threading.h index 0d6fce2e4..7b4dc0f22 100644 --- a/frame/1m/packm/bli_packm_threading.h +++ b/frame/1m/packm/bli_packm_threading.h @@ -46,6 +46,7 @@ typedef struct packm_thrinfo_s packm_thrinfo_t; #define packm_thread_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way ) +void bli_packm_thrinfo_free( packm_thrinfo_t* thread ); packm_thrinfo_t* bli_create_packm_thread_info( thread_comm_t* ocomm, dim_t ocomm_id, thread_comm_t* icomm, dim_t icomm_id, dim_t n_way, dim_t work_id ); void bli_setup_packm_thread_info( packm_thrinfo_t* thread, thread_comm_t* ocomm, dim_t ocomm_id, thread_comm_t* icomm, dim_t icomm_id, diff --git a/frame/3/gemm/bli_gemm_front.c b/frame/3/gemm/bli_gemm_front.c index 2211625a5..01b8eaab7 100644 --- a/frame/3/gemm/bli_gemm_front.c +++ b/frame/3/gemm/bli_gemm_front.c @@ -88,6 +88,6 @@ void bli_gemm_front( obj_t* alpha, (void*) cntl, (void**) infos ); - bli_gemm_thrinfo_free_paths( infos ); + bli_gemm_thrinfo_free_paths( infos, n_threads ); } diff --git a/frame/3/gemm/bli_gemm_threading.c b/frame/3/gemm/bli_gemm_threading.c index 047b083cf..2b2277b33 100644 --- a/frame/3/gemm/bli_gemm_threading.c +++ b/frame/3/gemm/bli_gemm_threading.c @@ -84,8 +84,29 @@ gemm_thrinfo_t* bli_create_gemm_thrinfo_node( thread_comm_t* ocomm, dim_t ocomm_ return thread; } -void bli_gemm_thrinfo_free_paths( gemm_thrinfo_t** threads ) +void bli_gemm_thrinfo_free( gemm_thrinfo_t* thread) { + if( thread == NULL ) return; + + // Free Communicators + if( thread_am_ochief( thread ) ) + bli_free_communicator( thread->ocomm ); +// if( thread_am_ichief( thread ) ) +// bli_cleanup_communicator( thread->icomm ); + + // Free Sub Thrinfos + bli_packm_thrinfo_free( opackm ); + bli_packm_thrinfo_free( ipackm ); + bli_gemm_thrinfo_free( sub_gemm ); + bli_free( thread ); + + return; +} +void bli_gemm_thrinfo_free_paths( gemm_thrinfo_t** threads, dim_t num ) +{ + for( int i = 0; i < num; i++) + bli_gemm_thrinfo_free( threads[i] ); + bli_free( threads ); } gemm_thrinfo_t** bli_create_gemm_thrinfo_paths( ) diff --git a/frame/3/hemm/bli_hemm_front.c b/frame/3/hemm/bli_hemm_front.c index c3a708211..7848e1117 100644 --- a/frame/3/hemm/bli_hemm_front.c +++ b/frame/3/hemm/bli_hemm_front.c @@ -94,6 +94,6 @@ void bli_hemm_front( side_t side, (void*) cntl, (void**) infos ); - bli_gemm_thrinfo_free_paths( infos ); + bli_gemm_thrinfo_free_paths( infos, n_threads ); } diff --git a/frame/3/her2k/bli_her2k_front.c b/frame/3/her2k/bli_her2k_front.c index b8329cf5b..01afc70dc 100644 --- a/frame/3/her2k/bli_her2k_front.c +++ b/frame/3/her2k/bli_her2k_front.c @@ -135,7 +135,7 @@ void bli_her2k_front( obj_t* alpha, (void*) cntl, (void**) infos ); - bli_herk_thrinfo_free_paths( infos ); + bli_herk_thrinfo_free_paths( infos, n_threads ); #endif } diff --git a/frame/3/herk/bli_herk_blk_var1f.c b/frame/3/herk/bli_herk_blk_var1f.c index 7d4fa4375..6ef80bad2 100644 --- a/frame/3/herk/bli_herk_blk_var1f.c +++ b/frame/3/herk/bli_herk_blk_var1f.c @@ -82,7 +82,7 @@ void bli_herk_blk_var1f( obj_t* a, // Query dimension in partitioning direction. m_trans = bli_obj_length_after_trans( *c ); dim_t start, end; - bli_get_range( thread, 0, m_trans, 8, &start, &end ); + bli_get_range_weighted( thread, 0, m_trans, 8, bli_obj_is_upper( *c ), &start, &end ); // Partition along the m dimension. for ( i = start; i < end; i += b_alg ) diff --git a/frame/3/herk/bli_herk_blk_var2f.c b/frame/3/herk/bli_herk_blk_var2f.c index 5fcb56001..95215e906 100644 --- a/frame/3/herk/bli_herk_blk_var2f.c +++ b/frame/3/herk/bli_herk_blk_var2f.c @@ -91,7 +91,7 @@ void bli_herk_blk_var2f( obj_t* a, // Needs to be replaced with a weighted range because triangle //bli_get_range( thread, 0, n_trans, 8, &start, &end ); - bli_get_range_weighted( thread, 0, n_trans, 8, 1, &start, &end ); + bli_get_range_weighted( thread, 0, n_trans, 8, bli_obj_is_lower( *c ), &start, &end ); // Partition along the n dimension. for ( i = start; i < end; i += b_alg ) diff --git a/frame/3/herk/bli_herk_front.c b/frame/3/herk/bli_herk_front.c index 6139478ea..6fb092460 100644 --- a/frame/3/herk/bli_herk_front.c +++ b/frame/3/herk/bli_herk_front.c @@ -91,6 +91,6 @@ void bli_herk_front( obj_t* alpha, (void*) cntl, (void**) infos ); - bli_herk_thrinfo_free_paths( infos ); + bli_herk_thrinfo_free_paths( infos, n_threads ); } diff --git a/frame/3/herk/bli_herk_threading.c b/frame/3/herk/bli_herk_threading.c index 2b291a924..091b74ff6 100644 --- a/frame/3/herk/bli_herk_threading.c +++ b/frame/3/herk/bli_herk_threading.c @@ -84,8 +84,27 @@ herk_thrinfo_t* bli_create_herk_thrinfo_node( thread_comm_t* ocomm, dim_t ocomm_ return thread; } -void bli_herk_thrinfo_free_paths( herk_thrinfo_t** threads ) +void bli_herk_thrinfo_free( herk_thrinfo_t* thread) { + if( thread == NULL ) return; + + // Free Communicators + if( thread_am_ochief( thread ) ) + bli_free_communicator( thread->ocomm ); + + // Free Sub Thrinfos + bli_packm_thrinfo_free( opackm ); + bli_packm_thrinfo_free( ipackm ); + bli_herk_thrinfo_free( sub_herk ); + bli_free( thread ); + + return; +} +void bli_herk_thrinfo_free_paths( herk_thrinfo_t** threads, dim_t num ) +{ + for( int i = 0; i < num; i++) + bli_herk_thrinfo_free( threads[i] ); + bli_free( threads ); } herk_thrinfo_t** bli_create_herk_thrinfo_paths( ) diff --git a/frame/3/symm/bli_symm_front.c b/frame/3/symm/bli_symm_front.c index ed0c44664..796ad5196 100644 --- a/frame/3/symm/bli_symm_front.c +++ b/frame/3/symm/bli_symm_front.c @@ -93,6 +93,6 @@ void bli_symm_front( side_t side, (void*) cntl, (void**) infos ); - bli_gemm_thrinfo_free_paths( infos ); + bli_gemm_thrinfo_free_paths( infos, n_threads ); } diff --git a/frame/3/syr2k/bli_syr2k_front.c b/frame/3/syr2k/bli_syr2k_front.c index f1ce3e279..eceaf1913 100644 --- a/frame/3/syr2k/bli_syr2k_front.c +++ b/frame/3/syr2k/bli_syr2k_front.c @@ -117,7 +117,7 @@ void bli_syr2k_front( obj_t* alpha, (void*) cntl, (void**) infos ); - bli_herk_thrinfo_free_paths( infos ); + bli_herk_thrinfo_free_paths( infos, n_threads ); #endif } diff --git a/frame/3/syrk/bli_syrk_front.c b/frame/3/syrk/bli_syrk_front.c index c5ac22797..977a91cd8 100644 --- a/frame/3/syrk/bli_syrk_front.c +++ b/frame/3/syrk/bli_syrk_front.c @@ -87,6 +87,6 @@ void bli_syrk_front( obj_t* alpha, (void*) cntl, (void**) infos ); - bli_herk_thrinfo_free_paths( infos ); + bli_herk_thrinfo_free_paths( infos, n_threads ); } diff --git a/frame/3/trmm/bli_trmm_front.c b/frame/3/trmm/bli_trmm_front.c index 644f27d4b..d8caba7dc 100644 --- a/frame/3/trmm/bli_trmm_front.c +++ b/frame/3/trmm/bli_trmm_front.c @@ -139,6 +139,6 @@ void bli_trmm_front( side_t side, (void*) cntl, (void**) infos ); - bli_trmm_thrinfo_free_paths( infos ); + bli_trmm_thrinfo_free_paths( infos, n_threads ); } diff --git a/frame/3/trmm/bli_trmm_threading.c b/frame/3/trmm/bli_trmm_threading.c index 3a6a7c0b4..144f4a64b 100644 --- a/frame/3/trmm/bli_trmm_threading.c +++ b/frame/3/trmm/bli_trmm_threading.c @@ -84,8 +84,28 @@ trmm_thrinfo_t* bli_create_trmm_thrinfo_node( thread_comm_t* ocomm, dim_t ocomm_ return thread; } -void bli_trmm_thrinfo_free_paths( trmm_thrinfo_t** threads ) +void bli_trmm_thrinfo_free( trmm_thrinfo_t* thread) { + if( thread == NULL ) return; + + // Free Communicators + if( thread_am_ochief( thread ) ) + bli_free_communicator( thread->ocomm ); + + // Free Sub Thrinfos + bli_packm_thrinfo_free( opackm ); + bli_packm_thrinfo_free( ipackm ); + bli_trmm_thrinfo_free( sub_trmm ); + bli_free( thread ); + + return; +} + +void bli_trmm_thrinfo_free_paths( trmm_thrinfo_t** threads, dim_t num ) +{ + for( int i = 0; i < num; i++) + bli_trmm_thrinfo_free( threads[i] ); + bli_free( threads ); } trmm_thrinfo_t** bli_create_trmm_thrinfo_paths( ) diff --git a/frame/3/trmm3/bli_trmm3_front.c b/frame/3/trmm3/bli_trmm3_front.c index 080b9a399..6f8757faa 100644 --- a/frame/3/trmm3/bli_trmm3_front.c +++ b/frame/3/trmm3/bli_trmm3_front.c @@ -141,6 +141,6 @@ void bli_trmm3_front( side_t side, (void*) cntl, (void**) infos ); - bli_trmm_thrinfo_free_paths( infos ); + bli_trmm_thrinfo_free_paths( infos, n_threads ); } diff --git a/frame/3/trsm/bli_trsm_front.c b/frame/3/trsm/bli_trsm_front.c index e7cae7d51..2c42c24f9 100644 --- a/frame/3/trsm/bli_trsm_front.c +++ b/frame/3/trsm/bli_trsm_front.c @@ -139,6 +139,6 @@ void bli_trsm_front( side_t side, (void*) cntl, (void**) infos ); - bli_trsm_thrinfo_free_paths( infos ); + bli_trsm_thrinfo_free_paths( infos, n_threads ); } diff --git a/frame/3/trsm/bli_trsm_threading.c b/frame/3/trsm/bli_trsm_threading.c index 08c915b15..139e090de 100644 --- a/frame/3/trsm/bli_trsm_threading.c +++ b/frame/3/trsm/bli_trsm_threading.c @@ -84,8 +84,27 @@ trsm_thrinfo_t* bli_create_trsm_thrinfo_node( thread_comm_t* ocomm, dim_t ocomm_ return thread; } -void bli_trsm_thrinfo_free_paths( trsm_thrinfo_t** threads ) +void bli_trsm_thrinfo_free( trsm_thrinfo_t* thread) { + if( thread == NULL ) return; + + // Free Communicators + if( thread_am_ochief( thread ) ) + bli_free_communicator( thread->ocomm ); + + // Free Sub Thrinfos + bli_packm_thrinfo_free( opackm ); + bli_packm_thrinfo_free( ipackm ); + bli_trsm_thrinfo_free( sub_trsm ); + bli_free( thread ); + + return; +} +void bli_trsm_thrinfo_free_paths( trsm_thrinfo_t** threads, dim_t num ) +{ + for( int i = 0; i < num; i++) + bli_trsm_thrinfo_free( threads[i] ); + bli_free( threads ); } trsm_thrinfo_t** bli_create_trsm_thrinfo_paths( ) diff --git a/frame/base/bli_threading.c b/frame/base/bli_threading.c index 0b9ec30bd..df903af4f 100644 --- a/frame/base/bli_threading.c +++ b/frame/base/bli_threading.c @@ -39,6 +39,12 @@ void bli_cleanup_communicator( thread_comm_t* communicator ) if( communicator == NULL ) return; bli_destroy_lock( &communicator->barrier_lock ); } +void bli_free_communicator( thread_comm_t* communicator ) +{ + if( communicator == NULL ) return; + bli_cleanup_communicator( communicator ); + bli_free( communicator ); +} void bli_setup_communicator( thread_comm_t* communicator, dim_t n_threads) { diff --git a/frame/base/bli_threading.h b/frame/base/bli_threading.h index f09da42c3..ca790192f 100644 --- a/frame/base/bli_threading.h +++ b/frame/base/bli_threading.h @@ -49,7 +49,9 @@ struct thread_comm_s typedef struct thread_comm_s thread_comm_t; void bli_setup_communicator( thread_comm_t* communicator, dim_t n_threads ); +void bli_cleanup_communicator( thread_comm_t* communicator ); thread_comm_t* bli_create_communicator( dim_t n_threads ); +void bli_free_communicator( thread_comm_t* communicator ); void* bli_broadcast_structure( thread_comm_t* communicator, dim_t inside_id, void* to_send );