From 9dcd6f05c4c3ff2ce7cd87a9951a96ebef22681e Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Tue, 24 May 2016 13:15:32 -0500 Subject: [PATCH] Implemented developer-configurable malloc()/free(). Details: - Replaced all instances of bli_malloc() and bli_free() with one of: - bli_malloc_pool()/bli_free_pool() - bli_malloc_user()/bli_free_user() - bli_malloc_intl()/bli_free_intl() each of which can be configured to call malloc()/free() substitutes, so long as the substitute functions have the same function type signatures as malloc() and free() defined by C's stdlib.h. The _pool() function is called when allocating blocks for the memory pools (used for packing buffers, primarily), the _user() function is called when obj_t's are created (via bli_obj_create() and friends), and the _intl() function is called for internal use by BLIS, such as when creating control tree nodes or temporary buffers for manipulating internal data structures. Substitutes for any of the three types of bli_malloc() may be specified by #defining the following pairs of cpp macros in bli_kernel.h: - BLIS_MALLOC_POOL/BLIS_FREE_POOL - BLIS_MALLOC_USER/BLIS_FREE_USER - BLIS_MALLOC_INTL/BLIS_FREE_INTL to be the name of the substitute functions. (Obviously, the object code that contains these functions must be provided at link-time.) These macros default to malloc() and free(). Subsitute functions are also automatically prototyped by BLIS (in bli_malloc_prototypes.h). - Removed definitions for bli_malloc() and bli_free(). - Note that bli_malloc_pool() and bli_malloc_user() are now defined in terms of a new function, bli_malloc_align(), which aligns memory to an arbitrary (power of two) alignment boundary, but does so manually, whereas before alignment was performed behind the scenes by posix_memalign(). Currently, bli_malloc_intl() is defined in terms of bli_malloc_noalign(), which serves as a simple wrapper to the designated function that is passed in (e.g. BLIS_MALLOC_INTL). Similarly, there are bli_free_align() and bli_free_noalign(), which are used in concert with their bli_malloc_*() counterparts. --- frame/1/packv/bli_packv_cntl.c | 2 +- frame/1/scalv/bli_scalv_cntl.c | 2 +- frame/1/unpackv/bli_unpackv_cntl.c | 2 +- frame/1m/packm/bli_packm_cntl.c | 2 +- frame/1m/packm/bli_packm_threading.c | 2 +- frame/1m/scalm/bli_scalm_cntl.c | 2 +- frame/1m/unpackm/bli_unpackm_cntl.c | 2 +- frame/2/gemv/bli_gemv_cntl.c | 2 +- frame/2/ger/bli_ger_cntl.c | 2 +- frame/2/hemv/bli_hemv_cntl.c | 2 +- frame/2/her/bli_her_cntl.c | 2 +- frame/2/her2/bli_her2_cntl.c | 2 +- frame/2/trmv/bli_trmv_cntl.c | 2 +- frame/2/trsv/bli_trsv_cntl.c | 2 +- frame/3/gemm/bli_gemm_cntl.c | 2 +- frame/3/gemm/bli_gemm_threading.c | 8 +- frame/3/herk/bli_herk_threading.c | 8 +- frame/3/trmm/bli_trmm_threading.c | 8 +- frame/3/trsm/bli_trsm_cntl.c | 2 +- frame/3/trsm/bli_trsm_threading.c | 8 +- frame/base/bli_blksz.c | 4 +- frame/base/bli_check.c | 28 ++++ frame/base/bli_check.h | 2 + frame/base/bli_cntx.c | 16 +-- frame/base/bli_error.c | 4 + frame/base/bli_func.c | 4 +- frame/base/bli_gks.c | 28 ++-- frame/base/bli_malloc.c | 180 ++++++++++++++++++++++---- frame/base/bli_malloc.h | 27 +++- frame/base/bli_mbool.c | 4 +- frame/base/bli_mem.c | 10 +- frame/base/bli_obj.c | 15 ++- frame/base/bli_obj.h | 1 + frame/base/bli_pool.c | 12 +- frame/base/bli_threading.c | 6 +- frame/base/bli_threading_omp.c | 10 +- frame/base/bli_threading_pthreads.c | 12 +- frame/cntl/bli_cntl.c | 2 +- frame/include/bli_kernel_macro_defs.h | 56 ++++++-- frame/include/bli_malloc_prototypes.h | 50 +++++++ frame/include/bli_param_macro_defs.h | 2 +- frame/include/bli_type_defs.h | 2 + frame/include/blis.h | 2 + 43 files changed, 419 insertions(+), 122 deletions(-) create mode 100644 frame/include/bli_malloc_prototypes.h diff --git a/frame/1/packv/bli_packv_cntl.c b/frame/1/packv/bli_packv_cntl.c index 0d2a5ccab..ac068ce71 100644 --- a/frame/1/packv/bli_packv_cntl.c +++ b/frame/1/packv/bli_packv_cntl.c @@ -56,7 +56,7 @@ packv_t* bli_packv_cntl_obj_create( impl_t impl_type, { packv_t* cntl; - cntl = ( packv_t* ) bli_malloc( sizeof(packv_t) ); + cntl = ( packv_t* ) bli_malloc_intl( sizeof(packv_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/1/scalv/bli_scalv_cntl.c b/frame/1/scalv/bli_scalv_cntl.c index 8470805a7..9edb6162c 100644 --- a/frame/1/scalv/bli_scalv_cntl.c +++ b/frame/1/scalv/bli_scalv_cntl.c @@ -53,7 +53,7 @@ scalv_t* bli_scalv_cntl_obj_create( impl_t impl_type, { scalv_t* cntl; - cntl = ( scalv_t* ) bli_malloc( sizeof(scalv_t) ); + cntl = ( scalv_t* ) bli_malloc_intl( sizeof(scalv_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/1/unpackv/bli_unpackv_cntl.c b/frame/1/unpackv/bli_unpackv_cntl.c index cfa0be8ff..1e1ab93fb 100644 --- a/frame/1/unpackv/bli_unpackv_cntl.c +++ b/frame/1/unpackv/bli_unpackv_cntl.c @@ -52,7 +52,7 @@ unpackv_t* bli_unpackv_cntl_obj_create( impl_t impl_type, { unpackv_t* cntl; - cntl = ( unpackv_t* ) bli_malloc( sizeof(unpackv_t) ); + cntl = ( unpackv_t* ) bli_malloc_intl( sizeof(unpackv_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/1m/packm/bli_packm_cntl.c b/frame/1m/packm/bli_packm_cntl.c index ae800e99c..f0f674615 100644 --- a/frame/1m/packm/bli_packm_cntl.c +++ b/frame/1m/packm/bli_packm_cntl.c @@ -103,7 +103,7 @@ packm_t* bli_packm_cntl_obj_create( impl_t impl_type, { packm_t* cntl; - cntl = ( packm_t* ) bli_malloc( sizeof(packm_t) ); + cntl = ( packm_t* ) bli_malloc_intl( sizeof(packm_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/1m/packm/bli_packm_threading.c b/frame/1m/packm/bli_packm_threading.c index 7c026a413..c4b6af649 100644 --- a/frame/1m/packm/bli_packm_threading.c +++ b/frame/1m/packm/bli_packm_threading.c @@ -37,7 +37,7 @@ void bli_packm_thrinfo_free( packm_thrinfo_t* thread ) { if( thread != NULL && thread != &BLIS_PACKM_SINGLE_THREADED) - bli_free(thread); + bli_free_intl(thread); } packm_thrinfo_t* bli_create_packm_thread_info( thread_comm_t* ocomm, dim_t ocomm_id, thread_comm_t* icomm, dim_t icomm_id, diff --git a/frame/1m/scalm/bli_scalm_cntl.c b/frame/1m/scalm/bli_scalm_cntl.c index 1f26635ca..4a965b3fa 100644 --- a/frame/1m/scalm/bli_scalm_cntl.c +++ b/frame/1m/scalm/bli_scalm_cntl.c @@ -53,7 +53,7 @@ scalm_t* bli_scalm_cntl_obj_create( impl_t impl_type, { scalm_t* cntl; - cntl = ( scalm_t* ) bli_malloc( sizeof(scalm_t) ); + cntl = ( scalm_t* ) bli_malloc_intl( sizeof(scalm_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/1m/unpackm/bli_unpackm_cntl.c b/frame/1m/unpackm/bli_unpackm_cntl.c index 7d88c1318..0e99bb741 100644 --- a/frame/1m/unpackm/bli_unpackm_cntl.c +++ b/frame/1m/unpackm/bli_unpackm_cntl.c @@ -54,7 +54,7 @@ unpackm_t* bli_unpackm_cntl_obj_create( impl_t impl_type, { unpackm_t* cntl; - cntl = ( unpackm_t* ) bli_malloc( sizeof(unpackm_t) ); + cntl = ( unpackm_t* ) bli_malloc_intl( sizeof(unpackm_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/2/gemv/bli_gemv_cntl.c b/frame/2/gemv/bli_gemv_cntl.c index 05ae42ef4..ecedeaca4 100644 --- a/frame/2/gemv/bli_gemv_cntl.c +++ b/frame/2/gemv/bli_gemv_cntl.c @@ -178,7 +178,7 @@ gemv_t* bli_gemv_cntl_obj_create( impl_t impl_type, { gemv_t* cntl; - cntl = ( gemv_t* ) bli_malloc( sizeof(gemv_t) ); + cntl = ( gemv_t* ) bli_malloc_intl( sizeof(gemv_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/2/ger/bli_ger_cntl.c b/frame/2/ger/bli_ger_cntl.c index f3f20e3bb..16565ef02 100644 --- a/frame/2/ger/bli_ger_cntl.c +++ b/frame/2/ger/bli_ger_cntl.c @@ -170,7 +170,7 @@ ger_t* bli_ger_cntl_obj_create( impl_t impl_type, { ger_t* cntl; - cntl = ( ger_t* ) bli_malloc( sizeof(ger_t) ); + cntl = ( ger_t* ) bli_malloc_intl( sizeof(ger_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/2/hemv/bli_hemv_cntl.c b/frame/2/hemv/bli_hemv_cntl.c index e245ab689..8505f615c 100644 --- a/frame/2/hemv/bli_hemv_cntl.c +++ b/frame/2/hemv/bli_hemv_cntl.c @@ -131,7 +131,7 @@ hemv_t* bli_hemv_cntl_obj_create( impl_t impl_type, { hemv_t* cntl; - cntl = ( hemv_t* ) bli_malloc( sizeof(hemv_t) ); + cntl = ( hemv_t* ) bli_malloc_intl( sizeof(hemv_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/2/her/bli_her_cntl.c b/frame/2/her/bli_her_cntl.c index 6d5d35a2b..932306c21 100644 --- a/frame/2/her/bli_her_cntl.c +++ b/frame/2/her/bli_her_cntl.c @@ -115,7 +115,7 @@ her_t* bli_her_cntl_obj_create( impl_t impl_type, { her_t* cntl; - cntl = ( her_t* ) bli_malloc( sizeof(her_t) ); + cntl = ( her_t* ) bli_malloc_intl( sizeof(her_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/2/her2/bli_her2_cntl.c b/frame/2/her2/bli_her2_cntl.c index 51b909b49..4a0f5d0f8 100644 --- a/frame/2/her2/bli_her2_cntl.c +++ b/frame/2/her2/bli_her2_cntl.c @@ -121,7 +121,7 @@ her2_t* bli_her2_cntl_obj_create( impl_t impl_type, { her2_t* cntl; - cntl = ( her2_t* ) bli_malloc( sizeof(her2_t) ); + cntl = ( her2_t* ) bli_malloc_intl( sizeof(her2_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/2/trmv/bli_trmv_cntl.c b/frame/2/trmv/bli_trmv_cntl.c index 59c417291..5fbf872aa 100644 --- a/frame/2/trmv/bli_trmv_cntl.c +++ b/frame/2/trmv/bli_trmv_cntl.c @@ -117,7 +117,7 @@ trmv_t* bli_trmv_cntl_obj_create( impl_t impl_type, { trmv_t* cntl; - cntl = ( trmv_t* ) bli_malloc( sizeof(trmv_t) ); + cntl = ( trmv_t* ) bli_malloc_intl( sizeof(trmv_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/2/trsv/bli_trsv_cntl.c b/frame/2/trsv/bli_trsv_cntl.c index a90df2c2e..71de48d3c 100644 --- a/frame/2/trsv/bli_trsv_cntl.c +++ b/frame/2/trsv/bli_trsv_cntl.c @@ -121,7 +121,7 @@ trsv_t* bli_trsv_cntl_obj_create( impl_t impl_type, { trsv_t* cntl; - cntl = ( trsv_t* ) bli_malloc( sizeof(trsv_t) ); + cntl = ( trsv_t* ) bli_malloc_intl( sizeof(trsv_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/3/gemm/bli_gemm_cntl.c b/frame/3/gemm/bli_gemm_cntl.c index fd00be419..382b82bbd 100644 --- a/frame/3/gemm/bli_gemm_cntl.c +++ b/frame/3/gemm/bli_gemm_cntl.c @@ -156,7 +156,7 @@ gemm_t* bli_gemm_cntl_obj_create( impl_t impl_type, { gemm_t* cntl; - cntl = ( gemm_t* ) bli_malloc( sizeof(gemm_t) ); + cntl = ( gemm_t* ) bli_malloc_intl( sizeof(gemm_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/3/gemm/bli_gemm_threading.c b/frame/3/gemm/bli_gemm_threading.c index 58b244e77..910c267c7 100644 --- a/frame/3/gemm/bli_gemm_threading.c +++ b/frame/3/gemm/bli_gemm_threading.c @@ -74,7 +74,7 @@ gemm_thrinfo_t* bli_create_gemm_thrinfo_node( thread_comm_t* ocomm, dim_t ocomm_ packm_thrinfo_t* ipackm, gemm_thrinfo_t* sub_gemm ) { - gemm_thrinfo_t* thread = ( gemm_thrinfo_t* ) bli_malloc( sizeof( gemm_thrinfo_t ) ); + gemm_thrinfo_t* thread = ( gemm_thrinfo_t* ) bli_malloc_intl( sizeof( gemm_thrinfo_t ) ); bli_setup_gemm_thrinfo_node( thread, ocomm, ocomm_id, icomm, icomm_id, n_way, work_id, @@ -98,7 +98,7 @@ void bli_gemm_thrinfo_free( gemm_thrinfo_t* thread) bli_packm_thrinfo_free( thread->opackm ); bli_packm_thrinfo_free( thread->ipackm ); bli_gemm_thrinfo_free( thread->sub_gemm ); - bli_free( thread ); + bli_free_intl( thread ); return; } @@ -106,7 +106,7 @@ void bli_gemm_thrinfo_free_paths( gemm_thrinfo_t** threads, dim_t num ) { for( int i = 0; i < num; i++) bli_gemm_thrinfo_free( threads[i] ); - bli_free( threads ); + bli_free_intl( threads ); } gemm_thrinfo_t** bli_create_gemm_thrinfo_paths( ) @@ -138,7 +138,7 @@ gemm_thrinfo_t** bli_create_gemm_thrinfo_paths( ) dim_t ir_nt = 1; - gemm_thrinfo_t** paths = (gemm_thrinfo_t**) bli_malloc( global_num_threads * sizeof( gemm_thrinfo_t* ) ); + gemm_thrinfo_t** paths = (gemm_thrinfo_t**) bli_malloc_intl( global_num_threads * sizeof( gemm_thrinfo_t* ) ); thread_comm_t* global_comm = bli_create_communicator( global_num_threads ); for( int a = 0; a < jc_way; a++ ) diff --git a/frame/3/herk/bli_herk_threading.c b/frame/3/herk/bli_herk_threading.c index 089ce278d..cecebcfcc 100644 --- a/frame/3/herk/bli_herk_threading.c +++ b/frame/3/herk/bli_herk_threading.c @@ -74,7 +74,7 @@ herk_thrinfo_t* bli_create_herk_thrinfo_node( thread_comm_t* ocomm, dim_t ocomm_ packm_thrinfo_t* ipackm, herk_thrinfo_t* sub_herk ) { - herk_thrinfo_t* thread = ( herk_thrinfo_t* ) bli_malloc( sizeof( herk_thrinfo_t ) ); + herk_thrinfo_t* thread = ( herk_thrinfo_t* ) bli_malloc_intl( sizeof( herk_thrinfo_t ) ); bli_setup_herk_thrinfo_node( thread, ocomm, ocomm_id, icomm, icomm_id, n_way, work_id, @@ -98,7 +98,7 @@ void bli_herk_thrinfo_free( herk_thrinfo_t* thread) bli_packm_thrinfo_free( thread->opackm ); bli_packm_thrinfo_free( thread->ipackm ); bli_herk_thrinfo_free( thread->sub_herk ); - bli_free( thread ); + bli_free_intl( thread ); return; } @@ -106,7 +106,7 @@ void bli_herk_thrinfo_free_paths( herk_thrinfo_t** threads, dim_t num ) { for( int i = 0; i < num; i++) bli_herk_thrinfo_free( threads[i] ); - bli_free( threads ); + bli_free_intl( threads ); } herk_thrinfo_t** bli_create_herk_thrinfo_paths( ) @@ -137,7 +137,7 @@ herk_thrinfo_t** bli_create_herk_thrinfo_paths( ) dim_t ir_nt = 1; - herk_thrinfo_t** paths = (herk_thrinfo_t**) bli_malloc( global_num_threads * sizeof( herk_thrinfo_t* ) ); + herk_thrinfo_t** paths = (herk_thrinfo_t**) bli_malloc_intl( global_num_threads * sizeof( herk_thrinfo_t* ) ); thread_comm_t* global_comm = bli_create_communicator( global_num_threads ); for( int a = 0; a < jc_way; a++ ) diff --git a/frame/3/trmm/bli_trmm_threading.c b/frame/3/trmm/bli_trmm_threading.c index 173910d55..583389e39 100644 --- a/frame/3/trmm/bli_trmm_threading.c +++ b/frame/3/trmm/bli_trmm_threading.c @@ -74,7 +74,7 @@ trmm_thrinfo_t* bli_create_trmm_thrinfo_node( thread_comm_t* ocomm, dim_t ocomm_ packm_thrinfo_t* ipackm, trmm_thrinfo_t* sub_trmm ) { - trmm_thrinfo_t* thread = ( trmm_thrinfo_t* ) bli_malloc( sizeof( trmm_thrinfo_t ) ); + trmm_thrinfo_t* thread = ( trmm_thrinfo_t* ) bli_malloc_intl( sizeof( trmm_thrinfo_t ) ); bli_setup_trmm_thrinfo_node( thread, ocomm, ocomm_id, icomm, icomm_id, n_way, work_id, @@ -98,7 +98,7 @@ void bli_trmm_thrinfo_free( trmm_thrinfo_t* thread) bli_packm_thrinfo_free( thread->opackm ); bli_packm_thrinfo_free( thread->ipackm ); bli_trmm_thrinfo_free( thread->sub_trmm ); - bli_free( thread ); + bli_free_intl( thread ); return; } @@ -107,7 +107,7 @@ void bli_trmm_thrinfo_free_paths( trmm_thrinfo_t** threads, dim_t num ) { for( int i = 0; i < num; i++) bli_trmm_thrinfo_free( threads[i] ); - bli_free( threads ); + bli_free_intl( threads ); } trmm_thrinfo_t** bli_create_trmm_thrinfo_paths( bool_t jc_dependency ) @@ -142,7 +142,7 @@ trmm_thrinfo_t** bli_create_trmm_thrinfo_paths( bool_t jc_dependency ) dim_t ir_nt = 1; - trmm_thrinfo_t** paths = (trmm_thrinfo_t**) bli_malloc( global_num_threads * sizeof( trmm_thrinfo_t* ) ); + trmm_thrinfo_t** paths = (trmm_thrinfo_t**) bli_malloc_intl( global_num_threads * sizeof( trmm_thrinfo_t* ) ); thread_comm_t* global_comm = bli_create_communicator( global_num_threads ); for( int a = 0; a < jc_way; a++ ) diff --git a/frame/3/trsm/bli_trsm_cntl.c b/frame/3/trsm/bli_trsm_cntl.c index ea2602eb6..3a83faafd 100644 --- a/frame/3/trsm/bli_trsm_cntl.c +++ b/frame/3/trsm/bli_trsm_cntl.c @@ -250,7 +250,7 @@ trsm_t* bli_trsm_cntl_obj_create( impl_t impl_type, { trsm_t* cntl; - cntl = ( trsm_t* ) bli_malloc( sizeof(trsm_t) ); + cntl = ( trsm_t* ) bli_malloc_intl( sizeof(trsm_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/3/trsm/bli_trsm_threading.c b/frame/3/trsm/bli_trsm_threading.c index 862fd07a7..510778805 100644 --- a/frame/3/trsm/bli_trsm_threading.c +++ b/frame/3/trsm/bli_trsm_threading.c @@ -74,7 +74,7 @@ trsm_thrinfo_t* bli_create_trsm_thrinfo_node( thread_comm_t* ocomm, dim_t ocomm_ packm_thrinfo_t* ipackm, trsm_thrinfo_t* sub_trsm ) { - trsm_thrinfo_t* thread = ( trsm_thrinfo_t* ) bli_malloc( sizeof( trsm_thrinfo_t ) ); + trsm_thrinfo_t* thread = ( trsm_thrinfo_t* ) bli_malloc_intl( sizeof( trsm_thrinfo_t ) ); bli_setup_trsm_thrinfo_node( thread, ocomm, ocomm_id, icomm, icomm_id, n_way, work_id, @@ -98,7 +98,7 @@ void bli_trsm_thrinfo_free( trsm_thrinfo_t* thread) bli_packm_thrinfo_free( thread->opackm ); bli_packm_thrinfo_free( thread->ipackm ); bli_trsm_thrinfo_free( thread->sub_trsm ); - bli_free( thread ); + bli_free_intl( thread ); return; } @@ -106,7 +106,7 @@ void bli_trsm_thrinfo_free_paths( trsm_thrinfo_t** threads, dim_t num ) { for( int i = 0; i < num; i++) bli_trsm_thrinfo_free( threads[i] ); - bli_free( threads ); + bli_free_intl( threads ); } trsm_thrinfo_t** bli_create_trsm_thrinfo_paths( bool_t right_sided ) @@ -144,7 +144,7 @@ trsm_thrinfo_t** bli_create_trsm_thrinfo_paths( bool_t right_sided ) dim_t ir_nt = 1; - trsm_thrinfo_t** paths = (trsm_thrinfo_t**) bli_malloc( global_num_threads * sizeof( trsm_thrinfo_t* ) ); + trsm_thrinfo_t** paths = (trsm_thrinfo_t**) bli_malloc_intl( global_num_threads * sizeof( trsm_thrinfo_t* ) ); thread_comm_t* global_comm = bli_create_communicator( global_num_threads ); for( int a = 0; a < jc_way; a++ ) diff --git a/frame/base/bli_blksz.c b/frame/base/bli_blksz.c index 388ee11a5..e7bd0be2a 100644 --- a/frame/base/bli_blksz.c +++ b/frame/base/bli_blksz.c @@ -42,7 +42,7 @@ blksz_t* bli_blksz_obj_create( dim_t b_s, dim_t be_s, { blksz_t* b; - b = ( blksz_t* ) bli_malloc( sizeof(blksz_t) ); + b = ( blksz_t* ) bli_malloc_intl( sizeof(blksz_t) ); bli_blksz_obj_init( b, b_s, be_s, @@ -71,7 +71,7 @@ void bli_blksz_obj_init( blksz_t* b, void bli_blksz_obj_free( blksz_t* b ) { - bli_free( b ); + bli_free_intl( b ); } // ----------------------------------------------------------------------------- diff --git a/frame/base/bli_check.c b/frame/base/bli_check.c index f6b5ae47f..2c63aeb81 100644 --- a/frame/base/bli_check.c +++ b/frame/base/bli_check.c @@ -797,6 +797,34 @@ err_t bli_check_sufficient_stack_buf_size( num_t dt, cntx_t* cntx ) return e_val; } +err_t bli_check_alignment_is_power_of_two( size_t align_size ) +{ + err_t e_val = BLIS_SUCCESS; + + // This function returns an error code if align_size is zero or not + // a power of two. + + if ( align_size == 0 ) + e_val = BLIS_ALIGNMENT_NOT_POWER_OF_TWO; + else if ( ( align_size & ( align_size - 1 ) ) ) + e_val = BLIS_ALIGNMENT_NOT_POWER_OF_TWO; + + return e_val; +} + +err_t bli_check_alignment_is_mult_of_ptr_size( size_t align_size ) +{ + err_t e_val = BLIS_SUCCESS; + + // This function returns an error code if align_size is not a whole + // multiple of the size of a pointer. + + if ( align_size % sizeof( void* ) != 0 ) + e_val = BLIS_ALIGNMENT_NOT_MULT_OF_PTR_SIZE; + + return e_val; +} + // -- Object-related errors ---------------------------------------------------- err_t bli_check_object_alias_of( obj_t* a, obj_t* b ) diff --git a/frame/base/bli_check.h b/frame/base/bli_check.h index e614861a8..76a396b07 100644 --- a/frame/base/bli_check.h +++ b/frame/base/bli_check.h @@ -100,6 +100,8 @@ err_t bli_check_valid_packbuf( packbuf_t buf_type ); err_t bli_check_requested_block_size_for_pool( siz_t req_size, pool_t* pool ); err_t bli_check_if_exhausted_pool( pool_t* pool ); err_t bli_check_sufficient_stack_buf_size( num_t dt, cntx_t* cntx ); +err_t bli_check_alignment_is_power_of_two( size_t align_size ); +err_t bli_check_alignment_is_mult_of_ptr_size( size_t align_size ); err_t bli_check_object_alias_of( obj_t* a, obj_t* b ); diff --git a/frame/base/bli_cntx.c b/frame/base/bli_cntx.c index a2fedb755..d06167a07 100644 --- a/frame/base/bli_cntx.c +++ b/frame/base/bli_cntx.c @@ -392,10 +392,10 @@ void bli_cntx_set_blkszs( ind_t method, dim_t n_bs, ... ) // Allocate some temporary local arrays. - bszids = bli_malloc( n_bs * sizeof( bszid_t ) ); - blkszs = bli_malloc( n_bs * sizeof( blksz_t* ) ); - bmults = bli_malloc( n_bs * sizeof( bszid_t ) ); - scalrs = bli_malloc( n_bs * sizeof( dim_t ) ); + bszids = bli_malloc_intl( n_bs * sizeof( bszid_t ) ); + blkszs = bli_malloc_intl( n_bs * sizeof( blksz_t* ) ); + bmults = bli_malloc_intl( n_bs * sizeof( bszid_t ) ); + scalrs = bli_malloc_intl( n_bs * sizeof( dim_t ) ); // -- Begin variable argument section -- @@ -541,10 +541,10 @@ void bli_cntx_set_blkszs( ind_t method, dim_t n_bs, ... ) } // Free the temporary local arrays. - bli_free( blkszs ); - bli_free( bszids ); - bli_free( bmults ); - bli_free( scalrs ); + bli_free_intl( blkszs ); + bli_free_intl( bszids ); + bli_free_intl( bmults ); + bli_free_intl( scalrs ); } #endif diff --git a/frame/base/bli_error.c b/frame/base/bli_error.c index e58d9d02d..2284d84c6 100644 --- a/frame/base/bli_error.c +++ b/frame/base/bli_error.c @@ -170,6 +170,10 @@ void bli_error_init_msgs( void ) "Attempted to allocate more memory from contiguous pool than is available." ); sprintf( bli_error_string_for_code(BLIS_INSUFFICIENT_STACK_BUF_SIZE), "Configured maximum stack buffer size is insufficient for register blocksizes currently in use." ); + sprintf( bli_error_string_for_code(BLIS_ALIGNMENT_NOT_POWER_OF_TWO), + "Encountered memory alignment value that is either zero or not a power of two." ); + sprintf( bli_error_string_for_code(BLIS_ALIGNMENT_NOT_MULT_OF_PTR_SIZE), + "Encountered memory alignment value that is not a multiple of sizeof(void*)." ); sprintf( bli_error_string_for_code(BLIS_EXPECTED_OBJECT_ALIAS), "Expected object to be alias." ); diff --git a/frame/base/bli_func.c b/frame/base/bli_func.c index 8ee41d4a4..75be26085 100644 --- a/frame/base/bli_func.c +++ b/frame/base/bli_func.c @@ -42,7 +42,7 @@ func_t* bli_func_obj_create( void* ptr_s, { func_t* f; - f = ( func_t* ) bli_malloc( sizeof(func_t) ); + f = ( func_t* ) bli_malloc_intl( sizeof(func_t) ); bli_func_obj_init( f, ptr_s, @@ -67,7 +67,7 @@ void bli_func_obj_init( func_t* f, void bli_func_obj_free( func_t* f ) { - bli_free( f ); + bli_free_intl( f ); } // ----------------------------------------------------------------------------- diff --git a/frame/base/bli_gks.c b/frame/base/bli_gks.c index 74e9dde11..6ae0f461e 100644 --- a/frame/base/bli_gks.c +++ b/frame/base/bli_gks.c @@ -133,9 +133,9 @@ void bli_gks_cntx_set_blkszs( ind_t method, dim_t n_bs, ... ) double scalr; // Allocate some temporary local arrays. - bszids = bli_malloc( n_bs * sizeof( bszid_t ) ); - bmults = bli_malloc( n_bs * sizeof( bszid_t ) ); - scalrs = bli_malloc( n_bs * sizeof( double ) ); + bszids = bli_malloc_intl( n_bs * sizeof( bszid_t ) ); + bmults = bli_malloc_intl( n_bs * sizeof( bszid_t ) ); + scalrs = bli_malloc_intl( n_bs * sizeof( double ) ); // -- Begin variable argument section -- @@ -274,9 +274,9 @@ void bli_gks_cntx_set_blkszs( ind_t method, dim_t n_bs, ... ) } // Free the temporary local arrays. - bli_free( bszids ); - bli_free( bmults ); - bli_free( scalrs ); + bli_free_intl( bszids ); + bli_free_intl( bmults ); + bli_free_intl( scalrs ); } @@ -415,7 +415,7 @@ void bli_gks_cntx_set_l3_nat_ukrs( dim_t n_uk, ... ) cntx_t* cntx; // Allocate some temporary local arrays. - l3_ukrs = bli_malloc( n_uk * sizeof( l3ukr_t ) ); + l3_ukrs = bli_malloc_intl( n_uk * sizeof( l3ukr_t ) ); // -- Begin variable argument section -- @@ -452,7 +452,7 @@ void bli_gks_cntx_set_l3_nat_ukrs( dim_t n_uk, ... ) } // Free the temporary local array. - bli_free( l3_ukrs ); + bli_free_intl( l3_ukrs ); } void bli_gks_cntx_set_l3_vir_ukr( ind_t method, @@ -485,7 +485,7 @@ void bli_gks_cntx_set_l3_vir_ukrs( ind_t method, dim_t n_uk, ... ) cntx_t* cntx; // Allocate some temporary local arrays. - l3_ukrs = bli_malloc( n_uk * sizeof( l3ukr_t ) ); + l3_ukrs = bli_malloc_intl( n_uk * sizeof( l3ukr_t ) ); // -- Begin variable argument section -- @@ -522,7 +522,7 @@ void bli_gks_cntx_set_l3_vir_ukrs( ind_t method, dim_t n_uk, ... ) } // Free the temporary local array. - bli_free( l3_ukrs ); + bli_free_intl( l3_ukrs ); } @@ -703,7 +703,7 @@ void bli_gks_cntx_set_l1f_kers( dim_t n_kr, ... ) cntx_t* cntx; // Allocate some temporary local arrays. - l1f_kers = bli_malloc( n_kr * sizeof( l1fkr_t ) ); + l1f_kers = bli_malloc_intl( n_kr * sizeof( l1fkr_t ) ); // -- Begin variable argument section -- @@ -740,7 +740,7 @@ void bli_gks_cntx_set_l1f_kers( dim_t n_kr, ... ) } // Free the temporary local array. - bli_free( l1f_kers ); + bli_free_intl( l1f_kers ); } @@ -879,7 +879,7 @@ void bli_gks_cntx_set_l1v_kers( dim_t n_kr, ... ) cntx_t* cntx; // Allocate some temporary local arrays. - l1v_kers = bli_malloc( n_kr * sizeof( l1vkr_t ) ); + l1v_kers = bli_malloc_intl( n_kr * sizeof( l1vkr_t ) ); // -- Begin variable argument section -- @@ -916,7 +916,7 @@ void bli_gks_cntx_set_l1v_kers( dim_t n_kr, ... ) } // Free the temporary local array. - bli_free( l1v_kers ); + bli_free_intl( l1v_kers ); } diff --git a/frame/base/bli_malloc.c b/frame/base/bli_malloc.c index 8afa3fb91..191db4834 100644 --- a/frame/base/bli_malloc.c +++ b/frame/base/bli_malloc.c @@ -34,37 +34,171 @@ #include "blis.h" -void* bli_malloc( siz_t size ) -{ - void* p = NULL; - int r_val; +// ----------------------------------------------------------------------------- +void* bli_malloc_pool( size_t size ) +{ + const malloc_ft malloc_fp = BLIS_MALLOC_POOL; + const size_t align_size = BLIS_POOL_ADDR_ALIGN_SIZE; + + return bli_malloc_align( malloc_fp, size, align_size ); +} + +void bli_free_pool( void* p ) +{ + bli_free_align( BLIS_FREE_POOL, p ); +} + +// ----------------------------------------------------------------------------- + +void* bli_malloc_user( size_t size ) +{ + const malloc_ft malloc_fp = BLIS_MALLOC_USER; + const size_t align_size = BLIS_HEAP_ADDR_ALIGN_SIZE; + + return bli_malloc_align( malloc_fp, size, align_size ); +} + +void bli_free_user( void* p ) +{ + bli_free_align( BLIS_FREE_USER, p ); +} + +// ----------------------------------------------------------------------------- + +void* bli_malloc_intl( size_t size ) +{ + const malloc_ft malloc_fp = BLIS_MALLOC_INTL; + + return bli_malloc_noalign( malloc_fp, size ); +} + +void bli_free_intl( void* p ) +{ + bli_free_noalign( BLIS_FREE_INTL, p ); +} + +// ----------------------------------------------------------------------------- + +void* bli_malloc_align + ( + malloc_ft f, + size_t size, + size_t align_size + ) +{ + const size_t ptr_size = sizeof( void* ); + size_t align_offset = 0; + void* p_orig; + int8_t* p_byte; + void** p_addr; + + // Check parameters. + if ( bli_error_checking_is_enabled() ) + bli_malloc_align_check( f, size, align_size ); + + // Return early if zero bytes were requested. if ( size == 0 ) return NULL; -#if BLIS_HEAP_ADDR_ALIGN_SIZE == 1 - p = malloc( ( size_t )size ); -#elif defined(_WIN32) - p = _aligned_malloc( ( size_t )size, - ( size_t )BLIS_HEAP_ADDR_ALIGN_SIZE ); -#else - r_val = posix_memalign( &p, - ( size_t )BLIS_HEAP_ADDR_ALIGN_SIZE, - ( size_t )size ); + // Add the alignment size and the size of a pointer to the number + // of bytes to allocate. + size += align_size + ptr_size; - if ( r_val != 0 ) bli_abort(); -#endif + // Call the allocation function. + p_orig = f( size ); - if ( p == NULL ) bli_abort(); + // If NULL was returned, something is probably very wrong. + if ( p_orig == NULL ) bli_abort(); - return p; + // Advance the pointer by one pointer element. + p_byte = p_orig; + p_byte += ptr_size; + + // Compute the offset to the desired alignment. + if ( bli_is_unaligned_to( p_byte, align_size ) ) + { + align_offset = align_size - + bli_offset_past_alignment( p_byte, align_size ); + } + + // Advance the pointer using the difference between the alignment + // size and the alignment offset. + p_byte += align_offset; + + // Compute the address of the pointer element just before the start + // of the aligned address, and store the original address there. + p_addr = ( void** )(p_byte - ptr_size); + *p_addr = p_orig; + + // Return the aligned pointer. + return p_byte; } -void bli_free( void* p ) +void bli_free_align + ( + free_ft f, + void* p + ) { -#if BLIS_HEAP_ADDR_ALIGN_SIZE == 1 || !defined(_WIN32) - free( p ); -#else - _aligned_free( p ); -#endif + const size_t ptr_size = sizeof( void* ); + void* p_orig; + int8_t* p_byte; + void** p_addr; + + // Since the bli_malloc_pool() function returned the aligned pointer, + // we have to first recover the original pointer before we can free + // the memory. + + // Start by casting the pointer to a byte pointer. + p_byte = p; + + // Compute the address of the pointer element just before the start + // of the aligned address, and recover the original address. + p_addr = ( void** )( p_byte - ptr_size ); + p_orig = *p_addr; + + // Free the original pointer. + f( p_orig ); } +// ----------------------------------------------------------------------------- + +void* bli_malloc_noalign + ( + malloc_ft f, + size_t size + ) +{ + return f( size ); +} + +void bli_free_noalign + ( + free_ft f, + void* p + ) +{ + f( p ); +} + +// ----------------------------------------------------------------------------- + +void bli_malloc_align_check + ( + malloc_ft f, + size_t size, + size_t align_size + ) +{ + err_t e_val; + + // Check for valid alignment. + + e_val = bli_check_alignment_is_power_of_two( align_size ); + bli_check_error_code( e_val ); + + e_val = bli_check_alignment_is_mult_of_ptr_size( align_size ); + bli_check_error_code( e_val ); +} + + diff --git a/frame/base/bli_malloc.h b/frame/base/bli_malloc.h index f5a05c0e4..bd2a8cc9a 100644 --- a/frame/base/bli_malloc.h +++ b/frame/base/bli_malloc.h @@ -32,5 +32,28 @@ */ -void* bli_malloc( siz_t size ); -void bli_free( void* p ); +// Typedef function pointer types for malloc() and free() substitutes. +typedef void* (*malloc_ft) ( size_t size ); +typedef void (*free_ft) ( void* p ); + +// ----------------------------------------------------------------------------- + +void* bli_malloc_pool( size_t size ); +void bli_free_pool( void* p ); + +void* bli_malloc_intl( size_t size ); +void bli_free_intl( void* p ); + +void* bli_malloc_user( size_t size ); +void bli_free_user( void* p ); + +// ----------------------------------------------------------------------------- + +void* bli_malloc_align( malloc_ft f, size_t size, size_t align_size ); +void bli_free_align( free_ft f, void* p ); + +void* bli_malloc_noalign( malloc_ft f, size_t size ); +void bli_free_noalign( free_ft f, void* p ); + +void bli_malloc_align_check( malloc_ft f, size_t size, size_t align_size ); + diff --git a/frame/base/bli_mbool.c b/frame/base/bli_mbool.c index 9bea1cd2e..46ba531bc 100644 --- a/frame/base/bli_mbool.c +++ b/frame/base/bli_mbool.c @@ -42,7 +42,7 @@ mbool_t* bli_mbool_obj_create( bool_t b_s, { mbool_t* b; - b = ( mbool_t* ) bli_malloc( sizeof(mbool_t) ); + b = ( mbool_t* ) bli_malloc_intl( sizeof(mbool_t) ); bli_mbool_obj_init( b, b_s, @@ -67,6 +67,6 @@ void bli_mbool_obj_init( mbool_t* b, void bli_mbool_obj_free( mbool_t* b ) { - bli_free( b ); + bli_free_intl( b ); } diff --git a/frame/base/bli_mem.c b/frame/base/bli_mem.c index a1991304e..25530b1ed 100644 --- a/frame/base/bli_mem.c +++ b/frame/base/bli_mem.c @@ -61,8 +61,8 @@ void bli_mem_acquire_m( siz_t req_size, if ( buf_type == BLIS_BUFFER_FOR_GEN_USE ) { // For general-use buffer requests, such as those used by level-2 - // operations, using bli_malloc() is sufficient. - void* buf_sys = bli_malloc( req_size ); + // operations, dynamically allocating memory is sufficient. + void* buf_sys = bli_malloc_pool( req_size ); // Initialize the mem_t object with: // - the address of the memory block, @@ -163,9 +163,9 @@ void bli_mem_release( mem_t* mem ) { void* buf_sys = bli_mem_buf_sys( mem ); - // For general-use buffers, we allocate with bli_malloc(), and so - // here we need to call bli_free(). - bli_free( buf_sys ); + // For general-use buffers, we dynamically allocate memory, and so + // here we need to free. + bli_free_pool( buf_sys ); } else { diff --git a/frame/base/bli_obj.c b/frame/base/bli_obj.c index d8d152003..226b0747a 100644 --- a/frame/base/bli_obj.c +++ b/frame/base/bli_obj.c @@ -166,7 +166,7 @@ void bli_obj_alloc_buffer( inc_t rs, buffer_size = ( siz_t )n_elem * elem_size; // Allocate the buffer. - p = bli_malloc( buffer_size ); + p = bli_malloc_user( buffer_size ); // Set individual fields. bli_obj_set_buffer( p, *obj ); @@ -221,7 +221,7 @@ void bli_obj_free( obj_t* obj ) // is a detached scalar (ie: if the buffer pointer refers to the // address of the internal scalar buffer). if ( bli_obj_buffer( *obj ) != bli_obj_internal_scalar_buffer( *obj ) ) - bli_free( bli_obj_buffer( *obj ) ); + bli_free_user( bli_obj_buffer( *obj ) ); } } @@ -436,6 +436,17 @@ dim_t bli_align_dim_to_size( dim_t dim, siz_t elem_size, siz_t align_size ) return dim; } +dim_t bli_align_ptr_to_size( void* p, size_t align_size ) +{ + dim_t dim; + + dim = ( ( ( uintptr_t )p + align_size - 1 ) / + align_size + ) * align_size; + + return dim; +} + static num_t type_union[BLIS_NUM_FP_TYPES][BLIS_NUM_FP_TYPES] = { // s c d z diff --git a/frame/base/bli_obj.h b/frame/base/bli_obj.h index 92e9b1d87..fffb183aa 100644 --- a/frame/base/bli_obj.h +++ b/frame/base/bli_obj.h @@ -89,6 +89,7 @@ siz_t bli_datatype_size( num_t dt ); dim_t bli_align_dim_to_mult( dim_t dim, dim_t dim_mult ); dim_t bli_align_dim_to_size( dim_t dim, siz_t elem_size, siz_t align_size ); +dim_t bli_align_ptr_to_size( void* p, size_t align_size ); num_t bli_datatype_union( num_t dt1, num_t dt2 ); diff --git a/frame/base/bli_pool.c b/frame/base/bli_pool.c index 23090ca39..a9660a058 100644 --- a/frame/base/bli_pool.c +++ b/frame/base/bli_pool.c @@ -43,7 +43,7 @@ void bli_pool_init( dim_t num_blocks, dim_t i; // Allocate the block_ptrs array. - block_ptrs = bli_malloc( num_blocks * sizeof( pblk_t ) ); + block_ptrs = bli_malloc_intl( num_blocks * sizeof( pblk_t ) ); // Allocate and initialize each entry in the block_ptrs array. for ( i = 0; i < num_blocks; ++i ) @@ -88,7 +88,7 @@ void bli_pool_finalize( pool_t* pool ) } // Free the block_ptrs array. - bli_free( block_ptrs ); + bli_free_intl( block_ptrs ); // Clear the contents of the pool_t struct. bli_pool_set_block_ptrs( NULL, pool ); @@ -235,7 +235,7 @@ void bli_pool_grow( dim_t num_blocks_add, pool_t* pool ) block_ptrs_cur = bli_pool_block_ptrs( pool ); // Allocate a new block_ptrs array of length num_blocks_new. - block_ptrs_new = bli_malloc( num_blocks_new * sizeof( pblk_t ) ); + block_ptrs_new = bli_malloc_intl( num_blocks_new * sizeof( pblk_t ) ); // Query the top_index of the pool. top_index = bli_pool_top_index( pool ); @@ -251,7 +251,7 @@ void bli_pool_grow( dim_t num_blocks_add, pool_t* pool ) //printf( "bli_pool_grow: bp_cur: %p\n", block_ptrs_cur ); // Free the old block_ptrs array. - bli_free( block_ptrs_cur ); + bli_free_intl( block_ptrs_cur ); // Update the pool_t struct with the new block_ptrs array and // record its allocated length. @@ -343,7 +343,7 @@ void bli_pool_alloc_block( siz_t block_size, // Allocate the block. We add the alignment size to ensure we will // have enough usable space after alignment. - buf_sys = bli_malloc( block_size + align_size ); + buf_sys = bli_malloc_pool( block_size + align_size ); buf_align = buf_sys; // Advance the pointer to achieve the necessary alignment, if it is not @@ -378,7 +378,7 @@ void bli_pool_free_block( pblk_t* block ) buf_sys = bli_pblk_buf_sys( block ); // Free the block. - bli_free( buf_sys ); + bli_free_pool( buf_sys ); } void bli_pool_print( pool_t* pool ) diff --git a/frame/base/bli_threading.c b/frame/base/bli_threading.c index c0d88cd31..316f3d5cd 100644 --- a/frame/base/bli_threading.c +++ b/frame/base/bli_threading.c @@ -96,7 +96,7 @@ void bli_level3_thread_decorator //Constructors and destructors for constructors thread_comm_t* bli_create_communicator( dim_t n_threads ) { - thread_comm_t* comm = (thread_comm_t*) bli_malloc( sizeof(thread_comm_t) ); + thread_comm_t* comm = (thread_comm_t*) bli_malloc_intl( sizeof(thread_comm_t) ); bli_setup_communicator( comm, n_threads ); return comm; } @@ -114,7 +114,7 @@ void bli_free_communicator( thread_comm_t* communicator ) { if( communicator == NULL ) return; bli_cleanup_communicator( communicator ); - bli_free( communicator ); + bli_free_intl( communicator ); } void bli_cleanup_communicator( thread_comm_t* communicator ) @@ -129,7 +129,7 @@ thrinfo_t* bli_create_thread_info( thread_comm_t* ocomm, dim_t ocomm_id, thread_ dim_t n_way, dim_t work_id ) { - thrinfo_t* thr = (thrinfo_t*) bli_malloc( sizeof(thrinfo_t) ); + thrinfo_t* thr = (thrinfo_t*) bli_malloc_intl( sizeof(thrinfo_t) ); bli_setup_thread_info( thr, ocomm, ocomm_id, icomm, icomm_id, n_way, work_id ); return thr; } diff --git a/frame/base/bli_threading_omp.c b/frame/base/bli_threading_omp.c index 8cd714da1..01a43c840 100644 --- a/frame/base/bli_threading_omp.c +++ b/frame/base/bli_threading_omp.c @@ -39,7 +39,7 @@ //Constructors and destructors for constructors thread_comm_t* bli_create_communicator( dim_t n_threads ) { - thread_comm_t* comm = (thread_comm_t*) bli_malloc( sizeof(thread_comm_t) ); + thread_comm_t* comm = (thread_comm_t*) bli_malloc_intl( sizeof(thread_comm_t) ); bli_setup_communicator( comm, n_threads ); return comm; } @@ -48,7 +48,7 @@ void bli_free_communicator( thread_comm_t* communicator ) { if( communicator == NULL ) return; bli_cleanup_communicator( communicator ); - bli_free( communicator ); + bli_free_intl( communicator ); } void bli_level3_thread_decorator @@ -129,7 +129,7 @@ void bli_free_barrier_tree( barrier_t* barrier ) if( barrier->count == 0 ) { bli_free_barrier_tree( barrier->dad ); - bli_free( barrier ); + bli_free_intl( barrier ); } return; } @@ -178,7 +178,7 @@ void bli_cleanup_communicator( thread_comm_t* communicator ) { bli_free_barrier_tree( communicator->barriers[i] ); } - bli_free( communicator->barriers ); + bli_free_intl( communicator->barriers ); } @@ -187,7 +187,7 @@ void bli_setup_communicator( thread_comm_t* communicator, dim_t n_threads) if( communicator == NULL ) return; communicator->sent_object = NULL; communicator->n_threads = n_threads; - communicator->barriers = ( barrier_t** ) bli_malloc( sizeof( barrier_t* ) * n_threads ); + communicator->barriers = ( barrier_t** ) bli_malloc_intl( sizeof( barrier_t* ) * n_threads ); bli_create_tree_barrier( n_threads, BLIS_TREE_BARRIER_ARITY, communicator->barriers, 0 ); } diff --git a/frame/base/bli_threading_pthreads.c b/frame/base/bli_threading_pthreads.c index e3f3f6387..f0de26198 100644 --- a/frame/base/bli_threading_pthreads.c +++ b/frame/base/bli_threading_pthreads.c @@ -125,8 +125,8 @@ void bli_level3_thread_decorator void** thread ) { - pthread_t* pthreads = (pthread_t*) bli_malloc(sizeof(pthread_t) * n_threads); - thread_data_t* datas = (thread_data_t*) bli_malloc(sizeof(thread_data_t) * n_threads); + pthread_t* pthreads = (pthread_t*) bli_malloc_intl(sizeof(pthread_t) * n_threads); + thread_data_t* datas = (thread_data_t*) bli_malloc_intl(sizeof(thread_data_t) * n_threads); for( int i = 1; i < n_threads; i++ ) { @@ -151,8 +151,8 @@ void bli_level3_thread_decorator pthread_join( pthreads[i], NULL ); } - bli_free( pthreads ); - bli_free( datas ); + bli_free_intl( pthreads ); + bli_free_intl( datas ); } //barrier routine taken from art of multicore programming @@ -164,7 +164,7 @@ void bli_barrier( thread_comm_t* communicator, dim_t t_id ) //Constructors and destructors for constructors thread_comm_t* bli_create_communicator( dim_t n_threads ) { - thread_comm_t* comm = (thread_comm_t*) bli_malloc( sizeof(thread_comm_t) ); + thread_comm_t* comm = (thread_comm_t*) bli_malloc_intl( sizeof(thread_comm_t) ); bli_setup_communicator( comm, n_threads ); return comm; } @@ -181,7 +181,7 @@ void bli_free_communicator( thread_comm_t* communicator ) { if( communicator == NULL ) return; bli_cleanup_communicator( communicator ); - bli_free( communicator ); + bli_free_intl( communicator ); } void bli_cleanup_communicator( thread_comm_t* communicator ) diff --git a/frame/cntl/bli_cntl.c b/frame/cntl/bli_cntl.c index a31fc2dd5..ffd6120c8 100644 --- a/frame/cntl/bli_cntl.c +++ b/frame/cntl/bli_cntl.c @@ -36,5 +36,5 @@ void bli_cntl_obj_free( void* cntl ) { - bli_free( cntl ); + bli_free_intl( cntl ); } diff --git a/frame/include/bli_kernel_macro_defs.h b/frame/include/bli_kernel_macro_defs.h index b1f869a9c..00a2aa4b9 100644 --- a/frame/include/bli_kernel_macro_defs.h +++ b/frame/include/bli_kernel_macro_defs.h @@ -38,8 +38,49 @@ // -- MEMORY ALLOCATION -------------------------------------------------------- -// Size of a virtual memory page. This is used to align certain memory -// buffers which are allocated and used internally. +// Memory allocation functions. These macros define the three types of +// malloc()-style functions, and their free() counterparts: one for each +// type of memory to be allocated. +// NOTE: ANY ALTERNATIVE TO malloc()/free() USED FOR ANY OF THE FOLLOWING +// THREE PAIRS OF MACROS MUST USE THE SAME FUNCTION PROTOTYPE AS malloc() +// and free(): +// +// void* malloc( size_t size ); +// void free( void* p ); +// + +// This allocation function is called to allocate memory for blocks within +// BLIS's internal memory pools. +#ifndef BLIS_MALLOC_POOL +#define BLIS_MALLOC_POOL malloc +#endif + +#ifndef BLIS_FREE_POOL +#define BLIS_FREE_POOL free +#endif + +// This allocation function is called to allocate memory for internally- +// used objects and structures, such as control tree nodes. +#ifndef BLIS_MALLOC_INTL +#define BLIS_MALLOC_INTL malloc +#endif + +#ifndef BLIS_FREE_INTL +#define BLIS_FREE_INTL free +#endif + +// This allocation function is called to allocate memory for objects +// created by user-level API functions, such as bli_obj_create(). +#ifndef BLIS_MALLOC_USER +#define BLIS_MALLOC_USER malloc +#endif + +#ifndef BLIS_FREE_USER +#define BLIS_FREE_USER free +#endif + +// Size of a virtual memory page. This is used to align blocks within the +// memory pools. #ifndef BLIS_PAGE_SIZE #define BLIS_PAGE_SIZE 4096 #endif @@ -76,17 +117,16 @@ // functions. #define BLIS_STACK_BUF_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE -// Alignment size used when allocating memory dynamically from the operating -// system (eg: posix_memalign()). To disable heap alignment and just use -// malloc() instead, set this to 1. +// Alignment size used when allocating memory via BLIS_MALLOC_USER. +// To disable heap alignment, set this to 1. #define BLIS_HEAP_ADDR_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE -// Alignment size used when sizing leading dimensions of dynamically -// allocated memory. +// Alignment size used when sizing leading dimensions of memory allocated +// via BLIS_MALLOC_USER. #define BLIS_HEAP_STRIDE_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE // Alignment size used when allocating blocks to the internal memory -// pool (for packing buffers). +// pool, via BLIS_MALLOC_POOL. #define BLIS_POOL_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE diff --git a/frame/include/bli_malloc_prototypes.h b/frame/include/bli_malloc_prototypes.h new file mode 100644 index 000000000..e828f99aa --- /dev/null +++ b/frame/include/bli_malloc_prototypes.h @@ -0,0 +1,50 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_MALLOC_PROTOTYPES_H +#define BLIS_MALLOC_PROTOTYPES_H + +// Generate prototypes for each of the malloc() and free() functions +// defined in BLIS + +void* BLIS_MALLOC_POOL( size_t size ); +void BLIS_FREE_POOL( void* p ); + +void* BLIS_MALLOC_INTL( size_t size ); +void BLIS_FREE_INTL( void* p ); + +void* BLIS_MALLOC_USER( size_t size ); +void BLIS_FREE_USER( void* p ); + +#endif diff --git a/frame/include/bli_param_macro_defs.h b/frame/include/bli_param_macro_defs.h index 54cba702e..8869cea17 100644 --- a/frame/include/bli_param_macro_defs.h +++ b/frame/include/bli_param_macro_defs.h @@ -48,7 +48,7 @@ \ ( ( uintptr_t )(p) % ( uintptr_t )(size) != 0 ) -#define bli_offset_from_alignment( p, size ) \ +#define bli_offset_past_alignment( p, size ) \ \ ( ( uintptr_t )(p) % ( uintptr_t )(size) ) diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index ffdcba56b..7274ce5a6 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -1000,6 +1000,8 @@ typedef enum BLIS_REQUESTED_CONTIG_BLOCK_TOO_BIG = (-121), BLIS_EXHAUSTED_CONTIG_MEMORY_POOL = (-122), BLIS_INSUFFICIENT_STACK_BUF_SIZE = (-123), + BLIS_ALIGNMENT_NOT_POWER_OF_TWO = (-124), + BLIS_ALIGNMENT_NOT_MULT_OF_PTR_SIZE = (-125), // Object-related errors BLIS_EXPECTED_OBJECT_ALIAS = (-130), diff --git a/frame/include/blis.h b/frame/include/blis.h index e20fc5f73..06463dbed 100644 --- a/frame/include/blis.h +++ b/frame/include/blis.h @@ -90,6 +90,8 @@ extern "C" { #include "bli_kernel_prototypes.h" +#include "bli_malloc_prototypes.h" + // -- Base operation prototypes --