diff --git a/frame/1/packv/bli_packv_cntl.c b/frame/1/packv/bli_packv_cntl.c index 0d2a5ccab..ac068ce71 100644 --- a/frame/1/packv/bli_packv_cntl.c +++ b/frame/1/packv/bli_packv_cntl.c @@ -56,7 +56,7 @@ packv_t* bli_packv_cntl_obj_create( impl_t impl_type, { packv_t* cntl; - cntl = ( packv_t* ) bli_malloc( sizeof(packv_t) ); + cntl = ( packv_t* ) bli_malloc_intl( sizeof(packv_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/1/scalv/bli_scalv_cntl.c b/frame/1/scalv/bli_scalv_cntl.c index 8470805a7..9edb6162c 100644 --- a/frame/1/scalv/bli_scalv_cntl.c +++ b/frame/1/scalv/bli_scalv_cntl.c @@ -53,7 +53,7 @@ scalv_t* bli_scalv_cntl_obj_create( impl_t impl_type, { scalv_t* cntl; - cntl = ( scalv_t* ) bli_malloc( sizeof(scalv_t) ); + cntl = ( scalv_t* ) bli_malloc_intl( sizeof(scalv_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/1/unpackv/bli_unpackv_cntl.c b/frame/1/unpackv/bli_unpackv_cntl.c index cfa0be8ff..1e1ab93fb 100644 --- a/frame/1/unpackv/bli_unpackv_cntl.c +++ b/frame/1/unpackv/bli_unpackv_cntl.c @@ -52,7 +52,7 @@ unpackv_t* bli_unpackv_cntl_obj_create( impl_t impl_type, { unpackv_t* cntl; - cntl = ( unpackv_t* ) bli_malloc( sizeof(unpackv_t) ); + cntl = ( unpackv_t* ) bli_malloc_intl( sizeof(unpackv_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/1m/packm/bli_packm_cntl.c b/frame/1m/packm/bli_packm_cntl.c index ae800e99c..f0f674615 100644 --- a/frame/1m/packm/bli_packm_cntl.c +++ b/frame/1m/packm/bli_packm_cntl.c @@ -103,7 +103,7 @@ packm_t* bli_packm_cntl_obj_create( impl_t impl_type, { packm_t* cntl; - cntl = ( packm_t* ) bli_malloc( sizeof(packm_t) ); + cntl = ( packm_t* ) bli_malloc_intl( sizeof(packm_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/1m/packm/bli_packm_threading.c b/frame/1m/packm/bli_packm_threading.c index 7c026a413..c4b6af649 100644 --- a/frame/1m/packm/bli_packm_threading.c +++ b/frame/1m/packm/bli_packm_threading.c @@ -37,7 +37,7 @@ void bli_packm_thrinfo_free( packm_thrinfo_t* thread ) { if( thread != NULL && thread != &BLIS_PACKM_SINGLE_THREADED) - bli_free(thread); + bli_free_intl(thread); } packm_thrinfo_t* bli_create_packm_thread_info( thread_comm_t* ocomm, dim_t ocomm_id, thread_comm_t* icomm, dim_t icomm_id, diff --git a/frame/1m/scalm/bli_scalm_cntl.c b/frame/1m/scalm/bli_scalm_cntl.c index 1f26635ca..4a965b3fa 100644 --- a/frame/1m/scalm/bli_scalm_cntl.c +++ b/frame/1m/scalm/bli_scalm_cntl.c @@ -53,7 +53,7 @@ scalm_t* bli_scalm_cntl_obj_create( impl_t impl_type, { scalm_t* cntl; - cntl = ( scalm_t* ) bli_malloc( sizeof(scalm_t) ); + cntl = ( scalm_t* ) bli_malloc_intl( sizeof(scalm_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/1m/unpackm/bli_unpackm_cntl.c b/frame/1m/unpackm/bli_unpackm_cntl.c index 7d88c1318..0e99bb741 100644 --- a/frame/1m/unpackm/bli_unpackm_cntl.c +++ b/frame/1m/unpackm/bli_unpackm_cntl.c @@ -54,7 +54,7 @@ unpackm_t* bli_unpackm_cntl_obj_create( impl_t impl_type, { unpackm_t* cntl; - cntl = ( unpackm_t* ) bli_malloc( sizeof(unpackm_t) ); + cntl = ( unpackm_t* ) bli_malloc_intl( sizeof(unpackm_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/2/gemv/bli_gemv_cntl.c b/frame/2/gemv/bli_gemv_cntl.c index 05ae42ef4..ecedeaca4 100644 --- a/frame/2/gemv/bli_gemv_cntl.c +++ b/frame/2/gemv/bli_gemv_cntl.c @@ -178,7 +178,7 @@ gemv_t* bli_gemv_cntl_obj_create( impl_t impl_type, { gemv_t* cntl; - cntl = ( gemv_t* ) bli_malloc( sizeof(gemv_t) ); + cntl = ( gemv_t* ) bli_malloc_intl( sizeof(gemv_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/2/ger/bli_ger_cntl.c b/frame/2/ger/bli_ger_cntl.c index f3f20e3bb..16565ef02 100644 --- a/frame/2/ger/bli_ger_cntl.c +++ b/frame/2/ger/bli_ger_cntl.c @@ -170,7 +170,7 @@ ger_t* bli_ger_cntl_obj_create( impl_t impl_type, { ger_t* cntl; - cntl = ( ger_t* ) bli_malloc( sizeof(ger_t) ); + cntl = ( ger_t* ) bli_malloc_intl( sizeof(ger_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/2/hemv/bli_hemv_cntl.c b/frame/2/hemv/bli_hemv_cntl.c index e245ab689..8505f615c 100644 --- a/frame/2/hemv/bli_hemv_cntl.c +++ b/frame/2/hemv/bli_hemv_cntl.c @@ -131,7 +131,7 @@ hemv_t* bli_hemv_cntl_obj_create( impl_t impl_type, { hemv_t* cntl; - cntl = ( hemv_t* ) bli_malloc( sizeof(hemv_t) ); + cntl = ( hemv_t* ) bli_malloc_intl( sizeof(hemv_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/2/her/bli_her_cntl.c b/frame/2/her/bli_her_cntl.c index 6d5d35a2b..932306c21 100644 --- a/frame/2/her/bli_her_cntl.c +++ b/frame/2/her/bli_her_cntl.c @@ -115,7 +115,7 @@ her_t* bli_her_cntl_obj_create( impl_t impl_type, { her_t* cntl; - cntl = ( her_t* ) bli_malloc( sizeof(her_t) ); + cntl = ( her_t* ) bli_malloc_intl( sizeof(her_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/2/her2/bli_her2_cntl.c b/frame/2/her2/bli_her2_cntl.c index 51b909b49..4a0f5d0f8 100644 --- a/frame/2/her2/bli_her2_cntl.c +++ b/frame/2/her2/bli_her2_cntl.c @@ -121,7 +121,7 @@ her2_t* bli_her2_cntl_obj_create( impl_t impl_type, { her2_t* cntl; - cntl = ( her2_t* ) bli_malloc( sizeof(her2_t) ); + cntl = ( her2_t* ) bli_malloc_intl( sizeof(her2_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/2/trmv/bli_trmv_cntl.c b/frame/2/trmv/bli_trmv_cntl.c index 59c417291..5fbf872aa 100644 --- a/frame/2/trmv/bli_trmv_cntl.c +++ b/frame/2/trmv/bli_trmv_cntl.c @@ -117,7 +117,7 @@ trmv_t* bli_trmv_cntl_obj_create( impl_t impl_type, { trmv_t* cntl; - cntl = ( trmv_t* ) bli_malloc( sizeof(trmv_t) ); + cntl = ( trmv_t* ) bli_malloc_intl( sizeof(trmv_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/2/trsv/bli_trsv_cntl.c b/frame/2/trsv/bli_trsv_cntl.c index a90df2c2e..71de48d3c 100644 --- a/frame/2/trsv/bli_trsv_cntl.c +++ b/frame/2/trsv/bli_trsv_cntl.c @@ -121,7 +121,7 @@ trsv_t* bli_trsv_cntl_obj_create( impl_t impl_type, { trsv_t* cntl; - cntl = ( trsv_t* ) bli_malloc( sizeof(trsv_t) ); + cntl = ( trsv_t* ) bli_malloc_intl( sizeof(trsv_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/3/gemm/bli_gemm_cntl.c b/frame/3/gemm/bli_gemm_cntl.c index fd00be419..382b82bbd 100644 --- a/frame/3/gemm/bli_gemm_cntl.c +++ b/frame/3/gemm/bli_gemm_cntl.c @@ -156,7 +156,7 @@ gemm_t* bli_gemm_cntl_obj_create( impl_t impl_type, { gemm_t* cntl; - cntl = ( gemm_t* ) bli_malloc( sizeof(gemm_t) ); + cntl = ( gemm_t* ) bli_malloc_intl( sizeof(gemm_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/3/gemm/bli_gemm_threading.c b/frame/3/gemm/bli_gemm_threading.c index 58b244e77..910c267c7 100644 --- a/frame/3/gemm/bli_gemm_threading.c +++ b/frame/3/gemm/bli_gemm_threading.c @@ -74,7 +74,7 @@ gemm_thrinfo_t* bli_create_gemm_thrinfo_node( thread_comm_t* ocomm, dim_t ocomm_ packm_thrinfo_t* ipackm, gemm_thrinfo_t* sub_gemm ) { - gemm_thrinfo_t* thread = ( gemm_thrinfo_t* ) bli_malloc( sizeof( gemm_thrinfo_t ) ); + gemm_thrinfo_t* thread = ( gemm_thrinfo_t* ) bli_malloc_intl( sizeof( gemm_thrinfo_t ) ); bli_setup_gemm_thrinfo_node( thread, ocomm, ocomm_id, icomm, icomm_id, n_way, work_id, @@ -98,7 +98,7 @@ void bli_gemm_thrinfo_free( gemm_thrinfo_t* thread) bli_packm_thrinfo_free( thread->opackm ); bli_packm_thrinfo_free( thread->ipackm ); bli_gemm_thrinfo_free( thread->sub_gemm ); - bli_free( thread ); + bli_free_intl( thread ); return; } @@ -106,7 +106,7 @@ void bli_gemm_thrinfo_free_paths( gemm_thrinfo_t** threads, dim_t num ) { for( int i = 0; i < num; i++) bli_gemm_thrinfo_free( threads[i] ); - bli_free( threads ); + bli_free_intl( threads ); } gemm_thrinfo_t** bli_create_gemm_thrinfo_paths( ) @@ -138,7 +138,7 @@ gemm_thrinfo_t** bli_create_gemm_thrinfo_paths( ) dim_t ir_nt = 1; - gemm_thrinfo_t** paths = (gemm_thrinfo_t**) bli_malloc( global_num_threads * sizeof( gemm_thrinfo_t* ) ); + gemm_thrinfo_t** paths = (gemm_thrinfo_t**) bli_malloc_intl( global_num_threads * sizeof( gemm_thrinfo_t* ) ); thread_comm_t* global_comm = bli_create_communicator( global_num_threads ); for( int a = 0; a < jc_way; a++ ) diff --git a/frame/3/herk/bli_herk_threading.c b/frame/3/herk/bli_herk_threading.c index 089ce278d..cecebcfcc 100644 --- a/frame/3/herk/bli_herk_threading.c +++ b/frame/3/herk/bli_herk_threading.c @@ -74,7 +74,7 @@ herk_thrinfo_t* bli_create_herk_thrinfo_node( thread_comm_t* ocomm, dim_t ocomm_ packm_thrinfo_t* ipackm, herk_thrinfo_t* sub_herk ) { - herk_thrinfo_t* thread = ( herk_thrinfo_t* ) bli_malloc( sizeof( herk_thrinfo_t ) ); + herk_thrinfo_t* thread = ( herk_thrinfo_t* ) bli_malloc_intl( sizeof( herk_thrinfo_t ) ); bli_setup_herk_thrinfo_node( thread, ocomm, ocomm_id, icomm, icomm_id, n_way, work_id, @@ -98,7 +98,7 @@ void bli_herk_thrinfo_free( herk_thrinfo_t* thread) bli_packm_thrinfo_free( thread->opackm ); bli_packm_thrinfo_free( thread->ipackm ); bli_herk_thrinfo_free( thread->sub_herk ); - bli_free( thread ); + bli_free_intl( thread ); return; } @@ -106,7 +106,7 @@ void bli_herk_thrinfo_free_paths( herk_thrinfo_t** threads, dim_t num ) { for( int i = 0; i < num; i++) bli_herk_thrinfo_free( threads[i] ); - bli_free( threads ); + bli_free_intl( threads ); } herk_thrinfo_t** bli_create_herk_thrinfo_paths( ) @@ -137,7 +137,7 @@ herk_thrinfo_t** bli_create_herk_thrinfo_paths( ) dim_t ir_nt = 1; - herk_thrinfo_t** paths = (herk_thrinfo_t**) bli_malloc( global_num_threads * sizeof( herk_thrinfo_t* ) ); + herk_thrinfo_t** paths = (herk_thrinfo_t**) bli_malloc_intl( global_num_threads * sizeof( herk_thrinfo_t* ) ); thread_comm_t* global_comm = bli_create_communicator( global_num_threads ); for( int a = 0; a < jc_way; a++ ) diff --git a/frame/3/trmm/bli_trmm_threading.c b/frame/3/trmm/bli_trmm_threading.c index 173910d55..583389e39 100644 --- a/frame/3/trmm/bli_trmm_threading.c +++ b/frame/3/trmm/bli_trmm_threading.c @@ -74,7 +74,7 @@ trmm_thrinfo_t* bli_create_trmm_thrinfo_node( thread_comm_t* ocomm, dim_t ocomm_ packm_thrinfo_t* ipackm, trmm_thrinfo_t* sub_trmm ) { - trmm_thrinfo_t* thread = ( trmm_thrinfo_t* ) bli_malloc( sizeof( trmm_thrinfo_t ) ); + trmm_thrinfo_t* thread = ( trmm_thrinfo_t* ) bli_malloc_intl( sizeof( trmm_thrinfo_t ) ); bli_setup_trmm_thrinfo_node( thread, ocomm, ocomm_id, icomm, icomm_id, n_way, work_id, @@ -98,7 +98,7 @@ void bli_trmm_thrinfo_free( trmm_thrinfo_t* thread) bli_packm_thrinfo_free( thread->opackm ); bli_packm_thrinfo_free( thread->ipackm ); bli_trmm_thrinfo_free( thread->sub_trmm ); - bli_free( thread ); + bli_free_intl( thread ); return; } @@ -107,7 +107,7 @@ void bli_trmm_thrinfo_free_paths( trmm_thrinfo_t** threads, dim_t num ) { for( int i = 0; i < num; i++) bli_trmm_thrinfo_free( threads[i] ); - bli_free( threads ); + bli_free_intl( threads ); } trmm_thrinfo_t** bli_create_trmm_thrinfo_paths( bool_t jc_dependency ) @@ -142,7 +142,7 @@ trmm_thrinfo_t** bli_create_trmm_thrinfo_paths( bool_t jc_dependency ) dim_t ir_nt = 1; - trmm_thrinfo_t** paths = (trmm_thrinfo_t**) bli_malloc( global_num_threads * sizeof( trmm_thrinfo_t* ) ); + trmm_thrinfo_t** paths = (trmm_thrinfo_t**) bli_malloc_intl( global_num_threads * sizeof( trmm_thrinfo_t* ) ); thread_comm_t* global_comm = bli_create_communicator( global_num_threads ); for( int a = 0; a < jc_way; a++ ) diff --git a/frame/3/trsm/bli_trsm_cntl.c b/frame/3/trsm/bli_trsm_cntl.c index ea2602eb6..3a83faafd 100644 --- a/frame/3/trsm/bli_trsm_cntl.c +++ b/frame/3/trsm/bli_trsm_cntl.c @@ -250,7 +250,7 @@ trsm_t* bli_trsm_cntl_obj_create( impl_t impl_type, { trsm_t* cntl; - cntl = ( trsm_t* ) bli_malloc( sizeof(trsm_t) ); + cntl = ( trsm_t* ) bli_malloc_intl( sizeof(trsm_t) ); cntl->impl_type = impl_type; cntl->var_num = var_num; diff --git a/frame/3/trsm/bli_trsm_threading.c b/frame/3/trsm/bli_trsm_threading.c index 862fd07a7..510778805 100644 --- a/frame/3/trsm/bli_trsm_threading.c +++ b/frame/3/trsm/bli_trsm_threading.c @@ -74,7 +74,7 @@ trsm_thrinfo_t* bli_create_trsm_thrinfo_node( thread_comm_t* ocomm, dim_t ocomm_ packm_thrinfo_t* ipackm, trsm_thrinfo_t* sub_trsm ) { - trsm_thrinfo_t* thread = ( trsm_thrinfo_t* ) bli_malloc( sizeof( trsm_thrinfo_t ) ); + trsm_thrinfo_t* thread = ( trsm_thrinfo_t* ) bli_malloc_intl( sizeof( trsm_thrinfo_t ) ); bli_setup_trsm_thrinfo_node( thread, ocomm, ocomm_id, icomm, icomm_id, n_way, work_id, @@ -98,7 +98,7 @@ void bli_trsm_thrinfo_free( trsm_thrinfo_t* thread) bli_packm_thrinfo_free( thread->opackm ); bli_packm_thrinfo_free( thread->ipackm ); bli_trsm_thrinfo_free( thread->sub_trsm ); - bli_free( thread ); + bli_free_intl( thread ); return; } @@ -106,7 +106,7 @@ void bli_trsm_thrinfo_free_paths( trsm_thrinfo_t** threads, dim_t num ) { for( int i = 0; i < num; i++) bli_trsm_thrinfo_free( threads[i] ); - bli_free( threads ); + bli_free_intl( threads ); } trsm_thrinfo_t** bli_create_trsm_thrinfo_paths( bool_t right_sided ) @@ -144,7 +144,7 @@ trsm_thrinfo_t** bli_create_trsm_thrinfo_paths( bool_t right_sided ) dim_t ir_nt = 1; - trsm_thrinfo_t** paths = (trsm_thrinfo_t**) bli_malloc( global_num_threads * sizeof( trsm_thrinfo_t* ) ); + trsm_thrinfo_t** paths = (trsm_thrinfo_t**) bli_malloc_intl( global_num_threads * sizeof( trsm_thrinfo_t* ) ); thread_comm_t* global_comm = bli_create_communicator( global_num_threads ); for( int a = 0; a < jc_way; a++ ) diff --git a/frame/base/bli_blksz.c b/frame/base/bli_blksz.c index 388ee11a5..e7bd0be2a 100644 --- a/frame/base/bli_blksz.c +++ b/frame/base/bli_blksz.c @@ -42,7 +42,7 @@ blksz_t* bli_blksz_obj_create( dim_t b_s, dim_t be_s, { blksz_t* b; - b = ( blksz_t* ) bli_malloc( sizeof(blksz_t) ); + b = ( blksz_t* ) bli_malloc_intl( sizeof(blksz_t) ); bli_blksz_obj_init( b, b_s, be_s, @@ -71,7 +71,7 @@ void bli_blksz_obj_init( blksz_t* b, void bli_blksz_obj_free( blksz_t* b ) { - bli_free( b ); + bli_free_intl( b ); } // ----------------------------------------------------------------------------- diff --git a/frame/base/bli_check.c b/frame/base/bli_check.c index f6b5ae47f..2c63aeb81 100644 --- a/frame/base/bli_check.c +++ b/frame/base/bli_check.c @@ -797,6 +797,34 @@ err_t bli_check_sufficient_stack_buf_size( num_t dt, cntx_t* cntx ) return e_val; } +err_t bli_check_alignment_is_power_of_two( size_t align_size ) +{ + err_t e_val = BLIS_SUCCESS; + + // This function returns an error code if align_size is zero or not + // a power of two. + + if ( align_size == 0 ) + e_val = BLIS_ALIGNMENT_NOT_POWER_OF_TWO; + else if ( ( align_size & ( align_size - 1 ) ) ) + e_val = BLIS_ALIGNMENT_NOT_POWER_OF_TWO; + + return e_val; +} + +err_t bli_check_alignment_is_mult_of_ptr_size( size_t align_size ) +{ + err_t e_val = BLIS_SUCCESS; + + // This function returns an error code if align_size is not a whole + // multiple of the size of a pointer. + + if ( align_size % sizeof( void* ) != 0 ) + e_val = BLIS_ALIGNMENT_NOT_MULT_OF_PTR_SIZE; + + return e_val; +} + // -- Object-related errors ---------------------------------------------------- err_t bli_check_object_alias_of( obj_t* a, obj_t* b ) diff --git a/frame/base/bli_check.h b/frame/base/bli_check.h index e614861a8..76a396b07 100644 --- a/frame/base/bli_check.h +++ b/frame/base/bli_check.h @@ -100,6 +100,8 @@ err_t bli_check_valid_packbuf( packbuf_t buf_type ); err_t bli_check_requested_block_size_for_pool( siz_t req_size, pool_t* pool ); err_t bli_check_if_exhausted_pool( pool_t* pool ); err_t bli_check_sufficient_stack_buf_size( num_t dt, cntx_t* cntx ); +err_t bli_check_alignment_is_power_of_two( size_t align_size ); +err_t bli_check_alignment_is_mult_of_ptr_size( size_t align_size ); err_t bli_check_object_alias_of( obj_t* a, obj_t* b ); diff --git a/frame/base/bli_cntx.c b/frame/base/bli_cntx.c index a2fedb755..d06167a07 100644 --- a/frame/base/bli_cntx.c +++ b/frame/base/bli_cntx.c @@ -392,10 +392,10 @@ void bli_cntx_set_blkszs( ind_t method, dim_t n_bs, ... ) // Allocate some temporary local arrays. - bszids = bli_malloc( n_bs * sizeof( bszid_t ) ); - blkszs = bli_malloc( n_bs * sizeof( blksz_t* ) ); - bmults = bli_malloc( n_bs * sizeof( bszid_t ) ); - scalrs = bli_malloc( n_bs * sizeof( dim_t ) ); + bszids = bli_malloc_intl( n_bs * sizeof( bszid_t ) ); + blkszs = bli_malloc_intl( n_bs * sizeof( blksz_t* ) ); + bmults = bli_malloc_intl( n_bs * sizeof( bszid_t ) ); + scalrs = bli_malloc_intl( n_bs * sizeof( dim_t ) ); // -- Begin variable argument section -- @@ -541,10 +541,10 @@ void bli_cntx_set_blkszs( ind_t method, dim_t n_bs, ... ) } // Free the temporary local arrays. - bli_free( blkszs ); - bli_free( bszids ); - bli_free( bmults ); - bli_free( scalrs ); + bli_free_intl( blkszs ); + bli_free_intl( bszids ); + bli_free_intl( bmults ); + bli_free_intl( scalrs ); } #endif diff --git a/frame/base/bli_error.c b/frame/base/bli_error.c index e58d9d02d..2284d84c6 100644 --- a/frame/base/bli_error.c +++ b/frame/base/bli_error.c @@ -170,6 +170,10 @@ void bli_error_init_msgs( void ) "Attempted to allocate more memory from contiguous pool than is available." ); sprintf( bli_error_string_for_code(BLIS_INSUFFICIENT_STACK_BUF_SIZE), "Configured maximum stack buffer size is insufficient for register blocksizes currently in use." ); + sprintf( bli_error_string_for_code(BLIS_ALIGNMENT_NOT_POWER_OF_TWO), + "Encountered memory alignment value that is either zero or not a power of two." ); + sprintf( bli_error_string_for_code(BLIS_ALIGNMENT_NOT_MULT_OF_PTR_SIZE), + "Encountered memory alignment value that is not a multiple of sizeof(void*)." ); sprintf( bli_error_string_for_code(BLIS_EXPECTED_OBJECT_ALIAS), "Expected object to be alias." ); diff --git a/frame/base/bli_func.c b/frame/base/bli_func.c index 8ee41d4a4..75be26085 100644 --- a/frame/base/bli_func.c +++ b/frame/base/bli_func.c @@ -42,7 +42,7 @@ func_t* bli_func_obj_create( void* ptr_s, { func_t* f; - f = ( func_t* ) bli_malloc( sizeof(func_t) ); + f = ( func_t* ) bli_malloc_intl( sizeof(func_t) ); bli_func_obj_init( f, ptr_s, @@ -67,7 +67,7 @@ void bli_func_obj_init( func_t* f, void bli_func_obj_free( func_t* f ) { - bli_free( f ); + bli_free_intl( f ); } // ----------------------------------------------------------------------------- diff --git a/frame/base/bli_gks.c b/frame/base/bli_gks.c index 74e9dde11..6ae0f461e 100644 --- a/frame/base/bli_gks.c +++ b/frame/base/bli_gks.c @@ -133,9 +133,9 @@ void bli_gks_cntx_set_blkszs( ind_t method, dim_t n_bs, ... ) double scalr; // Allocate some temporary local arrays. - bszids = bli_malloc( n_bs * sizeof( bszid_t ) ); - bmults = bli_malloc( n_bs * sizeof( bszid_t ) ); - scalrs = bli_malloc( n_bs * sizeof( double ) ); + bszids = bli_malloc_intl( n_bs * sizeof( bszid_t ) ); + bmults = bli_malloc_intl( n_bs * sizeof( bszid_t ) ); + scalrs = bli_malloc_intl( n_bs * sizeof( double ) ); // -- Begin variable argument section -- @@ -274,9 +274,9 @@ void bli_gks_cntx_set_blkszs( ind_t method, dim_t n_bs, ... ) } // Free the temporary local arrays. - bli_free( bszids ); - bli_free( bmults ); - bli_free( scalrs ); + bli_free_intl( bszids ); + bli_free_intl( bmults ); + bli_free_intl( scalrs ); } @@ -415,7 +415,7 @@ void bli_gks_cntx_set_l3_nat_ukrs( dim_t n_uk, ... ) cntx_t* cntx; // Allocate some temporary local arrays. - l3_ukrs = bli_malloc( n_uk * sizeof( l3ukr_t ) ); + l3_ukrs = bli_malloc_intl( n_uk * sizeof( l3ukr_t ) ); // -- Begin variable argument section -- @@ -452,7 +452,7 @@ void bli_gks_cntx_set_l3_nat_ukrs( dim_t n_uk, ... ) } // Free the temporary local array. - bli_free( l3_ukrs ); + bli_free_intl( l3_ukrs ); } void bli_gks_cntx_set_l3_vir_ukr( ind_t method, @@ -485,7 +485,7 @@ void bli_gks_cntx_set_l3_vir_ukrs( ind_t method, dim_t n_uk, ... ) cntx_t* cntx; // Allocate some temporary local arrays. - l3_ukrs = bli_malloc( n_uk * sizeof( l3ukr_t ) ); + l3_ukrs = bli_malloc_intl( n_uk * sizeof( l3ukr_t ) ); // -- Begin variable argument section -- @@ -522,7 +522,7 @@ void bli_gks_cntx_set_l3_vir_ukrs( ind_t method, dim_t n_uk, ... ) } // Free the temporary local array. - bli_free( l3_ukrs ); + bli_free_intl( l3_ukrs ); } @@ -703,7 +703,7 @@ void bli_gks_cntx_set_l1f_kers( dim_t n_kr, ... ) cntx_t* cntx; // Allocate some temporary local arrays. - l1f_kers = bli_malloc( n_kr * sizeof( l1fkr_t ) ); + l1f_kers = bli_malloc_intl( n_kr * sizeof( l1fkr_t ) ); // -- Begin variable argument section -- @@ -740,7 +740,7 @@ void bli_gks_cntx_set_l1f_kers( dim_t n_kr, ... ) } // Free the temporary local array. - bli_free( l1f_kers ); + bli_free_intl( l1f_kers ); } @@ -879,7 +879,7 @@ void bli_gks_cntx_set_l1v_kers( dim_t n_kr, ... ) cntx_t* cntx; // Allocate some temporary local arrays. - l1v_kers = bli_malloc( n_kr * sizeof( l1vkr_t ) ); + l1v_kers = bli_malloc_intl( n_kr * sizeof( l1vkr_t ) ); // -- Begin variable argument section -- @@ -916,7 +916,7 @@ void bli_gks_cntx_set_l1v_kers( dim_t n_kr, ... ) } // Free the temporary local array. - bli_free( l1v_kers ); + bli_free_intl( l1v_kers ); } diff --git a/frame/base/bli_malloc.c b/frame/base/bli_malloc.c index 8afa3fb91..191db4834 100644 --- a/frame/base/bli_malloc.c +++ b/frame/base/bli_malloc.c @@ -34,37 +34,171 @@ #include "blis.h" -void* bli_malloc( siz_t size ) -{ - void* p = NULL; - int r_val; +// ----------------------------------------------------------------------------- +void* bli_malloc_pool( size_t size ) +{ + const malloc_ft malloc_fp = BLIS_MALLOC_POOL; + const size_t align_size = BLIS_POOL_ADDR_ALIGN_SIZE; + + return bli_malloc_align( malloc_fp, size, align_size ); +} + +void bli_free_pool( void* p ) +{ + bli_free_align( BLIS_FREE_POOL, p ); +} + +// ----------------------------------------------------------------------------- + +void* bli_malloc_user( size_t size ) +{ + const malloc_ft malloc_fp = BLIS_MALLOC_USER; + const size_t align_size = BLIS_HEAP_ADDR_ALIGN_SIZE; + + return bli_malloc_align( malloc_fp, size, align_size ); +} + +void bli_free_user( void* p ) +{ + bli_free_align( BLIS_FREE_USER, p ); +} + +// ----------------------------------------------------------------------------- + +void* bli_malloc_intl( size_t size ) +{ + const malloc_ft malloc_fp = BLIS_MALLOC_INTL; + + return bli_malloc_noalign( malloc_fp, size ); +} + +void bli_free_intl( void* p ) +{ + bli_free_noalign( BLIS_FREE_INTL, p ); +} + +// ----------------------------------------------------------------------------- + +void* bli_malloc_align + ( + malloc_ft f, + size_t size, + size_t align_size + ) +{ + const size_t ptr_size = sizeof( void* ); + size_t align_offset = 0; + void* p_orig; + int8_t* p_byte; + void** p_addr; + + // Check parameters. + if ( bli_error_checking_is_enabled() ) + bli_malloc_align_check( f, size, align_size ); + + // Return early if zero bytes were requested. if ( size == 0 ) return NULL; -#if BLIS_HEAP_ADDR_ALIGN_SIZE == 1 - p = malloc( ( size_t )size ); -#elif defined(_WIN32) - p = _aligned_malloc( ( size_t )size, - ( size_t )BLIS_HEAP_ADDR_ALIGN_SIZE ); -#else - r_val = posix_memalign( &p, - ( size_t )BLIS_HEAP_ADDR_ALIGN_SIZE, - ( size_t )size ); + // Add the alignment size and the size of a pointer to the number + // of bytes to allocate. + size += align_size + ptr_size; - if ( r_val != 0 ) bli_abort(); -#endif + // Call the allocation function. + p_orig = f( size ); - if ( p == NULL ) bli_abort(); + // If NULL was returned, something is probably very wrong. + if ( p_orig == NULL ) bli_abort(); - return p; + // Advance the pointer by one pointer element. + p_byte = p_orig; + p_byte += ptr_size; + + // Compute the offset to the desired alignment. + if ( bli_is_unaligned_to( p_byte, align_size ) ) + { + align_offset = align_size - + bli_offset_past_alignment( p_byte, align_size ); + } + + // Advance the pointer using the difference between the alignment + // size and the alignment offset. + p_byte += align_offset; + + // Compute the address of the pointer element just before the start + // of the aligned address, and store the original address there. + p_addr = ( void** )(p_byte - ptr_size); + *p_addr = p_orig; + + // Return the aligned pointer. + return p_byte; } -void bli_free( void* p ) +void bli_free_align + ( + free_ft f, + void* p + ) { -#if BLIS_HEAP_ADDR_ALIGN_SIZE == 1 || !defined(_WIN32) - free( p ); -#else - _aligned_free( p ); -#endif + const size_t ptr_size = sizeof( void* ); + void* p_orig; + int8_t* p_byte; + void** p_addr; + + // Since the bli_malloc_pool() function returned the aligned pointer, + // we have to first recover the original pointer before we can free + // the memory. + + // Start by casting the pointer to a byte pointer. + p_byte = p; + + // Compute the address of the pointer element just before the start + // of the aligned address, and recover the original address. + p_addr = ( void** )( p_byte - ptr_size ); + p_orig = *p_addr; + + // Free the original pointer. + f( p_orig ); } +// ----------------------------------------------------------------------------- + +void* bli_malloc_noalign + ( + malloc_ft f, + size_t size + ) +{ + return f( size ); +} + +void bli_free_noalign + ( + free_ft f, + void* p + ) +{ + f( p ); +} + +// ----------------------------------------------------------------------------- + +void bli_malloc_align_check + ( + malloc_ft f, + size_t size, + size_t align_size + ) +{ + err_t e_val; + + // Check for valid alignment. + + e_val = bli_check_alignment_is_power_of_two( align_size ); + bli_check_error_code( e_val ); + + e_val = bli_check_alignment_is_mult_of_ptr_size( align_size ); + bli_check_error_code( e_val ); +} + + diff --git a/frame/base/bli_malloc.h b/frame/base/bli_malloc.h index f5a05c0e4..bd2a8cc9a 100644 --- a/frame/base/bli_malloc.h +++ b/frame/base/bli_malloc.h @@ -32,5 +32,28 @@ */ -void* bli_malloc( siz_t size ); -void bli_free( void* p ); +// Typedef function pointer types for malloc() and free() substitutes. +typedef void* (*malloc_ft) ( size_t size ); +typedef void (*free_ft) ( void* p ); + +// ----------------------------------------------------------------------------- + +void* bli_malloc_pool( size_t size ); +void bli_free_pool( void* p ); + +void* bli_malloc_intl( size_t size ); +void bli_free_intl( void* p ); + +void* bli_malloc_user( size_t size ); +void bli_free_user( void* p ); + +// ----------------------------------------------------------------------------- + +void* bli_malloc_align( malloc_ft f, size_t size, size_t align_size ); +void bli_free_align( free_ft f, void* p ); + +void* bli_malloc_noalign( malloc_ft f, size_t size ); +void bli_free_noalign( free_ft f, void* p ); + +void bli_malloc_align_check( malloc_ft f, size_t size, size_t align_size ); + diff --git a/frame/base/bli_mbool.c b/frame/base/bli_mbool.c index 9bea1cd2e..46ba531bc 100644 --- a/frame/base/bli_mbool.c +++ b/frame/base/bli_mbool.c @@ -42,7 +42,7 @@ mbool_t* bli_mbool_obj_create( bool_t b_s, { mbool_t* b; - b = ( mbool_t* ) bli_malloc( sizeof(mbool_t) ); + b = ( mbool_t* ) bli_malloc_intl( sizeof(mbool_t) ); bli_mbool_obj_init( b, b_s, @@ -67,6 +67,6 @@ void bli_mbool_obj_init( mbool_t* b, void bli_mbool_obj_free( mbool_t* b ) { - bli_free( b ); + bli_free_intl( b ); } diff --git a/frame/base/bli_mem.c b/frame/base/bli_mem.c index a1991304e..25530b1ed 100644 --- a/frame/base/bli_mem.c +++ b/frame/base/bli_mem.c @@ -61,8 +61,8 @@ void bli_mem_acquire_m( siz_t req_size, if ( buf_type == BLIS_BUFFER_FOR_GEN_USE ) { // For general-use buffer requests, such as those used by level-2 - // operations, using bli_malloc() is sufficient. - void* buf_sys = bli_malloc( req_size ); + // operations, dynamically allocating memory is sufficient. + void* buf_sys = bli_malloc_pool( req_size ); // Initialize the mem_t object with: // - the address of the memory block, @@ -163,9 +163,9 @@ void bli_mem_release( mem_t* mem ) { void* buf_sys = bli_mem_buf_sys( mem ); - // For general-use buffers, we allocate with bli_malloc(), and so - // here we need to call bli_free(). - bli_free( buf_sys ); + // For general-use buffers, we dynamically allocate memory, and so + // here we need to free. + bli_free_pool( buf_sys ); } else { diff --git a/frame/base/bli_obj.c b/frame/base/bli_obj.c index d8d152003..226b0747a 100644 --- a/frame/base/bli_obj.c +++ b/frame/base/bli_obj.c @@ -166,7 +166,7 @@ void bli_obj_alloc_buffer( inc_t rs, buffer_size = ( siz_t )n_elem * elem_size; // Allocate the buffer. - p = bli_malloc( buffer_size ); + p = bli_malloc_user( buffer_size ); // Set individual fields. bli_obj_set_buffer( p, *obj ); @@ -221,7 +221,7 @@ void bli_obj_free( obj_t* obj ) // is a detached scalar (ie: if the buffer pointer refers to the // address of the internal scalar buffer). if ( bli_obj_buffer( *obj ) != bli_obj_internal_scalar_buffer( *obj ) ) - bli_free( bli_obj_buffer( *obj ) ); + bli_free_user( bli_obj_buffer( *obj ) ); } } @@ -436,6 +436,17 @@ dim_t bli_align_dim_to_size( dim_t dim, siz_t elem_size, siz_t align_size ) return dim; } +dim_t bli_align_ptr_to_size( void* p, size_t align_size ) +{ + dim_t dim; + + dim = ( ( ( uintptr_t )p + align_size - 1 ) / + align_size + ) * align_size; + + return dim; +} + static num_t type_union[BLIS_NUM_FP_TYPES][BLIS_NUM_FP_TYPES] = { // s c d z diff --git a/frame/base/bli_obj.h b/frame/base/bli_obj.h index 92e9b1d87..fffb183aa 100644 --- a/frame/base/bli_obj.h +++ b/frame/base/bli_obj.h @@ -89,6 +89,7 @@ siz_t bli_datatype_size( num_t dt ); dim_t bli_align_dim_to_mult( dim_t dim, dim_t dim_mult ); dim_t bli_align_dim_to_size( dim_t dim, siz_t elem_size, siz_t align_size ); +dim_t bli_align_ptr_to_size( void* p, size_t align_size ); num_t bli_datatype_union( num_t dt1, num_t dt2 ); diff --git a/frame/base/bli_pool.c b/frame/base/bli_pool.c index 23090ca39..a9660a058 100644 --- a/frame/base/bli_pool.c +++ b/frame/base/bli_pool.c @@ -43,7 +43,7 @@ void bli_pool_init( dim_t num_blocks, dim_t i; // Allocate the block_ptrs array. - block_ptrs = bli_malloc( num_blocks * sizeof( pblk_t ) ); + block_ptrs = bli_malloc_intl( num_blocks * sizeof( pblk_t ) ); // Allocate and initialize each entry in the block_ptrs array. for ( i = 0; i < num_blocks; ++i ) @@ -88,7 +88,7 @@ void bli_pool_finalize( pool_t* pool ) } // Free the block_ptrs array. - bli_free( block_ptrs ); + bli_free_intl( block_ptrs ); // Clear the contents of the pool_t struct. bli_pool_set_block_ptrs( NULL, pool ); @@ -235,7 +235,7 @@ void bli_pool_grow( dim_t num_blocks_add, pool_t* pool ) block_ptrs_cur = bli_pool_block_ptrs( pool ); // Allocate a new block_ptrs array of length num_blocks_new. - block_ptrs_new = bli_malloc( num_blocks_new * sizeof( pblk_t ) ); + block_ptrs_new = bli_malloc_intl( num_blocks_new * sizeof( pblk_t ) ); // Query the top_index of the pool. top_index = bli_pool_top_index( pool ); @@ -251,7 +251,7 @@ void bli_pool_grow( dim_t num_blocks_add, pool_t* pool ) //printf( "bli_pool_grow: bp_cur: %p\n", block_ptrs_cur ); // Free the old block_ptrs array. - bli_free( block_ptrs_cur ); + bli_free_intl( block_ptrs_cur ); // Update the pool_t struct with the new block_ptrs array and // record its allocated length. @@ -343,7 +343,7 @@ void bli_pool_alloc_block( siz_t block_size, // Allocate the block. We add the alignment size to ensure we will // have enough usable space after alignment. - buf_sys = bli_malloc( block_size + align_size ); + buf_sys = bli_malloc_pool( block_size + align_size ); buf_align = buf_sys; // Advance the pointer to achieve the necessary alignment, if it is not @@ -378,7 +378,7 @@ void bli_pool_free_block( pblk_t* block ) buf_sys = bli_pblk_buf_sys( block ); // Free the block. - bli_free( buf_sys ); + bli_free_pool( buf_sys ); } void bli_pool_print( pool_t* pool ) diff --git a/frame/base/bli_threading.c b/frame/base/bli_threading.c index c0d88cd31..316f3d5cd 100644 --- a/frame/base/bli_threading.c +++ b/frame/base/bli_threading.c @@ -96,7 +96,7 @@ void bli_level3_thread_decorator //Constructors and destructors for constructors thread_comm_t* bli_create_communicator( dim_t n_threads ) { - thread_comm_t* comm = (thread_comm_t*) bli_malloc( sizeof(thread_comm_t) ); + thread_comm_t* comm = (thread_comm_t*) bli_malloc_intl( sizeof(thread_comm_t) ); bli_setup_communicator( comm, n_threads ); return comm; } @@ -114,7 +114,7 @@ void bli_free_communicator( thread_comm_t* communicator ) { if( communicator == NULL ) return; bli_cleanup_communicator( communicator ); - bli_free( communicator ); + bli_free_intl( communicator ); } void bli_cleanup_communicator( thread_comm_t* communicator ) @@ -129,7 +129,7 @@ thrinfo_t* bli_create_thread_info( thread_comm_t* ocomm, dim_t ocomm_id, thread_ dim_t n_way, dim_t work_id ) { - thrinfo_t* thr = (thrinfo_t*) bli_malloc( sizeof(thrinfo_t) ); + thrinfo_t* thr = (thrinfo_t*) bli_malloc_intl( sizeof(thrinfo_t) ); bli_setup_thread_info( thr, ocomm, ocomm_id, icomm, icomm_id, n_way, work_id ); return thr; } diff --git a/frame/base/bli_threading_omp.c b/frame/base/bli_threading_omp.c index 8cd714da1..01a43c840 100644 --- a/frame/base/bli_threading_omp.c +++ b/frame/base/bli_threading_omp.c @@ -39,7 +39,7 @@ //Constructors and destructors for constructors thread_comm_t* bli_create_communicator( dim_t n_threads ) { - thread_comm_t* comm = (thread_comm_t*) bli_malloc( sizeof(thread_comm_t) ); + thread_comm_t* comm = (thread_comm_t*) bli_malloc_intl( sizeof(thread_comm_t) ); bli_setup_communicator( comm, n_threads ); return comm; } @@ -48,7 +48,7 @@ void bli_free_communicator( thread_comm_t* communicator ) { if( communicator == NULL ) return; bli_cleanup_communicator( communicator ); - bli_free( communicator ); + bli_free_intl( communicator ); } void bli_level3_thread_decorator @@ -129,7 +129,7 @@ void bli_free_barrier_tree( barrier_t* barrier ) if( barrier->count == 0 ) { bli_free_barrier_tree( barrier->dad ); - bli_free( barrier ); + bli_free_intl( barrier ); } return; } @@ -178,7 +178,7 @@ void bli_cleanup_communicator( thread_comm_t* communicator ) { bli_free_barrier_tree( communicator->barriers[i] ); } - bli_free( communicator->barriers ); + bli_free_intl( communicator->barriers ); } @@ -187,7 +187,7 @@ void bli_setup_communicator( thread_comm_t* communicator, dim_t n_threads) if( communicator == NULL ) return; communicator->sent_object = NULL; communicator->n_threads = n_threads; - communicator->barriers = ( barrier_t** ) bli_malloc( sizeof( barrier_t* ) * n_threads ); + communicator->barriers = ( barrier_t** ) bli_malloc_intl( sizeof( barrier_t* ) * n_threads ); bli_create_tree_barrier( n_threads, BLIS_TREE_BARRIER_ARITY, communicator->barriers, 0 ); } diff --git a/frame/base/bli_threading_pthreads.c b/frame/base/bli_threading_pthreads.c index e3f3f6387..f0de26198 100644 --- a/frame/base/bli_threading_pthreads.c +++ b/frame/base/bli_threading_pthreads.c @@ -125,8 +125,8 @@ void bli_level3_thread_decorator void** thread ) { - pthread_t* pthreads = (pthread_t*) bli_malloc(sizeof(pthread_t) * n_threads); - thread_data_t* datas = (thread_data_t*) bli_malloc(sizeof(thread_data_t) * n_threads); + pthread_t* pthreads = (pthread_t*) bli_malloc_intl(sizeof(pthread_t) * n_threads); + thread_data_t* datas = (thread_data_t*) bli_malloc_intl(sizeof(thread_data_t) * n_threads); for( int i = 1; i < n_threads; i++ ) { @@ -151,8 +151,8 @@ void bli_level3_thread_decorator pthread_join( pthreads[i], NULL ); } - bli_free( pthreads ); - bli_free( datas ); + bli_free_intl( pthreads ); + bli_free_intl( datas ); } //barrier routine taken from art of multicore programming @@ -164,7 +164,7 @@ void bli_barrier( thread_comm_t* communicator, dim_t t_id ) //Constructors and destructors for constructors thread_comm_t* bli_create_communicator( dim_t n_threads ) { - thread_comm_t* comm = (thread_comm_t*) bli_malloc( sizeof(thread_comm_t) ); + thread_comm_t* comm = (thread_comm_t*) bli_malloc_intl( sizeof(thread_comm_t) ); bli_setup_communicator( comm, n_threads ); return comm; } @@ -181,7 +181,7 @@ void bli_free_communicator( thread_comm_t* communicator ) { if( communicator == NULL ) return; bli_cleanup_communicator( communicator ); - bli_free( communicator ); + bli_free_intl( communicator ); } void bli_cleanup_communicator( thread_comm_t* communicator ) diff --git a/frame/cntl/bli_cntl.c b/frame/cntl/bli_cntl.c index a31fc2dd5..ffd6120c8 100644 --- a/frame/cntl/bli_cntl.c +++ b/frame/cntl/bli_cntl.c @@ -36,5 +36,5 @@ void bli_cntl_obj_free( void* cntl ) { - bli_free( cntl ); + bli_free_intl( cntl ); } diff --git a/frame/include/bli_kernel_macro_defs.h b/frame/include/bli_kernel_macro_defs.h index b1f869a9c..00a2aa4b9 100644 --- a/frame/include/bli_kernel_macro_defs.h +++ b/frame/include/bli_kernel_macro_defs.h @@ -38,8 +38,49 @@ // -- MEMORY ALLOCATION -------------------------------------------------------- -// Size of a virtual memory page. This is used to align certain memory -// buffers which are allocated and used internally. +// Memory allocation functions. These macros define the three types of +// malloc()-style functions, and their free() counterparts: one for each +// type of memory to be allocated. +// NOTE: ANY ALTERNATIVE TO malloc()/free() USED FOR ANY OF THE FOLLOWING +// THREE PAIRS OF MACROS MUST USE THE SAME FUNCTION PROTOTYPE AS malloc() +// and free(): +// +// void* malloc( size_t size ); +// void free( void* p ); +// + +// This allocation function is called to allocate memory for blocks within +// BLIS's internal memory pools. +#ifndef BLIS_MALLOC_POOL +#define BLIS_MALLOC_POOL malloc +#endif + +#ifndef BLIS_FREE_POOL +#define BLIS_FREE_POOL free +#endif + +// This allocation function is called to allocate memory for internally- +// used objects and structures, such as control tree nodes. +#ifndef BLIS_MALLOC_INTL +#define BLIS_MALLOC_INTL malloc +#endif + +#ifndef BLIS_FREE_INTL +#define BLIS_FREE_INTL free +#endif + +// This allocation function is called to allocate memory for objects +// created by user-level API functions, such as bli_obj_create(). +#ifndef BLIS_MALLOC_USER +#define BLIS_MALLOC_USER malloc +#endif + +#ifndef BLIS_FREE_USER +#define BLIS_FREE_USER free +#endif + +// Size of a virtual memory page. This is used to align blocks within the +// memory pools. #ifndef BLIS_PAGE_SIZE #define BLIS_PAGE_SIZE 4096 #endif @@ -76,17 +117,16 @@ // functions. #define BLIS_STACK_BUF_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE -// Alignment size used when allocating memory dynamically from the operating -// system (eg: posix_memalign()). To disable heap alignment and just use -// malloc() instead, set this to 1. +// Alignment size used when allocating memory via BLIS_MALLOC_USER. +// To disable heap alignment, set this to 1. #define BLIS_HEAP_ADDR_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE -// Alignment size used when sizing leading dimensions of dynamically -// allocated memory. +// Alignment size used when sizing leading dimensions of memory allocated +// via BLIS_MALLOC_USER. #define BLIS_HEAP_STRIDE_ALIGN_SIZE BLIS_SIMD_ALIGN_SIZE // Alignment size used when allocating blocks to the internal memory -// pool (for packing buffers). +// pool, via BLIS_MALLOC_POOL. #define BLIS_POOL_ADDR_ALIGN_SIZE BLIS_PAGE_SIZE diff --git a/frame/include/bli_malloc_prototypes.h b/frame/include/bli_malloc_prototypes.h new file mode 100644 index 000000000..e828f99aa --- /dev/null +++ b/frame/include/bli_malloc_prototypes.h @@ -0,0 +1,50 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_MALLOC_PROTOTYPES_H +#define BLIS_MALLOC_PROTOTYPES_H + +// Generate prototypes for each of the malloc() and free() functions +// defined in BLIS + +void* BLIS_MALLOC_POOL( size_t size ); +void BLIS_FREE_POOL( void* p ); + +void* BLIS_MALLOC_INTL( size_t size ); +void BLIS_FREE_INTL( void* p ); + +void* BLIS_MALLOC_USER( size_t size ); +void BLIS_FREE_USER( void* p ); + +#endif diff --git a/frame/include/bli_param_macro_defs.h b/frame/include/bli_param_macro_defs.h index 54cba702e..8869cea17 100644 --- a/frame/include/bli_param_macro_defs.h +++ b/frame/include/bli_param_macro_defs.h @@ -48,7 +48,7 @@ \ ( ( uintptr_t )(p) % ( uintptr_t )(size) != 0 ) -#define bli_offset_from_alignment( p, size ) \ +#define bli_offset_past_alignment( p, size ) \ \ ( ( uintptr_t )(p) % ( uintptr_t )(size) ) diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index ffdcba56b..7274ce5a6 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -1000,6 +1000,8 @@ typedef enum BLIS_REQUESTED_CONTIG_BLOCK_TOO_BIG = (-121), BLIS_EXHAUSTED_CONTIG_MEMORY_POOL = (-122), BLIS_INSUFFICIENT_STACK_BUF_SIZE = (-123), + BLIS_ALIGNMENT_NOT_POWER_OF_TWO = (-124), + BLIS_ALIGNMENT_NOT_MULT_OF_PTR_SIZE = (-125), // Object-related errors BLIS_EXPECTED_OBJECT_ALIAS = (-130), diff --git a/frame/include/blis.h b/frame/include/blis.h index e20fc5f73..06463dbed 100644 --- a/frame/include/blis.h +++ b/frame/include/blis.h @@ -90,6 +90,8 @@ extern "C" { #include "bli_kernel_prototypes.h" +#include "bli_malloc_prototypes.h" + // -- Base operation prototypes --