mirror of
https://github.com/amd/blis.git
synced 2026-06-06 04:34:02 +00:00
Implemented a pool-based small block allocator.
Details:
- Implemented a sophisticated data structure and set of APIs that track
the small blocks of memory (around 80-100 bytes each) used when
creating nodes for control and thread trees (cntl_t and thrinfo_t) as
well as thread communicators (thrcomm_t). The purpose of the small
block allocator, or sba, is to allow the library to transition into a
runtime state in which it does not perform any calls to malloc() or
free() during normal execution of level-3 operations, regardless of
the threading environment (potentially multiple application threads
as well as multiple BLIS threads). The functionality relies on a new
data structure, apool_t, which is (roughly speaking) a pool of
arrays, where each array element is a pool of small blocks. The outer
pool, which is protected by a mutex, provides separate arrays for each
application thread while the arrays each handle multiple BLIS threads
for any given application thread. The design minimizes the potential
for lock contention, as only concurrent application threads would
need to fight for the apool_t lock, and only if they happen to begin
their level-3 operations at precisely the same time. Thanks to Kiran
Varaganti and AMD for requesting this feature.
- Added a configure option to disable the sba pools, which are enabled
by default; renamed the --[dis|en]able-packbuf-pools option to
--[dis|en]able-pba-pools; and rewrote the --help text associated with
this new option and consolidated it with the --help text for the
option associated with the sba (--[dis|en]able-sba-pools).
- Moved the membrk field from the cntx_t to the rntm_t. We now pass in
a rntm_t* to the bli_membrk_acquire() and _release() APIs, just as we
do for bli_sba_acquire() and _release().
- Replaced all calls to bli_malloc_intl() and bli_free_intl() that are
used for small blocks with calls to bli_sba_acquire(), which takes a
rntm (in addition to the bytes requested), and bli_sba_release().
These latter two functions reduce to the former two when the sba pools
are disabled at configure-time.
- Added rntm_t* arguments to various cntl_t and thrinfo_t functions, as
required by the new usage of bli_sba_acquire() and _release().
- Moved the freeing of "old" blocks (those allocated prior to a change
in the block_size) from bli_membrk_acquire_m() to the implementation
of the pool_t checkout function.
- Miscellaneous improvements to the pool_t API.
- Added a block_size field to the pblk_t.
- Harmonized the way that the trsm_ukr testsuite module performs packing
relative to that of gemmtrsm_ukr, in part to avoid the need to create
a packm control tree node, which now requires a rntm_t that has been
initialized with an sba and membrk.
- Re-enable explicit call bli_finalize() in testsuite so that users who
run the testsuite with memory tracing enabled can check for memory
leaks.
- Manually imported the compact/minor changes from 61441b24 that cause
the rntm to be copied locally when it is passed in via one of the
expert APIs.
- Reordered parameters to various bli_thrcomm_*() functions so that the
thrcomm_t* to the comm being modified is last, not first.
- Added more descriptive tracing for allocating/freeing small blocks and
formalized via a new configure option: --[dis|en]able-mem-tracing.
- Moved some unused scalm code and headers into frame/1m/other.
- Whitespace changes to bli_pthread.c.
- Regenerated build/libblis-symbols.def.
This commit is contained in:
@@ -61,8 +61,22 @@
|
||||
#define BLIS_ENABLE_JRIR_RR
|
||||
#endif
|
||||
|
||||
#if @enable_packbuf_pools@
|
||||
#define BLIS_ENABLE_PACKBUF_POOLS
|
||||
#if @enable_pba_pools@
|
||||
#define BLIS_ENABLE_PBA_POOLS
|
||||
#else
|
||||
#define BLIS_DISABLE_PBA_POOLS
|
||||
#endif
|
||||
|
||||
#if @enable_sba_pools@
|
||||
#define BLIS_ENABLE_SBA_POOLS
|
||||
#else
|
||||
#define BLIS_DISABLE_SBA_POOLS
|
||||
#endif
|
||||
|
||||
#if @enable_mem_tracing@
|
||||
#define BLIS_ENABLE_MEM_TRACING
|
||||
#else
|
||||
#define BLIS_DISABLE_MEM_TRACING
|
||||
#endif
|
||||
|
||||
#if @int_type_size@ == 64
|
||||
|
||||
@@ -42,10 +42,23 @@ bli_amaxv
|
||||
bli_amaxv_check
|
||||
bli_amaxv_ex
|
||||
bli_amaxv_ex_qfp
|
||||
bli_apool_alloc_block
|
||||
bli_apool_array_elem
|
||||
bli_apool_checkin_array
|
||||
bli_apool_checkout_array
|
||||
bli_apool_finalize
|
||||
bli_apool_free_block
|
||||
bli_apool_grow
|
||||
bli_apool_init
|
||||
bli_arch_query_id
|
||||
bli_arch_set_id
|
||||
bli_arch_set_id_once
|
||||
bli_arch_string
|
||||
bli_array_elem
|
||||
bli_array_finalize
|
||||
bli_array_init
|
||||
bli_array_resize
|
||||
bli_array_set_elem
|
||||
bli_asumv
|
||||
bli_asumv_check
|
||||
bli_asumv_ex
|
||||
@@ -129,6 +142,7 @@ bli_ccopym_unb_var1
|
||||
bli_ccopyv
|
||||
bli_ccopyv_ex
|
||||
bli_ccpackm_blk_var1_md
|
||||
bli_ccpackm_cxk_1e_md
|
||||
bli_ccpackm_cxk_1r_md
|
||||
bli_ccpackm_struc_cxk_md
|
||||
bli_ccxpbym_md
|
||||
@@ -151,6 +165,7 @@ bli_cdotxf_ex
|
||||
bli_cdotxv
|
||||
bli_cdotxv_ex
|
||||
bli_cdpackm_blk_var1_md
|
||||
bli_cdpackm_cxk_1e_md
|
||||
bli_cdpackm_cxk_1r_md
|
||||
bli_cdpackm_struc_cxk_md
|
||||
bli_cdxpbym_md
|
||||
@@ -240,6 +255,7 @@ bli_check_valid_datatype
|
||||
bli_check_valid_diag
|
||||
bli_check_valid_error_level
|
||||
bli_check_valid_kc_mod_mult
|
||||
bli_check_valid_malloc_buf
|
||||
bli_check_valid_mc_mod_mult
|
||||
bli_check_valid_nc_mod_mult
|
||||
bli_check_valid_packbuf
|
||||
@@ -453,6 +469,7 @@ bli_csgemm_ker_var2_md
|
||||
bli_cshiftd
|
||||
bli_cshiftd_ex
|
||||
bli_cspackm_blk_var1_md
|
||||
bli_cspackm_cxk_1e_md
|
||||
bli_cspackm_cxk_1r_md
|
||||
bli_cspackm_struc_cxk_md
|
||||
bli_csqrtsc
|
||||
@@ -556,6 +573,7 @@ bli_czcopysc
|
||||
bli_czgemm_ker_var2_md
|
||||
bli_czipsc
|
||||
bli_czpackm_blk_var1_md
|
||||
bli_czpackm_cxk_1e_md
|
||||
bli_czpackm_cxk_1r_md
|
||||
bli_czpackm_struc_cxk_md
|
||||
bli_czxpbym_md
|
||||
@@ -605,6 +623,7 @@ bli_dcopym_unb_var1
|
||||
bli_dcopyv
|
||||
bli_dcopyv_ex
|
||||
bli_dcpackm_blk_var1_md
|
||||
bli_dcpackm_cxk_1e_md
|
||||
bli_dcpackm_cxk_1r_md
|
||||
bli_dcpackm_struc_cxk_md
|
||||
bli_dcxpbym_md
|
||||
@@ -631,6 +650,7 @@ bli_ddotxv
|
||||
bli_ddotxv_ex
|
||||
bli_ddotxv_zen_int
|
||||
bli_ddpackm_blk_var1_md
|
||||
bli_ddpackm_cxk_1e_md
|
||||
bli_ddpackm_cxk_1r_md
|
||||
bli_ddpackm_struc_cxk_md
|
||||
bli_ddxpbym_md
|
||||
@@ -838,6 +858,7 @@ bli_dsgemm_ker_var2_md
|
||||
bli_dshiftd
|
||||
bli_dshiftd_ex
|
||||
bli_dspackm_blk_var1_md
|
||||
bli_dspackm_cxk_1e_md
|
||||
bli_dspackm_cxk_1r_md
|
||||
bli_dspackm_struc_cxk_md
|
||||
bli_dsqrtsc
|
||||
@@ -946,6 +967,7 @@ bli_dzcopysc
|
||||
bli_dzgemm_ker_var2_md
|
||||
bli_dzipsc
|
||||
bli_dzpackm_blk_var1_md
|
||||
bli_dzpackm_cxk_1e_md
|
||||
bli_dzpackm_cxk_1r_md
|
||||
bli_dzpackm_struc_cxk_md
|
||||
bli_dzxpbym_md
|
||||
@@ -958,11 +980,16 @@ bli_error_finalize
|
||||
bli_error_init
|
||||
bli_error_init_msgs
|
||||
bli_error_string_for_code
|
||||
bli_ffree_align
|
||||
bli_ffree_noalign
|
||||
bli_finalize
|
||||
bli_finalize_apis
|
||||
bli_finalize_auto
|
||||
bli_finalize_once
|
||||
bli_find_area_trap_l
|
||||
bli_fmalloc_align
|
||||
bli_fmalloc_align_check
|
||||
bli_fmalloc_noalign
|
||||
bli_fprintm
|
||||
bli_fprintm_check
|
||||
bli_fprintm_ex
|
||||
@@ -971,9 +998,7 @@ bli_fprintv
|
||||
bli_fprintv_check
|
||||
bli_fprintv_ex
|
||||
bli_fprintv_qfp
|
||||
bli_free_align
|
||||
bli_free_intl
|
||||
bli_free_noalign
|
||||
bli_free_pool
|
||||
bli_free_user
|
||||
bli_func_create
|
||||
@@ -1187,9 +1212,10 @@ bli_info_get_enable_blas
|
||||
bli_info_get_enable_cblas
|
||||
bli_info_get_enable_memkind
|
||||
bli_info_get_enable_openmp
|
||||
bli_info_get_enable_packbuf_pools
|
||||
bli_info_get_enable_pba_pools
|
||||
bli_info_get_enable_pthreads
|
||||
bli_info_get_enable_sandbox
|
||||
bli_info_get_enable_sba_pools
|
||||
bli_info_get_enable_stay_auto_init
|
||||
bli_info_get_enable_threading
|
||||
bli_info_get_gemm_impl_string
|
||||
@@ -1264,7 +1290,7 @@ bli_l1v_xi_check
|
||||
bli_l1v_xy_check
|
||||
bli_l3_basic_check
|
||||
bli_l3_cntl_create_if
|
||||
bli_l3_cntl_free_if
|
||||
bli_l3_cntl_free
|
||||
bli_l3_determine_kc
|
||||
bli_l3_direct
|
||||
bli_l3_ind_oper_enable_only
|
||||
@@ -1289,17 +1315,13 @@ bli_l3_thrinfo_print_paths
|
||||
bli_lcm
|
||||
bli_lsame
|
||||
bli_machval
|
||||
bli_malloc_align
|
||||
bli_malloc_align_check
|
||||
bli_malloc_intl
|
||||
bli_malloc_noalign
|
||||
bli_malloc_pool
|
||||
bli_malloc_user
|
||||
bli_mbool_create
|
||||
bli_mbool_free
|
||||
bli_mbool_init
|
||||
bli_membrk_acquire_m
|
||||
bli_membrk_acquire_v
|
||||
bli_membrk_compute_pool_block_sizes
|
||||
bli_membrk_compute_pool_block_sizes_dt
|
||||
bli_membrk_finalize
|
||||
@@ -1307,9 +1329,10 @@ bli_membrk_finalize_pools
|
||||
bli_membrk_init
|
||||
bli_membrk_init_pools
|
||||
bli_membrk_pool_size
|
||||
bli_membrk_query
|
||||
bli_membrk_release
|
||||
bli_membrk_rntm_set_membrk
|
||||
bli_memsys_finalize
|
||||
bli_memsys_global_membrk
|
||||
bli_memsys_init
|
||||
bli_mkherm
|
||||
bli_mkherm_check
|
||||
@@ -1448,11 +1471,16 @@ bli_prune_unref_mparts
|
||||
bli_pthread_barrier_destroy
|
||||
bli_pthread_barrier_init
|
||||
bli_pthread_barrier_wait
|
||||
bli_pthread_cond_broadcast
|
||||
bli_pthread_cond_destroy
|
||||
bli_pthread_cond_init
|
||||
bli_pthread_cond_wait
|
||||
bli_pthread_create
|
||||
bli_pthread_join
|
||||
bli_pthread_mutex_destroy
|
||||
bli_pthread_mutex_init
|
||||
bli_pthread_mutex_lock
|
||||
bli_pthread_mutex_trylock
|
||||
bli_pthread_mutex_unlock
|
||||
bli_pthread_once
|
||||
bli_randm
|
||||
@@ -1505,6 +1533,14 @@ bli_saxpyv
|
||||
bli_saxpyv_ex
|
||||
bli_saxpyv_zen_int
|
||||
bli_saxpyv_zen_int10
|
||||
bli_sba_acquire
|
||||
bli_sba_checkin_array
|
||||
bli_sba_checkout_array
|
||||
bli_sba_finalize
|
||||
bli_sba_init
|
||||
bli_sba_query
|
||||
bli_sba_release
|
||||
bli_sba_rntm_set_pool
|
||||
bli_scal2d
|
||||
bli_scal2d_check
|
||||
bli_scal2d_ex
|
||||
@@ -1523,7 +1559,6 @@ bli_scald_ex
|
||||
bli_scald_ex_qfp
|
||||
bli_scalm
|
||||
bli_scalm_check
|
||||
bli_scalm_cntl_create_node
|
||||
bli_scalm_ex
|
||||
bli_scalm_ex_qfp
|
||||
bli_scalv
|
||||
@@ -1543,6 +1578,7 @@ bli_scopym_unb_var1
|
||||
bli_scopyv
|
||||
bli_scopyv_ex
|
||||
bli_scpackm_blk_var1_md
|
||||
bli_scpackm_cxk_1e_md
|
||||
bli_scpackm_cxk_1r_md
|
||||
bli_scpackm_struc_cxk_md
|
||||
bli_scxpbym_md
|
||||
@@ -1569,6 +1605,7 @@ bli_sdotxv
|
||||
bli_sdotxv_ex
|
||||
bli_sdotxv_zen_int
|
||||
bli_sdpackm_blk_var1_md
|
||||
bli_sdpackm_cxk_1e_md
|
||||
bli_sdpackm_cxk_1r_md
|
||||
bli_sdpackm_struc_cxk_md
|
||||
bli_sdxpbym_md
|
||||
@@ -1780,6 +1817,7 @@ bli_ssgemm_ker_var2_md
|
||||
bli_sshiftd
|
||||
bli_sshiftd_ex
|
||||
bli_sspackm_blk_var1_md
|
||||
bli_sspackm_cxk_1e_md
|
||||
bli_sspackm_cxk_1r_md
|
||||
bli_sspackm_struc_cxk_md
|
||||
bli_ssqrtsc
|
||||
@@ -1955,6 +1993,7 @@ bli_szcopysc
|
||||
bli_szgemm_ker_var2_md
|
||||
bli_szipsc
|
||||
bli_szpackm_blk_var1_md
|
||||
bli_szpackm_cxk_1e_md
|
||||
bli_szpackm_cxk_1r_md
|
||||
bli_szpackm_struc_cxk_md
|
||||
bli_szxpbym_md
|
||||
@@ -1997,6 +2036,7 @@ bli_thread_set_ways
|
||||
bli_thread_set_ways_
|
||||
bli_thrinfo_create
|
||||
bli_thrinfo_create_for_cntl
|
||||
bli_thrinfo_free
|
||||
bli_thrinfo_grow
|
||||
bli_thrinfo_init
|
||||
bli_thrinfo_init_single
|
||||
@@ -2166,6 +2206,7 @@ bli_zcopym_unb_var1
|
||||
bli_zcopyv
|
||||
bli_zcopyv_ex
|
||||
bli_zcpackm_blk_var1_md
|
||||
bli_zcpackm_cxk_1e_md
|
||||
bli_zcpackm_cxk_1r_md
|
||||
bli_zcpackm_struc_cxk_md
|
||||
bli_zcxpbym_md
|
||||
@@ -2188,6 +2229,7 @@ bli_zdotxf_ex
|
||||
bli_zdotxv
|
||||
bli_zdotxv_ex
|
||||
bli_zdpackm_blk_var1_md
|
||||
bli_zdpackm_cxk_1e_md
|
||||
bli_zdpackm_cxk_1r_md
|
||||
bli_zdpackm_struc_cxk_md
|
||||
bli_zdxpbym_md
|
||||
@@ -2377,6 +2419,7 @@ bli_zsgemm_ker_var2_md
|
||||
bli_zshiftd
|
||||
bli_zshiftd_ex
|
||||
bli_zspackm_blk_var1_md
|
||||
bli_zspackm_cxk_1e_md
|
||||
bli_zspackm_cxk_1r_md
|
||||
bli_zspackm_struc_cxk_md
|
||||
bli_zsqrtsc
|
||||
@@ -2480,6 +2523,7 @@ bli_zzcopysc
|
||||
bli_zzgemm_ker_var2_md
|
||||
bli_zzipsc
|
||||
bli_zzpackm_blk_var1_md
|
||||
bli_zzpackm_cxk_1e_md
|
||||
bli_zzpackm_cxk_1r_md
|
||||
bli_zzpackm_struc_cxk_md
|
||||
bli_zzxpbym_md
|
||||
|
||||
95
configure
vendored
95
configure
vendored
@@ -148,20 +148,37 @@ print_usage()
|
||||
echo " --disable-threading is specified, threading will be"
|
||||
echo " disabled. The default is 'no'."
|
||||
echo " "
|
||||
echo " --disable-packbuf-pools, --enable-packbuf-pools"
|
||||
echo " --disable-pba-pools, --enable-pba-pools"
|
||||
echo " --disable-sba-pools, --enable-sba-pools"
|
||||
echo " "
|
||||
echo " Disable (enabled by default) use of internal memory"
|
||||
echo " pools for managing packing buffers. When disabled,"
|
||||
echo " the function specified by BLIS_MALLOC_POOL is called"
|
||||
echo " on-demand, whenever a packing buffer is needed, and"
|
||||
echo " the buffer is released via the function specified by"
|
||||
echo " BLIS_FREE_POOL() when the loop in which it was"
|
||||
echo " allocated terminates. When enabled, the memory pools"
|
||||
echo " minimize calls to both BLIS_MALLOC_POOL() and"
|
||||
echo " BLIS_FREE_POOL(), especially in a multithreaded"
|
||||
echo " environment, but does so through a mechanism that may"
|
||||
echo " incur additional overhead in some (but not all)"
|
||||
echo " situations."
|
||||
echo " Disable (enabled by default) use of internal memory pools"
|
||||
echo " within the packing block allocator (pba) and/or the small"
|
||||
echo " block allocator (sba). The former is used to allocate"
|
||||
echo " memory used to pack submatrices while the latter is used"
|
||||
echo " to allocate control/thread tree nodes and thread"
|
||||
echo " communicators. Both allocations take place in the context"
|
||||
echo " of level-3 operations. When the pba is disabled, the"
|
||||
echo " malloc()-like function specified by BLIS_MALLOC_POOL is"
|
||||
echo " called on-demand whenever a packing block is needed, and"
|
||||
echo " when the sba is disabled, the malloc()-like function"
|
||||
echo " specified by BLIS_MALLOC_INTL is called whenever a small"
|
||||
echo " block is needed, with the two allocators calling free()-"
|
||||
echo " like functions BLIS_FREE_POOL and BLIS_FREE_INTL,"
|
||||
echo " respectively when blocks are released. When enabled,"
|
||||
echo " either or both pools are populated via the same functions"
|
||||
echo " mentioned previously, and henceforth blocks are checked"
|
||||
echo " out and in. The library quickly reaches a state in which"
|
||||
echo " it no longer needs to call malloc() or free(), even"
|
||||
echo " across many separate level-3 operation invocations."
|
||||
echo " "
|
||||
echo " "
|
||||
echo " --enable-mem-tracing, --disable-mem-tracing"
|
||||
echo " "
|
||||
echo " Enable (disable by default) output to stdout that traces"
|
||||
echo " the allocation and freeing of memory, including the names"
|
||||
echo " of the functions that triggered the allocation/freeing."
|
||||
echo " Enabling this option WILL NEGATIVELY IMPACT PERFORMANCE."
|
||||
echo " Please use only for informational/debugging purposes."
|
||||
echo " "
|
||||
echo " -i SIZE, --int-size=SIZE"
|
||||
echo " "
|
||||
@@ -1720,7 +1737,9 @@ main()
|
||||
enable_arg_max_hack='no'
|
||||
enable_static='yes'
|
||||
enable_shared='yes'
|
||||
enable_packbuf_pools='yes'
|
||||
enable_pba_pools='yes'
|
||||
enable_sba_pools='yes'
|
||||
enable_mem_tracing='no'
|
||||
int_type_size=0
|
||||
blas_int_type_size=32
|
||||
enable_blas='yes'
|
||||
@@ -1837,11 +1856,23 @@ main()
|
||||
disable-threading)
|
||||
threading_model='no'
|
||||
;;
|
||||
enable-packbuf-pools)
|
||||
enable_packbuf_pools='yes'
|
||||
enable-pba-pools)
|
||||
enable_pba_pools='yes'
|
||||
;;
|
||||
disable-packbuf-pools)
|
||||
enable_packbuf_pools='no'
|
||||
disable-pba-pools)
|
||||
enable_pba_pools='no'
|
||||
;;
|
||||
enable-sba-pools)
|
||||
enable_sba_pools='yes'
|
||||
;;
|
||||
disable-sba-pools)
|
||||
enable_sba_pools='no'
|
||||
;;
|
||||
enable-mem-tracing)
|
||||
enable_mem_tracing='yes'
|
||||
;;
|
||||
disable-mem-tracing)
|
||||
enable_mem_tracing='no'
|
||||
;;
|
||||
enable-sandbox=*)
|
||||
sandbox_flag=1
|
||||
@@ -2549,12 +2580,26 @@ main()
|
||||
fi
|
||||
|
||||
# Convert 'yes' and 'no' flags to booleans.
|
||||
if [ "x${enable_packbuf_pools}" = "xyes" ]; then
|
||||
echo "${script_name}: internal memory pools for packing buffers are enabled."
|
||||
enable_packbuf_pools_01=1
|
||||
if [ "x${enable_pba_pools}" = "xyes" ]; then
|
||||
echo "${script_name}: internal memory pools for packing blocks are enabled."
|
||||
enable_pba_pools_01=1
|
||||
else
|
||||
echo "${script_name}: internal memory pools for packing buffers are disabled."
|
||||
enable_packbuf_pools_01=0
|
||||
echo "${script_name}: internal memory pools for packing blocks are disabled."
|
||||
enable_pba_pools_01=0
|
||||
fi
|
||||
if [ "x${enable_sba_pools}" = "xyes" ]; then
|
||||
echo "${script_name}: internal memory pools for small blocks are enabled."
|
||||
enable_sba_pools_01=1
|
||||
else
|
||||
echo "${script_name}: internal memory pools for small blocks are disabled."
|
||||
enable_sba_pools_01=0
|
||||
fi
|
||||
if [ "x${enable_mem_tracing}" = "xyes" ]; then
|
||||
echo "${script_name}: memory tracing output is enabled."
|
||||
enable_mem_tracing_01=1
|
||||
else
|
||||
echo "${script_name}: memory tracing output is disabled."
|
||||
enable_mem_tracing_01=0
|
||||
fi
|
||||
if [ "x${has_memkind}" = "xyes" ]; then
|
||||
if [ "x${enable_memkind}" = "x" ]; then
|
||||
@@ -2809,7 +2854,9 @@ main()
|
||||
| sed -e "s/@enable_pthreads@/${enable_pthreads_01}/g" \
|
||||
| sed -e "s/@enable_jrir_slab@/${enable_jrir_slab_01}/g" \
|
||||
| sed -e "s/@enable_jrir_rr@/${enable_jrir_rr_01}/g" \
|
||||
| sed -e "s/@enable_packbuf_pools@/${enable_packbuf_pools_01}/g" \
|
||||
| sed -e "s/@enable_pba_pools@/${enable_pba_pools_01}/g" \
|
||||
| sed -e "s/@enable_sba_pools@/${enable_sba_pools_01}/g" \
|
||||
| sed -e "s/@enable_mem_tracing@/${enable_mem_tracing_01}/g" \
|
||||
| sed -e "s/@int_type_size@/${int_type_size}/g" \
|
||||
| sed -e "s/@blas_int_type_size@/${blas_int_type_size}/g" \
|
||||
| sed -e "s/@enable_blas@/${enable_blas_01}/g" \
|
||||
|
||||
@@ -66,6 +66,3 @@
|
||||
#include "bli_packm.h"
|
||||
#include "bli_unpackm.h"
|
||||
|
||||
// Other
|
||||
#include "bli_scalm.h"
|
||||
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
|
||||
cntl_t* bli_packm_cntl_create_node
|
||||
(
|
||||
rntm_t* rntm,
|
||||
void* var_func,
|
||||
void* packm_var_func,
|
||||
bszid_t bmid_m,
|
||||
@@ -52,12 +53,12 @@ cntl_t* bli_packm_cntl_create_node
|
||||
cntl_t* cntl;
|
||||
packm_params_t* params;
|
||||
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_packm_cntl_create_node(): " );
|
||||
#endif
|
||||
|
||||
// Allocate a packm_params_t struct.
|
||||
params = bli_malloc_intl( sizeof( packm_params_t ) );
|
||||
params = bli_sba_acquire( rntm, sizeof( packm_params_t ) );
|
||||
|
||||
// Initialize the packm_params_t struct.
|
||||
params->size = sizeof( packm_params_t );
|
||||
@@ -70,7 +71,7 @@ cntl_t* bli_packm_cntl_create_node
|
||||
params->pack_schema = pack_schema;
|
||||
params->pack_buf_type = pack_buf_type;
|
||||
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_packm_cntl_create_node(): " );
|
||||
#endif
|
||||
|
||||
@@ -80,6 +81,7 @@ cntl_t* bli_packm_cntl_create_node
|
||||
// sync with the cntl_t tree.
|
||||
cntl = bli_cntl_create_node
|
||||
(
|
||||
rntm,
|
||||
BLIS_NOID,
|
||||
BLIS_NO_PART,
|
||||
var_func,
|
||||
|
||||
@@ -90,6 +90,7 @@ static packbuf_t bli_cntl_packm_params_pack_buf_type( cntl_t* cntl )
|
||||
|
||||
cntl_t* bli_packm_cntl_create_node
|
||||
(
|
||||
rntm_t* rntm,
|
||||
void* var_func,
|
||||
void* packm_var_func,
|
||||
bszid_t bmid_m,
|
||||
|
||||
@@ -34,32 +34,6 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
#if 0
|
||||
thrinfo_t* bli_packm_thrinfo_create
|
||||
(
|
||||
thrcomm_t* ocomm,
|
||||
dim_t ocomm_id,
|
||||
dim_t n_way,
|
||||
dim_t work_id,
|
||||
thrinfo_t* sub_node
|
||||
)
|
||||
{
|
||||
thrinfo_t* thread = bli_malloc_intl( sizeof( thrinfo_t ) );
|
||||
|
||||
bli_thrinfo_init
|
||||
(
|
||||
thread,
|
||||
ocomm, ocomm_id,
|
||||
n_way,
|
||||
work_id,
|
||||
FALSE,
|
||||
sub_node
|
||||
);
|
||||
|
||||
return thread;
|
||||
}
|
||||
#endif
|
||||
|
||||
void bli_packm_thrinfo_init
|
||||
(
|
||||
thrinfo_t* thread,
|
||||
@@ -95,14 +69,3 @@ void bli_packm_thrinfo_init_single
|
||||
);
|
||||
}
|
||||
|
||||
#if 0
|
||||
void bli_packm_thrinfo_free
|
||||
(
|
||||
thrinfo_t* thread
|
||||
)
|
||||
{
|
||||
if ( thread != NULL &&
|
||||
thread != &BLIS_PACKM_SINGLE_THREADED )
|
||||
bli_free_intl( thread );
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -36,6 +36,7 @@
|
||||
|
||||
cntl_t* bli_unpackm_cntl_create_node
|
||||
(
|
||||
rntm_t* rntm,
|
||||
void* var_func,
|
||||
void* unpackm_var_func,
|
||||
cntl_t* sub_node
|
||||
@@ -44,6 +45,10 @@ cntl_t* bli_unpackm_cntl_create_node
|
||||
cntl_t* cntl;
|
||||
unpackm_params_t* params;
|
||||
|
||||
// NOTE: If this function is ever called, figure out whether the
|
||||
// bli_malloc_intl() below needs to be changed to bli_sba_acquire().
|
||||
bli_abort();
|
||||
|
||||
// Allocate an unpackm_params_t struct.
|
||||
params = bli_malloc_intl( sizeof( unpackm_params_t ) );
|
||||
|
||||
@@ -57,6 +62,7 @@ cntl_t* bli_unpackm_cntl_create_node
|
||||
// sync with the cntl_t tree.
|
||||
cntl = bli_cntl_create_node
|
||||
(
|
||||
rntm,
|
||||
BLIS_NOID,
|
||||
BLIS_NO_PART,
|
||||
var_func,
|
||||
|
||||
@@ -47,6 +47,7 @@ typedef struct unpackm_params_s unpackm_params_t;
|
||||
|
||||
cntl_t* bli_unpackm_cntl_create_node
|
||||
(
|
||||
rntm_t* rntm,
|
||||
void* var_func,
|
||||
void* unpackm_var_func,
|
||||
cntl_t* sub_node
|
||||
|
||||
@@ -44,27 +44,11 @@ void bli_l3_cntl_create_if
|
||||
obj_t* a,
|
||||
obj_t* b,
|
||||
obj_t* c,
|
||||
rntm_t* rntm,
|
||||
cntl_t* cntl_orig,
|
||||
cntl_t** cntl_use
|
||||
)
|
||||
{
|
||||
// This is part of a hack to support mixed domain in bli_gemm_front().
|
||||
// Sometimes we need to specify a non-standard schema for A and B, and
|
||||
// we decided to transmit them via the schema field in the obj_t's
|
||||
// rather than pass them in as function parameters. Once the values
|
||||
// have been read, we immediately reset them back to their expected
|
||||
// values for unpacked objects. Notice that we do this even if the
|
||||
// caller passed in a custom control tree; that's because we still need
|
||||
// to reset the pack schema of a and b, which were modified by the
|
||||
// operation's _front() function. However, in order for this to work,
|
||||
// the level-3 thread entry function (or omp parallel region) must
|
||||
// alias thread-local copies of objects a and b.
|
||||
//pack_t schema_a = bli_obj_pack_schema( a );
|
||||
//pack_t schema_b = bli_obj_pack_schema( b );
|
||||
|
||||
//bli_obj_set_pack_schema( BLIS_NOT_PACKED, a );
|
||||
//bli_obj_set_pack_schema( BLIS_NOT_PACKED, b );
|
||||
|
||||
// If the control tree pointer is NULL, we construct a default
|
||||
// tree as a function of the operation family.
|
||||
if ( cntl_orig == NULL )
|
||||
@@ -73,7 +57,7 @@ void bli_l3_cntl_create_if
|
||||
family == BLIS_HERK ||
|
||||
family == BLIS_TRMM )
|
||||
{
|
||||
*cntl_use = bli_gemm_cntl_create( family, schema_a, schema_b );
|
||||
*cntl_use = bli_gemm_cntl_create( rntm, family, schema_a, schema_b );
|
||||
}
|
||||
else // if ( family == BLIS_TRSM )
|
||||
{
|
||||
@@ -82,7 +66,7 @@ void bli_l3_cntl_create_if
|
||||
if ( bli_obj_is_triangular( a ) ) side = BLIS_LEFT;
|
||||
else side = BLIS_RIGHT;
|
||||
|
||||
*cntl_use = bli_trsm_cntl_create( side, schema_a, schema_b );
|
||||
*cntl_use = bli_trsm_cntl_create( rntm, side, schema_a, schema_b );
|
||||
}
|
||||
}
|
||||
else
|
||||
@@ -90,7 +74,7 @@ void bli_l3_cntl_create_if
|
||||
// If the user provided a control tree, create a copy and use it
|
||||
// instead (so that threads can use its local tree as a place to
|
||||
// cache things like pack mem_t entries).
|
||||
*cntl_use = bli_cntl_copy( cntl_orig );
|
||||
*cntl_use = bli_cntl_copy( rntm, cntl_orig );
|
||||
|
||||
// Recursively set the family fields of the newly copied control tree
|
||||
// nodes.
|
||||
@@ -100,6 +84,7 @@ void bli_l3_cntl_create_if
|
||||
|
||||
void bli_l3_cntl_free
|
||||
(
|
||||
rntm_t* rntm,
|
||||
cntl_t* cntl_use,
|
||||
thrinfo_t* thread
|
||||
)
|
||||
@@ -115,11 +100,11 @@ void bli_l3_cntl_free
|
||||
family == BLIS_HERK ||
|
||||
family == BLIS_TRMM )
|
||||
{
|
||||
bli_gemm_cntl_free( cntl_use, thread );
|
||||
bli_gemm_cntl_free( rntm, cntl_use, thread );
|
||||
}
|
||||
else // if ( family == BLIS_TRSM )
|
||||
{
|
||||
bli_trsm_cntl_free( cntl_use, thread );
|
||||
bli_trsm_cntl_free( rntm, cntl_use, thread );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -46,12 +46,14 @@ void bli_l3_cntl_create_if
|
||||
obj_t* a,
|
||||
obj_t* b,
|
||||
obj_t* c,
|
||||
rntm_t* rntm,
|
||||
cntl_t* cntl_orig,
|
||||
cntl_t** cntl_use
|
||||
);
|
||||
|
||||
void bli_l3_cntl_free
|
||||
(
|
||||
rntm_t* rntm,
|
||||
cntl_t* cntl_use,
|
||||
thrinfo_t* thread
|
||||
);
|
||||
|
||||
@@ -45,7 +45,6 @@ void bli_l3_packm
|
||||
thrinfo_t* thread
|
||||
)
|
||||
{
|
||||
membrk_t* membrk;
|
||||
packbuf_t pack_buf_type;
|
||||
mem_t* cntl_mem_p;
|
||||
siz_t size_needed;
|
||||
@@ -70,9 +69,6 @@ void bli_l3_packm
|
||||
// return early.
|
||||
if ( size_needed == 0 ) return;
|
||||
|
||||
// Query the memory broker from the context.
|
||||
membrk = bli_cntx_get_membrk( cntx );
|
||||
|
||||
// Query the pack buffer type from the control tree node.
|
||||
pack_buf_type = bli_cntl_packm_params_pack_buf_type( cntl );
|
||||
|
||||
@@ -89,7 +85,7 @@ void bli_l3_packm
|
||||
|
||||
if ( bli_thread_am_ochief( thread ) )
|
||||
{
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_l3_packm(): acquiring mem pool block\n" );
|
||||
#endif
|
||||
|
||||
@@ -97,7 +93,7 @@ void bli_l3_packm
|
||||
// and saves the associated mem_t entry to local_mem_s.
|
||||
bli_membrk_acquire_m
|
||||
(
|
||||
membrk,
|
||||
rntm,
|
||||
size_needed,
|
||||
pack_buf_type,
|
||||
&local_mem_s
|
||||
@@ -134,10 +130,14 @@ void bli_l3_packm
|
||||
// The chief thread releases the existing block associated with
|
||||
// the mem_t entry in the control tree, and then re-acquires a
|
||||
// new block, saving the associated mem_t entry to local_mem_s.
|
||||
bli_membrk_release( cntl_mem_p );
|
||||
bli_membrk_release
|
||||
(
|
||||
rntm,
|
||||
cntl_mem_p
|
||||
);
|
||||
bli_membrk_acquire_m
|
||||
(
|
||||
membrk,
|
||||
rntm,
|
||||
size_needed,
|
||||
pack_buf_type,
|
||||
&local_mem_s
|
||||
|
||||
@@ -88,37 +88,11 @@ void bli_l3_thrinfo_init_single
|
||||
|
||||
void bli_l3_thrinfo_free
|
||||
(
|
||||
rntm_t* rntm,
|
||||
thrinfo_t* thread
|
||||
)
|
||||
{
|
||||
if ( thread == NULL ||
|
||||
thread == &BLIS_PACKM_SINGLE_THREADED ||
|
||||
thread == &BLIS_GEMM_SINGLE_THREADED
|
||||
) return;
|
||||
|
||||
thrinfo_t* thrinfo_sub_node = bli_thrinfo_sub_node( thread );
|
||||
|
||||
// Free the communicators, but only if the current thrinfo_t struct
|
||||
// is marked as needing them to be freed. The most common example of
|
||||
// thrinfo_t nodes NOT marked as needing their comms freed are those
|
||||
// associated with packm thrinfo_t nodes.
|
||||
if ( bli_thrinfo_needs_free_comm( thread ) )
|
||||
{
|
||||
// The ochief always frees his communicator, and the ichief free its
|
||||
// communicator if we are at the leaf node.
|
||||
if ( bli_thread_am_ochief( thread ) )
|
||||
bli_thrcomm_free( bli_thrinfo_ocomm( thread ) );
|
||||
}
|
||||
|
||||
// Free all children of the current thrinfo_t.
|
||||
bli_l3_thrinfo_free( thrinfo_sub_node );
|
||||
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
printf( "bli_l3_thrinfo_free(): " );
|
||||
#endif
|
||||
|
||||
// Free the thrinfo_t struct.
|
||||
bli_free_intl( thread );
|
||||
bli_thrinfo_free( rntm, thread );
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
@@ -149,6 +123,7 @@ void bli_l3_thrinfo_create_root
|
||||
// Create the root thrinfo_t node.
|
||||
*thread = bli_thrinfo_create
|
||||
(
|
||||
rntm,
|
||||
gl_comm,
|
||||
gl_comm_id,
|
||||
xx_way,
|
||||
@@ -348,6 +323,7 @@ void bli_l3_thrinfo_print_paths
|
||||
|
||||
void bli_l3_thrinfo_free_paths
|
||||
(
|
||||
rntm_t* rntm,
|
||||
thrinfo_t** threads
|
||||
)
|
||||
{
|
||||
@@ -355,7 +331,7 @@ void bli_l3_thrinfo_free_paths
|
||||
dim_t i;
|
||||
|
||||
for ( i = 0; i < n_threads; ++i )
|
||||
bli_l3_thrinfo_free( threads[i] );
|
||||
bli_l3_thrinfo_free( rntm, threads[i] );
|
||||
|
||||
bli_free_intl( threads );
|
||||
}
|
||||
|
||||
@@ -89,6 +89,7 @@ void bli_l3_thrinfo_init_single
|
||||
|
||||
void bli_l3_thrinfo_free
|
||||
(
|
||||
rntm_t* rntm,
|
||||
thrinfo_t* thread
|
||||
);
|
||||
|
||||
@@ -112,6 +113,7 @@ void bli_l3_thrinfo_print_paths
|
||||
|
||||
void bli_l3_thrinfo_free_paths
|
||||
(
|
||||
rntm_t* rntm,
|
||||
thrinfo_t** threads
|
||||
);
|
||||
|
||||
|
||||
@@ -37,21 +37,23 @@
|
||||
|
||||
cntl_t* bli_gemm_cntl_create
|
||||
(
|
||||
opid_t family,
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
rntm_t* rntm,
|
||||
opid_t family,
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
)
|
||||
{
|
||||
return bli_gemmbp_cntl_create( family, schema_a, schema_b );
|
||||
return bli_gemmbp_cntl_create( rntm, family, schema_a, schema_b );
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
cntl_t* bli_gemmbp_cntl_create
|
||||
(
|
||||
opid_t family,
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
rntm_t* rntm,
|
||||
opid_t family,
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
)
|
||||
{
|
||||
void* macro_kernel_fp;
|
||||
@@ -71,6 +73,7 @@ cntl_t* bli_gemmbp_cntl_create
|
||||
// Create two nodes for the macro-kernel.
|
||||
cntl_t* gemm_cntl_bu_ke = bli_gemm_cntl_create_node
|
||||
(
|
||||
rntm, // the thread's runtime structure
|
||||
family, // the operation family
|
||||
BLIS_MR, // needed for bli_thrinfo_rgrow()
|
||||
NULL, // variant function pointer not used
|
||||
@@ -79,6 +82,7 @@ cntl_t* bli_gemmbp_cntl_create
|
||||
|
||||
cntl_t* gemm_cntl_bp_bu = bli_gemm_cntl_create_node
|
||||
(
|
||||
rntm, // the thread's runtime structure
|
||||
family,
|
||||
BLIS_NR, // not used by macro-kernel, but needed for bli_thrinfo_rgrow()
|
||||
macro_kernel_fp,
|
||||
@@ -88,6 +92,7 @@ cntl_t* bli_gemmbp_cntl_create
|
||||
// Create a node for packing matrix A.
|
||||
cntl_t* gemm_cntl_packa = bli_packm_cntl_create_node
|
||||
(
|
||||
rntm,
|
||||
bli_gemm_packa, // pack the left-hand operand
|
||||
packa_fp,
|
||||
BLIS_MR,
|
||||
@@ -103,6 +108,7 @@ cntl_t* bli_gemmbp_cntl_create
|
||||
// Create a node for partitioning the m dimension by MC.
|
||||
cntl_t* gemm_cntl_op_bp = bli_gemm_cntl_create_node
|
||||
(
|
||||
rntm,
|
||||
family,
|
||||
BLIS_MC,
|
||||
bli_gemm_blk_var1,
|
||||
@@ -112,6 +118,7 @@ cntl_t* bli_gemmbp_cntl_create
|
||||
// Create a node for packing matrix B.
|
||||
cntl_t* gemm_cntl_packb = bli_packm_cntl_create_node
|
||||
(
|
||||
rntm,
|
||||
bli_gemm_packb, // pack the right-hand operand
|
||||
packb_fp,
|
||||
BLIS_KR,
|
||||
@@ -127,6 +134,7 @@ cntl_t* bli_gemmbp_cntl_create
|
||||
// Create a node for partitioning the k dimension by KC.
|
||||
cntl_t* gemm_cntl_mm_op = bli_gemm_cntl_create_node
|
||||
(
|
||||
rntm,
|
||||
family,
|
||||
BLIS_KC,
|
||||
bli_gemm_blk_var3,
|
||||
@@ -136,6 +144,7 @@ cntl_t* bli_gemmbp_cntl_create
|
||||
// Create a node for partitioning the n dimension by NC.
|
||||
cntl_t* gemm_cntl_vl_mm = bli_gemm_cntl_create_node
|
||||
(
|
||||
rntm,
|
||||
family,
|
||||
BLIS_NC,
|
||||
bli_gemm_blk_var2,
|
||||
@@ -246,23 +255,25 @@ cntl_t* bli_gemmpb_cntl_create
|
||||
|
||||
void bli_gemm_cntl_free
|
||||
(
|
||||
cntl_t* cntl,
|
||||
rntm_t* rntm,
|
||||
cntl_t* cntl,
|
||||
thrinfo_t* thread
|
||||
)
|
||||
{
|
||||
bli_cntl_free( cntl, thread );
|
||||
bli_cntl_free( rntm, cntl, thread );
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
cntl_t* bli_gemm_cntl_create_node
|
||||
(
|
||||
rntm_t* rntm,
|
||||
opid_t family,
|
||||
bszid_t bszid,
|
||||
void* var_func,
|
||||
cntl_t* sub_node
|
||||
)
|
||||
{
|
||||
return bli_cntl_create_node( family, bszid, var_func, NULL, sub_node );
|
||||
return bli_cntl_create_node( rntm, family, bszid, var_func, NULL, sub_node );
|
||||
}
|
||||
|
||||
|
||||
@@ -34,18 +34,20 @@
|
||||
|
||||
cntl_t* bli_gemm_cntl_create
|
||||
(
|
||||
opid_t family,
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
rntm_t* rntm,
|
||||
opid_t family,
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
cntl_t* bli_gemmbp_cntl_create
|
||||
(
|
||||
opid_t family,
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
rntm_t* rntm,
|
||||
opid_t family,
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
);
|
||||
|
||||
#if 0
|
||||
@@ -59,7 +61,8 @@ cntl_t* bli_gemmpb_cntl_create
|
||||
|
||||
void bli_gemm_cntl_free
|
||||
(
|
||||
cntl_t* cntl,
|
||||
rntm_t* rntm,
|
||||
cntl_t* cntl,
|
||||
thrinfo_t* thread
|
||||
);
|
||||
|
||||
@@ -67,6 +70,7 @@ void bli_gemm_cntl_free
|
||||
|
||||
cntl_t* bli_gemm_cntl_create_node
|
||||
(
|
||||
rntm_t* rntm,
|
||||
opid_t family,
|
||||
bszid_t bszid,
|
||||
void* var_func,
|
||||
|
||||
@@ -37,21 +37,23 @@
|
||||
|
||||
cntl_t* bli_trsm_cntl_create
|
||||
(
|
||||
side_t side,
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
rntm_t* rntm,
|
||||
side_t side,
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
)
|
||||
{
|
||||
if ( bli_is_left( side ) )
|
||||
return bli_trsm_l_cntl_create( schema_a, schema_b );
|
||||
return bli_trsm_l_cntl_create( rntm, schema_a, schema_b );
|
||||
else
|
||||
return bli_trsm_r_cntl_create( schema_a, schema_b );
|
||||
return bli_trsm_r_cntl_create( rntm, schema_a, schema_b );
|
||||
}
|
||||
|
||||
cntl_t* bli_trsm_l_cntl_create
|
||||
(
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
rntm_t* rntm,
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
)
|
||||
{
|
||||
void* macro_kernel_p;
|
||||
@@ -70,6 +72,7 @@ cntl_t* bli_trsm_l_cntl_create
|
||||
// Create two nodes for the macro-kernel.
|
||||
cntl_t* trsm_cntl_bu_ke = bli_trsm_cntl_create_node
|
||||
(
|
||||
rntm, // the thread's runtime structure
|
||||
family, // the operation family
|
||||
BLIS_MR, // needed for bli_thrinfo_rgrow()
|
||||
NULL, // variant function pointer not used
|
||||
@@ -78,6 +81,7 @@ cntl_t* bli_trsm_l_cntl_create
|
||||
|
||||
cntl_t* trsm_cntl_bp_bu = bli_trsm_cntl_create_node
|
||||
(
|
||||
rntm,
|
||||
family,
|
||||
BLIS_NR, // not used by macro-kernel, but needed for bli_thrinfo_rgrow()
|
||||
macro_kernel_p,
|
||||
@@ -87,6 +91,7 @@ cntl_t* bli_trsm_l_cntl_create
|
||||
// Create a node for packing matrix A.
|
||||
cntl_t* trsm_cntl_packa = bli_packm_cntl_create_node
|
||||
(
|
||||
rntm,
|
||||
bli_trsm_packa,
|
||||
packa_fp,
|
||||
BLIS_MR,
|
||||
@@ -102,6 +107,7 @@ cntl_t* bli_trsm_l_cntl_create
|
||||
// Create a node for partitioning the m dimension by MC.
|
||||
cntl_t* trsm_cntl_op_bp = bli_trsm_cntl_create_node
|
||||
(
|
||||
rntm,
|
||||
family,
|
||||
BLIS_MC,
|
||||
bli_trsm_blk_var1,
|
||||
@@ -111,6 +117,7 @@ cntl_t* bli_trsm_l_cntl_create
|
||||
// Create a node for packing matrix B.
|
||||
cntl_t* trsm_cntl_packb = bli_packm_cntl_create_node
|
||||
(
|
||||
rntm,
|
||||
bli_trsm_packb,
|
||||
packb_fp,
|
||||
BLIS_MR,
|
||||
@@ -126,6 +133,7 @@ cntl_t* bli_trsm_l_cntl_create
|
||||
// Create a node for partitioning the k dimension by KC.
|
||||
cntl_t* trsm_cntl_mm_op = bli_trsm_cntl_create_node
|
||||
(
|
||||
rntm,
|
||||
family,
|
||||
BLIS_KC,
|
||||
bli_trsm_blk_var3,
|
||||
@@ -135,6 +143,7 @@ cntl_t* bli_trsm_l_cntl_create
|
||||
// Create a node for partitioning the n dimension by NC.
|
||||
cntl_t* trsm_cntl_vl_mm = bli_trsm_cntl_create_node
|
||||
(
|
||||
rntm,
|
||||
family,
|
||||
BLIS_NC,
|
||||
bli_trsm_blk_var2,
|
||||
@@ -146,8 +155,9 @@ cntl_t* bli_trsm_l_cntl_create
|
||||
|
||||
cntl_t* bli_trsm_r_cntl_create
|
||||
(
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
rntm_t* rntm,
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
)
|
||||
{
|
||||
// NOTE: trsm macrokernels are presently disabled for right-side execution.
|
||||
@@ -161,6 +171,7 @@ cntl_t* bli_trsm_r_cntl_create
|
||||
// Create two nodes for the macro-kernel.
|
||||
cntl_t* trsm_cntl_bu_ke = bli_trsm_cntl_create_node
|
||||
(
|
||||
rntm,
|
||||
family,
|
||||
BLIS_MR, // needed for bli_thrinfo_rgrow()
|
||||
NULL, // variant function pointer not used
|
||||
@@ -169,6 +180,7 @@ cntl_t* bli_trsm_r_cntl_create
|
||||
|
||||
cntl_t* trsm_cntl_bp_bu = bli_trsm_cntl_create_node
|
||||
(
|
||||
rntm,
|
||||
family,
|
||||
BLIS_NR, // not used by macro-kernel, but needed for bli_thrinfo_rgrow()
|
||||
macro_kernel_p,
|
||||
@@ -178,6 +190,7 @@ cntl_t* bli_trsm_r_cntl_create
|
||||
// Create a node for packing matrix A.
|
||||
cntl_t* trsm_cntl_packa = bli_packm_cntl_create_node
|
||||
(
|
||||
rntm,
|
||||
bli_trsm_packa,
|
||||
packa_fp,
|
||||
BLIS_NR,
|
||||
@@ -193,6 +206,7 @@ cntl_t* bli_trsm_r_cntl_create
|
||||
// Create a node for partitioning the m dimension by MC.
|
||||
cntl_t* trsm_cntl_op_bp = bli_trsm_cntl_create_node
|
||||
(
|
||||
rntm,
|
||||
family,
|
||||
BLIS_MC,
|
||||
bli_trsm_blk_var1,
|
||||
@@ -202,6 +216,7 @@ cntl_t* bli_trsm_r_cntl_create
|
||||
// Create a node for packing matrix B.
|
||||
cntl_t* trsm_cntl_packb = bli_packm_cntl_create_node
|
||||
(
|
||||
rntm,
|
||||
bli_trsm_packb,
|
||||
packb_fp,
|
||||
BLIS_MR,
|
||||
@@ -217,6 +232,7 @@ cntl_t* bli_trsm_r_cntl_create
|
||||
// Create a node for partitioning the k dimension by KC.
|
||||
cntl_t* trsm_cntl_mm_op = bli_trsm_cntl_create_node
|
||||
(
|
||||
rntm,
|
||||
family,
|
||||
BLIS_KC,
|
||||
bli_trsm_blk_var3,
|
||||
@@ -226,6 +242,7 @@ cntl_t* bli_trsm_r_cntl_create
|
||||
// Create a node for partitioning the n dimension by NC.
|
||||
cntl_t* trsm_cntl_vl_mm = bli_trsm_cntl_create_node
|
||||
(
|
||||
rntm,
|
||||
family,
|
||||
BLIS_NC,
|
||||
bli_trsm_blk_var2,
|
||||
@@ -237,23 +254,25 @@ cntl_t* bli_trsm_r_cntl_create
|
||||
|
||||
void bli_trsm_cntl_free
|
||||
(
|
||||
cntl_t* cntl,
|
||||
rntm_t* rntm,
|
||||
cntl_t* cntl,
|
||||
thrinfo_t* thread
|
||||
)
|
||||
{
|
||||
bli_cntl_free( cntl, thread );
|
||||
bli_cntl_free( rntm, cntl, thread );
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
cntl_t* bli_trsm_cntl_create_node
|
||||
(
|
||||
rntm_t* rntm,
|
||||
opid_t family,
|
||||
bszid_t bszid,
|
||||
void* var_func,
|
||||
cntl_t* sub_node
|
||||
)
|
||||
{
|
||||
return bli_cntl_create_node( family, bszid, var_func, NULL, sub_node );
|
||||
return bli_cntl_create_node( rntm, family, bszid, var_func, NULL, sub_node );
|
||||
}
|
||||
|
||||
|
||||
@@ -34,26 +34,30 @@
|
||||
|
||||
cntl_t* bli_trsm_cntl_create
|
||||
(
|
||||
side_t side,
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
rntm_t* rntm,
|
||||
side_t side,
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
);
|
||||
|
||||
cntl_t* bli_trsm_l_cntl_create
|
||||
(
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
rntm_t* rntm,
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
);
|
||||
|
||||
cntl_t* bli_trsm_r_cntl_create
|
||||
(
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
rntm_t* rntm,
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
);
|
||||
|
||||
void bli_trsm_cntl_free
|
||||
(
|
||||
cntl_t* cntl,
|
||||
rntm_t* rntm,
|
||||
cntl_t* cntl,
|
||||
thrinfo_t* thread
|
||||
);
|
||||
|
||||
@@ -61,6 +65,7 @@ void bli_trsm_cntl_free
|
||||
|
||||
cntl_t* bli_trsm_cntl_create_node
|
||||
(
|
||||
rntm_t* rntm,
|
||||
opid_t family,
|
||||
bszid_t bszid,
|
||||
void* var_func,
|
||||
|
||||
563
frame/base/bli_apool.c
Normal file
563
frame/base/bli_apool.c
Normal file
@@ -0,0 +1,563 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name(s) of the copyright holder(s) nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
void bli_apool_init
|
||||
(
|
||||
malloc_ft malloc_fp,
|
||||
free_ft free_fp,
|
||||
apool_t* restrict apool
|
||||
)
|
||||
{
|
||||
// Query the mutex from the apool_t.
|
||||
bli_pthread_mutex_t* restrict mutex = bli_apool_mutex( apool );
|
||||
|
||||
// Initialize the mutex.
|
||||
//*mutex = BLIS_PTHREAD_MUTEX_INITIALIZER;
|
||||
bli_pthread_mutex_init( mutex, NULL );
|
||||
|
||||
// We choose to start with:
|
||||
// - an empty pool
|
||||
// - an initial block_ptrs_len of 8
|
||||
// - a single element in each initial array_t (though this is moot with
|
||||
// num_blocks = 0).
|
||||
const siz_t num_blocks = 0;
|
||||
siz_t block_ptrs_len = 8;
|
||||
const siz_t num_elem = 1;
|
||||
|
||||
// NOTE: Unlike in the bli_pool API, apool_t allocates block_ptrs as an
|
||||
// array of array_t* instead of an array of pblk_t. Why? We don't need to
|
||||
// track the size of each block, thus we don't need the block_size field
|
||||
// of pblk_t. That leaves only the void* field, and since we know apool_t
|
||||
// will always contain "blocks" that are really array_t structs, we can
|
||||
// make block_ptrs an array of array_t*.
|
||||
|
||||
// We formally set the block_size and align_size fields of the underlying
|
||||
// pool, even though they won't be queried. (They are used from hard-coded
|
||||
// values in bli_apool_alloc_block().)
|
||||
const siz_t block_size = sizeof( array_t );
|
||||
const siz_t align_size = 64;
|
||||
|
||||
// Query the underlying pool_t from the apool_t.
|
||||
pool_t* restrict pool = bli_apool_pool( apool );
|
||||
|
||||
// Set the default array_t length of the apool_t.
|
||||
bli_apool_set_def_array_len( num_elem, apool );
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
// Make sure that block_ptrs_len is at least num_blocks.
|
||||
block_ptrs_len = bli_max( block_ptrs_len, num_blocks );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_apool_init(): allocating block_ptrs (length %d): ",
|
||||
( int )block_ptrs_len );
|
||||
#endif
|
||||
|
||||
// Allocate the block_ptrs array.
|
||||
array_t** restrict block_ptrs
|
||||
=
|
||||
bli_malloc_intl( block_ptrs_len * sizeof( array_t* ) );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_apool_init(): allocating %d array_t.\n", ( int )num_blocks );
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
// Allocate and initialize each entry in the block_ptrs array.
|
||||
for ( dim_t i = 0; i < num_blocks; ++i )
|
||||
{
|
||||
// Pass in num_elem so the function knows how many elements to
|
||||
// initially have in each array_t.
|
||||
bli_apool_alloc_block
|
||||
(
|
||||
num_elem,
|
||||
malloc_fp,
|
||||
&(block_ptrs[i])
|
||||
);
|
||||
}
|
||||
|
||||
// NOTE: The semantics of top_index approximate a stack, where a "full"
|
||||
// stack (no blocks checked out) is one where top_index == 0 and an empty
|
||||
// stack (all blocks checked out) one where top_index == num_blocks.
|
||||
// (Here, num_blocks tracks the number of blocks currently allocated as
|
||||
// part of the pool.) This "orientation" of the stack was chosen
|
||||
// intentionally, in contrast to one where top_index == -1 means the
|
||||
// stack is empty and top_index = num_blocks - 1 means the stack is
|
||||
// full. The chosen scheme allows one to conceptualize the stack as a
|
||||
// number line in which blocks are checked out from lowest to highest,
|
||||
// and additional blocks are added at the higher end.
|
||||
|
||||
// Initialize the pool_t structure.
|
||||
bli_pool_set_block_ptrs( block_ptrs, pool );
|
||||
bli_pool_set_block_ptrs_len( block_ptrs_len, pool );
|
||||
bli_pool_set_top_index( 0, pool );
|
||||
bli_pool_set_num_blocks( num_blocks, pool );
|
||||
bli_pool_set_block_size( block_size, pool );
|
||||
bli_pool_set_align_size( align_size, pool );
|
||||
bli_pool_set_malloc_fp( malloc_fp, pool );
|
||||
bli_pool_set_free_fp( free_fp, pool );
|
||||
}
|
||||
|
||||
void bli_apool_alloc_block
|
||||
(
|
||||
siz_t num_elem,
|
||||
malloc_ft malloc_fp,
|
||||
array_t** restrict array_p
|
||||
)
|
||||
{
|
||||
// Since the apool_t is defined as a pool of array_t, we can hard-code
|
||||
// the block_size and align_size parameters. For the align_size, we
|
||||
// use the size of a cache line.
|
||||
const siz_t block_size = sizeof( array_t );
|
||||
//const siz_t align_size = 64;
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_apool_alloc_block(): allocating array_t: " );
|
||||
#endif
|
||||
|
||||
// Allocate the array_t via the bli_fmalloc_align() wrapper, which performs
|
||||
// alignment logic and opaquely saves the original pointer so that it can
|
||||
// be recovered when it's time to free the block.
|
||||
array_t* restrict array
|
||||
=
|
||||
//bli_fmalloc_align( malloc_fp, block_size, align_size );
|
||||
bli_malloc_intl( block_size );
|
||||
|
||||
// Initialize an array_t struct within the newly allocated memory region.
|
||||
bli_array_init( num_elem, sizeof( pool_t* ), array );
|
||||
|
||||
// Save the pointer in the caller's array_t*.
|
||||
*array_p = array;
|
||||
}
|
||||
|
||||
void bli_apool_free_block
|
||||
(
|
||||
free_ft free_fp,
|
||||
array_t* restrict array
|
||||
)
|
||||
{
|
||||
const siz_t num_elem = bli_array_num_elem( array );
|
||||
pool_t** restrict buf = bli_array_buf( array );
|
||||
|
||||
// Step through the array and finalize each pool_t.
|
||||
for ( dim_t i = 0; i < num_elem; ++i )
|
||||
{
|
||||
pool_t* restrict pool = buf[ i ];
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_apool_free_block(): freeing pool_t %d within array_t.\n",
|
||||
( int )i );
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
// Finalize and free the current pool_t, if it was created/allocated.
|
||||
if ( pool != NULL )
|
||||
{
|
||||
// Finalize the pool.
|
||||
bli_pool_finalize( pool );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_apool_free_block(): pool_t %d: ", ( int )i );
|
||||
#endif
|
||||
|
||||
// Free the pool_t struct.
|
||||
bli_free_intl( pool );
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_apool_free_block(): " );
|
||||
#endif
|
||||
|
||||
// Free the array buffer.
|
||||
bli_array_finalize( array );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_apool_free_block(): freeing array_t: " );
|
||||
#endif
|
||||
|
||||
// Free the array.
|
||||
//bli_ffree_align( free_fp, array );
|
||||
bli_free_intl( array );
|
||||
}
|
||||
|
||||
void bli_apool_finalize
|
||||
(
|
||||
apool_t* restrict apool
|
||||
)
|
||||
{
|
||||
// Query the mutex from the apool_t.
|
||||
bli_pthread_mutex_t* restrict mutex = bli_apool_mutex( apool );
|
||||
|
||||
// Destroy the mutex.
|
||||
bli_pthread_mutex_destroy( mutex );
|
||||
|
||||
// Query the underlying pool_t and mutex from the apool_t.
|
||||
pool_t* restrict pool = bli_apool_pool( apool );
|
||||
|
||||
// ----------------------------------------------------------------
|
||||
|
||||
// Query the block_ptrs array.
|
||||
array_t** restrict block_ptrs = bli_pool_block_ptrs( pool );
|
||||
|
||||
// Query the total number of blocks currently allocated.
|
||||
siz_t num_blocks = bli_pool_num_blocks( pool );
|
||||
|
||||
// Query the top_index of the pool.
|
||||
siz_t top_index = bli_pool_top_index( pool );
|
||||
|
||||
// Sanity check: The top_index should be zero.
|
||||
if ( top_index != 0 ) bli_abort();
|
||||
|
||||
// Query the free() function pointer for the pool.
|
||||
free_ft free_fp = bli_pool_free_fp( pool );
|
||||
|
||||
// Free the individual blocks (each an array_t) currently in the pool.
|
||||
for ( dim_t i = 0; i < num_blocks; ++i )
|
||||
{
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_apool_finalize(): freeing array_t %d within apool_t.\n",
|
||||
( int )i );
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
bli_apool_free_block( free_fp, block_ptrs[i] );
|
||||
}
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_apool_finalize(): freeing block_ptrs (length %d): ",
|
||||
( int )( bli_pool_block_ptrs_len( pool ) ) );
|
||||
#endif
|
||||
|
||||
// Free the block_ptrs array.
|
||||
bli_free_intl( block_ptrs );
|
||||
}
|
||||
|
||||
array_t* bli_apool_checkout_array
|
||||
(
|
||||
siz_t n_threads,
|
||||
apool_t* restrict apool
|
||||
)
|
||||
{
|
||||
// Acquire the apool_t's mutex.
|
||||
bli_apool_lock( apool );
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// NOTE: Unlike with the bli_pool API, we do not need to handle potential
|
||||
// reinitialization since the apool_t's block_size (corresponding to the
|
||||
// size of an array_t struct) will never grow.
|
||||
|
||||
// If the apool_t is exhausted, add a block (e.g. an array_t).
|
||||
if ( bli_apool_is_exhausted( apool ) )
|
||||
{
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_apool_checkout_block(): apool_t is exhausted; "
|
||||
"growing by 1 array_t.\n" );
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
bli_apool_grow( 1, apool );
|
||||
}
|
||||
|
||||
// At this point, at least one array_t is guaranteed to be available.
|
||||
|
||||
// Query the underlying pool_t from the apool_t.
|
||||
pool_t* restrict pool = bli_apool_pool( apool );
|
||||
|
||||
// Query the block_ptrs array.
|
||||
array_t** restrict block_ptrs = bli_pool_block_ptrs( pool );
|
||||
|
||||
// Query the top_index of the pool.
|
||||
const siz_t top_index = bli_pool_top_index( pool );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_apool_checkout_array(): checking out array_t %d.\n",
|
||||
( int )top_index );
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
// Select the array_t* at top_index to return to the caller.
|
||||
array_t* restrict array = block_ptrs[ top_index ];
|
||||
|
||||
// Increment the pool's top_index.
|
||||
bli_pool_set_top_index( top_index + 1, pool );
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// Release the apool_t's mutex.
|
||||
bli_apool_unlock( apool );
|
||||
|
||||
// Resize the array_t according to the number of threads specified by the
|
||||
// caller. (We need one element in the array_t per thread.)
|
||||
bli_array_resize( n_threads, array );
|
||||
|
||||
// Return the selected array_t*.
|
||||
return array;
|
||||
}
|
||||
|
||||
void bli_apool_checkin_array
|
||||
(
|
||||
array_t* restrict array,
|
||||
apool_t* restrict apool
|
||||
)
|
||||
{
|
||||
// Acquire the apool_t's mutex.
|
||||
bli_apool_lock( apool );
|
||||
|
||||
// Query the underlying pool_t from the apool_t.
|
||||
pool_t* restrict pool = bli_apool_pool( apool );
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// NOTE: Unlike with the bli_pool API, we do not need to handle potential
|
||||
// freeing of the blocks upon checkin due to the block_size having since
|
||||
// changed due to reinitialization since the apool's block_size will never
|
||||
// change.
|
||||
|
||||
// Query the block_ptrs array.
|
||||
array_t** restrict block_ptrs = bli_pool_block_ptrs( pool );
|
||||
|
||||
// Query the top_index of the pool.
|
||||
const siz_t top_index = bli_pool_top_index( pool );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_apool_checkin_block(): checking in array_t %d.\n",
|
||||
( int )top_index - 1 );
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
// Copy the caller's array_t address to the element at top_index - 1.
|
||||
block_ptrs[ top_index - 1 ] = array;
|
||||
|
||||
// Decrement the pool's top_index.
|
||||
bli_pool_set_top_index( top_index - 1, pool );
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// Release the apool_t's mutex.
|
||||
bli_apool_unlock( apool );
|
||||
}
|
||||
|
||||
pool_t* bli_apool_array_elem
|
||||
(
|
||||
siz_t index,
|
||||
array_t* restrict array
|
||||
)
|
||||
{
|
||||
// Query the array element corresponding to index.
|
||||
// NOTE: If we knew that the array_t contained elements of size
|
||||
// sizeof( void* ) or sizeof( whatever ), we could return the *value*
|
||||
// stored in the array. But since array_t is general-purpose, it can't
|
||||
// return the element itself. So instead, bli_array_elem() returns the
|
||||
// address of the element in the array. Since the elements that apool_t
|
||||
// stores in the array_t are pool_t*, that means that the function is
|
||||
// actually returning the address of a pool_t*, or pool_t**, hence the
|
||||
// dereferencing below.
|
||||
pool_t** restrict pool_p = bli_array_elem( index, array );
|
||||
pool_t* pool = *pool_p;
|
||||
|
||||
// If the element is NULL, then it means a pool_t has not yet been created
|
||||
// and allocated for the given index (thread id).
|
||||
if ( pool == NULL )
|
||||
{
|
||||
// Settle on the parameters to use when initializing the pool_t for
|
||||
// the current index within the array_t.
|
||||
const siz_t num_blocks = 1;
|
||||
const siz_t block_ptrs_len = 10;
|
||||
const siz_t align_size = 16;
|
||||
malloc_ft malloc_fp = BLIS_MALLOC_INTL;
|
||||
free_ft free_fp = BLIS_FREE_INTL;
|
||||
|
||||
// Each small block pool should contain blocks large enough to
|
||||
// accommodate any of the data structures for which they will be
|
||||
// used.
|
||||
const siz_t n_sizes = 4;
|
||||
siz_t sizes[4] = { sizeof( cntl_t ),
|
||||
sizeof( packm_params_t ),
|
||||
sizeof( thrcomm_t ),
|
||||
sizeof( thrinfo_t ) };
|
||||
siz_t block_size = 0;
|
||||
|
||||
// Find the largest of the sizes above and use that as the block_size
|
||||
// for the pool.
|
||||
for ( dim_t i = 0; i < n_sizes; ++i )
|
||||
{
|
||||
if ( block_size < sizes[i] ) block_size = sizes[i];
|
||||
}
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_apool_array_elem(): pool_t for tid %d is NULL; allocating pool_t.\n",
|
||||
( int )index );
|
||||
printf( "bli_apool_array_elem(): allocating pool_t: " );
|
||||
#endif
|
||||
|
||||
// Allocate the pool_t.
|
||||
pool = bli_malloc_intl( sizeof( pool_t ) );
|
||||
|
||||
// Initialize the pool_t.
|
||||
bli_pool_init
|
||||
(
|
||||
num_blocks,
|
||||
block_ptrs_len,
|
||||
block_size,
|
||||
align_size,
|
||||
malloc_fp,
|
||||
free_fp,
|
||||
pool
|
||||
);
|
||||
|
||||
// Update the array element with the address to the new pool_t.
|
||||
// NOTE: We pass in the address of the pool_t* since the bli_array
|
||||
// API is generalized for arbitrarily-sized elements, and therefore
|
||||
// it must always take the address of the data, rather than the
|
||||
// value (which it can only do if the elem size were fixed).
|
||||
bli_array_set_elem( &pool, index, array );
|
||||
}
|
||||
|
||||
// The array element is now guaranteed to refer to an allocated and
|
||||
// initialized pool_t.
|
||||
|
||||
// Return the array element.
|
||||
return pool;
|
||||
}
|
||||
|
||||
void bli_apool_grow
|
||||
(
|
||||
siz_t num_blocks_add,
|
||||
apool_t* restrict apool
|
||||
)
|
||||
{
|
||||
// If the requested increase is zero, return early.
|
||||
if ( num_blocks_add == 0 ) return;
|
||||
|
||||
// Query the underlying pool_t from the apool_t.
|
||||
pool_t* restrict pool = bli_apool_pool( apool );
|
||||
|
||||
// Query the default initial array length from the apool_t.
|
||||
const siz_t num_elem = bli_apool_def_array_len( apool );
|
||||
|
||||
// ----------------------------------------------------------------------------
|
||||
|
||||
// Query the allocated length of the block_ptrs array and also the
|
||||
// total number of blocks currently allocated.
|
||||
const siz_t block_ptrs_len_cur = bli_pool_block_ptrs_len( pool );
|
||||
const siz_t num_blocks_cur = bli_pool_num_blocks( pool );
|
||||
|
||||
// Compute the total number of allocated blocks that will exist
|
||||
// after we grow the pool.
|
||||
const siz_t num_blocks_new = num_blocks_cur + num_blocks_add;
|
||||
|
||||
// If adding num_blocks_add new blocks will exceed the current capacity
|
||||
// of the block_ptrs array, we need to first put in place a new (larger)
|
||||
// array.
|
||||
if ( block_ptrs_len_cur < num_blocks_new )
|
||||
{
|
||||
// To prevent this from happening often, we double the current
|
||||
// length of the block_ptrs array.
|
||||
const siz_t block_ptrs_len_new = 2 * block_ptrs_len_cur;
|
||||
|
||||
// Query the current block_ptrs array.
|
||||
array_t** restrict block_ptrs_cur = bli_pool_block_ptrs( pool );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_apool_grow(): growing block_ptrs_len (%d -> %d): ",
|
||||
( int )block_ptrs_len_cur, ( int )block_ptrs_len_new );
|
||||
#endif
|
||||
|
||||
// Allocate a new block_ptrs array.
|
||||
array_t** restrict block_ptrs_new
|
||||
=
|
||||
bli_malloc_intl( block_ptrs_len_new * sizeof( array_t* ) );
|
||||
|
||||
// Query the top_index of the pool.
|
||||
const siz_t top_index = bli_pool_top_index( pool );
|
||||
|
||||
// Copy the contents of the old block_ptrs array to the new/resized
|
||||
// array. Notice that we can begin with top_index since all entries
|
||||
// from 0 to top_index-1 have been (and are currently) checked out
|
||||
// to threads.
|
||||
for ( dim_t i = top_index; i < num_blocks_cur; ++i )
|
||||
{
|
||||
block_ptrs_new[i] = block_ptrs_cur[i];
|
||||
}
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_apool_grow(): freeing prev block_ptrs: " );
|
||||
#endif
|
||||
|
||||
// Free the old block_ptrs array.
|
||||
bli_free_intl( block_ptrs_cur );
|
||||
|
||||
// Update the pool_t struct with the new block_ptrs array and
|
||||
// record its allocated length.
|
||||
bli_pool_set_block_ptrs( block_ptrs_new, pool );
|
||||
bli_pool_set_block_ptrs_len( block_ptrs_len_new, pool );
|
||||
}
|
||||
|
||||
// At this point, we are guaranteed to have enough unused elements
|
||||
// in the block_ptrs array to accommodate an additional num_blocks_add
|
||||
// blocks.
|
||||
|
||||
// Query the current block_ptrs array (which was maybe just resized).
|
||||
array_t** restrict block_ptrs = bli_pool_block_ptrs( pool );
|
||||
|
||||
// Query the malloc() function pointer for the pool.
|
||||
malloc_ft malloc_fp = bli_pool_malloc_fp( pool );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_apool_grow(): growing apool_t (%d -> %d).\n",
|
||||
( int )num_blocks_cur, ( int )num_blocks_new );
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
// Allocate the requested additional blocks in the resized array.
|
||||
for ( dim_t i = num_blocks_cur; i < num_blocks_new; ++i )
|
||||
{
|
||||
bli_apool_alloc_block
|
||||
(
|
||||
num_elem,
|
||||
malloc_fp,
|
||||
&(block_ptrs[i])
|
||||
);
|
||||
}
|
||||
|
||||
// Update the pool_t struct with the new number of allocated blocks.
|
||||
// Notice that top_index remains unchanged, as do the block_size and
|
||||
// align_size fields.
|
||||
bli_pool_set_num_blocks( num_blocks_new, pool );
|
||||
}
|
||||
|
||||
145
frame/base/bli_apool.h
Normal file
145
frame/base/bli_apool.h
Normal file
@@ -0,0 +1,145 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name(s) of the copyright holder(s) nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_APOOL_H
|
||||
#define BLIS_APOOL_H
|
||||
|
||||
// -- Locked pool-of-arrays type --
|
||||
|
||||
/*
|
||||
typedef struct
|
||||
{
|
||||
bli_pthread_mutex_t mutex;
|
||||
pool_t pool;
|
||||
|
||||
siz_t def_array_len;
|
||||
|
||||
} apool_t;
|
||||
*/
|
||||
|
||||
|
||||
// apool entry query
|
||||
|
||||
static pool_t* bli_apool_pool( apool_t* apool )
|
||||
{
|
||||
return &(apool->pool);
|
||||
}
|
||||
|
||||
static bli_pthread_mutex_t* bli_apool_mutex( apool_t* apool )
|
||||
{
|
||||
return &(apool->mutex);
|
||||
}
|
||||
|
||||
static siz_t bli_apool_def_array_len( apool_t* pool )
|
||||
{
|
||||
return pool->def_array_len;
|
||||
}
|
||||
|
||||
static bool_t bli_apool_is_exhausted( apool_t* apool )
|
||||
{
|
||||
pool_t* restrict pool = bli_apool_pool( apool );
|
||||
|
||||
return bli_pool_is_exhausted( pool );
|
||||
}
|
||||
|
||||
// apool action
|
||||
|
||||
static void bli_apool_lock( apool_t* apool )
|
||||
{
|
||||
bli_pthread_mutex_lock( bli_apool_mutex( apool ) );
|
||||
}
|
||||
|
||||
static void bli_apool_unlock( apool_t* apool )
|
||||
{
|
||||
bli_pthread_mutex_unlock( bli_apool_mutex( apool ) );
|
||||
}
|
||||
|
||||
// apool entry modification
|
||||
|
||||
static void bli_apool_set_def_array_len( siz_t def_array_len, apool_t* pool ) \
|
||||
{
|
||||
pool->def_array_len = def_array_len;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
void bli_apool_init
|
||||
(
|
||||
malloc_ft malloc_fp,
|
||||
free_ft free_fp,
|
||||
apool_t* restrict apool
|
||||
);
|
||||
void bli_apool_finalize
|
||||
(
|
||||
apool_t* restrict apool
|
||||
);
|
||||
|
||||
array_t* bli_apool_checkout_array
|
||||
(
|
||||
siz_t n_threads,
|
||||
apool_t* restrict apool
|
||||
);
|
||||
void bli_apool_checkin_array
|
||||
(
|
||||
array_t* restrict array,
|
||||
apool_t* restrict apool
|
||||
);
|
||||
|
||||
pool_t* bli_apool_array_elem
|
||||
(
|
||||
siz_t index,
|
||||
array_t* restrict array
|
||||
);
|
||||
|
||||
void bli_apool_grow
|
||||
(
|
||||
siz_t num_blocks_add,
|
||||
apool_t* restrict apool
|
||||
);
|
||||
|
||||
void bli_apool_alloc_block
|
||||
(
|
||||
siz_t num_elem,
|
||||
malloc_ft malloc_fp,
|
||||
array_t** restrict array_p
|
||||
);
|
||||
void bli_apool_free_block
|
||||
(
|
||||
free_ft free_fp,
|
||||
array_t* restrict array
|
||||
);
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
204
frame/base/bli_array.c
Normal file
204
frame/base/bli_array.c
Normal file
@@ -0,0 +1,204 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name(s) of the copyright holder(s) nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
//#define BLIS_ENABLE_MEM_TRACING
|
||||
|
||||
void bli_array_init
|
||||
(
|
||||
const siz_t num_elem,
|
||||
const siz_t elem_size,
|
||||
array_t* restrict array
|
||||
)
|
||||
{
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_array_init(): allocating array [%d * %d]: ",
|
||||
( int )num_elem, ( int )elem_size );
|
||||
#endif
|
||||
|
||||
// Compute the total size (in bytes) of the array.
|
||||
const size_t array_size = num_elem * elem_size;
|
||||
|
||||
// Allocate the array buffer.
|
||||
void* restrict buf = bli_malloc_intl( array_size );
|
||||
|
||||
// Initialize the array elements to zero. THIS IS IMPORANT because
|
||||
// consumer threads will use the NULL-ness of the array elements to
|
||||
// determine if the corresponding block (data structure) needs to be
|
||||
// created/allocated and initialized.
|
||||
memset( buf, 0, array_size );
|
||||
|
||||
// Initialize the array_t structure.
|
||||
bli_array_set_buf( buf, array );
|
||||
bli_array_set_num_elem( num_elem, array );
|
||||
bli_array_set_elem_size( elem_size, array );
|
||||
}
|
||||
|
||||
void bli_array_resize
|
||||
(
|
||||
const siz_t num_elem_new,
|
||||
array_t* restrict array
|
||||
)
|
||||
{
|
||||
// Query the number of elements in the array.
|
||||
const siz_t num_elem_prev = bli_array_num_elem( array );
|
||||
|
||||
// If the new requested size (number of elements) is less than or equal to
|
||||
// the current size, no action is needed; return early.
|
||||
if ( num_elem_new <= num_elem_prev ) return;
|
||||
|
||||
// At this point, we know that num_elem_prev < num_elem_new, which means
|
||||
// we need to proceed with the resizing.
|
||||
|
||||
// Query the size of each element in the array.
|
||||
const siz_t elem_size = bli_array_elem_size( array );
|
||||
|
||||
// Compute the total size (in bytes) of the array before and after resizing.
|
||||
const size_t array_size_prev = num_elem_prev * elem_size;
|
||||
const size_t array_size_new = num_elem_new * elem_size;
|
||||
|
||||
// Query the previous array buffer.
|
||||
void* restrict buf_prev = bli_array_buf( array );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_array_resize(): allocating array [%d * %d]: ",
|
||||
( int )num_elem_new, ( int )elem_size );
|
||||
#endif
|
||||
|
||||
// Allocate a new array buffer.
|
||||
char* restrict buf_new = bli_malloc_intl( array_size_new );
|
||||
|
||||
// Copy the previous array contents to the new array.
|
||||
memcpy( buf_new, buf_prev, array_size_prev );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_array_resize(): freeing array [%d * %d]: ",
|
||||
( int )num_elem_prev, ( int )elem_size );
|
||||
#endif
|
||||
|
||||
// Now that the elements have been copied over to the new buffer, we can
|
||||
// free the previous array buffer.
|
||||
bli_free_intl( buf_prev );
|
||||
|
||||
// Initialize the new elements' contents to zero. (Note that we advance
|
||||
// the new buffer address by the size of the previous array so that we
|
||||
// arrive at the first byte of the new segment.)
|
||||
memset( &buf_new[ array_size_prev ], 0, array_size_new - array_size_prev );
|
||||
|
||||
// Update the array_t structure.
|
||||
// NOTE: The array elem_size field does not need updating.
|
||||
bli_array_set_buf( buf_new, array );
|
||||
bli_array_set_num_elem( num_elem_new, array );
|
||||
}
|
||||
|
||||
void bli_array_finalize
|
||||
(
|
||||
array_t* restrict array
|
||||
)
|
||||
{
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_array_finalize(): freeing buf (length %d): ",
|
||||
( int )bli_array_num_elem( array ) );
|
||||
#endif
|
||||
|
||||
// Query the buffer from the array.
|
||||
void* restrict buf = bli_array_buf( array );
|
||||
|
||||
// Free the buffer.
|
||||
bli_free_intl( buf );
|
||||
}
|
||||
|
||||
void* bli_array_elem
|
||||
(
|
||||
const siz_t index,
|
||||
array_t* restrict array
|
||||
)
|
||||
{
|
||||
// Query the number of elements in the array.
|
||||
const siz_t num_elem = bli_array_num_elem( array );
|
||||
|
||||
// Sanity check: disallow access beyond the bounds of the array.
|
||||
if ( num_elem <= index ) bli_abort();
|
||||
|
||||
// Query the size of each element in the array.
|
||||
const siz_t elem_size = bli_array_elem_size( array );
|
||||
|
||||
// Query the buffer from the array, but store it as a char* so we can use
|
||||
// it to easily perform byte pointer arithmetic.
|
||||
char* restrict buf = bli_array_buf( array );
|
||||
|
||||
// Advance the pointer by (index * elem_size) bytes.
|
||||
buf += index * elem_size;
|
||||
|
||||
// Return the address of the element computed above.
|
||||
return ( void* )buf;
|
||||
}
|
||||
|
||||
void bli_array_set_elem
|
||||
(
|
||||
void* restrict elem,
|
||||
const siz_t index,
|
||||
array_t* restrict array
|
||||
)
|
||||
{
|
||||
// Query the size of each element in the array.
|
||||
const siz_t elem_size = bli_array_elem_size( array );
|
||||
|
||||
// Query the buffer from the array as a char*.
|
||||
char* restrict buf = bli_array_buf( array );
|
||||
|
||||
if ( elem_size == sizeof( void* ) )
|
||||
{
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_array_set_elem(): elem_size is %d; setting index %d.\n",
|
||||
( int )elem_size, ( int )index );
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
// Special case: Handle elem_size = sizeof( void* ) without calling
|
||||
// memcpy().
|
||||
void** restrict buf_vvp = ( void** )buf;
|
||||
void** restrict elem_vvp = ( void** )elem;
|
||||
|
||||
buf_vvp[ index ] = *elem_vvp;
|
||||
}
|
||||
else
|
||||
{
|
||||
// General case: Copy the elem_size bytes from elem to buf at the
|
||||
// element index specified by index.
|
||||
memcpy( &buf[ index * elem_size ], elem, ( size_t )elem_size );
|
||||
}
|
||||
}
|
||||
|
||||
117
frame/base/bli_array.h
Normal file
117
frame/base/bli_array.h
Normal file
@@ -0,0 +1,117 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name(s) of the copyright holder(s) nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_ARRAY_H
|
||||
#define BLIS_ARRAY_H
|
||||
|
||||
// -- Array type --
|
||||
|
||||
/*
|
||||
typedef struct
|
||||
{
|
||||
void* buf;
|
||||
|
||||
siz_t num_elem;
|
||||
siz_t elem_size;
|
||||
|
||||
} array_t;
|
||||
*/
|
||||
|
||||
|
||||
// Array entry query
|
||||
|
||||
static void* bli_array_buf( array_t* array )
|
||||
{
|
||||
return array->buf;
|
||||
}
|
||||
|
||||
static siz_t bli_array_num_elem( array_t* array )
|
||||
{
|
||||
return array->num_elem;
|
||||
}
|
||||
|
||||
static siz_t bli_array_elem_size( array_t* array )
|
||||
{
|
||||
return array->elem_size;
|
||||
}
|
||||
|
||||
// Array entry modification
|
||||
|
||||
static void bli_array_set_buf( void* buf, array_t* array ) \
|
||||
{
|
||||
array->buf = buf;
|
||||
}
|
||||
|
||||
static void bli_array_set_num_elem( siz_t num_elem, array_t* array ) \
|
||||
{
|
||||
array->num_elem = num_elem;
|
||||
}
|
||||
|
||||
static void bli_array_set_elem_size( siz_t elem_size, array_t* array ) \
|
||||
{
|
||||
array->elem_size = elem_size;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
void bli_array_init
|
||||
(
|
||||
const siz_t num_elem,
|
||||
const siz_t elem_size,
|
||||
array_t* restrict array
|
||||
);
|
||||
void bli_array_resize
|
||||
(
|
||||
const siz_t num_elem_new,
|
||||
array_t* restrict array
|
||||
);
|
||||
void bli_array_finalize
|
||||
(
|
||||
array_t* restrict array
|
||||
);
|
||||
|
||||
void* bli_array_elem
|
||||
(
|
||||
const siz_t index,
|
||||
array_t* restrict array
|
||||
);
|
||||
void bli_array_set_elem
|
||||
(
|
||||
void* restrict elem,
|
||||
const siz_t index,
|
||||
array_t* restrict array
|
||||
);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -776,7 +776,20 @@ err_t bli_check_object_buffer( obj_t* a )
|
||||
return e_val;
|
||||
}
|
||||
|
||||
// -- Memory allocator checks --------------------------------------------------
|
||||
// -- Memory checks ------------------------------------------------------------
|
||||
|
||||
err_t bli_check_valid_malloc_buf( void* ptr )
|
||||
{
|
||||
err_t e_val = BLIS_SUCCESS;
|
||||
|
||||
if ( ptr == NULL )
|
||||
e_val = BLIS_MALLOC_RETURNED_NULL;
|
||||
|
||||
return e_val;
|
||||
}
|
||||
|
||||
|
||||
// -- Internal memory pool checks ----------------------------------------------
|
||||
|
||||
err_t bli_check_valid_packbuf( packbuf_t buf_type )
|
||||
{
|
||||
|
||||
@@ -98,6 +98,8 @@ err_t bli_check_packv_schema_on_unpack( obj_t* a );
|
||||
|
||||
err_t bli_check_object_buffer( obj_t* a );
|
||||
|
||||
err_t bli_check_valid_malloc_buf( void* ptr );
|
||||
|
||||
err_t bli_check_valid_packbuf( packbuf_t buf_type );
|
||||
err_t bli_check_if_exhausted_pool( pool_t* pool );
|
||||
err_t bli_check_sufficient_stack_buf_size( num_t dt, cntx_t* cntx );
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
|
||||
cntl_t* bli_cntl_create_node
|
||||
(
|
||||
rntm_t* rntm,
|
||||
opid_t family,
|
||||
bszid_t bszid,
|
||||
void* var_func,
|
||||
@@ -47,12 +48,12 @@ cntl_t* bli_cntl_create_node
|
||||
cntl_t* cntl;
|
||||
mem_t* pack_mem;
|
||||
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntl_create_node(): " );
|
||||
#endif
|
||||
|
||||
// Allocate the cntl_t struct.
|
||||
cntl = bli_malloc_intl( sizeof( cntl_t ) );
|
||||
cntl = bli_sba_acquire( rntm, sizeof( cntl_t ) );
|
||||
|
||||
bli_cntl_set_family( family, cntl );
|
||||
bli_cntl_set_bszid( bszid, cntl );
|
||||
@@ -72,14 +73,15 @@ cntl_t* bli_cntl_create_node
|
||||
|
||||
void bli_cntl_free_node
|
||||
(
|
||||
rntm_t* rntm,
|
||||
cntl_t* cntl
|
||||
)
|
||||
{
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntl_free_node(): " );
|
||||
#endif
|
||||
|
||||
bli_free_intl( cntl );
|
||||
bli_sba_release( rntm, cntl );
|
||||
}
|
||||
|
||||
void bli_cntl_clear_node
|
||||
@@ -105,17 +107,19 @@ void bli_cntl_clear_node
|
||||
|
||||
void bli_cntl_free
|
||||
(
|
||||
cntl_t* cntl,
|
||||
rntm_t* rntm,
|
||||
cntl_t* cntl,
|
||||
thrinfo_t* thread
|
||||
)
|
||||
{
|
||||
if ( thread != NULL ) bli_cntl_free_w_thrinfo( cntl, thread );
|
||||
else bli_cntl_free_wo_thrinfo( cntl );
|
||||
if ( thread != NULL ) bli_cntl_free_w_thrinfo( rntm, cntl, thread );
|
||||
else bli_cntl_free_wo_thrinfo( rntm, cntl );
|
||||
}
|
||||
|
||||
void bli_cntl_free_w_thrinfo
|
||||
(
|
||||
cntl_t* cntl,
|
||||
rntm_t* rntm,
|
||||
cntl_t* cntl,
|
||||
thrinfo_t* thread
|
||||
)
|
||||
{
|
||||
@@ -133,17 +137,17 @@ void bli_cntl_free_w_thrinfo
|
||||
{
|
||||
// Recursively free all memory associated with the sub-node and its
|
||||
// children.
|
||||
bli_cntl_free_w_thrinfo( cntl_sub_node, thread_sub_node );
|
||||
bli_cntl_free_w_thrinfo( rntm, cntl_sub_node, thread_sub_node );
|
||||
}
|
||||
|
||||
// Free the current node's params field, if it is non-NULL.
|
||||
if ( cntl_params != NULL )
|
||||
{
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntl_free_w_thrinfo(): " );
|
||||
#endif
|
||||
|
||||
bli_free_intl( cntl_params );
|
||||
bli_sba_release( rntm, cntl_params );
|
||||
}
|
||||
|
||||
// Release the current node's pack mem_t entry back to the memory
|
||||
@@ -152,19 +156,20 @@ void bli_cntl_free_w_thrinfo
|
||||
if ( bli_thread_am_ochief( thread ) )
|
||||
if ( bli_mem_is_alloc( cntl_pack_mem ) )
|
||||
{
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntl_free_w_thrinfo(): releasing mem pool block.\n" );
|
||||
#endif
|
||||
|
||||
bli_membrk_release( cntl_pack_mem );
|
||||
bli_membrk_release( rntm, cntl_pack_mem );
|
||||
}
|
||||
|
||||
// Free the current node.
|
||||
bli_cntl_free_node( cntl );
|
||||
bli_cntl_free_node( rntm, cntl );
|
||||
}
|
||||
|
||||
void bli_cntl_free_wo_thrinfo
|
||||
(
|
||||
rntm_t* rntm,
|
||||
cntl_t* cntl
|
||||
)
|
||||
{
|
||||
@@ -178,13 +183,13 @@ void bli_cntl_free_wo_thrinfo
|
||||
{
|
||||
// Recursively free all memory associated with the sub-node and its
|
||||
// children.
|
||||
bli_cntl_free_wo_thrinfo( cntl_sub_node );
|
||||
bli_cntl_free_wo_thrinfo( rntm, cntl_sub_node );
|
||||
}
|
||||
|
||||
// Free the current node's params field, if it is non-NULL.
|
||||
if ( cntl_params != NULL )
|
||||
{
|
||||
bli_free_intl( cntl_params );
|
||||
bli_sba_release( rntm, cntl_params );
|
||||
}
|
||||
|
||||
// Release the current node's pack mem_t entry back to the memory
|
||||
@@ -192,17 +197,18 @@ void bli_cntl_free_wo_thrinfo
|
||||
// allocated.
|
||||
if ( bli_mem_is_alloc( cntl_pack_mem ) )
|
||||
{
|
||||
bli_membrk_release( cntl_pack_mem );
|
||||
bli_membrk_release( rntm, cntl_pack_mem );
|
||||
}
|
||||
|
||||
// Free the current node.
|
||||
bli_cntl_free_node( cntl );
|
||||
bli_cntl_free_node( rntm, cntl );
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
cntl_t* bli_cntl_copy
|
||||
(
|
||||
rntm_t* rntm,
|
||||
cntl_t* cntl
|
||||
)
|
||||
{
|
||||
@@ -212,6 +218,7 @@ cntl_t* bli_cntl_copy
|
||||
// field.
|
||||
cntl_t* cntl_copy = bli_cntl_create_node
|
||||
(
|
||||
rntm,
|
||||
bli_cntl_family( cntl ),
|
||||
bli_cntl_bszid( cntl ),
|
||||
bli_cntl_var_func( cntl ),
|
||||
@@ -227,7 +234,7 @@ cntl_t* bli_cntl_copy
|
||||
// struct.
|
||||
uint64_t params_size = bli_cntl_params_size( cntl );
|
||||
void* params_orig = bli_cntl_params( cntl );
|
||||
void* params_copy = bli_malloc_intl( ( size_t )params_size );
|
||||
void* params_copy = bli_sba_acquire( rntm, ( size_t )params_size );
|
||||
|
||||
// Copy the original params struct to the new memory region.
|
||||
memcpy( params_copy, params_orig, params_size );
|
||||
@@ -242,6 +249,7 @@ cntl_t* bli_cntl_copy
|
||||
{
|
||||
cntl_t* sub_node_copy = bli_cntl_copy
|
||||
(
|
||||
rntm,
|
||||
bli_cntl_sub_node( cntl )
|
||||
);
|
||||
|
||||
|
||||
@@ -60,6 +60,7 @@ typedef struct cntl_s cntl_t;
|
||||
|
||||
cntl_t* bli_cntl_create_node
|
||||
(
|
||||
rntm_t* rntm,
|
||||
opid_t family,
|
||||
bszid_t bszid,
|
||||
void* var_func,
|
||||
@@ -69,6 +70,7 @@ cntl_t* bli_cntl_create_node
|
||||
|
||||
void bli_cntl_free_node
|
||||
(
|
||||
rntm_t* rntm,
|
||||
cntl_t* cntl
|
||||
);
|
||||
|
||||
@@ -81,23 +83,27 @@ void bli_cntl_clear_node
|
||||
|
||||
void bli_cntl_free
|
||||
(
|
||||
cntl_t* cntl,
|
||||
rntm_t* rntm,
|
||||
cntl_t* cntl,
|
||||
thrinfo_t* thread
|
||||
);
|
||||
|
||||
void bli_cntl_free_w_thrinfo
|
||||
(
|
||||
cntl_t* cntl,
|
||||
rntm_t* rntm,
|
||||
cntl_t* cntl,
|
||||
thrinfo_t* thread
|
||||
);
|
||||
|
||||
void bli_cntl_free_wo_thrinfo
|
||||
(
|
||||
cntl_t* cntl
|
||||
rntm_t* rntm,
|
||||
cntl_t* cntl
|
||||
);
|
||||
|
||||
cntl_t* bli_cntl_copy
|
||||
(
|
||||
rntm_t* rntm,
|
||||
cntl_t* cntl
|
||||
);
|
||||
|
||||
|
||||
@@ -91,10 +91,32 @@ void bli_cntx_set_blkszs( ind_t method, dim_t n_bs, ... )
|
||||
|
||||
|
||||
// Allocate some temporary local arrays.
|
||||
|
||||
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_blkszs(): " );
|
||||
#endif
|
||||
bszids = bli_malloc_intl( n_bs * sizeof( bszid_t ) );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_blkszs(): " );
|
||||
#endif
|
||||
blkszs = bli_malloc_intl( n_bs * sizeof( blksz_t* ) );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_blkszs(): " );
|
||||
#endif
|
||||
bmults = bli_malloc_intl( n_bs * sizeof( bszid_t ) );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_blkszs(): " );
|
||||
#endif
|
||||
dsclrs = bli_malloc_intl( n_bs * sizeof( double ) );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_blkszs(): " );
|
||||
#endif
|
||||
msclrs = bli_malloc_intl( n_bs * sizeof( double ) );
|
||||
|
||||
// -- Begin variable argument section --
|
||||
@@ -283,10 +305,30 @@ void bli_cntx_set_blkszs( ind_t method, dim_t n_bs, ... )
|
||||
}
|
||||
|
||||
// Free the temporary local arrays.
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_blkszs(): " );
|
||||
#endif
|
||||
bli_free_intl( blkszs );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_blkszs(): " );
|
||||
#endif
|
||||
bli_free_intl( bszids );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_blkszs(): " );
|
||||
#endif
|
||||
bli_free_intl( bmults );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_blkszs(): " );
|
||||
#endif
|
||||
bli_free_intl( dsclrs );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_blkszs(): " );
|
||||
#endif
|
||||
bli_free_intl( msclrs );
|
||||
}
|
||||
|
||||
@@ -323,8 +365,20 @@ void bli_cntx_set_ind_blkszs( ind_t method, dim_t n_bs, ... )
|
||||
if ( method == BLIS_NAT ) return;
|
||||
|
||||
// Allocate some temporary local arrays.
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_ind_blkszs(): " );
|
||||
#endif
|
||||
bszids = bli_malloc_intl( n_bs * sizeof( bszid_t ) );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_ind_blkszs(): " );
|
||||
#endif
|
||||
dsclrs = bli_malloc_intl( n_bs * sizeof( double ) );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_ind_blkszs(): " );
|
||||
#endif
|
||||
msclrs = bli_malloc_intl( n_bs * sizeof( double ) );
|
||||
|
||||
// -- Begin variable argument section --
|
||||
@@ -444,8 +498,20 @@ void bli_cntx_set_ind_blkszs( ind_t method, dim_t n_bs, ... )
|
||||
}
|
||||
|
||||
// Free the temporary local arrays.
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_ind_blkszs(): " );
|
||||
#endif
|
||||
bli_free_intl( bszids );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_ind_blkszs(): " );
|
||||
#endif
|
||||
bli_free_intl( dsclrs );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_ind_blkszs(): " );
|
||||
#endif
|
||||
bli_free_intl( msclrs );
|
||||
}
|
||||
|
||||
@@ -476,9 +542,25 @@ void bli_cntx_set_l3_nat_ukrs( dim_t n_ukrs, ... )
|
||||
dim_t i;
|
||||
|
||||
// Allocate some temporary local arrays.
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l3_nat_ukrs(): " );
|
||||
#endif
|
||||
l3ukr_t* ukr_ids = bli_malloc_intl( n_ukrs * sizeof( l3ukr_t ) );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l3_nat_ukrs(): " );
|
||||
#endif
|
||||
num_t* ukr_dts = bli_malloc_intl( n_ukrs * sizeof( num_t ) );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l3_nat_ukrs(): " );
|
||||
#endif
|
||||
void** ukr_fps = bli_malloc_intl( n_ukrs * sizeof( void* ) );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l3_nat_ukrs(): " );
|
||||
#endif
|
||||
bool_t* ukr_prefs = bli_malloc_intl( n_ukrs * sizeof( bool_t ) );
|
||||
|
||||
// -- Begin variable argument section --
|
||||
@@ -566,9 +648,24 @@ void bli_cntx_set_l3_nat_ukrs( dim_t n_ukrs, ... )
|
||||
}
|
||||
|
||||
// Free the temporary local arrays.
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l3_nat_ukrs(): " );
|
||||
#endif
|
||||
bli_free_intl( ukr_ids );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l3_nat_ukrs(): " );
|
||||
#endif
|
||||
bli_free_intl( ukr_dts );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l3_nat_ukrs(): " );
|
||||
#endif
|
||||
bli_free_intl( ukr_fps );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l3_nat_ukrs(): " );
|
||||
#endif
|
||||
bli_free_intl( ukr_prefs );
|
||||
}
|
||||
|
||||
@@ -599,8 +696,20 @@ void bli_cntx_set_l1f_kers( dim_t n_kers, ... )
|
||||
dim_t i;
|
||||
|
||||
// Allocate some temporary local arrays.
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l1f_kers(): " );
|
||||
#endif
|
||||
l1fkr_t* ker_ids = bli_malloc_intl( n_kers * sizeof( l1fkr_t ) );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l1f_kers(): " );
|
||||
#endif
|
||||
num_t* ker_dts = bli_malloc_intl( n_kers * sizeof( num_t ) );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l1f_kers(): " );
|
||||
#endif
|
||||
void** ker_fps = bli_malloc_intl( n_kers * sizeof( void* ) );
|
||||
|
||||
// -- Begin variable argument section --
|
||||
@@ -661,8 +770,20 @@ void bli_cntx_set_l1f_kers( dim_t n_kers, ... )
|
||||
}
|
||||
|
||||
// Free the temporary local arrays.
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l1f_kers(): " );
|
||||
#endif
|
||||
bli_free_intl( ker_ids );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l1f_kers(): " );
|
||||
#endif
|
||||
bli_free_intl( ker_dts );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l1f_kers(): " );
|
||||
#endif
|
||||
bli_free_intl( ker_fps );
|
||||
}
|
||||
|
||||
@@ -693,8 +814,20 @@ void bli_cntx_set_l1v_kers( dim_t n_kers, ... )
|
||||
dim_t i;
|
||||
|
||||
// Allocate some temporary local arrays.
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l1v_kers(): " );
|
||||
#endif
|
||||
l1vkr_t* ker_ids = bli_malloc_intl( n_kers * sizeof( l1vkr_t ) );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l1v_kers(): " );
|
||||
#endif
|
||||
num_t* ker_dts = bli_malloc_intl( n_kers * sizeof( num_t ) );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l1v_kers(): " );
|
||||
#endif
|
||||
void** ker_fps = bli_malloc_intl( n_kers * sizeof( void* ) );
|
||||
|
||||
// -- Begin variable argument section --
|
||||
@@ -755,8 +888,20 @@ void bli_cntx_set_l1v_kers( dim_t n_kers, ... )
|
||||
}
|
||||
|
||||
// Free the temporary local arrays.
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l1v_kers(): " );
|
||||
#endif
|
||||
bli_free_intl( ker_ids );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l1v_kers(): " );
|
||||
#endif
|
||||
bli_free_intl( ker_dts );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_l1v_kers(): " );
|
||||
#endif
|
||||
bli_free_intl( ker_fps );
|
||||
}
|
||||
|
||||
@@ -787,8 +932,20 @@ void bli_cntx_set_packm_kers( dim_t n_kers, ... )
|
||||
dim_t i;
|
||||
|
||||
// Allocate some temporary local arrays.
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_packm_kers(): " );
|
||||
#endif
|
||||
l1mkr_t* ker_ids = bli_malloc_intl( n_kers * sizeof( l1mkr_t ) );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_packm_kers(): " );
|
||||
#endif
|
||||
num_t* ker_dts = bli_malloc_intl( n_kers * sizeof( num_t ) );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_packm_kers(): " );
|
||||
#endif
|
||||
void** ker_fps = bli_malloc_intl( n_kers * sizeof( void* ) );
|
||||
|
||||
// -- Begin variable argument section --
|
||||
@@ -849,8 +1006,20 @@ void bli_cntx_set_packm_kers( dim_t n_kers, ... )
|
||||
}
|
||||
|
||||
// Free the temporary local arrays.
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_packm_kers(): " );
|
||||
#endif
|
||||
bli_free_intl( ker_ids );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_packm_kers(): " );
|
||||
#endif
|
||||
bli_free_intl( ker_dts );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_cntx_set_packm_kers(): " );
|
||||
#endif
|
||||
bli_free_intl( ker_fps );
|
||||
}
|
||||
|
||||
|
||||
@@ -60,7 +60,6 @@ typedef struct cntx_s
|
||||
pack_t schema_b;
|
||||
pack_t schema_c;
|
||||
|
||||
membrk_t* membrk;
|
||||
} cntx_t;
|
||||
*/
|
||||
|
||||
@@ -122,10 +121,6 @@ static pack_t bli_cntx_schema_c_panel( cntx_t* cntx )
|
||||
{
|
||||
return cntx->schema_c_panel;
|
||||
}
|
||||
static membrk_t* bli_cntx_get_membrk( cntx_t* cntx )
|
||||
{
|
||||
return cntx->membrk;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
@@ -154,10 +149,6 @@ static void bli_cntx_set_schema_ab_blockpanel( pack_t sa, pack_t sb, cntx_t* cnt
|
||||
bli_cntx_set_schema_a_block( sa, cntx );
|
||||
bli_cntx_set_schema_b_panel( sb, cntx );
|
||||
}
|
||||
static void bli_cntx_set_membrk( membrk_t* membrk, cntx_t* cntx )
|
||||
{
|
||||
cntx->membrk = membrk;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@@ -150,6 +150,9 @@ void bli_error_init_msgs( void )
|
||||
sprintf( bli_error_string_for_code(BLIS_EXPECTED_NONNULL_OBJECT_BUFFER),
|
||||
"Encountered object with non-zero dimensions containing null buffer." );
|
||||
|
||||
sprintf( bli_error_string_for_code(BLIS_MALLOC_RETURNED_NULL),
|
||||
"malloc() returned NULL; heap memory is likely exhausted." );
|
||||
|
||||
sprintf( bli_error_string_for_code(BLIS_INVALID_PACKBUF),
|
||||
"Invalid packbuf_t value." );
|
||||
sprintf( bli_error_string_for_code(BLIS_EXHAUSTED_CONTIG_MEMORY_POOL),
|
||||
|
||||
@@ -196,10 +196,18 @@ void bli_gks_finalize( void )
|
||||
// If the current context was allocated, free it.
|
||||
if ( gks_id_ind != NULL )
|
||||
{
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_gks_finalize(): cntx for ind_t %d: ", ( int )ind );
|
||||
#endif
|
||||
|
||||
bli_free_intl( gks_id_ind );
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_gks_finalize(): gks for arch_t %d: ", ( int )id );
|
||||
#endif
|
||||
|
||||
// Free the array of BLIS_NUM_IND_METHODS cntx* elements.
|
||||
bli_free_intl( gks_id );
|
||||
}
|
||||
@@ -320,6 +328,10 @@ void bli_gks_register_cntx
|
||||
// to register with an architecture id that has already been registered.
|
||||
if ( gks[ id ] != NULL ) return;
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_gks_register_cntx(): " );
|
||||
#endif
|
||||
|
||||
// At this point, we know the pointer to the array of cntx_t* is NULL and
|
||||
// needs to be allocated. Allocate the memory and initialize it to
|
||||
// zeros/NULL, storing the address of the alloacted memory at the element
|
||||
@@ -329,6 +341,10 @@ void bli_gks_register_cntx
|
||||
// Alias the allocated array for readability.
|
||||
cntx_t** restrict gks_id = gks[ id ];
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_gks_register_cntx(): " );
|
||||
#endif
|
||||
|
||||
// Allocate memory for a single context and store the address at
|
||||
// the element in the gks[ id ] array that is reserved for native
|
||||
// execution.
|
||||
|
||||
@@ -87,9 +87,17 @@ gint_t bli_info_get_enable_cblas( void )
|
||||
#endif
|
||||
}
|
||||
gint_t bli_info_get_blas_int_type_size( void ) { return BLIS_BLAS_INT_TYPE_SIZE; }
|
||||
gint_t bli_info_get_enable_packbuf_pools( void )
|
||||
gint_t bli_info_get_enable_pba_pools( void )
|
||||
{
|
||||
#ifdef BLIS_ENABLE_PACKBUF_POOLS
|
||||
#ifdef BLIS_ENABLE_PBA_POOLS
|
||||
return 1;
|
||||
#else
|
||||
return 0;
|
||||
#endif
|
||||
}
|
||||
gint_t bli_info_get_enable_sba_pools( void )
|
||||
{
|
||||
#ifdef BLIS_ENABLE_SBA_POOLS
|
||||
return 1;
|
||||
#else
|
||||
return 0;
|
||||
|
||||
@@ -58,7 +58,8 @@ gint_t bli_info_get_enable_stay_auto_init( void );
|
||||
gint_t bli_info_get_enable_blas( void );
|
||||
gint_t bli_info_get_enable_cblas( void );
|
||||
gint_t bli_info_get_blas_int_type_size( void );
|
||||
gint_t bli_info_get_enable_packbuf_pools( void );
|
||||
gint_t bli_info_get_enable_pba_pools( void );
|
||||
gint_t bli_info_get_enable_sba_pools( void );
|
||||
gint_t bli_info_get_enable_threading( void );
|
||||
gint_t bli_info_get_enable_openmp( void );
|
||||
gint_t bli_info_get_enable_pthreads( void );
|
||||
|
||||
@@ -35,7 +35,7 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
//#define ENABLE_MEM_DEBUG
|
||||
//#define BLIS_ENABLE_MEM_TRACING
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
@@ -44,19 +44,22 @@ void* bli_malloc_pool( size_t size )
|
||||
const malloc_ft malloc_fp = BLIS_MALLOC_POOL;
|
||||
const size_t align_size = BLIS_POOL_ADDR_ALIGN_SIZE;
|
||||
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_malloc_pool(): size %ld, align size %ld\n",
|
||||
( long )size, ( long )align_size );
|
||||
#endif
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
return bli_fmalloc_align( malloc_fp, size, align_size );
|
||||
}
|
||||
|
||||
void bli_free_pool( void* p )
|
||||
{
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_free_pool(): freeing block\n" );
|
||||
#endif
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
bli_ffree_align( BLIS_FREE_POOL, p );
|
||||
}
|
||||
|
||||
@@ -67,19 +70,22 @@ void* bli_malloc_user( size_t size )
|
||||
const malloc_ft malloc_fp = BLIS_MALLOC_USER;
|
||||
const size_t align_size = BLIS_HEAP_ADDR_ALIGN_SIZE;
|
||||
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_malloc_user(): size %ld, align size %ld\n",
|
||||
( long )size, ( long )align_size );
|
||||
#endif
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
return bli_fmalloc_align( malloc_fp, size, align_size );
|
||||
}
|
||||
|
||||
void bli_free_user( void* p )
|
||||
{
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_free_user(): freeing block\n" );
|
||||
#endif
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
bli_ffree_align( BLIS_FREE_USER, p );
|
||||
}
|
||||
|
||||
@@ -89,21 +95,19 @@ void* bli_malloc_intl( size_t size )
|
||||
{
|
||||
const malloc_ft malloc_fp = BLIS_MALLOC_INTL;
|
||||
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
printf( "bli_malloc_intl(): size %ld\n",
|
||||
( long )size );
|
||||
#endif
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_malloc_intl(): size %ld\n", ( long )size );
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
return bli_fmalloc_noalign( malloc_fp, size );
|
||||
}
|
||||
|
||||
void* bli_calloc_intl( size_t size )
|
||||
{
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
// printf( "bli_calloc_intl(): allocating block (size %ld)\n",
|
||||
// ( long )size );
|
||||
printf( "calloc: " );
|
||||
#endif
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_calloc_intl(): " );
|
||||
#endif
|
||||
|
||||
void* p = bli_malloc_intl( size );
|
||||
|
||||
@@ -114,9 +118,11 @@ void* bli_calloc_intl( size_t size )
|
||||
|
||||
void bli_free_intl( void* p )
|
||||
{
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_free_intl(): freeing block\n" );
|
||||
#endif
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
bli_ffree_noalign( BLIS_FREE_INTL, p );
|
||||
}
|
||||
|
||||
|
||||
@@ -60,11 +60,6 @@ static pool_t* bli_mem_pool( mem_t* mem )
|
||||
return mem->pool;
|
||||
}
|
||||
|
||||
static membrk_t* bli_mem_membrk( mem_t* mem )
|
||||
{
|
||||
return mem->membrk;
|
||||
}
|
||||
|
||||
static siz_t bli_mem_size( mem_t* mem )
|
||||
{
|
||||
return mem->size;
|
||||
@@ -105,11 +100,6 @@ static void bli_mem_set_pool( pool_t* pool, mem_t* mem )
|
||||
mem->pool = pool;
|
||||
}
|
||||
|
||||
static void bli_mem_set_membrk( membrk_t* membrk, mem_t* mem )
|
||||
{
|
||||
mem->membrk = membrk;
|
||||
}
|
||||
|
||||
static void bli_mem_set_size( siz_t size, mem_t* mem )
|
||||
{
|
||||
mem->size = size;
|
||||
@@ -120,7 +110,6 @@ static void bli_mem_clear( mem_t* mem )
|
||||
bli_mem_set_buffer( NULL, mem );
|
||||
bli_mem_set_pool( NULL, mem );
|
||||
bli_mem_set_size( 0, mem );
|
||||
bli_mem_set_membrk( NULL, mem );
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -36,12 +36,22 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
static membrk_t global_membrk;
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
membrk_t* bli_membrk_query( void )
|
||||
{
|
||||
return &global_membrk;
|
||||
}
|
||||
|
||||
void bli_membrk_init
|
||||
(
|
||||
cntx_t* cntx,
|
||||
membrk_t* membrk
|
||||
cntx_t* restrict cntx
|
||||
)
|
||||
{
|
||||
membrk_t* restrict membrk = bli_membrk_query();
|
||||
|
||||
const siz_t align_size = BLIS_POOL_ADDR_ALIGN_SIZE;
|
||||
malloc_ft malloc_fp = BLIS_MALLOC_POOL;
|
||||
free_ft free_fp = BLIS_FREE_POOL;
|
||||
@@ -52,20 +62,22 @@ void bli_membrk_init
|
||||
bli_membrk_set_free_fp( free_fp, membrk );
|
||||
|
||||
bli_membrk_init_mutex( membrk );
|
||||
#ifdef BLIS_ENABLE_PACKBUF_POOLS
|
||||
#ifdef BLIS_ENABLE_PBA_POOLS
|
||||
bli_membrk_init_pools( cntx, membrk );
|
||||
#endif
|
||||
}
|
||||
|
||||
void bli_membrk_finalize
|
||||
(
|
||||
membrk_t* membrk
|
||||
void
|
||||
)
|
||||
{
|
||||
membrk_t* restrict membrk = bli_membrk_query();
|
||||
|
||||
bli_membrk_set_malloc_fp( NULL, membrk );
|
||||
bli_membrk_set_free_fp( NULL, membrk );
|
||||
|
||||
#ifdef BLIS_ENABLE_PACKBUF_POOLS
|
||||
#ifdef BLIS_ENABLE_PBA_POOLS
|
||||
bli_membrk_finalize_pools( membrk );
|
||||
#endif
|
||||
bli_membrk_finalize_mutex( membrk );
|
||||
@@ -73,7 +85,7 @@ void bli_membrk_finalize
|
||||
|
||||
void bli_membrk_acquire_m
|
||||
(
|
||||
membrk_t* membrk,
|
||||
rntm_t* rntm,
|
||||
siz_t req_size,
|
||||
packbuf_t buf_type,
|
||||
mem_t* mem
|
||||
@@ -82,17 +94,22 @@ void bli_membrk_acquire_m
|
||||
pool_t* pool;
|
||||
pblk_t* pblk;
|
||||
dim_t pi;
|
||||
siz_t block_size;
|
||||
|
||||
// If the internal memory pools for pack buffers are disabled, we
|
||||
// spoof the buffer type as BLIS_BUFFER_FOR_GEN_USE to induce the
|
||||
// If the internal memory pools for packing block allocator are disabled,
|
||||
// we spoof the buffer type as BLIS_BUFFER_FOR_GEN_USE to induce the
|
||||
// immediate usage of bli_membrk_malloc().
|
||||
#ifndef BLIS_ENABLE_PACKBUF_POOLS
|
||||
#ifndef BLIS_ENABLE_PBA_POOLS
|
||||
buf_type = BLIS_BUFFER_FOR_GEN_USE;
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_membrk_acquire_m(): bli_malloc_pool(): size %ld\n",
|
||||
( long )req_size );
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Make sure the API is initialized.
|
||||
//assert( membrk ); //??
|
||||
// Query the memory broker from the runtime.
|
||||
membrk_t* membrk = bli_rntm_membrk( rntm );
|
||||
|
||||
|
||||
if ( buf_type == BLIS_BUFFER_FOR_GEN_USE )
|
||||
{
|
||||
@@ -114,7 +131,6 @@ void bli_membrk_acquire_m
|
||||
bli_mem_set_buf_type( buf_type, mem );
|
||||
bli_mem_set_pool( NULL, mem );
|
||||
bli_mem_set_size( req_size, mem );
|
||||
bli_mem_set_membrk( membrk, mem );
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -146,53 +162,54 @@ void bli_membrk_acquire_m
|
||||
// the struct's pblk_t field.
|
||||
bli_pool_checkout_block( req_size, pblk, pool );
|
||||
|
||||
// Query the size of the blocks in the pool so we can store it in
|
||||
// the mem_t object. At this point, it is guaranteed to be at
|
||||
// least as large as req_size. (NOTE: We must perform the query
|
||||
// within the critical section to ensure that the pool hasn't
|
||||
// changed.)
|
||||
block_size = bli_pool_block_size( pool );
|
||||
|
||||
}
|
||||
// END CRITICAL SECTION
|
||||
|
||||
// Release the mutex associated with the membrk object.
|
||||
bli_membrk_unlock( membrk );
|
||||
|
||||
// Query the block_size from the pblk_t. This will be at least
|
||||
// req_size, perhaps larger.
|
||||
siz_t block_size = bli_pblk_block_size( pblk );
|
||||
|
||||
// Initialize the mem_t object with:
|
||||
// - the buffer type (a packbuf_t value),
|
||||
// - the address of the memory pool to which it belongs,
|
||||
// - the size of the contiguous memory block (NOT the size of the
|
||||
// requested region),
|
||||
// - the membrk_t from which the mem_t entry was acquired.
|
||||
// The actual addresses (system and aligned) are already stored in
|
||||
// the mem_t struct's pblk_t field
|
||||
// The actual (aligned) address is already stored in the mem_t
|
||||
// struct's pblk_t field.
|
||||
bli_mem_set_buf_type( buf_type, mem );
|
||||
bli_mem_set_pool( pool, mem );
|
||||
bli_mem_set_size( block_size, mem );
|
||||
bli_mem_set_membrk( membrk, mem );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void bli_membrk_release
|
||||
(
|
||||
mem_t* mem
|
||||
rntm_t* rntm,
|
||||
mem_t* mem
|
||||
)
|
||||
{
|
||||
packbuf_t buf_type;
|
||||
pool_t* pool;
|
||||
pblk_t* pblk;
|
||||
siz_t block_size_cur;
|
||||
siz_t block_size_prev;
|
||||
membrk_t* membrk;
|
||||
|
||||
// Extract the membrk_t address from the mem_t object.
|
||||
membrk = bli_mem_membrk( mem );
|
||||
// Query the memory broker from the runtime.
|
||||
membrk_t* membrk = bli_rntm_membrk( rntm );
|
||||
|
||||
// Extract the buffer type so we know what kind of memory was allocated.
|
||||
buf_type = bli_mem_buf_type( mem );
|
||||
|
||||
#ifndef BLIS_ENABLE_PBA_POOLS
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_membrk_release(): bli_free_pool(): size %ld\n",
|
||||
( long )bli_mem_size( mem ) );
|
||||
#endif
|
||||
#endif
|
||||
|
||||
if ( buf_type == BLIS_BUFFER_FOR_GEN_USE )
|
||||
{
|
||||
free_ft free_fp = bli_membrk_free_fp( membrk );
|
||||
@@ -211,37 +228,14 @@ void bli_membrk_release
|
||||
// Extract the address of the pblk_t struct within the mem_t struct.
|
||||
pblk = bli_mem_pblk( mem );
|
||||
|
||||
// Query the size of the blocks that were in the pool at the time
|
||||
// the pblk_t was checked out. (This is used below, in the critical
|
||||
// section.)
|
||||
block_size_prev = bli_mem_size( mem );
|
||||
|
||||
// Acquire the mutex associated with the membrk object.
|
||||
bli_membrk_lock( membrk );
|
||||
|
||||
// BEGIN CRITICAL SECTION
|
||||
{
|
||||
|
||||
// Query the size of the blocks currently in the pool.
|
||||
block_size_cur = bli_pool_block_size( pool );
|
||||
|
||||
// If the block size of the pool has changed since the pblk_t
|
||||
// was checked out, then we need to free the pblk_t rather
|
||||
// than check it back in. Why? Because the pool's block size
|
||||
// has (most likely) increased to meet changing needs (example:
|
||||
// larger cache blocksizes). Thus, the current pblk_t's smaller
|
||||
// allocated size is of no use anymore.
|
||||
if ( block_size_cur != block_size_prev )
|
||||
{
|
||||
// Free the pblk_t using the appropriate function in the
|
||||
// pool API.
|
||||
bli_pool_free_block( pblk, pool );
|
||||
}
|
||||
else
|
||||
{
|
||||
// Check the block back into the pool.
|
||||
bli_pool_checkin_block( pblk, pool );
|
||||
}
|
||||
// Check the block back into the pool.
|
||||
bli_pool_checkin_block( pblk, pool );
|
||||
|
||||
}
|
||||
// END CRITICAL SECTION
|
||||
@@ -261,6 +255,7 @@ void bli_membrk_release
|
||||
}
|
||||
|
||||
|
||||
#if 0
|
||||
void bli_membrk_acquire_v
|
||||
(
|
||||
membrk_t* membrk,
|
||||
@@ -273,6 +268,18 @@ void bli_membrk_acquire_v
|
||||
BLIS_BUFFER_FOR_GEN_USE,
|
||||
mem );
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
void bli_membrk_rntm_set_membrk
|
||||
(
|
||||
rntm_t* rntm
|
||||
)
|
||||
{
|
||||
membrk_t* membrk = bli_membrk_query();
|
||||
|
||||
bli_rntm_set_membrk( membrk, rntm );
|
||||
}
|
||||
|
||||
|
||||
siz_t bli_membrk_pool_size
|
||||
|
||||
@@ -102,34 +102,34 @@ static void bli_membrk_unlock( membrk_t* membrk )
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
membrk_t* bli_membrk_query( void );
|
||||
|
||||
void bli_membrk_init
|
||||
(
|
||||
cntx_t* cntx,
|
||||
membrk_t* membrk
|
||||
cntx_t* cntx
|
||||
);
|
||||
void bli_membrk_finalize
|
||||
(
|
||||
membrk_t* membrk
|
||||
void
|
||||
);
|
||||
|
||||
void bli_membrk_acquire_m
|
||||
(
|
||||
membrk_t* membrk,
|
||||
rntm_t* rntm,
|
||||
siz_t req_size,
|
||||
packbuf_t buf_type,
|
||||
mem_t* mem
|
||||
);
|
||||
|
||||
void bli_membrk_acquire_v
|
||||
(
|
||||
membrk_t* membrk,
|
||||
siz_t req_size,
|
||||
mem_t* mem
|
||||
);
|
||||
|
||||
void bli_membrk_release
|
||||
(
|
||||
mem_t* mem
|
||||
rntm_t* rntm,
|
||||
mem_t* mem
|
||||
);
|
||||
|
||||
void bli_membrk_rntm_set_membrk
|
||||
(
|
||||
rntm_t* rntm
|
||||
);
|
||||
|
||||
siz_t bli_membrk_pool_size
|
||||
|
||||
@@ -35,35 +35,29 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
static membrk_t global_membrk;
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
membrk_t* bli_memsys_global_membrk( void )
|
||||
{
|
||||
return &global_membrk;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
void bli_memsys_init( void )
|
||||
{
|
||||
// Query a native context so we have something to pass into
|
||||
// bli_membrk_init_pools(). We use BLIS_DOUBLE for the datatype,
|
||||
// but the dt argument is actually only used when initializing
|
||||
// contexts for induced methods.
|
||||
|
||||
// NOTE: Instead of calling bli_gks_query_cntx(), we call
|
||||
// bli_gks_query_cntx_noinit() to avoid the call to bli_init_once().
|
||||
cntx_t* cntx_p = bli_gks_query_cntx_noinit();
|
||||
|
||||
// Initialize the global membrk_t object and its memory pools.
|
||||
bli_membrk_init( cntx_p, &global_membrk );
|
||||
// Initialize the packing block allocator and its data structures.
|
||||
bli_membrk_init( cntx_p );
|
||||
|
||||
// Initialize the small block allocator and its data structures.
|
||||
bli_sba_init();
|
||||
}
|
||||
|
||||
void bli_memsys_finalize( void )
|
||||
{
|
||||
// Finalize the global membrk_t object and its memory pools.
|
||||
bli_membrk_finalize( &global_membrk );
|
||||
// Finalize the small block allocator and its data structures.
|
||||
bli_sba_finalize();
|
||||
|
||||
// Finalize the global membrk_t object and its data structures.
|
||||
bli_membrk_finalize();
|
||||
}
|
||||
|
||||
|
||||
@@ -38,10 +38,6 @@
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
membrk_t* bli_memsys_global_membrk( void );
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
void bli_memsys_init( void );
|
||||
void bli_memsys_finalize( void );
|
||||
|
||||
|
||||
@@ -48,6 +48,10 @@ void bli_obj_create
|
||||
|
||||
bli_obj_create_without_buffer( dt, m, n, obj );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_obj_create(): " );
|
||||
#endif
|
||||
|
||||
bli_obj_alloc_buffer( rs, cs, 1, obj );
|
||||
}
|
||||
|
||||
@@ -232,6 +236,10 @@ void bli_obj_create_1x1
|
||||
{
|
||||
bli_obj_create_without_buffer( dt, 1, 1, obj );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_obj_create_1x1(): " );
|
||||
#endif
|
||||
|
||||
bli_obj_alloc_buffer( 1, 1, 1, obj );
|
||||
}
|
||||
|
||||
@@ -277,7 +285,13 @@ void bli_obj_free
|
||||
// is a detached scalar (ie: if the buffer pointer refers to the
|
||||
// address of the internal scalar buffer).
|
||||
if ( bli_obj_buffer( obj ) != bli_obj_internal_scalar_buffer( obj ) )
|
||||
{
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_obj_free(): " );
|
||||
#endif
|
||||
|
||||
bli_free_user( bli_obj_buffer( obj ) );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -35,50 +35,68 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
//#define ENABLE_MEM_DEBUG
|
||||
//#define BLIS_ENABLE_MEM_TRACING
|
||||
|
||||
void bli_pool_init
|
||||
(
|
||||
dim_t num_blocks,
|
||||
dim_t block_ptrs_len,
|
||||
siz_t block_size,
|
||||
siz_t align_size,
|
||||
malloc_ft malloc_fp,
|
||||
free_ft free_fp,
|
||||
pool_t* pool
|
||||
siz_t num_blocks,
|
||||
siz_t block_ptrs_len,
|
||||
siz_t block_size,
|
||||
siz_t align_size,
|
||||
malloc_ft malloc_fp,
|
||||
free_ft free_fp,
|
||||
pool_t* restrict pool
|
||||
)
|
||||
{
|
||||
pblk_t* block_ptrs;
|
||||
dim_t i;
|
||||
// Make sure that block_ptrs_len is at least num_blocks.
|
||||
block_ptrs_len = bli_max( block_ptrs_len, num_blocks );
|
||||
|
||||
// Make sure that num_block_ptrs is at least num_blocks.
|
||||
if ( block_ptrs_len < num_blocks ) block_ptrs_len = num_blocks;
|
||||
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
printf( "bli_pool_init(): allocating block_ptrs array of size %ld\n",
|
||||
( long )( block_ptrs_len * sizeof( pblk_t ) ) );
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_pool_init(): allocating block_ptrs (length %d): ",
|
||||
( int )block_ptrs_len );
|
||||
#endif
|
||||
|
||||
// Allocate the block_ptrs array.
|
||||
block_ptrs = bli_malloc_intl( block_ptrs_len * sizeof( pblk_t ) );
|
||||
// FGVZ: Do we want to call malloc_fp() for internal data structures as
|
||||
// well as pool blocks? If so, don't forget to s/bli_free_intl/free_fp/g.
|
||||
pblk_t* restrict block_ptrs
|
||||
=
|
||||
bli_malloc_intl( block_ptrs_len * sizeof( pblk_t ) );
|
||||
|
||||
// Allocate and initialize each entry in the block_ptrs array.
|
||||
for ( i = 0; i < num_blocks; ++i )
|
||||
for ( dim_t i = 0; i < num_blocks; ++i )
|
||||
{
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
printf( "bli_pool_init(): allocating block %d of size %ld (align %ld)\n",
|
||||
( int )i, ( long )block_size, ( long )align_size );
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_pool_init(): allocating block %d of size %d (align %d).\n",
|
||||
( int )i, ( int )block_size, ( int )align_size );
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
bli_pool_alloc_block( block_size, align_size,
|
||||
&(block_ptrs[i]), pool );
|
||||
bli_pool_alloc_block
|
||||
(
|
||||
block_size,
|
||||
align_size,
|
||||
malloc_fp,
|
||||
&(block_ptrs[i])
|
||||
);
|
||||
}
|
||||
|
||||
// NOTE: The semantics of top_index approximate a stack, where a "full"
|
||||
// stack (no blocks checked out) is one where top_index == 0 and an empty
|
||||
// stack (all blocks checked out) one where top_index == num_blocks.
|
||||
// (Here, num_blocks tracks the number of blocks currently allocated as
|
||||
// part of the pool.) This "orientation" of the stack was chosen
|
||||
// intentionally, in contrast to one where top_index == -1 means the
|
||||
// stack is empty and top_index = num_blocks - 1 means the stack is
|
||||
// full. The chosen scheme allows one to conceptualize the stack as a
|
||||
// number line in which blocks are checked out from lowest to highest,
|
||||
// and additional blocks are added at the higher end.
|
||||
|
||||
// Initialize the pool_t structure.
|
||||
bli_pool_set_block_ptrs( block_ptrs, pool );
|
||||
bli_pool_set_block_ptrs_len( block_ptrs_len, pool );
|
||||
bli_pool_set_num_blocks( num_blocks, pool );
|
||||
bli_pool_set_top_index( 0, pool );
|
||||
bli_pool_set_num_blocks( num_blocks, pool );
|
||||
bli_pool_set_block_size( block_size, pool );
|
||||
bli_pool_set_align_size( align_size, pool );
|
||||
bli_pool_set_malloc_fp( malloc_fp, pool );
|
||||
@@ -87,43 +105,49 @@ void bli_pool_init
|
||||
|
||||
void bli_pool_finalize
|
||||
(
|
||||
pool_t* pool
|
||||
pool_t* restrict pool
|
||||
)
|
||||
{
|
||||
pblk_t* block_ptrs;
|
||||
dim_t num_blocks;
|
||||
dim_t top_index;
|
||||
dim_t i;
|
||||
|
||||
// NOTE: This implementation assumes that either:
|
||||
// - all blocks have been checked in by all threads, or
|
||||
// - some subset of blocks have been checked in and the caller
|
||||
// is bli_pool_reinit().
|
||||
|
||||
// Query the current block_ptrs array.
|
||||
block_ptrs = bli_pool_block_ptrs( pool );
|
||||
// Query the block_ptrs array.
|
||||
pblk_t* restrict block_ptrs = bli_pool_block_ptrs( pool );
|
||||
|
||||
// Query the total number of blocks presently allocated.
|
||||
num_blocks = bli_pool_num_blocks( pool );
|
||||
// Query the total number of blocks currently allocated.
|
||||
const siz_t num_blocks = bli_pool_num_blocks( pool );
|
||||
|
||||
// Query the top_index of the pool.
|
||||
top_index = bli_pool_top_index( pool );
|
||||
const siz_t top_index = bli_pool_top_index( pool );
|
||||
|
||||
// Sanity check: The top_index should be zero.
|
||||
if ( top_index != 0 ) bli_abort();
|
||||
|
||||
// Query the free() function pointer for the pool.
|
||||
free_ft free_fp = bli_pool_free_fp( pool );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_pool_finalize(): freeing %d blocks of size %d (align %d).\n",
|
||||
( int )num_blocks, ( int )bli_pool_block_size( pool ),
|
||||
( int )bli_pool_align_size( pool ) );
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
// Free the individual blocks currently in the pool.
|
||||
for ( i = top_index; i < num_blocks; ++i )
|
||||
for ( dim_t i = 0; i < num_blocks; ++i )
|
||||
{
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
printf( "bli_pool_finalize(): freeing block %d of size %ld (align %ld)\n",
|
||||
( int )i, ( long )bli_pool_block_size( pool ),
|
||||
( long )bli_pool_align_size( pool ) );
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_pool_finalize(): block %d: ", ( int )i );
|
||||
#endif
|
||||
|
||||
bli_pool_free_block( &(block_ptrs[i]), pool );
|
||||
bli_pool_free_block( free_fp, &(block_ptrs[i]) );
|
||||
}
|
||||
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
printf( "bli_pool_finalize(): freeing block_ptrs array of size %ld\n",
|
||||
( long )( bli_pool_block_ptrs_len( pool ) * sizeof( pblk_t ) ) );
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_pool_finalize(): freeing block_ptrs (length %d): ",
|
||||
( int )( bli_pool_block_ptrs_len( pool ) ) );
|
||||
#endif
|
||||
|
||||
// Free the block_ptrs array.
|
||||
@@ -144,11 +168,11 @@ void bli_pool_finalize
|
||||
|
||||
void bli_pool_reinit
|
||||
(
|
||||
dim_t num_blocks_new,
|
||||
dim_t block_ptrs_len_new,
|
||||
siz_t block_size_new,
|
||||
siz_t align_size_new,
|
||||
pool_t* pool
|
||||
siz_t num_blocks_new,
|
||||
siz_t block_ptrs_len_new,
|
||||
siz_t block_size_new,
|
||||
siz_t align_size_new,
|
||||
pool_t* restrict pool
|
||||
)
|
||||
{
|
||||
// Preserve the pointers to malloc() and free() provided when the pool
|
||||
@@ -158,60 +182,66 @@ void bli_pool_reinit
|
||||
|
||||
// Finalize the pool as it is currently configured. If some blocks
|
||||
// are still checked out to threads, those blocks are not freed
|
||||
// here, and instead will be freed when the threads are ready to
|
||||
// release the blocks. (This will happen because the threads will
|
||||
// notice that the block size of the pool has changed.)
|
||||
// here, and instead will be freed when the threads attempt to check
|
||||
// those blocks back into the pool. (This condition can be detected
|
||||
// since the block size is encoded into each pblk, which is copied
|
||||
// upon checkout.)
|
||||
bli_pool_finalize( pool );
|
||||
|
||||
// Reinitialize the pool with the new parameters, in particular,
|
||||
// the new block size.
|
||||
bli_pool_init( num_blocks_new,
|
||||
block_ptrs_len_new,
|
||||
block_size_new,
|
||||
align_size_new,
|
||||
malloc_fp,
|
||||
free_fp,
|
||||
pool );
|
||||
bli_pool_init
|
||||
(
|
||||
num_blocks_new,
|
||||
block_ptrs_len_new,
|
||||
block_size_new,
|
||||
align_size_new,
|
||||
malloc_fp,
|
||||
free_fp,
|
||||
pool
|
||||
);
|
||||
}
|
||||
|
||||
void bli_pool_checkout_block
|
||||
(
|
||||
siz_t req_size,
|
||||
pblk_t* block,
|
||||
pool_t* pool
|
||||
siz_t req_size,
|
||||
pblk_t* restrict block,
|
||||
pool_t* restrict pool
|
||||
)
|
||||
{
|
||||
pblk_t* block_ptrs;
|
||||
dim_t top_index;
|
||||
|
||||
// If the requested block size is smaller than what the pool was
|
||||
// initialized with, reinitialize the pool to contain blocks of the
|
||||
// requested size.
|
||||
if ( bli_pool_block_size( pool ) < req_size )
|
||||
{
|
||||
const dim_t num_blocks_new = bli_pool_num_blocks( pool );
|
||||
const dim_t block_ptrs_len_new = bli_pool_block_ptrs_len( pool );
|
||||
const siz_t num_blocks_new = bli_pool_num_blocks( pool );
|
||||
const siz_t block_ptrs_len_new = bli_pool_block_ptrs_len( pool );
|
||||
const siz_t align_size_new = bli_pool_align_size( pool );
|
||||
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
printf( "bli_pool_checkout_block(): old block size %ld < req size %ld; "
|
||||
"reiniting",
|
||||
( long )bli_pool_block_size( pool ), ( long )req_size );
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_pool_checkout_block(): old block size %d < req size %d; "
|
||||
"reiniting.\n",
|
||||
( int )bli_pool_block_size( pool ), ( int )req_size );
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
// If the requested block size is smaller than what the pool
|
||||
// was initialized with, reinitialize the pool to contain blocks
|
||||
// of the requested size.
|
||||
bli_pool_reinit( num_blocks_new,
|
||||
block_ptrs_len_new,
|
||||
req_size,
|
||||
align_size_new,
|
||||
pool );
|
||||
bli_pool_reinit
|
||||
(
|
||||
num_blocks_new,
|
||||
block_ptrs_len_new,
|
||||
req_size,
|
||||
align_size_new,
|
||||
pool
|
||||
);
|
||||
}
|
||||
|
||||
// If the pool is exhausted, add a block.
|
||||
if ( bli_pool_is_exhausted( pool ) )
|
||||
{
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_pool_checkout_block(): pool is exhausted (block size %d); "
|
||||
"growing by 1.\n", ( int )bli_pool_block_size( pool ) );
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
bli_pool_grow( 1, pool );
|
||||
@@ -219,21 +249,22 @@ void bli_pool_checkout_block
|
||||
|
||||
// At this point, at least one block is guaranteed to be available.
|
||||
|
||||
// Query the current block_ptrs array.
|
||||
block_ptrs = bli_pool_block_ptrs( pool );
|
||||
// Query the block_ptrs array.
|
||||
pblk_t* restrict block_ptrs = bli_pool_block_ptrs( pool );
|
||||
|
||||
// Query the top_index of the pool.
|
||||
top_index = bli_pool_top_index( pool );
|
||||
const siz_t top_index = bli_pool_top_index( pool );
|
||||
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
printf( "bli_pool_checkout_block(): checking out block %d of size %ld (align %ld)\n",
|
||||
( int )top_index, ( long )bli_pool_block_size( pool ),
|
||||
( long )bli_pool_align_size( pool ) );
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_pool_checkout_block(): checking out block %d of size %d "
|
||||
"(align %d).\n",
|
||||
( int )top_index, ( int )bli_pool_block_size( pool ),
|
||||
( int )bli_pool_align_size( pool ) );
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
// Copy the block at top_index to the caller's pblk_t struct.
|
||||
//bli_pblk_copy( *(block_ptrs[top_index]), *block );
|
||||
*block = block_ptrs[top_index];
|
||||
// Copy the pblk_t at top_index to the caller's pblk_t struct.
|
||||
*block = block_ptrs[ top_index ];
|
||||
|
||||
// Notice that we don't actually need to clear the contents of
|
||||
// block_ptrs[top_index]. It will get overwritten eventually when
|
||||
@@ -246,28 +277,39 @@ void bli_pool_checkout_block
|
||||
|
||||
void bli_pool_checkin_block
|
||||
(
|
||||
pblk_t* block,
|
||||
pool_t* pool
|
||||
pblk_t* restrict block,
|
||||
pool_t* restrict pool
|
||||
)
|
||||
{
|
||||
pblk_t* block_ptrs;
|
||||
dim_t top_index;
|
||||
// If the pblk_t being checked in was allocated with a different block
|
||||
// size than is currently in use in the pool, we simply free it and
|
||||
// return. These "orphaned" blocks are no longer of use because the pool
|
||||
// has since been reinitialized to a different (larger) block size.
|
||||
if ( bli_pblk_block_size( block ) != bli_pool_block_size( pool ) )
|
||||
{
|
||||
// Query the free() function pointer for the pool.
|
||||
free_ft free_fp = bli_pool_free_fp( pool );
|
||||
|
||||
// Query the current block_ptrs array.
|
||||
block_ptrs = bli_pool_block_ptrs( pool );
|
||||
bli_pool_free_block( free_fp, block );
|
||||
return;
|
||||
}
|
||||
|
||||
// Query the block_ptrs array.
|
||||
pblk_t* restrict block_ptrs = bli_pool_block_ptrs( pool );
|
||||
|
||||
// Query the top_index of the pool.
|
||||
top_index = bli_pool_top_index( pool );
|
||||
const siz_t top_index = bli_pool_top_index( pool );
|
||||
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
printf( "bli_pool_checkin_block(): checking in block %d of size %ld (align %ld)\n",
|
||||
( int )top_index - 1, ( long )bli_pool_block_size( pool ),
|
||||
( long )bli_pool_align_size( pool ) );
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_pool_checkin_block(): checking in block %d of size %d "
|
||||
"(align %d).\n",
|
||||
( int )top_index - 1, ( int )bli_pool_block_size( pool ),
|
||||
( int )bli_pool_align_size( pool ) );
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
// Copy the caller's pblk_t struct to the block at top_index - 1.
|
||||
//bli_pblk_copy( *(block_ptrs[top_index-1]), *block );
|
||||
block_ptrs[top_index-1] = *block;
|
||||
block_ptrs[ top_index - 1 ] = *block;
|
||||
|
||||
// Decrement the pool's top_index.
|
||||
bli_pool_set_top_index( top_index - 1, pool );
|
||||
@@ -275,64 +317,60 @@ void bli_pool_checkin_block
|
||||
|
||||
void bli_pool_grow
|
||||
(
|
||||
dim_t num_blocks_add,
|
||||
pool_t* pool
|
||||
siz_t num_blocks_add,
|
||||
pool_t* restrict pool
|
||||
)
|
||||
{
|
||||
pblk_t* block_ptrs_cur;
|
||||
dim_t block_ptrs_len_cur;
|
||||
dim_t num_blocks_cur;
|
||||
|
||||
pblk_t* block_ptrs_new;
|
||||
dim_t num_blocks_new;
|
||||
|
||||
siz_t block_size;
|
||||
siz_t align_size;
|
||||
dim_t top_index;
|
||||
|
||||
dim_t i;
|
||||
|
||||
// If the requested increase is zero (or negative), return early.
|
||||
if ( num_blocks_add < 1 ) return;
|
||||
// If the requested increase is zero, return early.
|
||||
if ( num_blocks_add == 0 ) return;
|
||||
|
||||
// Query the allocated length of the block_ptrs array and also the
|
||||
// total number of blocks allocated.
|
||||
block_ptrs_len_cur = bli_pool_block_ptrs_len( pool );
|
||||
num_blocks_cur = bli_pool_num_blocks( pool );
|
||||
// total number of blocks currently allocated.
|
||||
const siz_t block_ptrs_len_cur = bli_pool_block_ptrs_len( pool );
|
||||
const siz_t num_blocks_cur = bli_pool_num_blocks( pool );
|
||||
|
||||
// Compute the total number of allocated blocks that will exist
|
||||
// after we grow the pool.
|
||||
num_blocks_new = num_blocks_cur + num_blocks_add;
|
||||
const siz_t num_blocks_new = num_blocks_cur + num_blocks_add;
|
||||
|
||||
// If the new total number of allocated blocks is larger than the
|
||||
// allocated length of the block_ptrs array, we need to allocate
|
||||
// a new (larger) block_ptrs array.
|
||||
// If adding num_blocks_add new blocks will exceed the current capacity
|
||||
// of the block_ptrs array, we need to first put in place a new (larger)
|
||||
// array.
|
||||
if ( block_ptrs_len_cur < num_blocks_new )
|
||||
{
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
printf( "bli_pool_grow(): growing block_ptrs_len from %d to %d.\n",
|
||||
( int )block_ptrs_len_cur, ( int )num_blocks_new );
|
||||
// To prevent this from happening often, we double the current
|
||||
// length of the block_ptrs array.
|
||||
const siz_t block_ptrs_len_new = 2 * block_ptrs_len_cur;
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_pool_grow(): growing block_ptrs_len (%d -> %d): ",
|
||||
( int )block_ptrs_len_cur, ( int )block_ptrs_len_new );
|
||||
#endif
|
||||
|
||||
// Query the current block_ptrs array.
|
||||
block_ptrs_cur = bli_pool_block_ptrs( pool );
|
||||
pblk_t* restrict block_ptrs_cur = bli_pool_block_ptrs( pool );
|
||||
|
||||
// Allocate a new block_ptrs array of length num_blocks_new.
|
||||
block_ptrs_new = bli_malloc_intl( num_blocks_new * sizeof( pblk_t ) );
|
||||
// Allocate a new block_ptrs array.
|
||||
// FGVZ: Do we want to call malloc_fp() for internal data structures as
|
||||
// well as pool blocks? If so, don't forget to s/bli_free_intl/free_fp/g.
|
||||
pblk_t* restrict block_ptrs_new
|
||||
=
|
||||
bli_malloc_intl( block_ptrs_len_new * sizeof( pblk_t ) );
|
||||
|
||||
// Query the top_index of the pool.
|
||||
top_index = bli_pool_top_index( pool );
|
||||
const siz_t top_index = bli_pool_top_index( pool );
|
||||
|
||||
// Copy the contents of the old block_ptrs array to the new/resized
|
||||
// array. Notice that we can begin with top_index since all entries
|
||||
// from 0 to top_index-1 have been checked out to threads.
|
||||
for ( i = top_index; i < num_blocks_cur; ++i )
|
||||
// from 0 to top_index-1 have been (and are currently) checked out
|
||||
// to threads.
|
||||
for ( dim_t i = top_index; i < num_blocks_cur; ++i )
|
||||
{
|
||||
block_ptrs_new[i] = block_ptrs_cur[i];
|
||||
}
|
||||
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
printf( "bli_pool_grow(): freeing previous block_ptrs array.\n" );
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_pool_grow(): freeing prev block_ptrs: " );
|
||||
#endif
|
||||
|
||||
// Free the old block_ptrs array.
|
||||
@@ -341,30 +379,39 @@ void bli_pool_grow
|
||||
// Update the pool_t struct with the new block_ptrs array and
|
||||
// record its allocated length.
|
||||
bli_pool_set_block_ptrs( block_ptrs_new, pool );
|
||||
bli_pool_set_block_ptrs_len( num_blocks_new, pool );
|
||||
bli_pool_set_block_ptrs_len( block_ptrs_len_new, pool );
|
||||
}
|
||||
|
||||
// At this point, we are guaranteed to have enough unused elements
|
||||
// in the block_ptrs array to accommodate an additional num_blocks_add
|
||||
// blocks.
|
||||
|
||||
// Query the current block_ptrs array (which was possibly just resized).
|
||||
block_ptrs_cur = bli_pool_block_ptrs( pool );
|
||||
// Query the current block_ptrs array (which was mabye just resized).
|
||||
pblk_t* restrict block_ptrs = bli_pool_block_ptrs( pool );
|
||||
|
||||
// Query the block size and alignment size of the pool.
|
||||
block_size = bli_pool_block_size( pool );
|
||||
align_size = bli_pool_align_size( pool );
|
||||
const siz_t block_size = bli_pool_block_size( pool );
|
||||
const siz_t align_size = bli_pool_align_size( pool );
|
||||
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
printf( "bli_pool_grow(): growing pool from from %d to %d.\n",
|
||||
// Query the malloc() function pointer for the pool.
|
||||
malloc_ft malloc_fp = bli_pool_malloc_fp( pool );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_pool_grow(): growing pool from (%d -> %d).\n",
|
||||
( int )num_blocks_cur, ( int )num_blocks_new );
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
// Allocate the requested additional blocks in the resized array.
|
||||
for ( i = num_blocks_cur; i < num_blocks_new; ++i )
|
||||
for ( dim_t i = num_blocks_cur; i < num_blocks_new; ++i )
|
||||
{
|
||||
bli_pool_alloc_block( block_size, align_size,
|
||||
&(block_ptrs_cur[i]), pool );
|
||||
bli_pool_alloc_block
|
||||
(
|
||||
block_size,
|
||||
align_size,
|
||||
malloc_fp,
|
||||
&(block_ptrs[i])
|
||||
);
|
||||
}
|
||||
|
||||
// Update the pool_t struct with the new number of allocated blocks.
|
||||
@@ -375,48 +422,41 @@ void bli_pool_grow
|
||||
|
||||
void bli_pool_shrink
|
||||
(
|
||||
dim_t num_blocks_sub,
|
||||
pool_t* pool
|
||||
siz_t num_blocks_sub,
|
||||
pool_t* restrict pool
|
||||
)
|
||||
{
|
||||
pblk_t* block_ptrs;
|
||||
dim_t num_blocks;
|
||||
dim_t num_blocks_avail;
|
||||
dim_t num_blocks_new;
|
||||
// If the requested decrease is zero, return early.
|
||||
if ( num_blocks_sub == 0 ) return;
|
||||
|
||||
dim_t top_index;
|
||||
|
||||
dim_t i;
|
||||
|
||||
// Query the total number of blocks presently allocated.
|
||||
num_blocks = bli_pool_num_blocks( pool );
|
||||
// Query the total number of blocks currently allocated.
|
||||
const siz_t num_blocks = bli_pool_num_blocks( pool );
|
||||
|
||||
// Query the top_index of the pool.
|
||||
top_index = bli_pool_top_index( pool );
|
||||
const siz_t top_index = bli_pool_top_index( pool );
|
||||
|
||||
// Compute the number of blocks available to be checked out
|
||||
// (and thus available for removal).
|
||||
num_blocks_avail = num_blocks - top_index;
|
||||
const siz_t num_blocks_avail = num_blocks - top_index;
|
||||
|
||||
// If the requested decrease is more than the number of available
|
||||
// blocks in the pool, only remove the number of blocks available.
|
||||
if ( num_blocks_avail < num_blocks_sub )
|
||||
num_blocks_sub = num_blocks_avail;
|
||||
// blocks in the pool, only remove the number of blocks actually
|
||||
// available.
|
||||
num_blocks_sub = bli_min( num_blocks_sub, num_blocks_avail );
|
||||
|
||||
// If the effective requested decrease is zero (or the requested
|
||||
// decrease was negative), return early.
|
||||
if ( num_blocks_sub < 1 ) return;
|
||||
|
||||
// Query the current block_ptrs array.
|
||||
block_ptrs = bli_pool_block_ptrs( pool );
|
||||
// Query the block_ptrs array.
|
||||
pblk_t* restrict block_ptrs = bli_pool_block_ptrs( pool );
|
||||
|
||||
// Compute the new total number of blocks.
|
||||
num_blocks_new = num_blocks - num_blocks_sub;
|
||||
const siz_t num_blocks_new = num_blocks - num_blocks_sub;
|
||||
|
||||
// Query the free() function pointer for the pool.
|
||||
free_ft free_fp = bli_pool_free_fp( pool );
|
||||
|
||||
// Free the individual blocks.
|
||||
for ( i = num_blocks_new; i < num_blocks; ++i )
|
||||
for ( dim_t i = num_blocks_new; i < num_blocks; ++i )
|
||||
{
|
||||
bli_pool_free_block( &(block_ptrs[i]), pool );
|
||||
bli_pool_free_block( free_fp, &(block_ptrs[i]) );
|
||||
}
|
||||
|
||||
// Update the pool_t struct.
|
||||
@@ -429,24 +469,24 @@ void bli_pool_shrink
|
||||
|
||||
void bli_pool_alloc_block
|
||||
(
|
||||
siz_t block_size,
|
||||
siz_t align_size,
|
||||
pblk_t* block,
|
||||
pool_t* pool
|
||||
siz_t block_size,
|
||||
siz_t align_size,
|
||||
malloc_ft malloc_fp,
|
||||
pblk_t* restrict block
|
||||
)
|
||||
{
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
//printf( "bli_pool_alloc_block(): allocating block of size %ld (align %ld)\n",
|
||||
// ( long )block_size, ( long )align_size );
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_pool_alloc_block(): calling fmalloc_align(): size %d (align %d)\n",
|
||||
( int )block_size, ( int )align_size );
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
// Query the malloc() function pointer from the pool.
|
||||
malloc_ft malloc_fp = bli_pool_malloc_fp( pool );
|
||||
|
||||
// Allocate the block via the bli_fmalloc_align() wrapper, which performs
|
||||
// alignment logic and opaquely saves the original pointer so that it can
|
||||
// be recovered when it's time to free the block.
|
||||
void* buf = bli_fmalloc_align( malloc_fp, block_size, align_size );
|
||||
void* restrict buf
|
||||
=
|
||||
bli_fmalloc_align( malloc_fp, block_size, align_size );
|
||||
|
||||
#if 0
|
||||
// NOTE: This code is disabled because it is not needed, since
|
||||
@@ -471,31 +511,26 @@ void bli_pool_alloc_block
|
||||
}
|
||||
#endif
|
||||
|
||||
//printf( "bli_pool_alloc_block(): bsize = %d; asize = %d\n", (int)block_size, (int)align_size );
|
||||
//printf( " sys = %p; align = %p\n", buf_sys, buf_align );
|
||||
|
||||
// Save the results in the pblk_t structure.
|
||||
bli_pblk_set_buf( buf, block );
|
||||
bli_pblk_set_block_size( block_size, block );
|
||||
}
|
||||
|
||||
void bli_pool_free_block
|
||||
(
|
||||
pblk_t* block,
|
||||
pool_t* pool
|
||||
free_ft free_fp,
|
||||
pblk_t* restrict block
|
||||
)
|
||||
{
|
||||
void* buf;
|
||||
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
printf( "bli_pool_free_block(): freeing block.\n" );
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_pool_free_block(): calling ffree_align(): size %d.\n",
|
||||
( int )bli_pblk_block_size( block ) );
|
||||
fflush( stdout );
|
||||
#endif
|
||||
|
||||
// Query the free() function pointer from the pool.
|
||||
free_ft free_fp = bli_pool_free_fp( pool );
|
||||
|
||||
// Extract the pblk_t buffer, which is the aligned address returned from
|
||||
// bli_fmalloc_align() when the block was allocated.
|
||||
buf = bli_pblk_buf( block );
|
||||
void* restrict buf = bli_pblk_buf( block );
|
||||
|
||||
// Free the block via the bli_ffree_align() wrapper, which recovers the
|
||||
// original pointer that was returned by the pool's malloc() function when
|
||||
@@ -505,34 +540,34 @@ void bli_pool_free_block
|
||||
|
||||
void bli_pool_print
|
||||
(
|
||||
pool_t* pool
|
||||
pool_t* restrict pool
|
||||
)
|
||||
{
|
||||
pblk_t* block_ptrs = bli_pool_block_ptrs( pool );
|
||||
dim_t block_ptrs_len = bli_pool_block_ptrs_len( pool );
|
||||
dim_t top_index = bli_pool_top_index( pool );
|
||||
dim_t num_blocks = bli_pool_num_blocks( pool );
|
||||
dim_t block_size = bli_pool_block_size( pool );
|
||||
dim_t align_size = bli_pool_align_size( pool );
|
||||
siz_t block_ptrs_len = bli_pool_block_ptrs_len( pool );
|
||||
siz_t top_index = bli_pool_top_index( pool );
|
||||
siz_t num_blocks = bli_pool_num_blocks( pool );
|
||||
siz_t block_size = bli_pool_block_size( pool );
|
||||
siz_t align_size = bli_pool_align_size( pool );
|
||||
dim_t i;
|
||||
|
||||
printf( "pool struct ---------------\n" );
|
||||
printf( " block_ptrs: %p\n", block_ptrs );
|
||||
printf( " block_ptrs_len: %ld\n", ( long )block_ptrs_len );
|
||||
printf( " top_index: %ld\n", ( long )top_index );
|
||||
printf( " num_blocks: %ld\n", ( long )num_blocks );
|
||||
printf( " block_size: %ld\n", ( long )block_size );
|
||||
printf( " align_size: %ld\n", ( long )align_size );
|
||||
printf( " block_ptrs_len: %d\n", ( int )block_ptrs_len );
|
||||
printf( " top_index: %d\n", ( int )top_index );
|
||||
printf( " num_blocks: %d\n", ( int )num_blocks );
|
||||
printf( " block_size: %d\n", ( int )block_size );
|
||||
printf( " align_size: %d\n", ( int )align_size );
|
||||
printf( " pblks sys align\n" );
|
||||
for ( i = 0; i < num_blocks; ++i )
|
||||
{
|
||||
printf( " %ld: %p\n", ( long )i, bli_pblk_buf( &block_ptrs[i] ) );
|
||||
printf( " %d: %p\n", ( int )i, bli_pblk_buf( &block_ptrs[i] ) );
|
||||
}
|
||||
}
|
||||
|
||||
void bli_pblk_print
|
||||
(
|
||||
pblk_t* pblk
|
||||
pblk_t* restrict pblk
|
||||
)
|
||||
{
|
||||
void* buf = bli_pblk_buf( pblk );
|
||||
|
||||
@@ -41,7 +41,9 @@
|
||||
/*
|
||||
typedef struct
|
||||
{
|
||||
void* buf;
|
||||
void* buf;
|
||||
siz_t block_size;
|
||||
|
||||
} pblk_t;
|
||||
*/
|
||||
|
||||
@@ -50,11 +52,11 @@ typedef struct
|
||||
/*
|
||||
typedef struct
|
||||
{
|
||||
pblk_t* block_ptrs;
|
||||
dim_t block_ptrs_len;
|
||||
void* block_ptrs;
|
||||
siz_t block_ptrs_len;
|
||||
|
||||
dim_t top_index;
|
||||
dim_t num_blocks;
|
||||
siz_t top_index;
|
||||
siz_t num_blocks;
|
||||
|
||||
siz_t block_size;
|
||||
siz_t align_size;
|
||||
@@ -73,6 +75,11 @@ static void* bli_pblk_buf( pblk_t* pblk )
|
||||
return pblk->buf;
|
||||
}
|
||||
|
||||
static siz_t bli_pblk_block_size( pblk_t* pblk )
|
||||
{
|
||||
return pblk->block_size;
|
||||
}
|
||||
|
||||
// Pool block modification
|
||||
|
||||
static void bli_pblk_set_buf( void* buf, pblk_t* pblk )
|
||||
@@ -80,25 +87,31 @@ static void bli_pblk_set_buf( void* buf, pblk_t* pblk )
|
||||
pblk->buf = buf;
|
||||
}
|
||||
|
||||
static void bli_pblk_set_block_size( siz_t block_size, pblk_t* pblk )
|
||||
{
|
||||
pblk->block_size = block_size;
|
||||
}
|
||||
|
||||
static void bli_pblk_clear( pblk_t* pblk )
|
||||
{
|
||||
bli_pblk_set_buf( NULL, pblk );
|
||||
bli_pblk_set_block_size( 0, pblk );
|
||||
}
|
||||
|
||||
|
||||
// Pool entry query
|
||||
|
||||
static pblk_t* bli_pool_block_ptrs( pool_t* pool )
|
||||
static void* bli_pool_block_ptrs( pool_t* pool )
|
||||
{
|
||||
return pool->block_ptrs;
|
||||
}
|
||||
|
||||
static dim_t bli_pool_block_ptrs_len( pool_t* pool )
|
||||
static siz_t bli_pool_block_ptrs_len( pool_t* pool )
|
||||
{
|
||||
return pool->block_ptrs_len;
|
||||
}
|
||||
|
||||
static dim_t bli_pool_num_blocks( pool_t* pool )
|
||||
static siz_t bli_pool_num_blocks( pool_t* pool )
|
||||
{
|
||||
return pool->num_blocks;
|
||||
}
|
||||
@@ -123,7 +136,7 @@ static free_ft bli_pool_free_fp( pool_t* pool )
|
||||
return pool->free_fp;
|
||||
}
|
||||
|
||||
static dim_t bli_pool_top_index( pool_t* pool )
|
||||
static siz_t bli_pool_top_index( pool_t* pool )
|
||||
{
|
||||
return pool->top_index;
|
||||
}
|
||||
@@ -136,17 +149,17 @@ static bool_t bli_pool_is_exhausted( pool_t* pool )
|
||||
|
||||
// Pool entry modification
|
||||
|
||||
static void bli_pool_set_block_ptrs( pblk_t* block_ptrs, pool_t* pool ) \
|
||||
static void bli_pool_set_block_ptrs( void* block_ptrs, pool_t* pool ) \
|
||||
{
|
||||
pool->block_ptrs = block_ptrs;
|
||||
}
|
||||
|
||||
static void bli_pool_set_block_ptrs_len( dim_t block_ptrs_len, pool_t* pool ) \
|
||||
static void bli_pool_set_block_ptrs_len( siz_t block_ptrs_len, pool_t* pool ) \
|
||||
{
|
||||
pool->block_ptrs_len = block_ptrs_len;
|
||||
}
|
||||
|
||||
static void bli_pool_set_num_blocks( dim_t num_blocks, pool_t* pool ) \
|
||||
static void bli_pool_set_num_blocks( siz_t num_blocks, pool_t* pool ) \
|
||||
{
|
||||
pool->num_blocks = num_blocks;
|
||||
}
|
||||
@@ -171,7 +184,7 @@ static void bli_pool_set_free_fp( free_ft free_fp, pool_t* pool ) \
|
||||
pool->free_fp = free_fp;
|
||||
}
|
||||
|
||||
static void bli_pool_set_top_index( dim_t top_index, pool_t* pool ) \
|
||||
static void bli_pool_set_top_index( siz_t top_index, pool_t* pool ) \
|
||||
{
|
||||
pool->top_index = top_index;
|
||||
}
|
||||
@@ -180,70 +193,70 @@ static void bli_pool_set_top_index( dim_t top_index, pool_t* pool ) \
|
||||
|
||||
void bli_pool_init
|
||||
(
|
||||
dim_t num_blocks,
|
||||
dim_t block_ptrs_len,
|
||||
siz_t block_size,
|
||||
siz_t align_size,
|
||||
malloc_ft malloc_fp,
|
||||
free_ft free_fp,
|
||||
pool_t* pool
|
||||
siz_t num_blocks,
|
||||
siz_t block_ptrs_len,
|
||||
siz_t block_size,
|
||||
siz_t align_size,
|
||||
malloc_ft malloc_fp,
|
||||
free_ft free_fp,
|
||||
pool_t* restrict pool
|
||||
);
|
||||
void bli_pool_finalize
|
||||
(
|
||||
pool_t* pool
|
||||
pool_t* restrict pool
|
||||
);
|
||||
void bli_pool_reinit
|
||||
(
|
||||
dim_t num_blocks_new,
|
||||
dim_t block_ptrs_len_new,
|
||||
siz_t block_size_new,
|
||||
siz_t align_size_new,
|
||||
pool_t* pool
|
||||
siz_t num_blocks_new,
|
||||
siz_t block_ptrs_len_new,
|
||||
siz_t block_size_new,
|
||||
siz_t align_size_new,
|
||||
pool_t* restrict pool
|
||||
);
|
||||
|
||||
void bli_pool_checkout_block
|
||||
(
|
||||
siz_t req_size,
|
||||
pblk_t* block,
|
||||
pool_t* pool
|
||||
siz_t req_size,
|
||||
pblk_t* restrict block,
|
||||
pool_t* restrict pool
|
||||
);
|
||||
void bli_pool_checkin_block
|
||||
(
|
||||
pblk_t* block,
|
||||
pool_t* pool
|
||||
pblk_t* restrict block,
|
||||
pool_t* restrict pool
|
||||
);
|
||||
|
||||
void bli_pool_grow
|
||||
(
|
||||
dim_t num_blocks_add,
|
||||
pool_t* pool
|
||||
siz_t num_blocks_add,
|
||||
pool_t* restrict pool
|
||||
);
|
||||
void bli_pool_shrink
|
||||
(
|
||||
dim_t num_blocks_sub,
|
||||
pool_t* pool
|
||||
siz_t num_blocks_sub,
|
||||
pool_t* restrict pool
|
||||
);
|
||||
|
||||
void bli_pool_alloc_block
|
||||
(
|
||||
siz_t block_size,
|
||||
siz_t align_size,
|
||||
pblk_t* block,
|
||||
pool_t* pool
|
||||
siz_t block_size,
|
||||
siz_t align_size,
|
||||
malloc_ft malloc_fp,
|
||||
pblk_t* restrict block
|
||||
);
|
||||
void bli_pool_free_block
|
||||
(
|
||||
pblk_t* block,
|
||||
pool_t* pool
|
||||
free_ft free_fp,
|
||||
pblk_t* restrict block
|
||||
);
|
||||
|
||||
void bli_pool_print
|
||||
(
|
||||
pool_t* pool
|
||||
pool_t* restrict pool
|
||||
);
|
||||
void bli_pblk_print
|
||||
(
|
||||
pblk_t* pblk
|
||||
pblk_t* restrict pblk
|
||||
);
|
||||
|
||||
#endif
|
||||
|
||||
@@ -49,7 +49,7 @@ typedef struct rntm_s
|
||||
*/
|
||||
|
||||
//
|
||||
// -- rntm_t query -------------------------------------------------------------
|
||||
// -- rntm_t query (public API) ------------------------------------------------
|
||||
//
|
||||
|
||||
static dim_t bli_rntm_num_threads( rntm_t* rntm )
|
||||
@@ -87,6 +87,20 @@ static dim_t bli_rntm_pr_ways( rntm_t* rntm )
|
||||
return bli_rntm_ways_for( BLIS_KR, rntm );
|
||||
}
|
||||
|
||||
//
|
||||
// -- rntm_t query (internal use only) -----------------------------------------
|
||||
//
|
||||
|
||||
static pool_t* bli_rntm_sba_pool( rntm_t* rntm )
|
||||
{
|
||||
return rntm->sba_pool;
|
||||
}
|
||||
|
||||
static membrk_t* bli_rntm_membrk( rntm_t* rntm )
|
||||
{
|
||||
return rntm->membrk;
|
||||
}
|
||||
|
||||
static dim_t bli_rntm_equals( rntm_t* rntm1, rntm_t* rntm2 )
|
||||
{
|
||||
const bool_t nt = bli_rntm_num_threads( rntm1 ) == bli_rntm_num_threads( rntm2 );
|
||||
@@ -151,6 +165,16 @@ static void bli_rntm_set_ways_only( dim_t jc, dim_t pc, dim_t ic, dim_t jr, dim_
|
||||
bli_rntm_set_pr_ways_only( 1, rntm );
|
||||
}
|
||||
|
||||
static void bli_rntm_set_sba_pool( pool_t* sba_pool, rntm_t* rntm )
|
||||
{
|
||||
rntm->sba_pool = sba_pool;
|
||||
}
|
||||
|
||||
static void bli_rntm_set_membrk( membrk_t* membrk, rntm_t* rntm )
|
||||
{
|
||||
rntm->membrk = membrk;
|
||||
}
|
||||
|
||||
static void bli_rntm_clear_num_threads_only( rntm_t* rntm )
|
||||
{
|
||||
bli_rntm_set_num_threads_only( -1, rntm );
|
||||
@@ -159,6 +183,10 @@ static void bli_rntm_clear_ways_only( rntm_t* rntm )
|
||||
{
|
||||
bli_rntm_set_ways_only( -1, -1, -1, -1, -1, rntm );
|
||||
}
|
||||
static void bli_rntm_clear_sba_pool( rntm_t* rntm )
|
||||
{
|
||||
bli_rntm_set_sba_pool( NULL, rntm );
|
||||
}
|
||||
|
||||
//
|
||||
// -- rntm_t modification (public API) -----------------------------------------
|
||||
@@ -196,12 +224,15 @@ static void bli_rntm_set_ways( dim_t jc, dim_t pc, dim_t ic, dim_t jr, dim_t ir,
|
||||
// will be in a good state upon return.
|
||||
|
||||
#define BLIS_RNTM_INITIALIZER { .num_threads = -1, \
|
||||
.thrloop = { -1, -1, -1, -1, -1, -1 } } \
|
||||
.thrloop = { -1, -1, -1, -1, -1, -1 }, \
|
||||
.sba_pool = NULL } \
|
||||
|
||||
static void bli_rntm_init( rntm_t* rntm )
|
||||
{
|
||||
bli_rntm_clear_num_threads_only( rntm );
|
||||
bli_rntm_clear_ways_only( rntm );
|
||||
|
||||
bli_rntm_clear_sba_pool( rntm );
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
190
frame/base/bli_sba.c
Normal file
190
frame/base/bli_sba.c
Normal file
@@ -0,0 +1,190 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name(s) of the copyright holder(s) nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
// The small block allocator: an apool_t of array_t of pool_t.
|
||||
static apool_t sba;
|
||||
|
||||
apool_t* bli_sba_query( void )
|
||||
{
|
||||
return &sba;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
void bli_sba_init( void )
|
||||
{
|
||||
bli_apool_init( BLIS_MALLOC_INTL, BLIS_FREE_INTL, &sba );
|
||||
}
|
||||
|
||||
void bli_sba_finalize( void )
|
||||
{
|
||||
bli_apool_finalize( &sba );
|
||||
}
|
||||
|
||||
void* bli_sba_acquire
|
||||
(
|
||||
rntm_t* restrict rntm,
|
||||
siz_t req_size
|
||||
)
|
||||
{
|
||||
void* block;
|
||||
|
||||
#ifdef BLIS_ENABLE_SBA_POOLS
|
||||
if ( rntm == NULL )
|
||||
{
|
||||
block = bli_malloc_intl( req_size );
|
||||
}
|
||||
else
|
||||
{
|
||||
pblk_t pblk;
|
||||
|
||||
// Query the small block pool from the rntm.
|
||||
pool_t* restrict pool = bli_rntm_sba_pool( rntm );
|
||||
|
||||
// Query the block_size of the pool_t so that we can request the exact
|
||||
// size present.
|
||||
const siz_t block_size = bli_pool_block_size( pool );
|
||||
|
||||
// Sanity check: Make sure the requested size is no larger than the
|
||||
// block_size field of the pool.
|
||||
if ( block_size < req_size )
|
||||
{
|
||||
printf( "bli_sba_acquire(): ** pool block_size is %d but req_size is %d.\n",
|
||||
( int )block_size, ( int )req_size );
|
||||
bli_abort();
|
||||
}
|
||||
|
||||
// Check out a block using the block_size queried above.
|
||||
bli_pool_checkout_block( block_size, &pblk, pool );
|
||||
|
||||
// The block address is stored within the pblk_t.
|
||||
block = bli_pblk_buf( &pblk );
|
||||
}
|
||||
#else
|
||||
|
||||
block = bli_malloc_intl( req_size );
|
||||
|
||||
#endif
|
||||
|
||||
// Return the address obtained from the pblk_t.
|
||||
return block;
|
||||
}
|
||||
|
||||
void bli_sba_release
|
||||
(
|
||||
rntm_t* restrict rntm,
|
||||
void* restrict block
|
||||
)
|
||||
{
|
||||
#ifdef BLIS_ENABLE_SBA_POOLS
|
||||
if ( rntm == NULL )
|
||||
{
|
||||
bli_free_intl( block );
|
||||
}
|
||||
else
|
||||
{
|
||||
pblk_t pblk;
|
||||
|
||||
// Query the small block pool from the rntm.
|
||||
pool_t* restrict pool = bli_rntm_sba_pool( rntm );
|
||||
|
||||
// Query the block_size field from the pool. This is not super-important
|
||||
// for this particular application of the pool_t (that is, the "leaf"
|
||||
// component of the sba), but it seems like good housekeeping to maintain
|
||||
// the block_size field of the pblk_t in case its ever needed/read.
|
||||
const siz_t block_size = bli_pool_block_size( pool );
|
||||
|
||||
// Embed the block's memory address into a pblk_t, along with the
|
||||
// block_size queried from the pool.
|
||||
bli_pblk_set_buf( block, &pblk );
|
||||
bli_pblk_set_block_size( block_size, &pblk );
|
||||
|
||||
// Check the pblk_t back into the pool_t. (It's okay that the pblk_t is
|
||||
// a local variable since its contents are copied into the pool's internal
|
||||
// data structure--an array of pblk_t.)
|
||||
bli_pool_checkin_block( &pblk, pool );
|
||||
}
|
||||
#else
|
||||
|
||||
bli_free_intl( block );
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
array_t* bli_sba_checkout_array
|
||||
(
|
||||
const siz_t n_threads
|
||||
)
|
||||
{
|
||||
#ifndef BLIS_ENABLE_SBA_POOLS
|
||||
return NULL;
|
||||
#endif
|
||||
|
||||
return bli_apool_checkout_array( n_threads, &sba );
|
||||
}
|
||||
|
||||
void bli_sba_checkin_array
|
||||
(
|
||||
array_t* restrict array
|
||||
)
|
||||
{
|
||||
#ifndef BLIS_ENABLE_SBA_POOLS
|
||||
return;
|
||||
#endif
|
||||
|
||||
bli_apool_checkin_array( array, &sba );
|
||||
}
|
||||
|
||||
void bli_sba_rntm_set_pool
|
||||
(
|
||||
siz_t index,
|
||||
array_t* restrict array,
|
||||
rntm_t* restrict rntm
|
||||
)
|
||||
{
|
||||
#ifndef BLIS_ENABLE_SBA_POOLS
|
||||
bli_rntm_set_sba_pool( NULL, rntm );
|
||||
return;
|
||||
#endif
|
||||
|
||||
// Query the pool_t* in the array_t corresponding to index.
|
||||
pool_t* restrict pool = bli_apool_array_elem( index, array );
|
||||
|
||||
// Embed the pool_t* into the rntm_t.
|
||||
bli_rntm_set_sba_pool( pool, rntm );
|
||||
}
|
||||
|
||||
|
||||
75
frame/base/bli_sba.h
Normal file
75
frame/base/bli_sba.h
Normal file
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name(s) of the copyright holder(s) nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_SBA_H
|
||||
#define BLIS_SBA_H
|
||||
|
||||
apool_t* bli_sba_query( void );
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
void bli_sba_init( void );
|
||||
void bli_sba_finalize( void );
|
||||
|
||||
array_t* bli_sba_checkout_array
|
||||
(
|
||||
const siz_t n_threads
|
||||
);
|
||||
|
||||
void bli_sba_checkin_array
|
||||
(
|
||||
array_t* restrict array
|
||||
);
|
||||
|
||||
void bli_sba_rntm_set_pool
|
||||
(
|
||||
siz_t index,
|
||||
array_t* restrict array,
|
||||
rntm_t* restrict rntm
|
||||
);
|
||||
|
||||
void* bli_sba_acquire
|
||||
(
|
||||
rntm_t* restrict rntm,
|
||||
siz_t req_size
|
||||
);
|
||||
void bli_sba_release
|
||||
(
|
||||
rntm_t* restrict rntm,
|
||||
void* restrict block
|
||||
);
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
@@ -922,15 +922,32 @@ typedef enum
|
||||
//
|
||||
|
||||
// These headers must be included here (or earlier) because definitions they
|
||||
// provide are needed in the pool_t and membrk_t structs.
|
||||
// provide are needed in the pool_t and related structs.
|
||||
#include "bli_pthread.h"
|
||||
#include "bli_malloc.h"
|
||||
|
||||
// -- Array type --
|
||||
|
||||
typedef struct
|
||||
{
|
||||
void* buf;
|
||||
|
||||
siz_t num_elem;
|
||||
siz_t elem_size;
|
||||
|
||||
//malloc_ft malloc_fp;
|
||||
//free_ft free_fp;
|
||||
|
||||
} array_t;
|
||||
|
||||
|
||||
// -- Pool block type --
|
||||
|
||||
typedef struct
|
||||
{
|
||||
void* buf;
|
||||
void* buf;
|
||||
siz_t block_size;
|
||||
|
||||
} pblk_t;
|
||||
|
||||
|
||||
@@ -938,7 +955,7 @@ typedef struct
|
||||
|
||||
typedef struct
|
||||
{
|
||||
pblk_t* block_ptrs;
|
||||
void* block_ptrs;
|
||||
dim_t block_ptrs_len;
|
||||
|
||||
dim_t top_index;
|
||||
@@ -953,7 +970,19 @@ typedef struct
|
||||
} pool_t;
|
||||
|
||||
|
||||
// -- Memory broker object type --
|
||||
// -- small block allocator: Locked pool-of-arrays-of-pools type --
|
||||
|
||||
typedef struct
|
||||
{
|
||||
bli_pthread_mutex_t mutex;
|
||||
pool_t pool;
|
||||
|
||||
siz_t def_array_len;
|
||||
|
||||
} apool_t;
|
||||
|
||||
|
||||
// -- packing block allocator: Locked set of pools type --
|
||||
|
||||
typedef struct membrk_s
|
||||
{
|
||||
@@ -975,7 +1004,6 @@ typedef struct mem_s
|
||||
pblk_t pblk;
|
||||
packbuf_t buf_type;
|
||||
pool_t* pool;
|
||||
membrk_t* membrk;
|
||||
siz_t size;
|
||||
} mem_t;
|
||||
|
||||
@@ -1199,7 +1227,6 @@ typedef struct cntx_s
|
||||
pack_t schema_b_panel;
|
||||
pack_t schema_c_panel;
|
||||
|
||||
membrk_t* membrk;
|
||||
} cntx_t;
|
||||
|
||||
|
||||
@@ -1207,9 +1234,18 @@ typedef struct cntx_s
|
||||
|
||||
typedef struct rntm_s
|
||||
{
|
||||
// "External" fields: these may be queried by the end-user.
|
||||
dim_t num_threads;
|
||||
dim_t thrloop[ BLIS_NUM_LOOPS ];
|
||||
|
||||
// "Internal" fields: these should not be exposed to the end-user.
|
||||
|
||||
// The small block pool, which is attached in the l3 thread decorator.
|
||||
pool_t* sba_pool;
|
||||
|
||||
// The packing block allocator, which is attached in the l3 thread decorator.
|
||||
membrk_t* membrk;
|
||||
|
||||
} rntm_t;
|
||||
|
||||
|
||||
@@ -1296,28 +1332,31 @@ typedef enum
|
||||
// Buffer-specific errors
|
||||
BLIS_EXPECTED_NONNULL_OBJECT_BUFFER = (-110),
|
||||
|
||||
// Memory allocator errors
|
||||
BLIS_INVALID_PACKBUF = (-120),
|
||||
BLIS_EXHAUSTED_CONTIG_MEMORY_POOL = (-122),
|
||||
BLIS_INSUFFICIENT_STACK_BUF_SIZE = (-123),
|
||||
BLIS_ALIGNMENT_NOT_POWER_OF_TWO = (-124),
|
||||
BLIS_ALIGNMENT_NOT_MULT_OF_PTR_SIZE = (-125),
|
||||
// Memory errors
|
||||
BLIS_MALLOC_RETURNED_NULL = (-120),
|
||||
|
||||
// Internal memory pool errors
|
||||
BLIS_INVALID_PACKBUF = (-130),
|
||||
BLIS_EXHAUSTED_CONTIG_MEMORY_POOL = (-131),
|
||||
BLIS_INSUFFICIENT_STACK_BUF_SIZE = (-132),
|
||||
BLIS_ALIGNMENT_NOT_POWER_OF_TWO = (-133),
|
||||
BLIS_ALIGNMENT_NOT_MULT_OF_PTR_SIZE = (-134),
|
||||
|
||||
// Object-related errors
|
||||
BLIS_EXPECTED_OBJECT_ALIAS = (-130),
|
||||
BLIS_EXPECTED_OBJECT_ALIAS = (-140),
|
||||
|
||||
// Architecture-related errors
|
||||
BLIS_INVALID_ARCH_ID = (-140),
|
||||
BLIS_INVALID_ARCH_ID = (-150),
|
||||
|
||||
// Blocksize-related errors
|
||||
BLIS_MC_DEF_NONMULTIPLE_OF_MR = (-150),
|
||||
BLIS_MC_MAX_NONMULTIPLE_OF_MR = (-151),
|
||||
BLIS_NC_DEF_NONMULTIPLE_OF_NR = (-152),
|
||||
BLIS_NC_MAX_NONMULTIPLE_OF_NR = (-153),
|
||||
BLIS_KC_DEF_NONMULTIPLE_OF_KR = (-154),
|
||||
BLIS_KC_MAX_NONMULTIPLE_OF_KR = (-155),
|
||||
BLIS_MC_DEF_NONMULTIPLE_OF_MR = (-160),
|
||||
BLIS_MC_MAX_NONMULTIPLE_OF_MR = (-161),
|
||||
BLIS_NC_DEF_NONMULTIPLE_OF_NR = (-162),
|
||||
BLIS_NC_MAX_NONMULTIPLE_OF_NR = (-163),
|
||||
BLIS_KC_DEF_NONMULTIPLE_OF_KR = (-164),
|
||||
BLIS_KC_MAX_NONMULTIPLE_OF_KR = (-165),
|
||||
|
||||
BLIS_ERROR_CODE_MAX = (-160)
|
||||
BLIS_ERROR_CODE_MAX = (-170)
|
||||
} err_t;
|
||||
|
||||
#endif
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2016, Hewlett Packard Enterprise Development LP
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -104,6 +105,9 @@ extern "C" {
|
||||
#include "bli_ind.h"
|
||||
#include "bli_membrk.h"
|
||||
#include "bli_pool.h"
|
||||
#include "bli_array.h"
|
||||
#include "bli_apool.h"
|
||||
#include "bli_sba.h"
|
||||
#include "bli_memsys.h"
|
||||
#include "bli_mem.h"
|
||||
#include "bli_part.h"
|
||||
|
||||
@@ -94,9 +94,11 @@ void PASTEMAC(opname,imeth) \
|
||||
cntx_t cntx_l; \
|
||||
if ( ind == BLIS_3MH || ind == BLIS_4MH ) { cntx_l = *cntx; cntx = &cntx_l; } \
|
||||
\
|
||||
/* Initialize a local runtime with global settings if necessary. */ \
|
||||
/* Initialize a local runtime with global settings if necessary. Note
|
||||
that in the case that a runtime is passed in, we make a local copy. */ \
|
||||
rntm_t rntm_l; \
|
||||
if ( rntm == NULL ) { rntm = &rntm_l; bli_thread_init_rntm( rntm ); } \
|
||||
if ( rntm == NULL ) { bli_thread_init_rntm( &rntm_l ); rntm = &rntm_l; } \
|
||||
else { rntm_l = *rntm; rntm = &rntm_l; } \
|
||||
\
|
||||
/* Some induced methods execute in multiple "stages". */ \
|
||||
for ( i = 0; i < nstage; ++i ) \
|
||||
@@ -185,9 +187,11 @@ void PASTEMAC(opname,imeth) \
|
||||
cntx_t cntx_l; \
|
||||
if ( ind == BLIS_3MH || ind == BLIS_4MH ) { cntx_l = *cntx; cntx = &cntx_l; } \
|
||||
\
|
||||
/* Initialize a local runtime with global settings if necessary. */ \
|
||||
/* Initialize a local runtime with global settings if necessary. Note
|
||||
that in the case that a runtime is passed in, we make a local copy. */ \
|
||||
rntm_t rntm_l; \
|
||||
if ( rntm == NULL ) { rntm = &rntm_l; bli_thread_init_rntm( rntm ); } \
|
||||
if ( rntm == NULL ) { bli_thread_init_rntm( &rntm_l ); rntm = &rntm_l; } \
|
||||
else { rntm_l = *rntm; rntm = &rntm_l; } \
|
||||
\
|
||||
/* Some induced methods execute in multiple "stages". */ \
|
||||
for ( i = 0; i < nstage; ++i ) \
|
||||
@@ -274,9 +278,11 @@ void PASTEMAC(opname,imeth) \
|
||||
cntx_t cntx_l; \
|
||||
if ( ind == BLIS_3MH || ind == BLIS_4MH ) { cntx_l = *cntx; cntx = &cntx_l; } \
|
||||
\
|
||||
/* Initialize a local runtime with global settings if necessary. */ \
|
||||
/* Initialize a local runtime with global settings if necessary. Note
|
||||
that in the case that a runtime is passed in, we make a local copy. */ \
|
||||
rntm_t rntm_l; \
|
||||
if ( rntm == NULL ) { rntm = &rntm_l; bli_thread_init_rntm( rntm ); } \
|
||||
if ( rntm == NULL ) { bli_thread_init_rntm( &rntm_l ); rntm = &rntm_l; } \
|
||||
else { rntm_l = *rntm; rntm = &rntm_l; } \
|
||||
\
|
||||
/* Some induced methods execute in multiple "stages". */ \
|
||||
for ( i = 0; i < nstage; ++i ) \
|
||||
@@ -348,9 +354,11 @@ void PASTEMAC(opname,imeth) \
|
||||
_cntx_init() function. */ \
|
||||
cntx = bli_gks_query_ind_cntx( ind, dt ); \
|
||||
\
|
||||
/* Initialize a local runtime with global settings if necessary. */ \
|
||||
/* Initialize a local runtime with global settings if necessary. Note
|
||||
that in the case that a runtime is passed in, we make a local copy. */ \
|
||||
rntm_t rntm_l; \
|
||||
if ( rntm == NULL ) { rntm = &rntm_l; bli_thread_init_rntm( rntm ); } \
|
||||
if ( rntm == NULL ) { bli_thread_init_rntm( &rntm_l ); rntm = &rntm_l; } \
|
||||
else { rntm_l = *rntm; rntm = &rntm_l; } \
|
||||
\
|
||||
/* Some induced methods execute in multiple "stages". */ \
|
||||
for ( i = 0; i < nstage; ++i ) \
|
||||
@@ -408,9 +416,11 @@ void PASTEMAC(opname,imeth) \
|
||||
_cntx_init() function. */ \
|
||||
cntx = bli_gks_query_ind_cntx( ind, dt ); \
|
||||
\
|
||||
/* Initialize a local runtime with global settings if necessary. */ \
|
||||
/* Initialize a local runtime with global settings if necessary. Note
|
||||
that in the case that a runtime is passed in, we make a local copy. */ \
|
||||
rntm_t rntm_l; \
|
||||
if ( rntm == NULL ) { rntm = &rntm_l; bli_thread_init_rntm( rntm ); } \
|
||||
if ( rntm == NULL ) { bli_thread_init_rntm( &rntm_l ); rntm = &rntm_l; } \
|
||||
else { rntm_l = *rntm; rntm = &rntm_l; } \
|
||||
\
|
||||
{ \
|
||||
/* NOTE: trsm cannot be implemented via any induced method that
|
||||
|
||||
@@ -56,9 +56,11 @@ void PASTEMAC(opname,imeth) \
|
||||
num_t dt = bli_obj_dt( c ); \
|
||||
PASTECH(opname,_oft) func = PASTEMAC(opname,ind_get_avail)( dt ); \
|
||||
\
|
||||
/* Initialize a local runtime with global settings if necessary. */ \
|
||||
/* Initialize a local runtime with global settings if necessary. Note
|
||||
that in the case that a runtime is passed in, we make a local copy. */ \
|
||||
rntm_t rntm_l; \
|
||||
if ( rntm == NULL ) { rntm = &rntm_l; bli_thread_init_rntm( rntm ); } \
|
||||
if ( rntm == NULL ) { bli_thread_init_rntm( &rntm_l ); rntm = &rntm_l; } \
|
||||
else { rntm_l = *rntm; rntm = &rntm_l; } \
|
||||
\
|
||||
func( alpha, a, b, beta, c, cntx, rntm ); \
|
||||
}
|
||||
@@ -90,9 +92,11 @@ void PASTEMAC(opname,imeth) \
|
||||
num_t dt = bli_obj_dt( c ); \
|
||||
PASTECH(opname,_oft) func = PASTEMAC(opname,ind_get_avail)( dt ); \
|
||||
\
|
||||
/* Initialize a local runtime with global settings if necessary. */ \
|
||||
/* Initialize a local runtime with global settings if necessary. Note
|
||||
that in the case that a runtime is passed in, we make a local copy. */ \
|
||||
rntm_t rntm_l; \
|
||||
if ( rntm == NULL ) { rntm = &rntm_l; bli_thread_init_rntm( rntm ); } \
|
||||
if ( rntm == NULL ) { bli_thread_init_rntm( &rntm_l ); rntm = &rntm_l; } \
|
||||
else { rntm_l = *rntm; rntm = &rntm_l; } \
|
||||
\
|
||||
func( side, alpha, a, b, beta, c, cntx, rntm ); \
|
||||
}
|
||||
@@ -122,9 +126,11 @@ void PASTEMAC(opname,imeth) \
|
||||
num_t dt = bli_obj_dt( c ); \
|
||||
PASTECH(opname,_oft) func = PASTEMAC(opname,ind_get_avail)( dt ); \
|
||||
\
|
||||
/* Initialize a local runtime with global settings if necessary. */ \
|
||||
/* Initialize a local runtime with global settings if necessary. Note
|
||||
that in the case that a runtime is passed in, we make a local copy. */ \
|
||||
rntm_t rntm_l; \
|
||||
if ( rntm == NULL ) { rntm = &rntm_l; bli_thread_init_rntm( rntm ); } \
|
||||
if ( rntm == NULL ) { bli_thread_init_rntm( &rntm_l ); rntm = &rntm_l; } \
|
||||
else { rntm_l = *rntm; rntm = &rntm_l; } \
|
||||
\
|
||||
func( alpha, a, beta, c, cntx, rntm ); \
|
||||
}
|
||||
@@ -153,9 +159,11 @@ void PASTEMAC(opname,imeth) \
|
||||
num_t dt = bli_obj_dt( b ); \
|
||||
PASTECH(opname,_oft) func = PASTEMAC(opname,ind_get_avail)( dt ); \
|
||||
\
|
||||
/* Initialize a local runtime with global settings if necessary. */ \
|
||||
/* Initialize a local runtime with global settings if necessary. Note
|
||||
that in the case that a runtime is passed in, we make a local copy. */ \
|
||||
rntm_t rntm_l; \
|
||||
if ( rntm == NULL ) { rntm = &rntm_l; bli_thread_init_rntm( rntm ); } \
|
||||
if ( rntm == NULL ) { bli_thread_init_rntm( &rntm_l ); rntm = &rntm_l; } \
|
||||
else { rntm_l = *rntm; rntm = &rntm_l; } \
|
||||
\
|
||||
func( side, alpha, a, b, cntx, rntm ); \
|
||||
}
|
||||
|
||||
@@ -61,9 +61,11 @@ void PASTEMAC(opname,imeth) \
|
||||
/* Obtain a valid (native) context from the gks if necessary. */ \
|
||||
if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
|
||||
\
|
||||
/* Initialize a local runtime with global settings if necessary. */ \
|
||||
/* Initialize a local runtime with global settings if necessary. Note
|
||||
that in the case that a runtime is passed in, we make a local copy. */ \
|
||||
rntm_t rntm_l; \
|
||||
if ( rntm == NULL ) { rntm = &rntm_l; bli_thread_init_rntm( rntm ); } \
|
||||
if ( rntm == NULL ) { bli_thread_init_rntm( &rntm_l ); rntm = &rntm_l; } \
|
||||
else { rntm_l = *rntm; rntm = &rntm_l; } \
|
||||
\
|
||||
/* Invoke the operation's front end. */ \
|
||||
PASTEMAC(opname,_front) \
|
||||
@@ -103,9 +105,11 @@ void PASTEMAC(opname,imeth) \
|
||||
/* Obtain a valid (native) context from the gks if necessary. */ \
|
||||
if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
|
||||
\
|
||||
/* Initialize a local runtime with global settings if necessary. */ \
|
||||
/* Initialize a local runtime with global settings if necessary. Note
|
||||
that in the case that a runtime is passed in, we make a local copy. */ \
|
||||
rntm_t rntm_l; \
|
||||
if ( rntm == NULL ) { rntm = &rntm_l; bli_thread_init_rntm( rntm ); } \
|
||||
if ( rntm == NULL ) { bli_thread_init_rntm( &rntm_l ); rntm = &rntm_l; } \
|
||||
else { rntm_l = *rntm; rntm = &rntm_l; } \
|
||||
\
|
||||
/* Invoke the operation's front end. */ \
|
||||
PASTEMAC(opname,_front) \
|
||||
@@ -139,9 +143,11 @@ void PASTEMAC(opname,imeth) \
|
||||
/* Obtain a valid (native) context from the gks if necessary. */ \
|
||||
if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
|
||||
\
|
||||
/* Initialize a local runtime with global settings if necessary. */ \
|
||||
/* Initialize a local runtime with global settings if necessary. Note
|
||||
that in the case that a runtime is passed in, we make a local copy. */ \
|
||||
rntm_t rntm_l; \
|
||||
if ( rntm == NULL ) { rntm = &rntm_l; bli_thread_init_rntm( rntm ); } \
|
||||
if ( rntm == NULL ) { bli_thread_init_rntm( &rntm_l ); rntm = &rntm_l; } \
|
||||
else { rntm_l = *rntm; rntm = &rntm_l; } \
|
||||
\
|
||||
/* Invoke the operation's front end. */ \
|
||||
PASTEMAC(opname,_front) \
|
||||
@@ -174,9 +180,11 @@ void PASTEMAC(opname,imeth) \
|
||||
/* Obtain a valid (native) context from the gks if necessary. */ \
|
||||
if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
|
||||
\
|
||||
/* Initialize a local runtime with global settings if necessary. */ \
|
||||
/* Initialize a local runtime with global settings if necessary. Note
|
||||
that in the case that a runtime is passed in, we make a local copy. */ \
|
||||
rntm_t rntm_l; \
|
||||
if ( rntm == NULL ) { rntm = &rntm_l; bli_thread_init_rntm( rntm ); } \
|
||||
if ( rntm == NULL ) { bli_thread_init_rntm( &rntm_l ); rntm = &rntm_l; } \
|
||||
else { rntm_l = *rntm; rntm = &rntm_l; } \
|
||||
\
|
||||
/* Invoke the operation's front end. */ \
|
||||
PASTEMAC(opname,_front) \
|
||||
@@ -208,9 +216,11 @@ void PASTEMAC(opname,imeth) \
|
||||
/* Obtain a valid (native) context from the gks if necessary. */ \
|
||||
if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
|
||||
\
|
||||
/* Initialize a local runtime with global settings if necessary. */ \
|
||||
/* Initialize a local runtime with global settings if necessary. Note
|
||||
that in the case that a runtime is passed in, we make a local copy. */ \
|
||||
rntm_t rntm_l; \
|
||||
if ( rntm == NULL ) { rntm = &rntm_l; bli_thread_init_rntm( rntm ); } \
|
||||
if ( rntm == NULL ) { bli_thread_init_rntm( &rntm_l ); rntm = &rntm_l; } \
|
||||
else { rntm_l = *rntm; rntm = &rntm_l; } \
|
||||
\
|
||||
/* Invoke the operation's front end. */ \
|
||||
PASTEMAC(opname,_front) \
|
||||
|
||||
@@ -6,6 +6,7 @@
|
||||
|
||||
Copyright (C) 2018, Southern Methodist University
|
||||
Copyright (C) 2018, The University of Texas at Austin
|
||||
Copyright (C) 2018, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -42,39 +43,57 @@
|
||||
// This branch defines a pthread-like API, bli_pthread_*(), and implements it
|
||||
// in terms of Windows API calls.
|
||||
|
||||
int bli_pthread_mutex_init( bli_pthread_mutex_t* mutex,
|
||||
const bli_pthread_mutexattr_t* attr )
|
||||
int bli_pthread_mutex_init
|
||||
(
|
||||
bli_pthread_mutex_t* mutex,
|
||||
const bli_pthread_mutexattr_t* attr
|
||||
)
|
||||
{
|
||||
if ( attr ) return EINVAL;
|
||||
InitializeSRWLock( mutex );
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bli_pthread_mutex_destroy( bli_pthread_mutex_t* mutex )
|
||||
int bli_pthread_mutex_destroy
|
||||
(
|
||||
bli_pthread_mutex_t* mutex
|
||||
)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bli_pthread_mutex_lock( bli_pthread_mutex_t* mutex )
|
||||
int bli_pthread_mutex_lock
|
||||
(
|
||||
bli_pthread_mutex_t* mutex
|
||||
)
|
||||
{
|
||||
AcquireSRWLockExclusive( mutex );
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bli_pthread_mutex_trylock( bli_pthread_mutex_t* mutex )
|
||||
int bli_pthread_mutex_trylock
|
||||
(
|
||||
bli_pthread_mutex_t* mutex
|
||||
)
|
||||
{
|
||||
return TryAcquireSRWLockExclusive( mutex ) ? 0 : EBUSY;
|
||||
}
|
||||
|
||||
int bli_pthread_mutex_unlock( bli_pthread_mutex_t* mutex )
|
||||
int bli_pthread_mutex_unlock
|
||||
(
|
||||
bli_pthread_mutex_t* mutex
|
||||
)
|
||||
{
|
||||
ReleaseSRWLockExclusive( mutex );
|
||||
return 0;
|
||||
}
|
||||
|
||||
static BOOL bli_init_once_wrapper( bli_pthread_once_t* once,
|
||||
void* param,
|
||||
void** context)
|
||||
static BOOL bli_init_once_wrapper
|
||||
(
|
||||
bli_pthread_once_t* once,
|
||||
void* param,
|
||||
void** context
|
||||
)
|
||||
{
|
||||
( void )once;
|
||||
( void )context;
|
||||
@@ -83,33 +102,49 @@ static BOOL bli_init_once_wrapper( bli_pthread_once_t* once,
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
void bli_pthread_once( bli_pthread_once_t* once, void (*init)(void) )
|
||||
void bli_pthread_once
|
||||
(
|
||||
bli_pthread_once_t* once,
|
||||
void (*init)(void)
|
||||
)
|
||||
{
|
||||
InitOnceExecuteOnce( once, bli_init_once_wrapper, init, NULL );
|
||||
}
|
||||
|
||||
int bli_pthread_cond_init( bli_pthread_cond_t* cond,
|
||||
const bli_pthread_condattr_t* attr )
|
||||
int bli_pthread_cond_init
|
||||
(
|
||||
bli_pthread_cond_t* cond,
|
||||
const bli_pthread_condattr_t* attr
|
||||
)
|
||||
{
|
||||
if ( attr ) return EINVAL;
|
||||
InitializeConditionVariable( cond );
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bli_pthread_cond_destroy( bli_pthread_cond_t* cond )
|
||||
int bli_pthread_cond_destroy
|
||||
(
|
||||
bli_pthread_cond_t* cond
|
||||
)
|
||||
{
|
||||
( void )cond;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bli_pthread_cond_wait( bli_pthread_cond_t* cond,
|
||||
bli_pthread_mutex_t* mutex )
|
||||
int bli_pthread_cond_wait
|
||||
(
|
||||
bli_pthread_cond_t* cond,
|
||||
bli_pthread_mutex_t* mutex
|
||||
)
|
||||
{
|
||||
if ( !SleepConditionVariableSRW( cond, mutex, INFINITE, 0 ) ) return EAGAIN;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bli_pthread_cond_broadcast( bli_pthread_cond_t* cond )
|
||||
int bli_pthread_cond_broadcast
|
||||
(
|
||||
bli_pthread_cond_t* cond
|
||||
)
|
||||
{
|
||||
WakeAllConditionVariable( cond );
|
||||
return 0;
|
||||
@@ -120,19 +155,26 @@ typedef struct
|
||||
void* (*start_routine)( void* );
|
||||
void* param;
|
||||
void** retval;
|
||||
|
||||
} bli_thread_param;
|
||||
|
||||
static DWORD bli_thread_func( void* param_ )
|
||||
static DWORD bli_thread_func
|
||||
(
|
||||
void* param_
|
||||
)
|
||||
{
|
||||
bli_thread_param* param = param_;
|
||||
*param->retval = param->start_routine( param->param );
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bli_pthread_create( bli_pthread_t* thread,
|
||||
const bli_pthread_attr_t* attr,
|
||||
void* (*start_routine)(void*),
|
||||
void* arg )
|
||||
int bli_pthread_create
|
||||
(
|
||||
bli_pthread_t* thread,
|
||||
const bli_pthread_attr_t* attr,
|
||||
void* (*start_routine)(void*),
|
||||
void* arg
|
||||
)
|
||||
{
|
||||
if ( attr ) return EINVAL;
|
||||
bli_thread_param param = { start_routine, arg, &thread->retval };
|
||||
@@ -141,8 +183,11 @@ int bli_pthread_create( bli_pthread_t* thread,
|
||||
return 0;
|
||||
}
|
||||
|
||||
int bli_pthread_join( bli_pthread_t thread,
|
||||
void** retval )
|
||||
int bli_pthread_join
|
||||
(
|
||||
bli_pthread_t thread,
|
||||
void** retval
|
||||
)
|
||||
{
|
||||
if ( !WaitForSingleObject( thread.handle, INFINITE ) ) return EAGAIN;
|
||||
if ( retval ) *retval = thread.retval;
|
||||
|
||||
@@ -36,18 +36,18 @@
|
||||
|
||||
void* bli_thrcomm_bcast
|
||||
(
|
||||
thrcomm_t* comm,
|
||||
dim_t id,
|
||||
void* to_send
|
||||
void* to_send,
|
||||
thrcomm_t* comm
|
||||
)
|
||||
{
|
||||
if ( comm == NULL || comm->n_threads == 1 ) return to_send;
|
||||
|
||||
if ( id == 0 ) comm->sent_object = to_send;
|
||||
|
||||
bli_thrcomm_barrier( comm, id );
|
||||
bli_thrcomm_barrier( id, comm );
|
||||
void* object = comm->sent_object;
|
||||
bli_thrcomm_barrier( comm, id );
|
||||
bli_thrcomm_barrier( id, comm );
|
||||
|
||||
return object;
|
||||
}
|
||||
@@ -71,7 +71,7 @@ void* bli_thrcomm_bcast
|
||||
|
||||
#endif
|
||||
|
||||
void bli_thrcomm_barrier_atomic( thrcomm_t* comm, dim_t t_id )
|
||||
void bli_thrcomm_barrier_atomic( dim_t t_id, thrcomm_t* comm )
|
||||
{
|
||||
// Return early if the comm is NULL or if there is only one
|
||||
// thread participating.
|
||||
|
||||
@@ -55,14 +55,14 @@ static dim_t bli_thrcomm_num_threads( thrcomm_t* comm )
|
||||
|
||||
|
||||
// Thread communicator prototypes.
|
||||
thrcomm_t* bli_thrcomm_create( dim_t n_threads );
|
||||
void bli_thrcomm_free( thrcomm_t* comm );
|
||||
void bli_thrcomm_init( thrcomm_t* comm, dim_t n_threads );
|
||||
thrcomm_t* bli_thrcomm_create( rntm_t* rntm, dim_t n_threads );
|
||||
void bli_thrcomm_free( rntm_t* rntm, thrcomm_t* comm );
|
||||
void bli_thrcomm_init( dim_t n_threads, thrcomm_t* comm );
|
||||
void bli_thrcomm_cleanup( thrcomm_t* comm );
|
||||
void bli_thrcomm_barrier( thrcomm_t* comm, dim_t thread_id );
|
||||
void* bli_thrcomm_bcast( thrcomm_t* comm, dim_t inside_id, void* to_send );
|
||||
void bli_thrcomm_barrier( dim_t thread_id, thrcomm_t* comm );
|
||||
void* bli_thrcomm_bcast( dim_t inside_id, void* to_send, thrcomm_t* comm );
|
||||
|
||||
void bli_thrcomm_barrier_atomic( thrcomm_t* comm, dim_t t_id );
|
||||
void bli_thrcomm_barrier_atomic( dim_t thread_id, thrcomm_t* comm );
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -37,28 +37,35 @@
|
||||
|
||||
#ifdef BLIS_ENABLE_OPENMP
|
||||
|
||||
thrcomm_t* bli_thrcomm_create( dim_t n_threads )
|
||||
thrcomm_t* bli_thrcomm_create( rntm_t* rntm, dim_t n_threads )
|
||||
{
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_thrcomm_create(): " );
|
||||
#endif
|
||||
|
||||
thrcomm_t* comm = bli_malloc_intl( sizeof(thrcomm_t) );
|
||||
bli_thrcomm_init( comm, n_threads );
|
||||
thrcomm_t* comm = bli_sba_acquire( rntm, sizeof(thrcomm_t) );
|
||||
|
||||
bli_thrcomm_init( n_threads, comm );
|
||||
|
||||
return comm;
|
||||
}
|
||||
|
||||
void bli_thrcomm_free( thrcomm_t* comm )
|
||||
void bli_thrcomm_free( rntm_t* rntm, thrcomm_t* comm )
|
||||
{
|
||||
if ( comm == NULL ) return;
|
||||
|
||||
bli_thrcomm_cleanup( comm );
|
||||
bli_free_intl( comm );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_thrcomm_free(): " );
|
||||
#endif
|
||||
|
||||
bli_sba_release( rntm, comm );
|
||||
}
|
||||
|
||||
#ifndef BLIS_TREE_BARRIER
|
||||
|
||||
void bli_thrcomm_init( thrcomm_t* comm, dim_t n_threads)
|
||||
void bli_thrcomm_init( dim_t n_threads, thrcomm_t* comm )
|
||||
{
|
||||
if ( comm == NULL ) return;
|
||||
comm->sent_object = NULL;
|
||||
@@ -75,7 +82,7 @@ void bli_thrcomm_cleanup( thrcomm_t* comm )
|
||||
|
||||
//'Normal' barrier for openmp
|
||||
//barrier routine taken from art of multicore programming
|
||||
void bli_thrcomm_barrier( thrcomm_t* comm, dim_t t_id )
|
||||
void bli_thrcomm_barrier( dim_t t_id, thrcomm_t* comm )
|
||||
{
|
||||
#if 0
|
||||
if ( comm == NULL || comm->n_threads == 1 )
|
||||
@@ -97,12 +104,12 @@ void bli_thrcomm_barrier( thrcomm_t* comm, dim_t t_id )
|
||||
while ( *listener == my_sense ) {}
|
||||
}
|
||||
#endif
|
||||
bli_thrcomm_barrier_atomic( comm, t_id );
|
||||
bli_thrcomm_barrier_atomic( t_id, comm );
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void bli_thrcomm_init( thrcomm_t* comm, dim_t n_threads)
|
||||
void bli_thrcomm_init( dim_t n_threads, thrcomm_t* comm )
|
||||
{
|
||||
if ( comm == NULL ) return;
|
||||
comm->sent_object = NULL;
|
||||
@@ -176,7 +183,7 @@ void bli_thrcomm_tree_barrier_free( barrier_t* barrier )
|
||||
return;
|
||||
}
|
||||
|
||||
void bli_thrcomm_barrier( thrcomm_t* comm, dim_t t_id )
|
||||
void bli_thrcomm_barrier( dim_t t_id, thrcomm_t* comm )
|
||||
{
|
||||
bli_thrcomm_tree_barrier( comm->barriers[t_id] );
|
||||
}
|
||||
@@ -207,6 +214,7 @@ void bli_thrcomm_tree_barrier( barrier_t* barack )
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
// Define a dummy function bli_l3_thread_entry(), which is needed in the
|
||||
// pthreads version, so that when building Windows DLLs (with OpenMP enabled
|
||||
// or no multithreading) we don't risk having an unresolved symbol.
|
||||
@@ -240,22 +248,54 @@ void bli_l3_thread_decorator
|
||||
bli_obj_set_pack_schema( BLIS_NOT_PACKED, b );
|
||||
|
||||
// Query the total number of threads from the rntm_t object.
|
||||
dim_t n_threads = bli_rntm_num_threads( rntm );
|
||||
|
||||
// Allocate a global communicator for the root thrinfo_t structures.
|
||||
thrcomm_t* gl_comm = bli_thrcomm_create( n_threads );
|
||||
const dim_t n_threads = bli_rntm_num_threads( rntm );
|
||||
|
||||
#ifdef PRINT_THRINFO
|
||||
thrinfo_t** threads = bli_malloc_intl( n_threads * sizeof( thrinfo_t* ) );
|
||||
#endif
|
||||
|
||||
// NOTE: The sba was initialized in bli_init().
|
||||
|
||||
// Check out an array_t from the small block allocator. This is done
|
||||
// with an internal lock to ensure only one application thread accesses
|
||||
// the sba at a time. bli_sba_checkout_array() will also automatically
|
||||
// resize the array_t, if necessary.
|
||||
array_t* restrict array = bli_sba_checkout_array( n_threads );
|
||||
|
||||
// Access the pool_t* for thread 0 and embed it into the rntm. We do
|
||||
// this up-front only so that we have the rntm_t.sba_pool field
|
||||
// initialized and ready for the global communicator creation below.
|
||||
bli_sba_rntm_set_pool( 0, array, rntm );
|
||||
|
||||
// Set the packing block allocator field of the rntm. This will be
|
||||
// inherited by all of the child threads when they make local copies of
|
||||
// the rntm below.
|
||||
bli_membrk_rntm_set_membrk( rntm );
|
||||
|
||||
// Allocate a global communicator for the root thrinfo_t structures.
|
||||
thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads );
|
||||
|
||||
|
||||
_Pragma( "omp parallel num_threads(n_threads)" )
|
||||
{
|
||||
dim_t tid = omp_get_thread_num();
|
||||
// Create a thread-local copy of the master thread's rntm_t. This is
|
||||
// necessary since we want each thread to be able to track its own
|
||||
// small block pool_t as it executes down the function stack.
|
||||
rntm_t rntm_l = *rntm;
|
||||
rntm_t* restrict rntm_p = &rntm_l;
|
||||
|
||||
// Query the thread's id from OpenMP.
|
||||
const dim_t tid = omp_get_thread_num();
|
||||
|
||||
// Check for a somewhat obscure OpenMP thread-mistmatch issue.
|
||||
bli_l3_thread_decorator_thread_check( n_threads, tid, gl_comm, rntm );
|
||||
bli_l3_thread_decorator_thread_check( n_threads, tid, gl_comm, rntm_p );
|
||||
|
||||
// Use the thread id to access the appropriate pool_t* within the
|
||||
// array_t, and use it to set the sba_pool field within the rntm_t.
|
||||
// If the pool_t* element within the array_t is NULL, it will first
|
||||
// be allocated/initialized.
|
||||
bli_sba_rntm_set_pool( tid, array, rntm_p );
|
||||
|
||||
|
||||
obj_t a_t, b_t, c_t;
|
||||
cntl_t* cntl_use;
|
||||
@@ -272,10 +312,10 @@ void bli_l3_thread_decorator
|
||||
|
||||
// Create a default control tree for the operation, if needed.
|
||||
bli_l3_cntl_create_if( family, schema_a, schema_b,
|
||||
&a_t, &b_t, &c_t, cntl, &cntl_use );
|
||||
&a_t, &b_t, &c_t, rntm_p, cntl, &cntl_use );
|
||||
|
||||
// Create the root node of the current thread's thrinfo_t structure.
|
||||
bli_l3_thrinfo_create_root( tid, gl_comm, rntm, cntl_use, &thread );
|
||||
bli_l3_thrinfo_create_root( tid, gl_comm, rntm_p, cntl_use, &thread );
|
||||
|
||||
func
|
||||
(
|
||||
@@ -285,21 +325,20 @@ void bli_l3_thread_decorator
|
||||
beta,
|
||||
&c_t,
|
||||
cntx,
|
||||
rntm,
|
||||
rntm_p,
|
||||
cntl_use,
|
||||
thread
|
||||
);
|
||||
|
||||
// Free the thread's local control tree.
|
||||
bli_l3_cntl_free( cntl_use, thread );
|
||||
bli_l3_cntl_free( rntm_p, cntl_use, thread );
|
||||
|
||||
#ifdef PRINT_THRINFO
|
||||
threads[tid] = thread;
|
||||
#else
|
||||
// Free the current thread's thrinfo_t structure.
|
||||
bli_l3_thrinfo_free( thread );
|
||||
bli_l3_thrinfo_free( rntm_p, thread );
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
// We shouldn't free the global communicator since it was already freed
|
||||
@@ -308,9 +347,14 @@ void bli_l3_thread_decorator
|
||||
|
||||
#ifdef PRINT_THRINFO
|
||||
bli_l3_thrinfo_print_paths( threads );
|
||||
bli_l3_thrinfo_free_paths( threads );
|
||||
exit(1);
|
||||
//bli_l3_thrinfo_free_paths( rntm_p, threads );
|
||||
#endif
|
||||
|
||||
// Check the array_t back into the small block allocator. Similar to the
|
||||
// check-out, this is done using a lock embedded within the sba to ensure
|
||||
// mutual exclusion.
|
||||
bli_sba_checkin_array( array );
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
@@ -348,8 +392,8 @@ void bli_l3_thread_decorator_thread_check
|
||||
// if the number of threads in the current region is 1. If, for
|
||||
// example, BLIS requested 4 threads but only got 3, then we
|
||||
// abort().
|
||||
if ( tid == 0 )
|
||||
{
|
||||
//if ( tid == 0 )
|
||||
//{
|
||||
if ( n_threads_real != 1 )
|
||||
{
|
||||
bli_print_msg( "A different number of threads was "
|
||||
@@ -359,10 +403,10 @@ void bli_l3_thread_decorator_thread_check
|
||||
}
|
||||
|
||||
//n_threads = 1; // not needed since it has no effect?
|
||||
bli_thrcomm_init( gl_comm, 1 );
|
||||
bli_thrcomm_init( 1, gl_comm );
|
||||
bli_rntm_set_num_threads_only( 1, rntm );
|
||||
bli_rntm_set_ways_only( 1, 1, 1, 1, 1, rntm );
|
||||
}
|
||||
//}
|
||||
|
||||
// Synchronize all threads and continue.
|
||||
_Pragma( "omp barrier" )
|
||||
|
||||
@@ -37,32 +37,35 @@
|
||||
|
||||
#ifdef BLIS_ENABLE_PTHREADS
|
||||
|
||||
thrcomm_t* bli_thrcomm_create( dim_t n_threads )
|
||||
thrcomm_t* bli_thrcomm_create( rntm_t* rntm, dim_t n_threads )
|
||||
{
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_thrcomm_create(): " );
|
||||
#endif
|
||||
|
||||
thrcomm_t* comm = bli_malloc_intl( sizeof(thrcomm_t) );
|
||||
bli_thrcomm_init( comm, n_threads );
|
||||
thrcomm_t* comm = bli_sba_acquire( rntm, sizeof(thrcomm_t) );
|
||||
|
||||
bli_thrcomm_init( n_threads, comm );
|
||||
|
||||
return comm;
|
||||
}
|
||||
|
||||
void bli_thrcomm_free( thrcomm_t* comm )
|
||||
void bli_thrcomm_free( rntm_t* rntm, thrcomm_t* comm )
|
||||
{
|
||||
if ( comm == NULL ) return;
|
||||
|
||||
bli_thrcomm_cleanup( comm );
|
||||
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_thrcomm_free(): " );
|
||||
#endif
|
||||
|
||||
bli_free_intl( comm );
|
||||
bli_sba_release( rntm, comm );
|
||||
}
|
||||
|
||||
#ifdef BLIS_USE_PTHREAD_BARRIER
|
||||
|
||||
void bli_thrcomm_init( thrcomm_t* comm, dim_t n_threads)
|
||||
void bli_thrcomm_init( dim_t n_threads, thrcomm_t* comm )
|
||||
{
|
||||
if ( comm == NULL ) return;
|
||||
comm->sent_object = NULL;
|
||||
@@ -76,14 +79,14 @@ void bli_thrcomm_cleanup( thrcomm_t* comm )
|
||||
bli_pthread_barrier_destroy( &comm->barrier );
|
||||
}
|
||||
|
||||
void bli_thrcomm_barrier( thrcomm_t* comm, dim_t t_id )
|
||||
void bli_thrcomm_barrier( dim_t t_id, thrcomm_t* comm )
|
||||
{
|
||||
bli_pthread_barrier_wait( &comm->barrier );
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
void bli_thrcomm_init( thrcomm_t* comm, dim_t n_threads)
|
||||
void bli_thrcomm_init( dim_t n_threads, thrcomm_t* comm )
|
||||
{
|
||||
if ( comm == NULL ) return;
|
||||
comm->sent_object = NULL;
|
||||
@@ -104,7 +107,7 @@ void bli_thrcomm_cleanup( thrcomm_t* comm )
|
||||
//#endif
|
||||
}
|
||||
|
||||
void bli_thrcomm_barrier( thrcomm_t* comm, dim_t t_id )
|
||||
void bli_thrcomm_barrier( dim_t t_id, thrcomm_t* comm )
|
||||
{
|
||||
#if 0
|
||||
if ( comm == NULL || comm->n_threads == 1 ) return;
|
||||
@@ -130,7 +133,7 @@ void bli_thrcomm_barrier( thrcomm_t* comm, dim_t t_id )
|
||||
while( *listener == my_sense ) {}
|
||||
}
|
||||
#endif
|
||||
bli_thrcomm_barrier_atomic( comm, t_id );
|
||||
bli_thrcomm_barrier_atomic( t_id, comm );
|
||||
}
|
||||
|
||||
#endif
|
||||
@@ -151,8 +154,9 @@ typedef struct thread_data
|
||||
cntx_t* cntx;
|
||||
rntm_t* rntm;
|
||||
cntl_t* cntl;
|
||||
dim_t id;
|
||||
dim_t tid;
|
||||
thrcomm_t* gl_comm;
|
||||
array_t* array;
|
||||
} thread_data_t;
|
||||
|
||||
// Entry point for additional threads
|
||||
@@ -172,9 +176,22 @@ void* bli_l3_thread_entry( void* data_void )
|
||||
cntx_t* cntx = data->cntx;
|
||||
rntm_t* rntm = data->rntm;
|
||||
cntl_t* cntl = data->cntl;
|
||||
dim_t id = data->id;
|
||||
dim_t tid = data->tid;
|
||||
array_t* array = data->array;
|
||||
thrcomm_t* gl_comm = data->gl_comm;
|
||||
|
||||
// Create a thread-local copy of the master thread's rntm_t. This is
|
||||
// necessary since we want each thread to be able to track its own
|
||||
// small block pool_t as it executes down the function stack.
|
||||
rntm_t rntm_l = *rntm;
|
||||
rntm_t* restrict rntm_p = &rntm_l;
|
||||
|
||||
// Use the thread id to access the appropriate pool_t* within the
|
||||
// array_t, and use it to set the sba_pool field within the rntm_t.
|
||||
// If the pool_t* element within the array_t is NULL, it will first
|
||||
// be allocated/initialized.
|
||||
bli_sba_rntm_set_pool( tid, array, rntm_p );
|
||||
|
||||
obj_t a_t, b_t, c_t;
|
||||
cntl_t* cntl_use;
|
||||
thrinfo_t* thread;
|
||||
@@ -190,10 +207,10 @@ void* bli_l3_thread_entry( void* data_void )
|
||||
|
||||
// Create a default control tree for the operation, if needed.
|
||||
bli_l3_cntl_create_if( family, schema_a, schema_b,
|
||||
&a_t, &b_t, &c_t, cntl, &cntl_use );
|
||||
&a_t, &b_t, &c_t, rntm_p, cntl, &cntl_use );
|
||||
|
||||
// Create the root node of the current thread's thrinfo_t structure.
|
||||
bli_l3_thrinfo_create_root( id, gl_comm, rntm, cntl_use, &thread );
|
||||
bli_l3_thrinfo_create_root( tid, gl_comm, rntm_p, cntl_use, &thread );
|
||||
|
||||
func
|
||||
(
|
||||
@@ -203,16 +220,16 @@ void* bli_l3_thread_entry( void* data_void )
|
||||
beta,
|
||||
&c_t,
|
||||
cntx,
|
||||
rntm,
|
||||
rntm_p,
|
||||
cntl_use,
|
||||
thread
|
||||
);
|
||||
|
||||
// Free the thread's local control tree.
|
||||
bli_l3_cntl_free( cntl_use, thread );
|
||||
bli_l3_cntl_free( rntm_p, cntl_use, thread );
|
||||
|
||||
// Free the current thread's thrinfo_t structure.
|
||||
bli_l3_thrinfo_free( thread );
|
||||
bli_l3_thrinfo_free( rntm_p, thread );
|
||||
|
||||
return NULL;
|
||||
}
|
||||
@@ -243,39 +260,66 @@ void bli_l3_thread_decorator
|
||||
bli_obj_set_pack_schema( BLIS_NOT_PACKED, b );
|
||||
|
||||
// Query the total number of threads from the context.
|
||||
dim_t n_threads = bli_rntm_num_threads( rntm );
|
||||
const dim_t n_threads = bli_rntm_num_threads( rntm );
|
||||
|
||||
// NOTE: The sba was initialized in bli_init().
|
||||
|
||||
// Check out an array_t from the small block allocator. This is done
|
||||
// with an internal lock to ensure only one application thread accesses
|
||||
// the sba at a time. bli_sba_checkout_array() will also automatically
|
||||
// resize the array_t, if necessary.
|
||||
array_t* restrict array = bli_sba_checkout_array( n_threads );
|
||||
|
||||
// Access the pool_t* for thread 0 and embed it into the rntm. We do
|
||||
// this up-front only so that we have the rntm_t.sba_pool field
|
||||
// initialized and ready for the global communicator creation below.
|
||||
bli_sba_rntm_set_pool( 0, array, rntm );
|
||||
|
||||
// Set the packing block allocator field of the rntm. This will be
|
||||
// inherited by all of the child threads when they make local copies of
|
||||
// the rntm below.
|
||||
bli_membrk_rntm_set_membrk( rntm );
|
||||
|
||||
// Allocate a global communicator for the root thrinfo_t structures.
|
||||
thrcomm_t* gl_comm = bli_thrcomm_create( n_threads );
|
||||
thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads );
|
||||
|
||||
// Allocate an array of pthread objects and auxiliary data structs to pass
|
||||
// to the thread entry functions.
|
||||
bli_pthread_t* pthreads = bli_malloc_intl( sizeof( bli_pthread_t ) * n_threads );
|
||||
thread_data_t* datas = bli_malloc_intl( sizeof( thread_data_t ) * n_threads );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_l3_thread_decorator().pth: " );
|
||||
#endif
|
||||
bli_pthread_t* pthreads = bli_malloc_intl( sizeof( bli_pthread_t ) * n_threads );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_l3_thread_decorator().pth: " );
|
||||
#endif
|
||||
thread_data_t* datas = bli_malloc_intl( sizeof( thread_data_t ) * n_threads );
|
||||
|
||||
// NOTE: We must iterate backwards so that the chief thread (thread id 0)
|
||||
// can spawn all other threads before proceeding with its own computation.
|
||||
for ( dim_t id = n_threads - 1; 0 <= id; id-- )
|
||||
for ( dim_t tid = n_threads - 1; 0 <= tid; tid-- )
|
||||
{
|
||||
// Set up thread data for additional threads (beyond thread 0).
|
||||
datas[id].func = func;
|
||||
datas[id].family = family;
|
||||
datas[id].schema_a = schema_a;
|
||||
datas[id].schema_b = schema_b;
|
||||
datas[id].alpha = alpha;
|
||||
datas[id].a = a;
|
||||
datas[id].b = b;
|
||||
datas[id].beta = beta;
|
||||
datas[id].c = c;
|
||||
datas[id].cntx = cntx;
|
||||
datas[id].rntm = rntm;
|
||||
datas[id].cntl = cntl;
|
||||
datas[id].id = id;
|
||||
datas[id].gl_comm = gl_comm;
|
||||
datas[tid].func = func;
|
||||
datas[tid].family = family;
|
||||
datas[tid].schema_a = schema_a;
|
||||
datas[tid].schema_b = schema_b;
|
||||
datas[tid].alpha = alpha;
|
||||
datas[tid].a = a;
|
||||
datas[tid].b = b;
|
||||
datas[tid].beta = beta;
|
||||
datas[tid].c = c;
|
||||
datas[tid].cntx = cntx;
|
||||
datas[tid].rntm = rntm;
|
||||
datas[tid].cntl = cntl;
|
||||
datas[tid].tid = tid;
|
||||
datas[tid].gl_comm = gl_comm;
|
||||
datas[tid].array = array;
|
||||
|
||||
// Spawn additional threads for ids greater than 1.
|
||||
if ( id != 0 )
|
||||
bli_pthread_create( &pthreads[id], NULL, &bli_l3_thread_entry, &datas[id] );
|
||||
if ( tid != 0 )
|
||||
bli_pthread_create( &pthreads[tid], NULL, &bli_l3_thread_entry, &datas[tid] );
|
||||
else
|
||||
bli_l3_thread_entry( ( void* )(&datas[0]) );
|
||||
}
|
||||
@@ -285,15 +329,26 @@ void bli_l3_thread_decorator
|
||||
// (called from the thread entry function).
|
||||
|
||||
// Thread 0 waits for additional threads to finish.
|
||||
for ( dim_t id = 1; id < n_threads; id++ )
|
||||
for ( dim_t tid = 1; tid < n_threads; tid++ )
|
||||
{
|
||||
bli_pthread_join( pthreads[id], NULL );
|
||||
bli_pthread_join( pthreads[tid], NULL );
|
||||
}
|
||||
|
||||
// Check the array_t back into the small block allocator. Similar to the
|
||||
// check-out, this is done using a lock embedded within the sba to ensure
|
||||
// mutual exclusion.
|
||||
bli_sba_checkin_array( array );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_l3_thread_decorator().pth: " );
|
||||
#endif
|
||||
bli_free_intl( pthreads );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_l3_thread_decorator().pth: " );
|
||||
#endif
|
||||
bli_free_intl( datas );
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -38,30 +38,33 @@
|
||||
#ifndef BLIS_ENABLE_MULTITHREADING
|
||||
|
||||
//Constructors and destructors for constructors
|
||||
thrcomm_t* bli_thrcomm_create( dim_t n_threads )
|
||||
thrcomm_t* bli_thrcomm_create( rntm_t* rntm, dim_t n_threads )
|
||||
{
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_thrcomm_create(): " );
|
||||
#endif
|
||||
|
||||
thrcomm_t* comm = bli_malloc_intl( sizeof( thrcomm_t ) );
|
||||
bli_thrcomm_init( comm, n_threads );
|
||||
thrcomm_t* comm = bli_sba_acquire( rntm, sizeof( thrcomm_t ) );
|
||||
|
||||
bli_thrcomm_init( n_threads, comm );
|
||||
|
||||
return comm;
|
||||
}
|
||||
|
||||
void bli_thrcomm_free( thrcomm_t* comm )
|
||||
void bli_thrcomm_free( rntm_t* rntm, thrcomm_t* comm )
|
||||
{
|
||||
if ( comm == NULL ) return;
|
||||
|
||||
bli_thrcomm_cleanup( comm );
|
||||
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_thrcomm_free(): " );
|
||||
#endif
|
||||
|
||||
bli_free_intl( comm );
|
||||
bli_sba_release( rntm, comm );
|
||||
}
|
||||
|
||||
void bli_thrcomm_init( thrcomm_t* comm, dim_t n_threads )
|
||||
void bli_thrcomm_init( dim_t n_threads, thrcomm_t* comm )
|
||||
{
|
||||
if ( comm == NULL ) return;
|
||||
|
||||
@@ -76,7 +79,7 @@ void bli_thrcomm_cleanup( thrcomm_t* comm )
|
||||
if ( comm == NULL ) return;
|
||||
}
|
||||
|
||||
void bli_thrcomm_barrier( thrcomm_t* comm, dim_t t_id )
|
||||
void bli_thrcomm_barrier( dim_t t_id, thrcomm_t* comm )
|
||||
{
|
||||
return;
|
||||
}
|
||||
@@ -112,53 +115,88 @@ void bli_l3_thread_decorator
|
||||
bli_obj_set_pack_schema( BLIS_NOT_PACKED, b );
|
||||
|
||||
// For sequential execution, we use only one thread.
|
||||
dim_t n_threads = 1;
|
||||
dim_t id = 0;
|
||||
const dim_t n_threads = 1;
|
||||
|
||||
// NOTE: The sba was initialized in bli_init().
|
||||
|
||||
// Check out an array_t from the small block allocator. This is done
|
||||
// with an internal lock to ensure only one application thread accesses
|
||||
// the sba at a time. bli_sba_checkout_array() will also automatically
|
||||
// resize the array_t, if necessary.
|
||||
array_t* restrict array = bli_sba_checkout_array( n_threads );
|
||||
|
||||
// Access the pool_t* for thread 0 and embed it into the rntm. We do
|
||||
// this up-front only so that we can create the global comm below.
|
||||
bli_sba_rntm_set_pool( 0, array, rntm );
|
||||
|
||||
// Set the packing block allocator field of the rntm.
|
||||
bli_membrk_rntm_set_membrk( rntm );
|
||||
|
||||
// Allcoate a global communicator for the root thrinfo_t structures.
|
||||
thrcomm_t* gl_comm = bli_thrcomm_create( n_threads );
|
||||
thrcomm_t* restrict gl_comm = bli_thrcomm_create( rntm, n_threads );
|
||||
|
||||
cntl_t* cntl_use;
|
||||
thrinfo_t* thread;
|
||||
|
||||
// NOTE: Unlike with the _openmp.c and _pthreads.c variants, we don't
|
||||
// need to alias objects for A, B, and C since they were already aliased
|
||||
// in bli_*_front(). However, we may add aliasing here in the future so
|
||||
// that, with all three (_single.c, _openmp.c, _pthreads.c) implementations
|
||||
// consistently providing local aliases, we can then eliminate aliasing
|
||||
// elsewhere.
|
||||
{
|
||||
// NOTE: We don't need to create another copy of the rntm_t since
|
||||
// it was already copied in one of the high-level oapi functions.
|
||||
rntm_t* restrict rntm_p = rntm;
|
||||
|
||||
// Create a default control tree for the operation, if needed.
|
||||
bli_l3_cntl_create_if( family, schema_a, schema_b,
|
||||
a, b, c, cntl, &cntl_use );
|
||||
cntl_t* cntl_use;
|
||||
thrinfo_t* thread;
|
||||
|
||||
// Create the root node of the thread's thrinfo_t structure.
|
||||
bli_l3_thrinfo_create_root( id, gl_comm, rntm, cntl_use, &thread );
|
||||
const dim_t tid = 0;
|
||||
|
||||
func
|
||||
(
|
||||
alpha,
|
||||
a,
|
||||
b,
|
||||
beta,
|
||||
c,
|
||||
cntx,
|
||||
rntm,
|
||||
cntl_use,
|
||||
thread
|
||||
);
|
||||
// Use the thread id to access the appropriate pool_t* within the
|
||||
// array_t, and use it to set the sba_pool field within the rntm_t.
|
||||
// If the pool_t* element within the array_t is NULL, it will first
|
||||
// be allocated/initialized.
|
||||
// NOTE: This is commented out because, in the single-threaded case,
|
||||
// this is redundant since it's already been done above.
|
||||
//bli_sba_rntm_set_pool( tid, array, rntm_p );
|
||||
|
||||
// Free the thread's local control tree.
|
||||
bli_l3_cntl_free( cntl_use, thread );
|
||||
// NOTE: Unlike with the _openmp.c and _pthreads.c variants, we don't
|
||||
// need to alias objects for A, B, and C since they were already aliased
|
||||
// in bli_*_front(). However, we may add aliasing here in the future so
|
||||
// that, with all three (_single.c, _openmp.c, _pthreads.c) implementations
|
||||
// consistently providing local aliases, we can then eliminate aliasing
|
||||
// elsewhere.
|
||||
|
||||
// Free the current thread's thrinfo_t structure.
|
||||
bli_l3_thrinfo_free( thread );
|
||||
// Create a default control tree for the operation, if needed.
|
||||
bli_l3_cntl_create_if( family, schema_a, schema_b,
|
||||
a, b, c, rntm_p, cntl, &cntl_use );
|
||||
|
||||
// Create the root node of the thread's thrinfo_t structure.
|
||||
bli_l3_thrinfo_create_root( tid, gl_comm, rntm_p, cntl_use, &thread );
|
||||
|
||||
func
|
||||
(
|
||||
alpha,
|
||||
a,
|
||||
b,
|
||||
beta,
|
||||
c,
|
||||
cntx,
|
||||
rntm_p,
|
||||
cntl_use,
|
||||
thread
|
||||
);
|
||||
|
||||
// Free the thread's local control tree.
|
||||
bli_l3_cntl_free( rntm_p, cntl_use, thread );
|
||||
|
||||
// Free the current thread's thrinfo_t structure.
|
||||
bli_l3_thrinfo_free( rntm_p, thread );
|
||||
}
|
||||
|
||||
// We shouldn't free the global communicator since it was already freed
|
||||
// by the global communicator's chief thread in bli_l3_thrinfo_free()
|
||||
// (called above).
|
||||
}
|
||||
|
||||
// Check the array_t back into the small block allocator. Similar to the
|
||||
// check-out, this is done using a lock embedded within the sba to ensure
|
||||
// mutual exclusion.
|
||||
bli_sba_checkin_array( array );
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
@@ -46,7 +46,7 @@ static rntm_t global_rntm;
|
||||
|
||||
void bli_thread_init( void )
|
||||
{
|
||||
bli_thrcomm_init( &BLIS_SINGLE_COMM, 1 );
|
||||
bli_thrcomm_init( 1, &BLIS_SINGLE_COMM );
|
||||
bli_packm_thrinfo_init_single( &BLIS_PACKM_SINGLE_THREADED );
|
||||
bli_l3_thrinfo_init_single( &BLIS_GEMM_SINGLE_THREADED );
|
||||
|
||||
|
||||
@@ -37,6 +37,7 @@
|
||||
|
||||
thrinfo_t* bli_thrinfo_create
|
||||
(
|
||||
rntm_t* rntm,
|
||||
thrcomm_t* ocomm,
|
||||
dim_t ocomm_id,
|
||||
dim_t n_way,
|
||||
@@ -45,11 +46,11 @@ thrinfo_t* bli_thrinfo_create
|
||||
thrinfo_t* sub_node
|
||||
)
|
||||
{
|
||||
#ifdef ENABLE_MEM_DEBUG
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_thrinfo_create(): " );
|
||||
#endif
|
||||
|
||||
thrinfo_t* thread = bli_malloc_intl( sizeof( thrinfo_t ) );
|
||||
thrinfo_t* thread = bli_sba_acquire( rntm, sizeof( thrinfo_t ) );
|
||||
|
||||
bli_thrinfo_init
|
||||
(
|
||||
@@ -99,11 +100,47 @@ void bli_thrinfo_init_single
|
||||
);
|
||||
}
|
||||
|
||||
void bli_thrinfo_free
|
||||
(
|
||||
rntm_t* rntm,
|
||||
thrinfo_t* thread
|
||||
)
|
||||
{
|
||||
if ( thread == NULL ||
|
||||
thread == &BLIS_PACKM_SINGLE_THREADED ||
|
||||
thread == &BLIS_GEMM_SINGLE_THREADED
|
||||
) return;
|
||||
|
||||
thrinfo_t* thrinfo_sub_node = bli_thrinfo_sub_node( thread );
|
||||
|
||||
// Free the communicators, but only if the current thrinfo_t struct
|
||||
// is marked as needing them to be freed. The most common example of
|
||||
// thrinfo_t nodes NOT marked as needing their comms freed are those
|
||||
// associated with packm thrinfo_t nodes.
|
||||
if ( bli_thrinfo_needs_free_comm( thread ) )
|
||||
{
|
||||
// The ochief always frees his communicator, and the ichief free its
|
||||
// communicator if we are at the leaf node.
|
||||
if ( bli_thread_am_ochief( thread ) )
|
||||
bli_thrcomm_free( rntm, bli_thrinfo_ocomm( thread ) );
|
||||
}
|
||||
|
||||
// Recursively free all children of the current thrinfo_t.
|
||||
bli_thrinfo_free( rntm, thrinfo_sub_node );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "bli_thrinfo_free(): " );
|
||||
#endif
|
||||
|
||||
// Free the thrinfo_t struct.
|
||||
bli_sba_release( rntm, thread );
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
#include "assert.h"
|
||||
|
||||
#define BLIS_NUM_STATIC_COMMS 18
|
||||
#define BLIS_NUM_STATIC_COMMS 80
|
||||
|
||||
thrinfo_t* bli_thrinfo_create_for_cntl
|
||||
(
|
||||
@@ -118,12 +155,12 @@ thrinfo_t* bli_thrinfo_create_for_cntl
|
||||
|
||||
thrinfo_t* thread_chl;
|
||||
|
||||
bszid_t bszid_chl = bli_cntl_bszid( cntl_chl );
|
||||
const bszid_t bszid_chl = bli_cntl_bszid( cntl_chl );
|
||||
|
||||
dim_t parent_nt_in = bli_thread_num_threads( thread_par );
|
||||
dim_t parent_n_way = bli_thread_n_way( thread_par );
|
||||
dim_t parent_comm_id = bli_thread_ocomm_id( thread_par );
|
||||
dim_t parent_work_id = bli_thread_work_id( thread_par );
|
||||
const dim_t parent_nt_in = bli_thread_num_threads( thread_par );
|
||||
const dim_t parent_n_way = bli_thread_n_way( thread_par );
|
||||
const dim_t parent_comm_id = bli_thread_ocomm_id( thread_par );
|
||||
const dim_t parent_work_id = bli_thread_work_id( thread_par );
|
||||
|
||||
dim_t child_nt_in;
|
||||
dim_t child_comm_id;
|
||||
@@ -162,7 +199,7 @@ thrinfo_t* bli_thrinfo_create_for_cntl
|
||||
// object and store it in the array element corresponding to the
|
||||
// parent's work id.
|
||||
if ( child_comm_id == 0 )
|
||||
new_comms[ parent_work_id ] = bli_thrcomm_create( child_nt_in );
|
||||
new_comms[ parent_work_id ] = bli_thrcomm_create( rntm, child_nt_in );
|
||||
|
||||
bli_thread_obarrier( thread_par );
|
||||
|
||||
@@ -170,6 +207,7 @@ thrinfo_t* bli_thrinfo_create_for_cntl
|
||||
// that was created by their chief, as identified by parent_work_id.
|
||||
thread_chl = bli_thrinfo_create
|
||||
(
|
||||
rntm,
|
||||
new_comms[ parent_work_id ],
|
||||
child_comm_id,
|
||||
child_n_way,
|
||||
@@ -259,6 +297,7 @@ thrinfo_t* bli_thrinfo_rgrow
|
||||
// freed when thread_seg, or one of its descendents, is freed.
|
||||
thread_cur = bli_thrinfo_create
|
||||
(
|
||||
rntm,
|
||||
bli_thrinfo_ocomm( thread_seg ),
|
||||
bli_thread_ocomm_id( thread_seg ),
|
||||
bli_cntl_calc_num_threads_in( rntm, cntl_cur ),
|
||||
|
||||
@@ -122,12 +122,12 @@ static void bli_thrinfo_set_sub_node( thrinfo_t* sub_node, thrinfo_t* t )
|
||||
|
||||
static void* bli_thread_obroadcast( thrinfo_t* t, void* p )
|
||||
{
|
||||
return bli_thrcomm_bcast( t->ocomm, t->ocomm_id, p );
|
||||
return bli_thrcomm_bcast( t->ocomm_id, p, t->ocomm );
|
||||
}
|
||||
|
||||
static void bli_thread_obarrier( thrinfo_t* t )
|
||||
{
|
||||
bli_thrcomm_barrier( t->ocomm, t->ocomm_id );
|
||||
bli_thrcomm_barrier( t->ocomm_id, t->ocomm );
|
||||
}
|
||||
|
||||
|
||||
@@ -137,6 +137,7 @@ static void bli_thread_obarrier( thrinfo_t* t )
|
||||
|
||||
thrinfo_t* bli_thrinfo_create
|
||||
(
|
||||
rntm_t* rntm,
|
||||
thrcomm_t* ocomm,
|
||||
dim_t ocomm_id,
|
||||
dim_t n_way,
|
||||
@@ -161,6 +162,12 @@ void bli_thrinfo_init_single
|
||||
thrinfo_t* thread
|
||||
);
|
||||
|
||||
void bli_thrinfo_free
|
||||
(
|
||||
rntm_t* rntm,
|
||||
thrinfo_t* thread
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
thrinfo_t* bli_thrinfo_create_for_cntl
|
||||
|
||||
@@ -458,7 +458,7 @@ void GENBARNAME(cntx_init)
|
||||
|
||||
//bli_cntx_set_anti_pref( FALSE, cntx );
|
||||
|
||||
bli_cntx_set_membrk( bli_memsys_global_membrk(), cntx );
|
||||
//bli_cntx_set_membrk( bli_membrk_query(), cntx );
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
@@ -320,7 +320,7 @@ void libblis_test_gemmtrsm_ukr_experiment
|
||||
// allocated.
|
||||
void* buf_ap = bli_obj_buffer( &ap );
|
||||
void* buf_bp = bli_obj_buffer( &bp );
|
||||
bli_packm_init_pack( BLIS_NO_INVERT_DIAG, BLIS_PACKED_ROW_PANELS,
|
||||
bli_packm_init_pack( BLIS_INVERT_DIAG, BLIS_PACKED_ROW_PANELS,
|
||||
BLIS_PACK_FWD_IF_UPPER, BLIS_PACK_FWD_IF_LOWER,
|
||||
BLIS_MR, BLIS_KR, &a, &ap, cntx );
|
||||
bli_packm_init_pack( BLIS_NO_INVERT_DIAG, BLIS_PACKED_COL_PANELS,
|
||||
@@ -351,8 +351,10 @@ void libblis_test_gemmtrsm_ukr_experiment
|
||||
// know which set of micro-kernels (lower or upper) to choose from.
|
||||
bli_obj_set_uplo( uploa, &a11p );
|
||||
|
||||
//bli_printm( "a", &a, "%4.1f", "" );
|
||||
//bli_printm( "ap", &ap, "%4.1f", "" );
|
||||
#if 0
|
||||
bli_printm( "a", &a, "%5.2f", "" );
|
||||
bli_printm( "ap", &ap, "%5.2f", "" );
|
||||
#endif
|
||||
|
||||
// Repeat the experiment n_repeats times and record results.
|
||||
for ( i = 0; i < n_repeats; ++i )
|
||||
@@ -493,6 +495,10 @@ void libblis_test_gemmtrsm_ukr_check
|
||||
|
||||
bli_gemv( &BLIS_ONE, b11, &t, &BLIS_ZERO, &v );
|
||||
|
||||
#if 0
|
||||
bli_printm( "a11", a11, "%5.2f", "" );
|
||||
#endif
|
||||
|
||||
// Restore the diagonal of a11 to its original, un-inverted state
|
||||
// (needed for trsv).
|
||||
bli_invertd( a11 );
|
||||
|
||||
@@ -84,7 +84,7 @@ int main( int argc, char** argv )
|
||||
libblis_test_thread_decorator( ¶ms, &ops );
|
||||
|
||||
// Finalize libblis.
|
||||
//bli_finalize();
|
||||
bli_finalize();
|
||||
|
||||
// Return peacefully.
|
||||
return 0;
|
||||
@@ -126,13 +126,25 @@ void libblis_test_thread_decorator( test_params_t* params, test_ops_t* ops )
|
||||
|
||||
// Allocate an array of pthread objects and auxiliary data structs to pass
|
||||
// to the thread entry functions.
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "libblis_test_thread_decorator(): " );
|
||||
#endif
|
||||
bli_pthread_t* pthread = bli_malloc_intl( sizeof( bli_pthread_t ) * nt );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "libblis_test_thread_decorator(): " );
|
||||
#endif
|
||||
thread_data_t* tdata = bli_malloc_intl( sizeof( thread_data_t ) * nt );
|
||||
|
||||
// Allocate a mutex for the threads to share.
|
||||
//bli_pthread_mutex_t* mutex = bli_malloc_intl( sizeof( bli_pthread_mutex_t ) );
|
||||
|
||||
// Allocate a barrier for the threads to share.
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "libblis_test_thread_decorator(): " );
|
||||
#endif
|
||||
bli_pthread_barrier_t* barrier = bli_malloc_intl( sizeof( bli_pthread_barrier_t ) );
|
||||
|
||||
// Initialize the mutex.
|
||||
@@ -175,8 +187,20 @@ void libblis_test_thread_decorator( test_params_t* params, test_ops_t* ops )
|
||||
bli_pthread_barrier_destroy( barrier );
|
||||
|
||||
// Free the pthread-related memory.
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "libblis_test_thread_decorator(): " );
|
||||
#endif
|
||||
bli_free_intl( pthread );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "libblis_test_thread_decorator(): " );
|
||||
#endif
|
||||
bli_free_intl( tdata );
|
||||
|
||||
#ifdef BLIS_ENABLE_MEM_TRACING
|
||||
printf( "libblis_test_thread_decorator(): " );
|
||||
#endif
|
||||
//bli_free_intl( mutex );
|
||||
bli_free_intl( barrier );
|
||||
}
|
||||
@@ -837,8 +861,9 @@ void libblis_test_output_params_struct( FILE* os, test_params_t* params )
|
||||
libblis_test_fprintf_c( os, "Max stack buffer size (bytes) %d\n", ( int )bli_info_get_stack_buf_max_size() );
|
||||
libblis_test_fprintf_c( os, "Page size (bytes) %d\n", ( int )bli_info_get_page_size() );
|
||||
libblis_test_fprintf_c( os, "\n" );
|
||||
libblis_test_fprintf_c( os, "memory pools for pack buffers\n" );
|
||||
libblis_test_fprintf_c( os, " enabled? %d\n", ( int )bli_info_get_enable_packbuf_pools() );
|
||||
libblis_test_fprintf_c( os, "memory pools\n" );
|
||||
libblis_test_fprintf_c( os, " enabled for packing blocks? %d\n", ( int )bli_info_get_enable_pba_pools() );
|
||||
libblis_test_fprintf_c( os, " enabled for small blocks? %d\n", ( int )bli_info_get_enable_sba_pools() );
|
||||
libblis_test_fprintf_c( os, "\n" );
|
||||
libblis_test_fprintf_c( os, "memory alignment (bytes) \n" );
|
||||
libblis_test_fprintf_c( os, " stack address %d\n", ( int )bli_info_get_stack_buf_align_size() );
|
||||
@@ -2589,6 +2614,7 @@ void libblis_test_mobj_create( test_params_t* params, num_t dt, trans_t trans, c
|
||||
|
||||
|
||||
|
||||
#if 0
|
||||
cntl_t* libblis_test_pobj_create( bszid_t bmult_id_m, bszid_t bmult_id_n, invdiag_t inv_diag, pack_t pack_schema, packbuf_t pack_buf, obj_t* a, obj_t* p, cntx_t* cntx )
|
||||
{
|
||||
bool_t does_inv_diag;
|
||||
@@ -2600,6 +2626,7 @@ cntl_t* libblis_test_pobj_create( bszid_t bmult_id_m, bszid_t bmult_id_n, invdia
|
||||
// Create a control tree node for the packing operation.
|
||||
cntl_t* cntl = bli_packm_cntl_create_node
|
||||
(
|
||||
NULL, // we don't need the small block allocator from the runtime.
|
||||
NULL, // func ptr is not referenced b/c we don't call via l3 _int().
|
||||
bli_packm_blk_var1,
|
||||
bmult_id_m,
|
||||
@@ -2625,7 +2652,7 @@ cntl_t* libblis_test_pobj_create( bszid_t bmult_id_m, bszid_t bmult_id_n, invdia
|
||||
// mem_t entry later on.
|
||||
return cntl;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
void libblis_test_vobj_create( test_params_t* params, num_t dt, char storage, dim_t m, obj_t* x )
|
||||
|
||||
@@ -171,6 +171,7 @@ void libblis_test_trsm_ukr_experiment
|
||||
num_t datatype;
|
||||
|
||||
dim_t m, n;
|
||||
inc_t ldap, ldbp;
|
||||
|
||||
char sc_a = 'c';
|
||||
char sc_b = 'r';
|
||||
@@ -195,6 +196,11 @@ void libblis_test_trsm_ukr_experiment
|
||||
m = bli_cntx_get_blksz_def_dt( datatype, BLIS_MR, cntx );
|
||||
n = bli_cntx_get_blksz_def_dt( datatype, BLIS_NR, cntx );
|
||||
|
||||
// Also query PACKMR and PACKNR as the leading dimensions to ap and bp,
|
||||
// respectively.
|
||||
ldap = bli_cntx_get_blksz_max_dt( datatype, BLIS_MR, cntx );
|
||||
ldbp = bli_cntx_get_blksz_max_dt( datatype, BLIS_NR, cntx );
|
||||
|
||||
// Store the register blocksizes so that the driver can retrieve the
|
||||
// values later when printing results.
|
||||
op->dim_aux[0] = m;
|
||||
@@ -232,6 +238,7 @@ void libblis_test_trsm_ukr_experiment
|
||||
libblis_test_mobj_randomize( params, TRUE, &c );
|
||||
bli_copym( &c, &c_save );
|
||||
|
||||
#if 0
|
||||
// Create pack objects for a and b, and pack them to ap and bp,
|
||||
// respectively.
|
||||
cntl_t* cntl_a = libblis_test_pobj_create
|
||||
@@ -254,17 +261,52 @@ void libblis_test_trsm_ukr_experiment
|
||||
&b, &bp,
|
||||
cntx
|
||||
);
|
||||
#endif
|
||||
|
||||
// Create the packed objects. Use packmr and packnr as the leading
|
||||
// dimensions of ap and bp, respectively.
|
||||
bli_obj_create( datatype, m, m, 1, ldap, &ap );
|
||||
bli_obj_create( datatype, m, n, ldbp, 1, &bp );
|
||||
|
||||
// Set up the objects for packing. Calling packm_init_pack() does everything
|
||||
// except checkout a memory pool block and save its address to the obj_t's.
|
||||
// However, it does overwrite the buffer field of packed object with that of
|
||||
// the source object. So, we have to save the buffer address that was
|
||||
// allocated.
|
||||
void* buf_ap = bli_obj_buffer( &ap );
|
||||
void* buf_bp = bli_obj_buffer( &bp );
|
||||
bli_packm_init_pack( BLIS_INVERT_DIAG, BLIS_PACKED_ROW_PANELS,
|
||||
BLIS_PACK_FWD_IF_UPPER, BLIS_PACK_FWD_IF_LOWER,
|
||||
BLIS_MR, BLIS_KR, &a, &ap, cntx );
|
||||
bli_packm_init_pack( BLIS_NO_INVERT_DIAG, BLIS_PACKED_COL_PANELS,
|
||||
BLIS_PACK_FWD_IF_UPPER, BLIS_PACK_FWD_IF_LOWER,
|
||||
BLIS_KR, BLIS_NR, &b, &bp, cntx );
|
||||
bli_obj_set_buffer( buf_ap, &ap );
|
||||
bli_obj_set_buffer( buf_bp, &bp );
|
||||
|
||||
// Set the diagonal offset of ap.
|
||||
bli_obj_set_diag_offset( 0, &ap );
|
||||
|
||||
// Set the uplo field of ap since the default for packed objects is
|
||||
// BLIS_DENSE, and the _ukernel() wrapper needs this information to
|
||||
// know which set of micro-kernels (lower or upper) to choose from.
|
||||
bli_obj_set_uplo( uploa, &ap );
|
||||
|
||||
// Pack the data from the source objects.
|
||||
bli_packm_blk_var1( &a, &ap, cntx, NULL, &BLIS_PACKM_SINGLE_THREADED );
|
||||
bli_packm_blk_var1( &b, &bp, cntx, NULL, &BLIS_PACKM_SINGLE_THREADED );
|
||||
|
||||
#if 0
|
||||
bli_printm( "a", &a, "%5.2f", "" );
|
||||
bli_printm( "ap", &ap, "%5.2f", "" );
|
||||
#endif
|
||||
|
||||
// Repeat the experiment n_repeats times and record results.
|
||||
for ( i = 0; i < n_repeats; ++i )
|
||||
{
|
||||
// Re-pack the contents of b to bp.
|
||||
bli_packm_blk_var1( &b, &bp, cntx, cntl_b, &BLIS_PACKM_SINGLE_THREADED );
|
||||
//bli_packm_blk_var1( &b, &bp, cntx, cntl_b, &BLIS_PACKM_SINGLE_THREADED );
|
||||
bli_packm_blk_var1( &b, &bp, cntx, NULL, &BLIS_PACKM_SINGLE_THREADED );
|
||||
|
||||
bli_copym( &c_save, &c );
|
||||
|
||||
@@ -282,15 +324,17 @@ void libblis_test_trsm_ukr_experiment
|
||||
if ( bli_obj_is_complex( &b ) ) *perf *= 4.0;
|
||||
|
||||
// Perform checks.
|
||||
libblis_test_trsm_ukr_check( params, side, &a, &c, &b, resid );
|
||||
libblis_test_trsm_ukr_check( params, side, &ap, &c, &b, resid );
|
||||
|
||||
// Zero out performance and residual if output matrix is empty.
|
||||
libblis_test_check_empty_problem( &c, perf, resid );
|
||||
//libblis_test_check_empty_problem( &c, perf, resid );
|
||||
|
||||
#if 0
|
||||
// Free the control tree nodes and release their cached mem_t entries
|
||||
// back to the memory broker.
|
||||
bli_cntl_free( cntl_a, &BLIS_PACKM_SINGLE_THREADED );
|
||||
bli_cntl_free( cntl_b, &BLIS_PACKM_SINGLE_THREADED );
|
||||
bli_cntl_free( NULL, cntl_a, &BLIS_PACKM_SINGLE_THREADED );
|
||||
bli_cntl_free( NULL, cntl_b, &BLIS_PACKM_SINGLE_THREADED );
|
||||
#endif
|
||||
|
||||
// Free the test objects.
|
||||
bli_obj_free( &a );
|
||||
@@ -392,6 +436,14 @@ void libblis_test_trsm_ukr_check
|
||||
|
||||
bli_gemv( &BLIS_ONE, b, &t, &BLIS_ZERO, &v );
|
||||
|
||||
#if 0
|
||||
bli_printm( "a11", a, "%5.2f", "" );
|
||||
#endif
|
||||
|
||||
// Restore the diagonal of a11 to its original, un-inverted state
|
||||
// (needed for trsv).
|
||||
bli_invertd( a );
|
||||
|
||||
if ( bli_is_left( side ) )
|
||||
{
|
||||
bli_gemv( &BLIS_ONE, b_orig, &t, &BLIS_ZERO, &w );
|
||||
|
||||
Reference in New Issue
Block a user