From b444489f100d218bc8ef29b01ff8489c358559f9 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Tue, 3 Dec 2013 16:08:30 -0600 Subject: [PATCH] Added new "attached" scalar representation. Details: - Added infrastructure to support a new scalar representation, whereby every object contains an internal scalar that defaults to 1.0. This facilitates passing scalars around without having to house them in separate objects. These "attached" scalars are stored in the internal atom_t field of the obj_t struct, and are always stored to be the same datatype as the object to which they are attached. Level-3 variants no longer take scalar arguments, however, level-3 internal back-ends stll do; this is so that the calling function can perform subproblems such as C := C - alpha * A * B on-the-fly without needing to change either of the scalars attached to A or B. - Removed scalar argument from packm_int(). - Observe and apply attached scalars in scalm_int(), and removed scalar from interface of scalm_unb_var1(). - Renamed the following functions (and corresponding invocations): bli_obj_init_scalar_copy_of() -> bli_obj_scalar_init_detached_copy_of() bli_obj_init_scalar() -> bli_obj_scalar_init_detached() bli_obj_create_scalar_with_attached_buffer() -> bli_obj_create_1x1_with_attached_buffer() bli_obj_scalar_equals() -> bli_obj_equals() - Defined new functions: bli_obj_scalar_detach() bli_obj_scalar_attach() bli_obj_scalar_apply_scalar() bli_obj_scalar_reset() bli_obj_scalar_has_nonzero_imag() bli_obj_scalar_equals() - Placed all bli_obj_scalar_* functions in a new file, bli_obj_scalar.c. - Renamed the following macros: bli_obj_scalar_buffer() -> bli_obj_buffer_for_1x1() bli_obj_is_scalar() -> bli_obj_is_1x1() - Defined new macros to set and copy internal scalars between objects: bli_obj_set_internal_scalar() bli_obj_copy_internal_scalar() - In level-3 internal back-ends, added conditional blocks where alpha and beta are checked for non-unit-ness. Those values for alpha and beta are applied to the scalars attached to aliases of A/B/C, as appropriate, before being passed into the variant specified by the control tree. - In level-3 blocked variants, pass BLIS_ONE into subproblems instead of alpha and/or beta. - In level-3 macro-kernels, changed how scalars are obtained. Now, scalars attached to A and B are multiplied together to obtain alpha, while beta is obtained directly from C. - In level-3 front-ends, removed old function calls meant to provide future support for mixed domain/precision. These can be added back later once that functionality is given proper treatment. Also, removed the creating of copy-casts of alpha and beta since typecasting of scalars is now implicitly handled in the internal back-ends when alpha and beta are applied to the attached scalars. --- frame/0/getsc/bli_getsc.c | 2 +- frame/1/axpyv/bli_axpyv.c | 2 +- frame/1/dotxv/bli_dotxv_unb_var1.c | 4 +- frame/1/scal2v/bli_scal2v.c | 2 +- frame/1/scalv/bli_scalv.c | 2 +- frame/1/scalv/bli_scalv_int.c | 2 +- frame/1/setv/bli_setv.c | 2 +- frame/1d/axpyd/bli_axpyd.c | 2 +- frame/1d/scal2d/bli_scal2d.c | 2 +- frame/1d/scald/bli_scald.c | 2 +- frame/1d/setd/bli_setd.c | 2 +- frame/1f/axpyf/bli_axpyf_unb_var1.c | 2 +- frame/1f/dotxaxpyf/bli_dotxaxpyf_unb_var1.c | 4 +- frame/1f/dotxaxpyf/bli_dotxaxpyf_unb_var2.c | 4 +- frame/1f/dotxf/bli_dotxf_unb_var1.c | 4 +- frame/1m/axpym/bli_axpym.c | 2 +- frame/1m/packm/bli_packm_blk_var2.c | 33 ++-- frame/1m/packm/bli_packm_blk_var2.h | 5 +- frame/1m/packm/bli_packm_blk_var3.c | 31 ++-- frame/1m/packm/bli_packm_blk_var3.h | 5 +- frame/1m/packm/bli_packm_check.c | 54 ++++-- frame/1m/packm/bli_packm_check.h | 11 +- frame/1m/packm/bli_packm_init.c | 2 +- frame/1m/packm/bli_packm_int.c | 51 +++-- frame/1m/packm/bli_packm_int.h | 3 +- frame/1m/packm/bli_packm_unb_var1.c | 31 ++-- frame/1m/packm/bli_packm_unb_var1.h | 5 +- frame/1m/packm/old/bli_packm_blk_var1.c | 2 +- frame/1m/packm/other/bli_packm_blk_var2.c | 2 +- frame/1m/scal2m/bli_scal2m.c | 2 +- frame/1m/scalm/bli_scalm.c | 18 +- frame/1m/scalm/bli_scalm_int.c | 21 ++- frame/1m/scalm/bli_scalm_unb_var1.c | 22 ++- frame/1m/scalm/bli_scalm_unb_var1.h | 3 +- frame/1m/setm/bli_setm.c | 2 +- frame/2/gemv/bli_gemv.c | 8 +- frame/2/gemv/bli_gemv_blk_var1.c | 3 +- frame/2/gemv/bli_gemv_blk_var2.c | 3 +- frame/2/gemv/bli_gemv_unb_var1.c | 4 +- frame/2/gemv/bli_gemv_unb_var2.c | 4 +- frame/2/gemv/bli_gemv_unf_var1.c | 4 +- frame/2/gemv/bli_gemv_unf_var2.c | 4 +- frame/2/ger/bli_ger.c | 4 +- frame/2/ger/bli_ger_blk_var1.c | 3 +- frame/2/ger/bli_ger_blk_var2.c | 3 +- frame/2/ger/bli_ger_int.c | 2 +- frame/2/ger/bli_ger_unb_var1.c | 2 +- frame/2/ger/bli_ger_unb_var2.c | 2 +- frame/2/hemv/bli_hemv.c | 8 +- frame/2/hemv/bli_hemv_blk_var1.c | 3 +- frame/2/hemv/bli_hemv_blk_var2.c | 3 +- frame/2/hemv/bli_hemv_blk_var3.c | 3 +- frame/2/hemv/bli_hemv_blk_var4.c | 3 +- frame/2/hemv/bli_hemv_unb_var1.c | 4 +- frame/2/hemv/bli_hemv_unb_var2.c | 4 +- frame/2/hemv/bli_hemv_unb_var3.c | 4 +- frame/2/hemv/bli_hemv_unb_var4.c | 4 +- frame/2/hemv/bli_hemv_unf_var1.c | 4 +- frame/2/hemv/bli_hemv_unf_var1a.c | 4 +- frame/2/hemv/bli_hemv_unf_var3.c | 4 +- frame/2/hemv/bli_hemv_unf_var3a.c | 4 +- frame/2/her/bli_her.c | 4 +- frame/2/her/bli_her_blk_var1.c | 3 +- frame/2/her/bli_her_blk_var2.c | 3 +- frame/2/her2/bli_her2.c | 6 +- frame/2/her2/bli_her2_blk_var1.c | 3 +- frame/2/her2/bli_her2_blk_var2.c | 3 +- frame/2/her2/bli_her2_blk_var3.c | 3 +- frame/2/her2/bli_her2_blk_var4.c | 3 +- frame/2/her2/bli_her2_int.c | 4 +- frame/2/her2/bli_her2_unb_var1.c | 2 +- frame/2/her2/bli_her2_unb_var2.c | 2 +- frame/2/her2/bli_her2_unb_var3.c | 2 +- frame/2/her2/bli_her2_unb_var4.c | 2 +- frame/2/her2/bli_her2_unf_var1.c | 2 +- frame/2/her2/bli_her2_unf_var4.c | 2 +- frame/2/symv/bli_symv.c | 8 +- frame/2/syr/bli_syr.c | 4 +- frame/2/syr2/bli_syr2.c | 4 +- frame/2/trmv/bli_trmv.c | 4 +- frame/2/trmv/bli_trmv_l_blk_var1.c | 3 +- frame/2/trmv/bli_trmv_l_blk_var2.c | 3 +- frame/2/trmv/bli_trmv_u_blk_var1.c | 3 +- frame/2/trmv/bli_trmv_u_blk_var2.c | 3 +- frame/2/trmv/bli_trmv_unb_var1.c | 2 +- frame/2/trmv/bli_trmv_unb_var2.c | 2 +- frame/2/trmv/bli_trmv_unf_var1.c | 2 +- frame/2/trmv/bli_trmv_unf_var2.c | 2 +- frame/2/trsv/bli_trsv.c | 4 +- frame/2/trsv/bli_trsv_l_blk_var1.c | 3 +- frame/2/trsv/bli_trsv_l_blk_var2.c | 3 +- frame/2/trsv/bli_trsv_u_blk_var1.c | 3 +- frame/2/trsv/bli_trsv_u_blk_var2.c | 3 +- frame/2/trsv/bli_trsv_unb_var1.c | 2 +- frame/2/trsv/bli_trsv_unb_var2.c | 2 +- frame/2/trsv/bli_trsv_unf_var1.c | 2 +- frame/2/trsv/bli_trsv_unf_var2.c | 2 +- frame/3/gemm/bli_gemm.c | 38 +--- frame/3/gemm/bli_gemm_blk_var1.c | 25 +-- frame/3/gemm/bli_gemm_blk_var1.h | 4 +- frame/3/gemm/bli_gemm_blk_var2.c | 25 +-- frame/3/gemm/bli_gemm_blk_var2.h | 4 +- frame/3/gemm/bli_gemm_blk_var3.c | 39 ++-- frame/3/gemm/bli_gemm_blk_var3.h | 4 +- frame/3/gemm/bli_gemm_blk_var4.c | 41 ++--- frame/3/gemm/bli_gemm_blk_var4.h | 4 +- frame/3/gemm/bli_gemm_int.c | 30 ++- frame/3/gemm/bli_gemm_ker_var2.c | 26 ++- frame/3/gemm/bli_gemm_ker_var2.h | 4 +- frame/3/gemm/bli_gemm_ker_var5.c | 27 ++- frame/3/gemm/bli_gemm_ker_var5.h | 4 +- frame/3/gemm/bli_gemm_target.c | 14 -- frame/3/gemm/bli_gemm_target.h | 2 - frame/3/gemm/other/bli_gemm_ker_var2.c | 24 +-- frame/3/hemm/bli_hemm.c | 38 +--- frame/3/her2k/bli_her2k.c | 71 +++---- frame/3/her2k/bli_her2k_blk_var1f.c | 48 +++-- frame/3/her2k/bli_her2k_blk_var1f.h | 5 +- frame/3/her2k/bli_her2k_blk_var2f.c | 38 ++-- frame/3/her2k/bli_her2k_blk_var2f.h | 5 +- frame/3/her2k/bli_her2k_blk_var3f.c | 52 +++--- frame/3/her2k/bli_her2k_blk_var3f.h | 5 +- frame/3/her2k/bli_her2k_int.c | 53 ++++-- frame/3/her2k/bli_her2k_l_ker_var2.c | 23 ++- frame/3/her2k/bli_her2k_l_ker_var2.h | 5 +- frame/3/her2k/bli_her2k_target.c | 14 -- frame/3/her2k/bli_her2k_target.h | 2 - frame/3/her2k/bli_her2k_u_ker_var2.c | 23 ++- frame/3/her2k/bli_her2k_u_ker_var2.h | 5 +- frame/3/herk/bli_herk.c | 38 +--- frame/3/herk/bli_herk_blk_var1f.c | 25 +-- frame/3/herk/bli_herk_blk_var1f.h | 4 +- frame/3/herk/bli_herk_blk_var2f.c | 25 +-- frame/3/herk/bli_herk_blk_var2f.h | 4 +- frame/3/herk/bli_herk_blk_var3f.c | 39 ++-- frame/3/herk/bli_herk_blk_var3f.h | 4 +- frame/3/herk/bli_herk_int.c | 30 ++- frame/3/herk/bli_herk_l_ker_var2.c | 29 ++- frame/3/herk/bli_herk_l_ker_var2.h | 4 +- frame/3/herk/bli_herk_target.c | 6 - frame/3/herk/bli_herk_target.h | 2 - frame/3/herk/bli_herk_u_ker_var2.c | 29 ++- frame/3/herk/bli_herk_u_ker_var2.h | 4 +- frame/3/symm/bli_symm.c | 38 +--- frame/3/syr2k/bli_syr2k.c | 63 ++----- frame/3/syrk/bli_syrk.c | 38 +--- frame/3/trmm/bli_trmm.c | 21 +-- frame/3/trmm/bli_trmm_blk_var1.c | 25 +-- frame/3/trmm/bli_trmm_blk_var1.h | 4 +- frame/3/trmm/bli_trmm_blk_var2b.c | 25 +-- frame/3/trmm/bli_trmm_blk_var2b.h | 4 +- frame/3/trmm/bli_trmm_blk_var2f.c | 25 +-- frame/3/trmm/bli_trmm_blk_var2f.h | 4 +- frame/3/trmm/bli_trmm_blk_var3b.c | 25 +-- frame/3/trmm/bli_trmm_blk_var3b.h | 4 +- frame/3/trmm/bli_trmm_blk_var3f.c | 25 +-- frame/3/trmm/bli_trmm_blk_var3f.h | 4 +- frame/3/trmm/bli_trmm_int.c | 30 ++- frame/3/trmm/bli_trmm_ll_ker_var2.c | 27 ++- frame/3/trmm/bli_trmm_ll_ker_var2.h | 4 +- frame/3/trmm/bli_trmm_lu_ker_var2.c | 27 ++- frame/3/trmm/bli_trmm_lu_ker_var2.h | 4 +- frame/3/trmm/bli_trmm_rl_ker_var2.c | 27 ++- frame/3/trmm/bli_trmm_rl_ker_var2.h | 4 +- frame/3/trmm/bli_trmm_ru_ker_var2.c | 27 ++- frame/3/trmm/bli_trmm_ru_ker_var2.h | 4 +- frame/3/trmm/bli_trmm_target.c | 5 +- frame/3/trmm/bli_trmm_target.h | 3 +- frame/3/trmm3/bli_trmm3.c | 39 +--- frame/3/trsm/bli_trsm.c | 21 +-- frame/3/trsm/bli_trsm_blk_var1b.c | 18 +- frame/3/trsm/bli_trsm_blk_var1b.h | 4 +- frame/3/trsm/bli_trsm_blk_var1f.c | 18 +- frame/3/trsm/bli_trsm_blk_var1f.h | 4 +- frame/3/trsm/bli_trsm_blk_var2b.c | 25 +-- frame/3/trsm/bli_trsm_blk_var2b.h | 4 +- frame/3/trsm/bli_trsm_blk_var2f.c | 25 +-- frame/3/trsm/bli_trsm_blk_var2f.h | 4 +- frame/3/trsm/bli_trsm_blk_var3b.c | 37 ++-- frame/3/trsm/bli_trsm_blk_var3b.h | 4 +- frame/3/trsm/bli_trsm_blk_var3f.c | 37 ++-- frame/3/trsm/bli_trsm_blk_var3f.h | 4 +- frame/3/trsm/bli_trsm_int.c | 36 +++- frame/3/trsm/bli_trsm_ll_ker_var2.c | 13 +- frame/3/trsm/bli_trsm_ll_ker_var2.h | 4 +- frame/3/trsm/bli_trsm_lu_ker_var2.c | 13 +- frame/3/trsm/bli_trsm_lu_ker_var2.h | 4 +- frame/3/trsm/bli_trsm_rl_ker_var2.c | 13 +- frame/3/trsm/bli_trsm_rl_ker_var2.h | 4 +- frame/3/trsm/bli_trsm_ru_ker_var2.c | 13 +- frame/3/trsm/bli_trsm_ru_ker_var2.h | 4 +- frame/base/bli_obj.c | 72 ++------ frame/base/bli_obj.h | 20 +- frame/base/bli_obj_scalar.c | 174 ++++++++++++++++++ frame/base/bli_obj_scalar.h | 59 ++++++ frame/base/bli_query.c | 21 ++- frame/base/bli_query.h | 4 +- frame/include/bli_obj_macro_defs.h | 17 +- frame/include/bli_param_macro_defs.h | 4 +- frame/include/blis.h | 1 + .../core2-sse3/1f/bli_dotxf_opt_var1.c.alt | 4 +- testsuite/src/test_addm.c | 16 +- testsuite/src/test_addv.c | 14 +- testsuite/src/test_axpy2v.c | 10 +- testsuite/src/test_axpyf.c | 8 +- testsuite/src/test_axpym.c | 4 +- testsuite/src/test_axpyv.c | 4 +- testsuite/src/test_copym.c | 2 +- testsuite/src/test_copyv.c | 2 +- testsuite/src/test_dotaxpyv.c | 10 +- testsuite/src/test_dotv.c | 10 +- testsuite/src/test_dotxaxpyf.c | 12 +- testsuite/src/test_dotxf.c | 10 +- testsuite/src/test_dotxv.c | 18 +- testsuite/src/test_fnormm.c | 10 +- testsuite/src/test_fnormv.c | 8 +- testsuite/src/test_gemm.c | 11 +- testsuite/src/test_gemm_ukr.c | 18 +- testsuite/src/test_gemmtrsm_ukr.c | 16 +- testsuite/src/test_gemv.c | 10 +- testsuite/src/test_ger.c | 8 +- testsuite/src/test_hemm.c | 10 +- testsuite/src/test_hemv.c | 8 +- testsuite/src/test_her.c | 8 +- testsuite/src/test_her2.c | 10 +- testsuite/src/test_her2k.c | 12 +- testsuite/src/test_herk.c | 10 +- testsuite/src/test_scal2m.c | 4 +- testsuite/src/test_scal2v.c | 4 +- testsuite/src/test_scalm.c | 6 +- testsuite/src/test_scalv.c | 6 +- testsuite/src/test_setm.c | 4 +- testsuite/src/test_setv.c | 4 +- testsuite/src/test_subm.c | 16 +- testsuite/src/test_subv.c | 14 +- testsuite/src/test_symm.c | 10 +- testsuite/src/test_symv.c | 8 +- testsuite/src/test_syr.c | 8 +- testsuite/src/test_syr2.c | 8 +- testsuite/src/test_syr2k.c | 10 +- testsuite/src/test_syrk.c | 10 +- testsuite/src/test_trmm.c | 8 +- testsuite/src/test_trmm3.c | 10 +- testsuite/src/test_trmv.c | 6 +- testsuite/src/test_trsm.c | 8 +- testsuite/src/test_trsm_ukr.c | 10 +- testsuite/src/test_trsv.c | 8 +- 247 files changed, 1461 insertions(+), 1645 deletions(-) create mode 100644 frame/base/bli_obj_scalar.c create mode 100644 frame/base/bli_obj_scalar.h diff --git a/frame/0/getsc/bli_getsc.c b/frame/0/getsc/bli_getsc.c index ad8c1cd28..ffd93dd05 100644 --- a/frame/0/getsc/bli_getsc.c +++ b/frame/0/getsc/bli_getsc.c @@ -56,7 +56,7 @@ void bli_getsc( obj_t* chi, // If chi is a constant object, default to using the dcomplex // value within since we don't know if the caller needs just the // real or the real and imaginary parts. - void* buf_chi = bli_obj_scalar_buffer( dt_def, *chi ); + void* buf_chi = bli_obj_buffer_for_1x1( dt_def, *chi ); FUNCPTR_T f; diff --git a/frame/1/axpyv/bli_axpyv.c b/frame/1/axpyv/bli_axpyv.c index 45388df15..23f6a8b08 100644 --- a/frame/1/axpyv/bli_axpyv.c +++ b/frame/1/axpyv/bli_axpyv.c @@ -58,7 +58,7 @@ void PASTEMAC0(opname)( \ dt_x = bli_obj_datatype( *x ); \ \ /* Create an object to hold a copy-cast of alpha. */ \ - bli_obj_init_scalar_copy_of( dt_x, \ + bli_obj_scalar_init_detached_copy_of( dt_x, \ BLIS_NO_CONJUGATE, \ alpha, \ &alpha_local ); \ diff --git a/frame/1/dotxv/bli_dotxv_unb_var1.c b/frame/1/dotxv/bli_dotxv_unb_var1.c index e529e08e8..da9c92c4b 100644 --- a/frame/1/dotxv/bli_dotxv_unb_var1.c +++ b/frame/1/dotxv/bli_dotxv_unb_var1.c @@ -93,11 +93,11 @@ void bli_dotxv_unb_var1( obj_t* alpha, // The datatype of alpha MUST be the type union of x and y. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_x, dt_y ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of rho. dt_beta = dt_rho; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/1/scal2v/bli_scal2v.c b/frame/1/scal2v/bli_scal2v.c index 1984c500b..91cc6c163 100644 --- a/frame/1/scal2v/bli_scal2v.c +++ b/frame/1/scal2v/bli_scal2v.c @@ -58,7 +58,7 @@ void PASTEMAC0(opname)( \ dt_x = bli_obj_datatype( *x ); \ \ /* Create an object to hold a copy-cast of beta. */ \ - bli_obj_init_scalar_copy_of( dt_x, \ + bli_obj_scalar_init_detached_copy_of( dt_x, \ BLIS_NO_CONJUGATE, \ beta, \ &beta_local ); \ diff --git a/frame/1/scalv/bli_scalv.c b/frame/1/scalv/bli_scalv.c index f2c9e794d..915714027 100644 --- a/frame/1/scalv/bli_scalv.c +++ b/frame/1/scalv/bli_scalv.c @@ -57,7 +57,7 @@ void PASTEMAC0(opname)( \ dt_x = bli_obj_datatype( *x ); \ \ /* Create an object to hold a copy-cast of beta. */ \ - bli_obj_init_scalar_copy_of( dt_x, \ + bli_obj_scalar_init_detached_copy_of( dt_x, \ BLIS_NO_CONJUGATE, \ beta, \ &beta_local ); \ diff --git a/frame/1/scalv/bli_scalv_int.c b/frame/1/scalv/bli_scalv_int.c index 8adea7589..10ab4fb51 100644 --- a/frame/1/scalv/bli_scalv_int.c +++ b/frame/1/scalv/bli_scalv_int.c @@ -64,7 +64,7 @@ void bli_scalv_int( obj_t* beta, if ( bli_obj_has_zero_dim( *x ) ) return; // Return early if the beta scalar equals one. - if ( bli_obj_scalar_equals( beta, &BLIS_ONE ) ) return; + if ( bli_obj_equals( beta, &BLIS_ONE ) ) return; // Extract the variant number and implementation type. n = cntl_var_num( cntl ); diff --git a/frame/1/setv/bli_setv.c b/frame/1/setv/bli_setv.c index 23c59a51d..f6676c471 100644 --- a/frame/1/setv/bli_setv.c +++ b/frame/1/setv/bli_setv.c @@ -58,7 +58,7 @@ void PASTEMAC0(opname)( \ dt_x = bli_obj_datatype( *x ); \ \ /* Create an object to hold a copy-cast of beta. */ \ - bli_obj_init_scalar_copy_of( dt_x, \ + bli_obj_scalar_init_detached_copy_of( dt_x, \ BLIS_NO_CONJUGATE, \ beta, \ &beta_local ); \ diff --git a/frame/1d/axpyd/bli_axpyd.c b/frame/1d/axpyd/bli_axpyd.c index 956987690..541b9a543 100644 --- a/frame/1d/axpyd/bli_axpyd.c +++ b/frame/1d/axpyd/bli_axpyd.c @@ -53,7 +53,7 @@ void bli_axpyd( obj_t* alpha, dt_x = bli_obj_datatype( *x ); // Create an object to hold a copy-cast of alpha. - bli_obj_init_scalar_copy_of( dt_x, + bli_obj_scalar_init_detached_copy_of( dt_x, BLIS_NO_CONJUGATE, alpha, &alpha_local ); diff --git a/frame/1d/scal2d/bli_scal2d.c b/frame/1d/scal2d/bli_scal2d.c index fb61b6ebd..9f9bb533d 100644 --- a/frame/1d/scal2d/bli_scal2d.c +++ b/frame/1d/scal2d/bli_scal2d.c @@ -53,7 +53,7 @@ void bli_scal2d( obj_t* beta, dt_x = bli_obj_datatype( *x ); // Create an object to hold a copy-cast of alpha. - bli_obj_init_scalar_copy_of( dt_x, + bli_obj_scalar_init_detached_copy_of( dt_x, BLIS_NO_CONJUGATE, beta, &beta_local ); diff --git a/frame/1d/scald/bli_scald.c b/frame/1d/scald/bli_scald.c index a491e2859..80aa13d84 100644 --- a/frame/1d/scald/bli_scald.c +++ b/frame/1d/scald/bli_scald.c @@ -52,7 +52,7 @@ void bli_scald( obj_t* beta, dt_x = bli_obj_datatype( *x ); // Create an object to hold a copy-cast of alpha. - bli_obj_init_scalar_copy_of( dt_x, + bli_obj_scalar_init_detached_copy_of( dt_x, BLIS_NO_CONJUGATE, beta, &beta_local ); diff --git a/frame/1d/setd/bli_setd.c b/frame/1d/setd/bli_setd.c index 7bf2c623a..0fe088ec8 100644 --- a/frame/1d/setd/bli_setd.c +++ b/frame/1d/setd/bli_setd.c @@ -52,7 +52,7 @@ void bli_setd( obj_t* beta, dt_x = bli_obj_datatype( *x ); // Create an object to hold a copy-cast of alpha. - bli_obj_init_scalar_copy_of( dt_x, + bli_obj_scalar_init_detached_copy_of( dt_x, BLIS_NO_CONJUGATE, beta, &beta_local ); diff --git a/frame/1f/axpyf/bli_axpyf_unb_var1.c b/frame/1f/axpyf/bli_axpyf_unb_var1.c index 9b15d8ee4..7442a4696 100644 --- a/frame/1f/axpyf/bli_axpyf_unb_var1.c +++ b/frame/1f/axpyf/bli_axpyf_unb_var1.c @@ -93,7 +93,7 @@ void bli_axpyf_unb_var1( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/1f/dotxaxpyf/bli_dotxaxpyf_unb_var1.c b/frame/1f/dotxaxpyf/bli_dotxaxpyf_unb_var1.c index e8a4fbeef..dd15bce19 100644 --- a/frame/1f/dotxaxpyf/bli_dotxaxpyf_unb_var1.c +++ b/frame/1f/dotxaxpyf/bli_dotxaxpyf_unb_var1.c @@ -113,11 +113,11 @@ void bli_dotxaxpyf_unb_var1( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/1f/dotxaxpyf/bli_dotxaxpyf_unb_var2.c b/frame/1f/dotxaxpyf/bli_dotxaxpyf_unb_var2.c index 8b2a3ecda..cb1526623 100644 --- a/frame/1f/dotxaxpyf/bli_dotxaxpyf_unb_var2.c +++ b/frame/1f/dotxaxpyf/bli_dotxaxpyf_unb_var2.c @@ -113,11 +113,11 @@ void bli_dotxaxpyf_unb_var2( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/1f/dotxf/bli_dotxf_unb_var1.c b/frame/1f/dotxf/bli_dotxf_unb_var1.c index ba7ac47cd..a706b7f85 100644 --- a/frame/1f/dotxf/bli_dotxf_unb_var1.c +++ b/frame/1f/dotxf/bli_dotxf_unb_var1.c @@ -98,11 +98,11 @@ void bli_dotxf_unb_var1( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/1m/axpym/bli_axpym.c b/frame/1m/axpym/bli_axpym.c index 192624fa5..7adc5442a 100644 --- a/frame/1m/axpym/bli_axpym.c +++ b/frame/1m/axpym/bli_axpym.c @@ -53,7 +53,7 @@ void bli_axpym( obj_t* alpha, dt_x = bli_obj_datatype( *x ); // Create an object to hold a copy-cast of alpha. - bli_obj_init_scalar_copy_of( dt_x, + bli_obj_scalar_init_detached_copy_of( dt_x, BLIS_NO_CONJUGATE, alpha, &alpha_local ); diff --git a/frame/1m/packm/bli_packm_blk_var2.c b/frame/1m/packm/bli_packm_blk_var2.c index 50db6c2c8..d12f65b2e 100644 --- a/frame/1m/packm/bli_packm_blk_var2.c +++ b/frame/1m/packm/bli_packm_blk_var2.c @@ -46,7 +46,7 @@ typedef void (*FUNCPTR_T)( dim_t n, dim_t m_max, dim_t n_max, - void* beta, + void* kappa, void* c, inc_t rs_c, inc_t cs_c, void* p, inc_t rs_p, inc_t cs_p, dim_t pd_p, inc_t ps_p @@ -55,8 +55,7 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,packm_blk_var2); -void bli_packm_blk_var2( obj_t* beta, - obj_t* c, +void bli_packm_blk_var2( obj_t* c, obj_t* p ) { num_t dt_cp = bli_obj_datatype( *c ); @@ -82,10 +81,16 @@ void bli_packm_blk_var2( obj_t* beta, dim_t pd_p = bli_obj_panel_dim( *p ); inc_t ps_p = bli_obj_panel_stride( *p ); - void* buf_beta = bli_obj_scalar_buffer( dt_cp, *beta ); + void* buf_kappa; FUNCPTR_T f; + // This variant assumes that the micro-kernel will always apply the + // alpha scalar of the higher-level operation. Thus, we use BLIS_ONE + // for kappa so that the underlying packm implementation does not + // scale during packing. + buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE ); + // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_cp]; @@ -100,7 +105,7 @@ void bli_packm_blk_var2( obj_t* beta, n_p, m_max_p, n_max_p, - buf_beta, + buf_kappa, buf_c, rs_c, cs_c, buf_p, rs_p, cs_p, pd_p, ps_p ); @@ -120,16 +125,16 @@ void PASTEMAC(ch,varname )( \ dim_t n, \ dim_t m_max, \ dim_t n_max, \ - void* beta, \ + void* kappa, \ void* c, inc_t rs_c, inc_t cs_c, \ void* p, inc_t rs_p, inc_t cs_p, \ dim_t pd_p, inc_t ps_p \ ) \ { \ - ctype* restrict beta_cast = beta; \ - ctype* restrict c_cast = c; \ - ctype* restrict p_cast = p; \ - ctype* restrict zero = PASTEMAC(ch,0); \ + ctype* restrict kappa_cast = kappa; \ + ctype* restrict c_cast = c; \ + ctype* restrict p_cast = p; \ + ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict c_begin; \ ctype* restrict p_begin; \ \ @@ -338,7 +343,7 @@ void PASTEMAC(ch,varname )( \ PASTEMAC(ch,packm_cxk)( conjc10, \ p10_dim, \ p10_len, \ - beta_cast, \ + kappa_cast, \ c10, incc10, ldc10, \ p10, ldp ); \ \ @@ -347,7 +352,7 @@ void PASTEMAC(ch,varname )( \ PASTEMAC(ch,packm_cxk)( conjc12, \ p12_dim, \ p12_len, \ - beta_cast, \ + kappa_cast, \ c12, incc12, ldc12, \ p12, ldp ); \ \ @@ -358,7 +363,7 @@ void PASTEMAC(ch,varname )( \ conjc, \ p11_m, \ p11_n, \ - beta_cast, \ + kappa_cast, \ c11, rs_c, cs_c, \ p11, rs_p11, cs_p11 ); \ \ @@ -412,7 +417,7 @@ void PASTEMAC(ch,varname )( \ PASTEMAC(ch,packm_cxk)( conjc10, \ panel_dim_i, \ panel_len, \ - beta_cast, \ + kappa_cast, \ c10, incc10, ldc10, \ p_begin, ldp ); \ \ diff --git a/frame/1m/packm/bli_packm_blk_var2.h b/frame/1m/packm/bli_packm_blk_var2.h index 8022f7ff6..578150c89 100644 --- a/frame/1m/packm/bli_packm_blk_var2.h +++ b/frame/1m/packm/bli_packm_blk_var2.h @@ -32,8 +32,7 @@ */ -void bli_packm_blk_var2( obj_t* beta, - obj_t* c, +void bli_packm_blk_var2( obj_t* c, obj_t* p ); @@ -50,7 +49,7 @@ void PASTEMAC(ch,varname)( \ dim_t n, \ dim_t m_max, \ dim_t n_max, \ - void* beta, \ + void* kappa, \ void* c, inc_t rs_c, inc_t cs_c, \ void* p, inc_t rs_p, inc_t cs_p, \ dim_t pd_p, inc_t ps_p \ diff --git a/frame/1m/packm/bli_packm_blk_var3.c b/frame/1m/packm/bli_packm_blk_var3.c index 52be1a5a4..f6a9ac727 100644 --- a/frame/1m/packm/bli_packm_blk_var3.c +++ b/frame/1m/packm/bli_packm_blk_var3.c @@ -49,7 +49,7 @@ typedef void (*FUNCPTR_T)( dim_t n, dim_t m_max, dim_t n_max, - void* beta, + void* kappa, void* c, inc_t rs_c, inc_t cs_c, void* p, inc_t rs_p, inc_t cs_p, dim_t pd_p, inc_t ps_p @@ -58,8 +58,7 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,packm_blk_var3); -void bli_packm_blk_var3( obj_t* beta, - obj_t* c, +void bli_packm_blk_var3( obj_t* c, obj_t* p ) { num_t dt_cp = bli_obj_datatype( *c ); @@ -88,10 +87,16 @@ void bli_packm_blk_var3( obj_t* beta, dim_t pd_p = bli_obj_panel_dim( *p ); inc_t ps_p = bli_obj_panel_stride( *p ); - void* buf_beta = bli_obj_scalar_buffer( dt_cp, *beta ); + void* buf_kappa; FUNCPTR_T f; + // This variant assumes that the micro-kernel will always apply the + // alpha scalar of the higher-level operation. Thus, we use BLIS_ONE + // for kappa so that the underlying packm implementation does not + // scale during packing. + buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE ); + // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_cp]; @@ -109,7 +114,7 @@ void bli_packm_blk_var3( obj_t* beta, n_p, m_max_p, n_max_p, - buf_beta, + buf_kappa, buf_c, rs_c, cs_c, buf_p, rs_p, cs_p, pd_p, ps_p ); @@ -132,16 +137,16 @@ void PASTEMAC(ch,varname )( \ dim_t n, \ dim_t m_max, \ dim_t n_max, \ - void* beta, \ + void* kappa, \ void* c, inc_t rs_c, inc_t cs_c, \ void* p, inc_t rs_p, inc_t cs_p, \ dim_t pd_p, inc_t ps_p \ ) \ { \ - ctype* restrict beta_cast = beta; \ - ctype* restrict c_cast = c; \ - ctype* restrict p_cast = p; \ - ctype* restrict zero = PASTEMAC(ch,0); \ + ctype* restrict kappa_cast = kappa; \ + ctype* restrict c_cast = c; \ + ctype* restrict p_cast = p; \ + ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict c_begin; \ ctype* restrict p_begin; \ \ @@ -317,7 +322,7 @@ void PASTEMAC(ch,varname )( \ PASTEMAC(ch,packm_cxk)( conjc, \ panel_dim_i, \ panel_len_i, \ - beta_cast, \ + kappa_cast, \ c_use, incc, ldc, \ p_use, ldp ); \ \ @@ -328,7 +333,7 @@ void PASTEMAC(ch,varname )( \ PASTEMAC2(ch,ch,setd_unb_var1)( diagoffp, \ *m_panel_use, \ *n_panel_use, \ - beta_cast, \ + kappa_cast, \ p_use, rs_p, cs_p ); \ } \ \ @@ -378,7 +383,7 @@ void PASTEMAC(ch,varname )( \ PASTEMAC(ch,packm_cxk)( conjc, \ panel_dim_i, \ panel_len_i, \ - beta_cast, \ + kappa_cast, \ c_use, incc, ldc, \ p_use, ldp ); \ \ diff --git a/frame/1m/packm/bli_packm_blk_var3.h b/frame/1m/packm/bli_packm_blk_var3.h index dd0ca1a09..ff7ef2793 100644 --- a/frame/1m/packm/bli_packm_blk_var3.h +++ b/frame/1m/packm/bli_packm_blk_var3.h @@ -32,8 +32,7 @@ */ -void bli_packm_blk_var3( obj_t* beta, - obj_t* c, +void bli_packm_blk_var3( obj_t* c, obj_t* p ); @@ -53,7 +52,7 @@ void PASTEMAC(ch,varname)( \ dim_t n, \ dim_t m_max, \ dim_t n_max, \ - void* beta, \ + void* kappa, \ void* c, inc_t rs_c, inc_t cs_c, \ void* p, inc_t rs_p, inc_t cs_p, \ dim_t pd_p, inc_t ps_p \ diff --git a/frame/1m/packm/bli_packm_check.c b/frame/1m/packm/bli_packm_check.c index da3b686cf..41b5302f4 100644 --- a/frame/1m/packm/bli_packm_check.c +++ b/frame/1m/packm/bli_packm_check.c @@ -34,33 +34,49 @@ #include "blis.h" -void bli_packm_check( obj_t* beta, - obj_t* c, - obj_t* p, - packm_t* cntl ) + +void bli_packm_init_check( obj_t* a, + obj_t* p, + packm_t* cntl ) { err_t e_val; // Check object datatypes. - e_val = bli_check_noninteger_object( beta ); + e_val = bli_check_floating_object( a ); bli_check_error_code( e_val ); - e_val = bli_check_floating_object( c ); - bli_check_error_code( e_val ); + // Check control tree pointer. - // Check object dimensions. - - e_val = bli_check_scalar_object( beta ); - bli_check_error_code( e_val ); - - // We don't check for conformal dimensions between c and p because - // p has not yet been initialized. - - // Check control tree pointer - - // NOTE: We can't check the control tree until we stop interpreting a - // NULL value (in bli_packm_int()) as a request to skip the operation. + // NOTE: We can't check the control tree because we interpret a NULL + // value (in bli_packm_int()) as a request to skip the operation. + //e_val = bli_check_valid_cntl( ( void* )cntl ); + //bli_check_error_code( e_val ); +} + +void bli_packm_int_check( obj_t* a, + obj_t* p, + packm_t* cntl ) +{ + err_t e_val; + + // Check object datatypes. + + e_val = bli_check_floating_object( a ); + bli_check_error_code( e_val ); + + e_val = bli_check_floating_object( p ); + bli_check_error_code( e_val ); + + // Check object dimensions. + + e_val = bli_check_conformal_dims( a, p ); + bli_check_error_code( e_val ); + + // Check control tree pointer. + + // NOTE: We can't check the control tree because we interpret a NULL + // value (in bli_packm_int()) as a request to skip the operation. //e_val = bli_check_valid_cntl( ( void* )cntl ); //bli_check_error_code( e_val ); } diff --git a/frame/1m/packm/bli_packm_check.h b/frame/1m/packm/bli_packm_check.h index e4ce7292c..cc950b302 100644 --- a/frame/1m/packm/bli_packm_check.h +++ b/frame/1m/packm/bli_packm_check.h @@ -32,7 +32,10 @@ */ -void bli_packm_check( obj_t* beta, - obj_t* c, - obj_t* p, - packm_t* cntl ); +void bli_packm_init_check( obj_t* a, + obj_t* p, + packm_t* cntl ); + +void bli_packm_int_check( obj_t* a, + obj_t* p, + packm_t* cntl ); diff --git a/frame/1m/packm/bli_packm_init.c b/frame/1m/packm/bli_packm_init.c index 9f10bd7aa..cc4ede33a 100644 --- a/frame/1m/packm/bli_packm_init.c +++ b/frame/1m/packm/bli_packm_init.c @@ -56,7 +56,7 @@ void bli_packm_init( obj_t* a, // Check parameters. if ( bli_error_checking_is_enabled() ) - bli_packm_check( &BLIS_ONE, a, p, cntl ); + bli_packm_init_check( a, p, cntl ); // First check if we are to skip this operation because the control tree // is NULL, and if so, simply alias the object to its packed counterpart. diff --git a/frame/1m/packm/bli_packm_int.c b/frame/1m/packm/bli_packm_int.c index 6d06fb98c..6aca671c5 100644 --- a/frame/1m/packm/bli_packm_int.c +++ b/frame/1m/packm/bli_packm_int.c @@ -36,8 +36,7 @@ #define FUNCPTR_T packm_fp -typedef void (*FUNCPTR_T)( obj_t* beta, - obj_t* a, +typedef void (*FUNCPTR_T)( obj_t* a, obj_t* p ); static FUNCPTR_T vars[6][3] = @@ -51,20 +50,17 @@ static FUNCPTR_T vars[6][3] = { NULL, NULL, NULL, }, }; -void bli_packm_int( obj_t* beta, - obj_t* a, +void bli_packm_int( obj_t* a, obj_t* p, packm_t* cntl ) { - obj_t* beta_use; - varnum_t n; impl_t i; FUNCPTR_T f; // Check parameters. if ( bli_error_checking_is_enabled() ) - bli_packm_check( beta, a, p, cntl ); + bli_packm_int_check( a, p, cntl ); // Sanity check; A should never have a zero dimension. If we must support // it, then we should fold it into the next alias-and-early-exit block. @@ -106,13 +102,35 @@ void bli_packm_int( obj_t* beta, return; } - // Notice that a beta parameter is always passed in. This value is allowed - // to be non-unit even when no scaling is prescribed. If the control tree - // indicates no scaling, then make sure that BLIS_ONE is passed into the - // packm implementation. - //if ( cntl_does_scale( cntl ) ) beta_use = beta; - //else beta_use = &BLIS_ONE; - beta_use = &BLIS_ONE; +/* + // The value for kappa we use will depend on whether the scalar + // attached to A has a nonzero imaginary component. If it does, + // then we will apply the scalar during packing to facilitate + // implementing complex domain micro-kernels in terms of their + // real domain counterparts. (In the aforementioned situation, + // applying a real scalar is easy, but applying a complex one is + // harder, so we avoid the need altogether with the code below.) + if ( bli_obj_scalar_has_nonzero_imag( a ) ) + { + bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); + + // Detach the scalar. + bli_obj_scalar_detach( a, &kappa ); + + // Reset the attached scalar (to 1.0). + bli_obj_scalar_reset( a ); + + kappa_p = κ + } + else + { + // If the internal scalar of A has only a real component, then + // we will apply it later (in the micro-kernel), and so we will + // use BLIS_ONE to indicate no scaling during packing. + kappa_p = &BLIS_ONE; + } +*/ + // Extract the variant number and implementation type. n = cntl_var_num( cntl ); @@ -121,9 +139,8 @@ void bli_packm_int( obj_t* beta, // Index into the variant array to extract the correct function pointer. f = vars[n][i]; - // Invoke the variant with beta_use. - f( beta_use, - a, + // Invoke the variant with kappa_use. + f( a, p ); } diff --git a/frame/1m/packm/bli_packm_int.h b/frame/1m/packm/bli_packm_int.h index 40ec4ed7d..bd1174b24 100644 --- a/frame/1m/packm/bli_packm_int.h +++ b/frame/1m/packm/bli_packm_int.h @@ -32,8 +32,7 @@ */ -void bli_packm_int( obj_t* beta, - obj_t* c, +void bli_packm_int( obj_t* a, obj_t* p, packm_t* cntl ); diff --git a/frame/1m/packm/bli_packm_unb_var1.c b/frame/1m/packm/bli_packm_unb_var1.c index be0627f39..5d9626aa1 100644 --- a/frame/1m/packm/bli_packm_unb_var1.c +++ b/frame/1m/packm/bli_packm_unb_var1.c @@ -47,7 +47,7 @@ typedef void (*FUNCPTR_T)( dim_t n, dim_t m_max, dim_t n_max, - void* beta, + void* kappa, void* c, inc_t rs_c, inc_t cs_c, void* p, inc_t rs_p, inc_t cs_p ); @@ -55,8 +55,7 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,packm_unb_var1); -void bli_packm_unb_var1( obj_t* beta, - obj_t* c, +void bli_packm_unb_var1( obj_t* c, obj_t* p ) { num_t dt_cp = bli_obj_datatype( *c ); @@ -81,7 +80,7 @@ void bli_packm_unb_var1( obj_t* beta, inc_t rs_p = bli_obj_row_stride( *p ); inc_t cs_p = bli_obj_col_stride( *p ); - void* buf_beta = bli_obj_scalar_buffer( dt_cp, *beta ); + void* buf_kappa; FUNCPTR_T f; @@ -89,6 +88,12 @@ void bli_packm_unb_var1( obj_t* beta, if ( bli_obj_is_dense( *p ) ) densify = TRUE; else densify = FALSE; + // This variant assumes that the computational kernel will always apply + // the alpha scalar of the higher-level operation. Thus, we use BLIS_ONE + // for kappa so that the underlying packm implementation does not scale + // during packing. + buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE ); + // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_cp]; @@ -104,7 +109,7 @@ void bli_packm_unb_var1( obj_t* beta, n_p, m_max_p, n_max_p, - buf_beta, + buf_kappa, buf_c, rs_c, cs_c, buf_p, rs_p, cs_p ); } @@ -124,20 +129,20 @@ void PASTEMAC(ch,varname)( \ dim_t n, \ dim_t m_max, \ dim_t n_max, \ - void* beta, \ + void* kappa, \ void* c, inc_t rs_c, inc_t cs_c, \ void* p, inc_t rs_p, inc_t cs_p \ ) \ { \ - ctype* restrict beta_cast = beta; \ - ctype* restrict c_cast = c; \ - ctype* restrict p_cast = p; \ - ctype* restrict zero = PASTEMAC(ch,0); \ + ctype* restrict kappa_cast = kappa; \ + ctype* restrict c_cast = c; \ + ctype* restrict p_cast = p; \ + ctype* restrict zero = PASTEMAC(ch,0); \ \ /* We begin by packing the region indicated by the parameters. If matrix c is dense (either because the structure is general or because the structure has already been "densified"), this ends - up being the only action we take. Note that if beta is unit, + up being the only action we take. Note that if kappa is unit, the data is simply copied (rather than scaled by one). */ \ PASTEMAC3(ch,ch,ch,scal2m)( diagoffc, \ diagc, \ @@ -145,7 +150,7 @@ void PASTEMAC(ch,varname)( \ transc, \ m, \ n, \ - beta_cast, \ + kappa_cast, \ c_cast, rs_c, cs_c, \ p_cast, rs_p, cs_p ); \ \ @@ -184,7 +189,7 @@ void PASTEMAC(ch,varname)( \ transc, \ m, \ n, \ - beta_cast, \ + kappa_cast, \ c_cast, rs_c, cs_c, \ p_cast, rs_p, cs_p ); \ } \ diff --git a/frame/1m/packm/bli_packm_unb_var1.h b/frame/1m/packm/bli_packm_unb_var1.h index 5da6224ff..9b7cec060 100644 --- a/frame/1m/packm/bli_packm_unb_var1.h +++ b/frame/1m/packm/bli_packm_unb_var1.h @@ -32,8 +32,7 @@ */ -void bli_packm_unb_var1( obj_t* beta, - obj_t* c, +void bli_packm_unb_var1( obj_t* c, obj_t* p ); @@ -51,7 +50,7 @@ void PASTEMAC(ch,varname)( \ dim_t n, \ dim_t m_max, \ dim_t n_max, \ - void* beta, \ + void* kappa, \ void* c, inc_t rs_c, inc_t cs_c, \ void* p, inc_t rs_p, inc_t cs_p \ ); diff --git a/frame/1m/packm/old/bli_packm_blk_var1.c b/frame/1m/packm/old/bli_packm_blk_var1.c index 7a0d3bfae..4d1207f9a 100644 --- a/frame/1m/packm/old/bli_packm_blk_var1.c +++ b/frame/1m/packm/old/bli_packm_blk_var1.c @@ -83,7 +83,7 @@ void bli_packm_blk_var1( obj_t* beta, inc_t cs_p = bli_obj_col_stride( *p ); inc_t ps_p = bli_obj_panel_stride( *p ); - void* buf_beta = bli_obj_scalar_buffer( dt_cp, *beta ); + void* buf_beta = bli_obj_buffer_for_1x1( dt_cp, *beta ); FUNCPTR_T f; diff --git a/frame/1m/packm/other/bli_packm_blk_var2.c b/frame/1m/packm/other/bli_packm_blk_var2.c index cdbaddc77..194b50c23 100644 --- a/frame/1m/packm/other/bli_packm_blk_var2.c +++ b/frame/1m/packm/other/bli_packm_blk_var2.c @@ -83,7 +83,7 @@ void bli_packm_blk_var2( obj_t* beta, dim_t pd_p = bli_obj_panel_dim( *p ); inc_t ps_p = bli_obj_panel_stride( *p ); - void* buf_beta = bli_obj_scalar_buffer( dt_cp, *beta ); + void* buf_beta = bli_obj_buffer_for_1x1( dt_cp, *beta ); FUNCPTR_T f; diff --git a/frame/1m/scal2m/bli_scal2m.c b/frame/1m/scal2m/bli_scal2m.c index 62b1f368c..c697c9b86 100644 --- a/frame/1m/scal2m/bli_scal2m.c +++ b/frame/1m/scal2m/bli_scal2m.c @@ -53,7 +53,7 @@ void bli_scal2m( obj_t* beta, dt_x = bli_obj_datatype( *x ); // Create an object to hold a copy-cast of beta. - bli_obj_init_scalar_copy_of( dt_x, + bli_obj_scalar_init_detached_copy_of( dt_x, BLIS_NO_CONJUGATE, beta, &beta_local ); diff --git a/frame/1m/scalm/bli_scalm.c b/frame/1m/scalm/bli_scalm.c index a2a4a59c9..e53252629 100644 --- a/frame/1m/scalm/bli_scalm.c +++ b/frame/1m/scalm/bli_scalm.c @@ -43,28 +43,12 @@ extern scalm_t* scalm_cntl; void bli_scalm( obj_t* beta, obj_t* x ) { - num_t dt_x; - obj_t beta_local; - if ( bli_error_checking_is_enabled() ) bli_scalm_check( beta, x ); - // Use the datatype of x as the target type for beta (since we do - // not assume mixed domain/type support is enabled). - dt_x = bli_obj_datatype( *x ); - - // Create an object to hold a copy-cast of beta. - bli_obj_init_scalar_copy_of( dt_x, - BLIS_NO_CONJUGATE, - beta, - &beta_local ); - - bli_scalm_unb_var1( &beta_local, x ); -/* - bli_scalm_int( &beta_local, + bli_scalm_int( beta, x, scalm_cntl ); -*/ } diff --git a/frame/1m/scalm/bli_scalm_int.c b/frame/1m/scalm/bli_scalm_int.c index caf520892..9b168e89a 100644 --- a/frame/1m/scalm/bli_scalm_int.c +++ b/frame/1m/scalm/bli_scalm_int.c @@ -36,8 +36,7 @@ #define FUNCPTR_T scalm_fp -typedef void (*FUNCPTR_T)( obj_t* beta, - obj_t* x ); +typedef void (*FUNCPTR_T)( obj_t* x ); static FUNCPTR_T vars[1][3] = { @@ -49,6 +48,7 @@ void bli_scalm_int( obj_t* beta, obj_t* x, scalm_t* cntl ) { + obj_t x_local; varnum_t n; impl_t i; FUNCPTR_T f; @@ -63,8 +63,18 @@ void bli_scalm_int( obj_t* beta, // Return early if one of the matrix operands has a zero dimension. if ( bli_obj_has_zero_dim( *x ) ) return; - // Return early if the beta scalar equals one. - if ( bli_obj_scalar_equals( beta, &BLIS_ONE ) ) return; + // Return early if both beta and the scalar attached to x are unit. + if ( bli_obj_equals( beta, &BLIS_ONE ) && + bli_obj_scalar_equals( x, &BLIS_ONE ) ) return; + + // Alias x to x_local so we can apply beta if it is non-unit. + bli_obj_alias_to( *x, x_local ); + + // If beta is non-unit, apply it to the scalar attached to x. + if ( !bli_obj_equals( beta, &BLIS_ONE ) ) + { + bli_obj_scalar_apply_scalar( beta, &x_local ); + } // Extract the variant number and implementation type. n = cntl_var_num( cntl ); @@ -74,7 +84,6 @@ void bli_scalm_int( obj_t* beta, f = vars[n][i]; // Invoke the variant. - f( beta, - x ); + f( &x_local ); } diff --git a/frame/1m/scalm/bli_scalm_unb_var1.c b/frame/1m/scalm/bli_scalm_unb_var1.c index 0a97b0d60..a4fa042ed 100644 --- a/frame/1m/scalm/bli_scalm_unb_var1.c +++ b/frame/1m/scalm/bli_scalm_unb_var1.c @@ -59,12 +59,10 @@ static FUNCPTR_T GENARRAY2_MIN(ftypes,scalm_unb_var1); #endif -void bli_scalm_unb_var1( obj_t* beta, - obj_t* x ) +void bli_scalm_unb_var1( obj_t* x ) { num_t dt_x = bli_obj_datatype( *x ); - conj_t conjbeta = bli_obj_conj_status( *beta ); doff_t diagoffx = bli_obj_diag_offset( *x ); uplo_t uplox = bli_obj_uplo( *x ); @@ -76,21 +74,25 @@ void bli_scalm_unb_var1( obj_t* beta, inc_t cs_x = bli_obj_col_stride( *x ); void* buf_beta; - num_t dt_beta; FUNCPTR_T f; - // If beta is a scalar constant, use dt_x to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the beta object and extract the buffer at the beta offset. - bli_set_scalar_dt_buffer( beta, dt_x, dt_beta, buf_beta ); + + // Grab the address of the internal scalar buffer for the scalar + // attached to x. + buf_beta = bli_obj_internal_scalar_buffer( *x ); // Index into the type combination array to extract the correct // function pointer. - f = ftypes[dt_beta][dt_x]; + // NOTE: We use dt_x for both beta and x because beta was obtained + // from the attached scalar of x, which is guaranteed to be of the + // same datatype as x. + f = ftypes[dt_x][dt_x]; // Invoke the function. - f( conjbeta, + // NOTE: We unconditionally pass in BLIS_NO_CONJUGATE for beta + // because it would have already been conjugated by the front-end. + f( BLIS_NO_CONJUGATE, diagoffx, uplox, m, diff --git a/frame/1m/scalm/bli_scalm_unb_var1.h b/frame/1m/scalm/bli_scalm_unb_var1.h index c723e1a52..25912fccf 100644 --- a/frame/1m/scalm/bli_scalm_unb_var1.h +++ b/frame/1m/scalm/bli_scalm_unb_var1.h @@ -32,8 +32,7 @@ */ -void bli_scalm_unb_var1( obj_t* beta, - obj_t* x ); +void bli_scalm_unb_var1( obj_t* x ); #undef GENTPROT2 diff --git a/frame/1m/setm/bli_setm.c b/frame/1m/setm/bli_setm.c index 217f7780a..b996ac751 100644 --- a/frame/1m/setm/bli_setm.c +++ b/frame/1m/setm/bli_setm.c @@ -52,7 +52,7 @@ void bli_setm( obj_t* beta, dt_x = bli_obj_datatype( *x ); // Create an object to hold a copy-cast of beta. - bli_obj_init_scalar_copy_of( dt_x, + bli_obj_scalar_init_detached_copy_of( dt_x, BLIS_NO_CONJUGATE, beta, &beta_local ); diff --git a/frame/2/gemv/bli_gemv.c b/frame/2/gemv/bli_gemv.c index d8377e3f2..6f2180f55 100644 --- a/frame/2/gemv/bli_gemv.c +++ b/frame/2/gemv/bli_gemv.c @@ -78,7 +78,7 @@ void bli_gemv( obj_t* alpha, // the type union of the target datatypes of a and x to prevent any // unnecessary loss of information during the computation. dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x ); - bli_obj_init_scalar_copy_of( dt_alpha, + bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); @@ -89,7 +89,7 @@ void bli_gemv( obj_t* alpha, // the complex part of beta*y will not be stored. If y is complex and // beta is real then beta is harmlessly promoted to complex. dt_beta = dt_targ_y; - bli_obj_init_scalar_copy_of( dt_beta, + bli_obj_scalar_init_detached_copy_of( dt_beta, BLIS_NO_CONJUGATE, beta, &beta_local ); @@ -188,8 +188,8 @@ void PASTEMAC(ch,opname)( \ rs_x = incx; cs_x = m_x * incx; \ rs_y = incy; cs_y = m_y * incy; \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ - bli_obj_create_scalar_with_attached_buffer( dt, beta, &betao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m_x, 1, x, rs_x, cs_x, &xo ); \ diff --git a/frame/2/gemv/bli_gemv_blk_var1.c b/frame/2/gemv/bli_gemv_blk_var1.c index d04b8683d..130e45729 100644 --- a/frame/2/gemv/bli_gemv_blk_var1.c +++ b/frame/2/gemv/bli_gemv_blk_var1.c @@ -76,8 +76,7 @@ void bli_gemv_blk_var1( obj_t* alpha, cntl_sub_packv_y( cntl ) ); // Copy/pack A1, y1 (if needed). - bli_packm_int( &BLIS_ONE, - &a1, + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); bli_packv_int( &y1, diff --git a/frame/2/gemv/bli_gemv_blk_var2.c b/frame/2/gemv/bli_gemv_blk_var2.c index 44ca60630..e6f203d61 100644 --- a/frame/2/gemv/bli_gemv_blk_var2.c +++ b/frame/2/gemv/bli_gemv_blk_var2.c @@ -81,8 +81,7 @@ void bli_gemv_blk_var2( obj_t* alpha, cntl_sub_packv_x( cntl ) ); // Copy/pack A1, x1 (if needed). - bli_packm_int( alpha, - &a1, + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/gemv/bli_gemv_unb_var1.c b/frame/2/gemv/bli_gemv_unb_var1.c index 069dfa44b..bb30dcaab 100644 --- a/frame/2/gemv/bli_gemv_unb_var1.c +++ b/frame/2/gemv/bli_gemv_unb_var1.c @@ -99,11 +99,11 @@ void bli_gemv_unb_var1( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/gemv/bli_gemv_unb_var2.c b/frame/2/gemv/bli_gemv_unb_var2.c index 6bfaf74af..51e379a5d 100644 --- a/frame/2/gemv/bli_gemv_unb_var2.c +++ b/frame/2/gemv/bli_gemv_unb_var2.c @@ -99,11 +99,11 @@ void bli_gemv_unb_var2( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/gemv/bli_gemv_unf_var1.c b/frame/2/gemv/bli_gemv_unf_var1.c index a1580fc39..7bcd97774 100644 --- a/frame/2/gemv/bli_gemv_unf_var1.c +++ b/frame/2/gemv/bli_gemv_unf_var1.c @@ -99,11 +99,11 @@ void bli_gemv_unf_var1( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/gemv/bli_gemv_unf_var2.c b/frame/2/gemv/bli_gemv_unf_var2.c index 6b95f704f..ecf18b4b4 100644 --- a/frame/2/gemv/bli_gemv_unf_var2.c +++ b/frame/2/gemv/bli_gemv_unf_var2.c @@ -99,11 +99,11 @@ void bli_gemv_unf_var2( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/ger/bli_ger.c b/frame/2/ger/bli_ger.c index 4003837f0..10108705e 100644 --- a/frame/2/ger/bli_ger.c +++ b/frame/2/ger/bli_ger.c @@ -75,7 +75,7 @@ void bli_ger( obj_t* alpha, // the type union of the target datatypes of x and y to prevent any // unnecessary loss of information during the computation. dt_alpha = bli_datatype_union( dt_targ_x, dt_targ_y ); - bli_obj_init_scalar_copy_of( dt_alpha, + bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); @@ -148,7 +148,7 @@ void PASTEMAC(ch,opname)( \ rs_x = incx; cs_x = m_x * incx; \ rs_y = incy; cs_y = m_y * incy; \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ \ bli_obj_create_with_attached_buffer( dt, m_x, 1, x, rs_x, cs_x, &xo ); \ bli_obj_create_with_attached_buffer( dt, m_y, 1, y, rs_y, cs_y, &yo ); \ diff --git a/frame/2/ger/bli_ger_blk_var1.c b/frame/2/ger/bli_ger_blk_var1.c index 2b7c6a20c..27662a50a 100644 --- a/frame/2/ger/bli_ger_blk_var1.c +++ b/frame/2/ger/bli_ger_blk_var1.c @@ -75,8 +75,7 @@ void bli_ger_blk_var1( obj_t* alpha, cntl_sub_packv_x( cntl ) ); // Copy/pack A1, x1 (if needed). - bli_packm_int( &BLIS_ONE, - &a1, + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/ger/bli_ger_blk_var2.c b/frame/2/ger/bli_ger_blk_var2.c index 1a977c453..65adc0573 100644 --- a/frame/2/ger/bli_ger_blk_var2.c +++ b/frame/2/ger/bli_ger_blk_var2.c @@ -75,8 +75,7 @@ void bli_ger_blk_var2( obj_t* alpha, cntl_sub_packv_y( cntl ) ); // Copy/pack A1, y1 (if needed). - bli_packm_int( &BLIS_ONE, - &a1, + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); bli_packv_int( &y1, diff --git a/frame/2/ger/bli_ger_int.c b/frame/2/ger/bli_ger_int.c index 59a0b3fbb..cb9273320 100644 --- a/frame/2/ger/bli_ger_int.c +++ b/frame/2/ger/bli_ger_int.c @@ -95,7 +95,7 @@ void bli_ger_int( conj_t conjx, bli_obj_toggle_conj( x_local ); bli_obj_toggle_conj( y_local ); - bli_obj_init_scalar_copy_of( bli_obj_datatype( *alpha ), + bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *alpha ), BLIS_CONJUGATE, alpha, &alpha_local ); diff --git a/frame/2/ger/bli_ger_unb_var1.c b/frame/2/ger/bli_ger_unb_var1.c index d4190eec4..123b8c39c 100644 --- a/frame/2/ger/bli_ger_unb_var1.c +++ b/frame/2/ger/bli_ger_unb_var1.c @@ -94,7 +94,7 @@ void bli_ger_unb_var1( obj_t* alpha, // The datatype of alpha MUST be the type union of x and y. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_x, dt_y ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/ger/bli_ger_unb_var2.c b/frame/2/ger/bli_ger_unb_var2.c index 2efe70c94..b5ffdcd86 100644 --- a/frame/2/ger/bli_ger_unb_var2.c +++ b/frame/2/ger/bli_ger_unb_var2.c @@ -94,7 +94,7 @@ void bli_ger_unb_var2( obj_t* alpha, // The datatype of alpha MUST be the type union of x and y. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_x, dt_y ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/hemv/bli_hemv.c b/frame/2/hemv/bli_hemv.c index cae2ffb71..20a787b1c 100644 --- a/frame/2/hemv/bli_hemv.c +++ b/frame/2/hemv/bli_hemv.c @@ -78,7 +78,7 @@ void bli_hemv( obj_t* alpha, // the type union of the target datatypes of a and x to prevent any // unnecessary loss of information during the computation. dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x ); - bli_obj_init_scalar_copy_of( dt_alpha, + bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); @@ -89,7 +89,7 @@ void bli_hemv( obj_t* alpha, // the complex part of beta*y will not be stored. If y is complex and // beta is real then beta is harmlessly promoted to complex. dt_beta = dt_targ_y; - bli_obj_init_scalar_copy_of( dt_beta, + bli_obj_scalar_init_detached_copy_of( dt_beta, BLIS_NO_CONJUGATE, beta, &beta_local ); @@ -180,8 +180,8 @@ void PASTEMAC(ch,opname)( \ rs_x = incx; cs_x = m * incx; \ rs_y = incy; cs_y = m * incy; \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ - bli_obj_create_scalar_with_attached_buffer( dt, beta, &betao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, m, m, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ diff --git a/frame/2/hemv/bli_hemv_blk_var1.c b/frame/2/hemv/bli_hemv_blk_var1.c index b20e45edc..97dac835c 100644 --- a/frame/2/hemv/bli_hemv_blk_var1.c +++ b/frame/2/hemv/bli_hemv_blk_var1.c @@ -106,8 +106,7 @@ void bli_hemv_blk_var1( conj_t conjh, cntl_sub_packv_y1( cntl ) ); // Copy/pack A11, x1, y1 (if needed). - bli_packm_int( &BLIS_ONE, - &a11, + bli_packm_int( &a11, &a11_pack, cntl_sub_packm_a11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/hemv/bli_hemv_blk_var2.c b/frame/2/hemv/bli_hemv_blk_var2.c index d319520fe..669b142ab 100644 --- a/frame/2/hemv/bli_hemv_blk_var2.c +++ b/frame/2/hemv/bli_hemv_blk_var2.c @@ -109,8 +109,7 @@ void bli_hemv_blk_var2( conj_t conjh, cntl_sub_packv_y1( cntl ) ); // Copy/pack A11, x1, y1 (if needed). - bli_packm_int( &BLIS_ONE, - &a11, + bli_packm_int( &a11, &a11_pack, cntl_sub_packm_a11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/hemv/bli_hemv_blk_var3.c b/frame/2/hemv/bli_hemv_blk_var3.c index 30914d4b6..39a0bf4ff 100644 --- a/frame/2/hemv/bli_hemv_blk_var3.c +++ b/frame/2/hemv/bli_hemv_blk_var3.c @@ -106,8 +106,7 @@ void bli_hemv_blk_var3( conj_t conjh, cntl_sub_packv_y1( cntl ) ); // Copy/pack A11, x1, y1 (if needed). - bli_packm_int( &BLIS_ONE, - &a11, + bli_packm_int( &a11, &a11_pack, cntl_sub_packm_a11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/hemv/bli_hemv_blk_var4.c b/frame/2/hemv/bli_hemv_blk_var4.c index 789b64336..e3365be55 100644 --- a/frame/2/hemv/bli_hemv_blk_var4.c +++ b/frame/2/hemv/bli_hemv_blk_var4.c @@ -109,8 +109,7 @@ void bli_hemv_blk_var4( conj_t conjh, cntl_sub_packv_y1( cntl ) ); // Copy/pack A11, x1, y1 (if needed). - bli_packm_int( &BLIS_ONE, - &a11, + bli_packm_int( &a11, &a11_pack, cntl_sub_packm_a11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/hemv/bli_hemv_unb_var1.c b/frame/2/hemv/bli_hemv_unb_var1.c index 9a3ad87a7..a4c6d471e 100644 --- a/frame/2/hemv/bli_hemv_unb_var1.c +++ b/frame/2/hemv/bli_hemv_unb_var1.c @@ -101,11 +101,11 @@ void bli_hemv_unb_var1( conj_t conjh, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/hemv/bli_hemv_unb_var2.c b/frame/2/hemv/bli_hemv_unb_var2.c index 4e70d5d24..7c6131763 100644 --- a/frame/2/hemv/bli_hemv_unb_var2.c +++ b/frame/2/hemv/bli_hemv_unb_var2.c @@ -101,11 +101,11 @@ void bli_hemv_unb_var2( conj_t conjh, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/hemv/bli_hemv_unb_var3.c b/frame/2/hemv/bli_hemv_unb_var3.c index 5eb85b03e..4ea68d8ba 100644 --- a/frame/2/hemv/bli_hemv_unb_var3.c +++ b/frame/2/hemv/bli_hemv_unb_var3.c @@ -101,11 +101,11 @@ void bli_hemv_unb_var3( conj_t conjh, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/hemv/bli_hemv_unb_var4.c b/frame/2/hemv/bli_hemv_unb_var4.c index 974e0fd16..3a7d61706 100644 --- a/frame/2/hemv/bli_hemv_unb_var4.c +++ b/frame/2/hemv/bli_hemv_unb_var4.c @@ -101,11 +101,11 @@ void bli_hemv_unb_var4( conj_t conjh, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/hemv/bli_hemv_unf_var1.c b/frame/2/hemv/bli_hemv_unf_var1.c index 0f22ede56..70e27ef86 100644 --- a/frame/2/hemv/bli_hemv_unf_var1.c +++ b/frame/2/hemv/bli_hemv_unf_var1.c @@ -101,11 +101,11 @@ void bli_hemv_unf_var1( conj_t conjh, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/hemv/bli_hemv_unf_var1a.c b/frame/2/hemv/bli_hemv_unf_var1a.c index e04e44ea9..e7baf0e2f 100644 --- a/frame/2/hemv/bli_hemv_unf_var1a.c +++ b/frame/2/hemv/bli_hemv_unf_var1a.c @@ -101,11 +101,11 @@ void bli_hemv_unf_var1a( conj_t conjh, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/hemv/bli_hemv_unf_var3.c b/frame/2/hemv/bli_hemv_unf_var3.c index 50ad162a7..218a0d048 100644 --- a/frame/2/hemv/bli_hemv_unf_var3.c +++ b/frame/2/hemv/bli_hemv_unf_var3.c @@ -101,11 +101,11 @@ void bli_hemv_unf_var3( conj_t conjh, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); #if 0 obj_t x_copy, y_copy; diff --git a/frame/2/hemv/bli_hemv_unf_var3a.c b/frame/2/hemv/bli_hemv_unf_var3a.c index 0c1cea2de..079dd42c4 100644 --- a/frame/2/hemv/bli_hemv_unf_var3a.c +++ b/frame/2/hemv/bli_hemv_unf_var3a.c @@ -101,11 +101,11 @@ void bli_hemv_unf_var3a( conj_t conjh, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); #if 0 obj_t x_copy, y_copy; diff --git a/frame/2/her/bli_her.c b/frame/2/her/bli_her.c index c376f10d6..2f2bfdb85 100644 --- a/frame/2/her/bli_her.c +++ b/frame/2/her/bli_her.c @@ -68,7 +68,7 @@ void bli_her( obj_t* alpha, // Create object to hold a copy-cast of alpha. dt_alpha = dt_targ_x; - bli_obj_init_scalar_copy_of( dt_alpha, + bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); @@ -151,7 +151,7 @@ void PASTEMAC(ch,opname)( \ \ rs_x = incx; cs_x = m * incx; \ \ - bli_obj_create_scalar_with_attached_buffer( dt_r, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt_r, alpha, &alphao ); \ \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \ diff --git a/frame/2/her/bli_her_blk_var1.c b/frame/2/her/bli_her_blk_var1.c index 4e3c80ebd..7a5cd3aed 100644 --- a/frame/2/her/bli_her_blk_var1.c +++ b/frame/2/her/bli_her_blk_var1.c @@ -90,8 +90,7 @@ void bli_her_blk_var1( conj_t conjh, cntl_sub_packv_x1( cntl ) ); // Copy/pack C11, x1 (if needed). - bli_packm_int( &BLIS_ONE, - &c11, + bli_packm_int( &c11, &c11_pack, cntl_sub_packm_c11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/her/bli_her_blk_var2.c b/frame/2/her/bli_her_blk_var2.c index e440e4972..3e534e5cb 100644 --- a/frame/2/her/bli_her_blk_var2.c +++ b/frame/2/her/bli_her_blk_var2.c @@ -90,8 +90,7 @@ void bli_her_blk_var2( conj_t conjh, cntl_sub_packv_x1( cntl ) ); // Copy/pack C11, x1 (if needed). - bli_packm_int( &BLIS_ONE, - &c11, + bli_packm_int( &c11, &c11_pack, cntl_sub_packm_c11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/her2/bli_her2.c b/frame/2/her2/bli_her2.c index e33c3a5e9..09929ab5c 100644 --- a/frame/2/her2/bli_her2.c +++ b/frame/2/her2/bli_her2.c @@ -75,13 +75,13 @@ void bli_her2( obj_t* alpha, // Create an object to hold a copy-cast of alpha. Notice that we use // the type union of the datatypes of x and y. dt_alpha = bli_datatype_union( dt_targ_x, dt_targ_y ); - bli_obj_init_scalar_copy_of( dt_alpha, + bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); // Also create a conjugated copy of alpha. - bli_obj_init_scalar_copy_of( dt_alpha, + bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_CONJUGATE, alpha, &alpha_conj_local ); @@ -171,7 +171,7 @@ void PASTEMAC(ch,opname)( \ rs_x = incx; cs_x = m * incx; \ rs_y = incy; cs_y = m * incy; \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ bli_obj_create_with_attached_buffer( dt, m, 1, y, rs_y, cs_y, &yo ); \ diff --git a/frame/2/her2/bli_her2_blk_var1.c b/frame/2/her2/bli_her2_blk_var1.c index c3cc94285..9a3ec5e7f 100644 --- a/frame/2/her2/bli_her2_blk_var1.c +++ b/frame/2/her2/bli_her2_blk_var1.c @@ -101,8 +101,7 @@ void bli_her2_blk_var1( conj_t conjh, cntl_sub_packv_y1( cntl ) ); // Copy/pack C11, x1, y1 (if needed). - bli_packm_int( &BLIS_ONE, - &c11, + bli_packm_int( &c11, &c11_pack, cntl_sub_packm_c11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/her2/bli_her2_blk_var2.c b/frame/2/her2/bli_her2_blk_var2.c index 39bbec4ed..4daab4759 100644 --- a/frame/2/her2/bli_her2_blk_var2.c +++ b/frame/2/her2/bli_her2_blk_var2.c @@ -104,8 +104,7 @@ void bli_her2_blk_var2( conj_t conjh, cntl_sub_packv_y1( cntl ) ); // Copy/pack C11, x1, y1 (if needed). - bli_packm_int( &BLIS_ONE, - &c11, + bli_packm_int( &c11, &c11_pack, cntl_sub_packm_c11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/her2/bli_her2_blk_var3.c b/frame/2/her2/bli_her2_blk_var3.c index 1fed9dc6f..058a951a3 100644 --- a/frame/2/her2/bli_her2_blk_var3.c +++ b/frame/2/her2/bli_her2_blk_var3.c @@ -104,8 +104,7 @@ void bli_her2_blk_var3( conj_t conjh, cntl_sub_packv_y1( cntl ) ); // Copy/pack C11, x1, y1 (if needed). - bli_packm_int( &BLIS_ONE, - &c11, + bli_packm_int( &c11, &c11_pack, cntl_sub_packm_c11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/her2/bli_her2_blk_var4.c b/frame/2/her2/bli_her2_blk_var4.c index dc989abea..e432c57a1 100644 --- a/frame/2/her2/bli_her2_blk_var4.c +++ b/frame/2/her2/bli_her2_blk_var4.c @@ -101,8 +101,7 @@ void bli_her2_blk_var4( conj_t conjh, cntl_sub_packv_y1( cntl ) ); // Copy/pack C11, x1, y1 (if needed). - bli_packm_int( &BLIS_ONE, - &c11, + bli_packm_int( &c11, &c11_pack, cntl_sub_packm_c11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/her2/bli_her2_int.c b/frame/2/her2/bli_her2_int.c index e40dc3c13..5b25f4d1f 100644 --- a/frame/2/her2/bli_her2_int.c +++ b/frame/2/her2/bli_her2_int.c @@ -93,11 +93,11 @@ void bli_her2_int( conj_t conjh, bli_obj_toggle_conj( x_local ); bli_obj_toggle_conj( y_local ); - bli_obj_init_scalar_copy_of( bli_obj_datatype( *alpha ), + bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *alpha ), BLIS_CONJUGATE, alpha, &alpha_local ); - bli_obj_init_scalar_copy_of( bli_obj_datatype( *alpha_conj ), + bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *alpha_conj ), BLIS_CONJUGATE, alpha_conj, &alpha_conj_local ); diff --git a/frame/2/her2/bli_her2_unb_var1.c b/frame/2/her2/bli_her2_unb_var1.c index 355231a89..1276aac37 100644 --- a/frame/2/her2/bli_her2_unb_var1.c +++ b/frame/2/her2/bli_her2_unb_var1.c @@ -96,7 +96,7 @@ void bli_her2_unb_var1( conj_t conjh, // The datatype of alpha MUST be the type union of the datatypes of x and y. dt_alpha = bli_datatype_union( dt_x, dt_y ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/her2/bli_her2_unb_var2.c b/frame/2/her2/bli_her2_unb_var2.c index e08e15e00..4d2ef3b1e 100644 --- a/frame/2/her2/bli_her2_unb_var2.c +++ b/frame/2/her2/bli_her2_unb_var2.c @@ -96,7 +96,7 @@ void bli_her2_unb_var2( conj_t conjh, // The datatype of alpha MUST be the type union of the datatypes of x and y. dt_alpha = bli_datatype_union( dt_x, dt_y ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/her2/bli_her2_unb_var3.c b/frame/2/her2/bli_her2_unb_var3.c index d51462757..5b5a0051b 100644 --- a/frame/2/her2/bli_her2_unb_var3.c +++ b/frame/2/her2/bli_her2_unb_var3.c @@ -96,7 +96,7 @@ void bli_her2_unb_var3( conj_t conjh, // The datatype of alpha MUST be the type union of the datatypes of x and y. dt_alpha = bli_datatype_union( dt_x, dt_y ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/her2/bli_her2_unb_var4.c b/frame/2/her2/bli_her2_unb_var4.c index 024387e0f..f461dd5fd 100644 --- a/frame/2/her2/bli_her2_unb_var4.c +++ b/frame/2/her2/bli_her2_unb_var4.c @@ -96,7 +96,7 @@ void bli_her2_unb_var4( conj_t conjh, // The datatype of alpha MUST be the type union of the datatypes of x and y. dt_alpha = bli_datatype_union( dt_x, dt_y ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/her2/bli_her2_unf_var1.c b/frame/2/her2/bli_her2_unf_var1.c index c8e94ac12..bb172260e 100644 --- a/frame/2/her2/bli_her2_unf_var1.c +++ b/frame/2/her2/bli_her2_unf_var1.c @@ -96,7 +96,7 @@ void bli_her2_unf_var1( conj_t conjh, // The datatype of alpha MUST be the type union of the datatypes of x and y. dt_alpha = bli_datatype_union( dt_x, dt_y ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/her2/bli_her2_unf_var4.c b/frame/2/her2/bli_her2_unf_var4.c index 6437816ec..e10f9eca7 100644 --- a/frame/2/her2/bli_her2_unf_var4.c +++ b/frame/2/her2/bli_her2_unf_var4.c @@ -96,7 +96,7 @@ void bli_her2_unf_var4( conj_t conjh, // The datatype of alpha MUST be the type union of the datatypes of x and y. dt_alpha = bli_datatype_union( dt_x, dt_y ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/symv/bli_symv.c b/frame/2/symv/bli_symv.c index be40e4913..2e267aa29 100644 --- a/frame/2/symv/bli_symv.c +++ b/frame/2/symv/bli_symv.c @@ -78,7 +78,7 @@ void bli_symv( obj_t* alpha, // the type union of the target datatypes of a and x to prevent any // unnecessary loss of information during the computation. dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x ); - bli_obj_init_scalar_copy_of( dt_alpha, + bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); @@ -89,7 +89,7 @@ void bli_symv( obj_t* alpha, // the complex part of beta*y will not be stored. If y is complex and // beta is real then beta is harmlessly promoted to complex. dt_beta = dt_targ_y; - bli_obj_init_scalar_copy_of( dt_beta, + bli_obj_scalar_init_detached_copy_of( dt_beta, BLIS_NO_CONJUGATE, beta, &beta_local ); @@ -180,8 +180,8 @@ void PASTEMAC(ch,opname)( \ rs_x = incx; cs_x = m * incx; \ rs_y = incy; cs_y = m * incy; \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ - bli_obj_create_scalar_with_attached_buffer( dt, beta, &betao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, m, m, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ diff --git a/frame/2/syr/bli_syr.c b/frame/2/syr/bli_syr.c index 2e0a55c94..8750c4031 100644 --- a/frame/2/syr/bli_syr.c +++ b/frame/2/syr/bli_syr.c @@ -70,7 +70,7 @@ void bli_syr( obj_t* alpha, // the type union of the target datatypes of x and c to prevent any // unnecessary loss of information during the computation. dt_alpha = bli_datatype_union( dt_targ_x, dt_targ_c ); - bli_obj_init_scalar_copy_of( dt_alpha, + bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); @@ -152,7 +152,7 @@ void PASTEMAC(ch,opname)( \ \ rs_x = incx; cs_x = m * incx; \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \ diff --git a/frame/2/syr2/bli_syr2.c b/frame/2/syr2/bli_syr2.c index 4debf609e..1b2e061b7 100644 --- a/frame/2/syr2/bli_syr2.c +++ b/frame/2/syr2/bli_syr2.c @@ -74,7 +74,7 @@ void bli_syr2( obj_t* alpha, // Create an object to hold a copy-cast of alpha. Notice that we use // the type union of the datatypes of x and y. dt_alpha = bli_datatype_union( dt_targ_x, dt_targ_y ); - bli_obj_init_scalar_copy_of( dt_alpha, + bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); @@ -164,7 +164,7 @@ void PASTEMAC(ch,opname)( \ rs_x = incx; cs_x = m * incx; \ rs_y = incy; cs_y = m * incy; \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ bli_obj_create_with_attached_buffer( dt, m, 1, y, rs_y, cs_y, &yo ); \ diff --git a/frame/2/trmv/bli_trmv.c b/frame/2/trmv/bli_trmv.c index 6d85e042a..e2b968719 100644 --- a/frame/2/trmv/bli_trmv.c +++ b/frame/2/trmv/bli_trmv.c @@ -70,7 +70,7 @@ void bli_trmv( obj_t* alpha, // the type union of the target datatypes of a and x to prevent any // unnecessary loss of information during the computation. dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x ); - bli_obj_init_scalar_copy_of( dt_alpha, + bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); @@ -150,7 +150,7 @@ void PASTEMAC(ch,opname)( \ \ rs_x = incx; cs_x = m * incx; \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ \ bli_obj_create_with_attached_buffer( dt, m, m, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ diff --git a/frame/2/trmv/bli_trmv_l_blk_var1.c b/frame/2/trmv/bli_trmv_l_blk_var1.c index 1e5ee9488..fab932343 100644 --- a/frame/2/trmv/bli_trmv_l_blk_var1.c +++ b/frame/2/trmv/bli_trmv_l_blk_var1.c @@ -80,8 +80,7 @@ void bli_trmv_l_blk_var1( obj_t* alpha, cntl_sub_packv_x1( cntl ) ); // Copy/pack A11, x1 (if needed). - bli_packm_int( &BLIS_ONE, - &a11, + bli_packm_int( &a11, &a11_pack, cntl_sub_packm_a11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/trmv/bli_trmv_l_blk_var2.c b/frame/2/trmv/bli_trmv_l_blk_var2.c index 3c8eb3265..cf7fa65dd 100644 --- a/frame/2/trmv/bli_trmv_l_blk_var2.c +++ b/frame/2/trmv/bli_trmv_l_blk_var2.c @@ -80,8 +80,7 @@ void bli_trmv_l_blk_var2( obj_t* alpha, cntl_sub_packv_x1( cntl ) ); // Copy/pack A11, x1 (if needed). - bli_packm_int( &BLIS_ONE, - &a11, + bli_packm_int( &a11, &a11_pack, cntl_sub_packm_a11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/trmv/bli_trmv_u_blk_var1.c b/frame/2/trmv/bli_trmv_u_blk_var1.c index 88a3ff182..7a4c8ac44 100644 --- a/frame/2/trmv/bli_trmv_u_blk_var1.c +++ b/frame/2/trmv/bli_trmv_u_blk_var1.c @@ -80,8 +80,7 @@ void bli_trmv_u_blk_var1( obj_t* alpha, cntl_sub_packv_x1( cntl ) ); // Copy/pack A11, x1 (if needed). - bli_packm_int( &BLIS_ONE, - &a11, + bli_packm_int( &a11, &a11_pack, cntl_sub_packm_a11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/trmv/bli_trmv_u_blk_var2.c b/frame/2/trmv/bli_trmv_u_blk_var2.c index 9011d5cb5..b1ed17b12 100644 --- a/frame/2/trmv/bli_trmv_u_blk_var2.c +++ b/frame/2/trmv/bli_trmv_u_blk_var2.c @@ -80,8 +80,7 @@ void bli_trmv_u_blk_var2( obj_t* alpha, cntl_sub_packv_x1( cntl ) ); // Copy/pack A11, x1 (if needed). - bli_packm_int( &BLIS_ONE, - &a11, + bli_packm_int( &a11, &a11_pack, cntl_sub_packm_a11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/trmv/bli_trmv_unb_var1.c b/frame/2/trmv/bli_trmv_unb_var1.c index 36d648258..bb3dc6892 100644 --- a/frame/2/trmv/bli_trmv_unb_var1.c +++ b/frame/2/trmv/bli_trmv_unb_var1.c @@ -88,7 +88,7 @@ void bli_trmv_unb_var1( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/trmv/bli_trmv_unb_var2.c b/frame/2/trmv/bli_trmv_unb_var2.c index 4833e3fca..6f6bb17b6 100644 --- a/frame/2/trmv/bli_trmv_unb_var2.c +++ b/frame/2/trmv/bli_trmv_unb_var2.c @@ -88,7 +88,7 @@ void bli_trmv_unb_var2( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/trmv/bli_trmv_unf_var1.c b/frame/2/trmv/bli_trmv_unf_var1.c index 1fa625774..4f5234576 100644 --- a/frame/2/trmv/bli_trmv_unf_var1.c +++ b/frame/2/trmv/bli_trmv_unf_var1.c @@ -88,7 +88,7 @@ void bli_trmv_unf_var1( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/trmv/bli_trmv_unf_var2.c b/frame/2/trmv/bli_trmv_unf_var2.c index a2ea30e45..a806ee00c 100644 --- a/frame/2/trmv/bli_trmv_unf_var2.c +++ b/frame/2/trmv/bli_trmv_unf_var2.c @@ -88,7 +88,7 @@ void bli_trmv_unf_var2( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/trsv/bli_trsv.c b/frame/2/trsv/bli_trsv.c index 2be95de33..9c2924a44 100644 --- a/frame/2/trsv/bli_trsv.c +++ b/frame/2/trsv/bli_trsv.c @@ -70,7 +70,7 @@ void bli_trsv( obj_t* alpha, // the type union of the target datatypes of a and x to prevent any // unnecessary loss of information during the computation. dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x ); - bli_obj_init_scalar_copy_of( dt_alpha, + bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); @@ -147,7 +147,7 @@ void PASTEMAC(ch,opname)( \ \ rs_x = incx; cs_x = m * incx; \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ \ bli_obj_create_with_attached_buffer( dt, m, m, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ diff --git a/frame/2/trsv/bli_trsv_l_blk_var1.c b/frame/2/trsv/bli_trsv_l_blk_var1.c index 947cce0c0..b842a774c 100644 --- a/frame/2/trsv/bli_trsv_l_blk_var1.c +++ b/frame/2/trsv/bli_trsv_l_blk_var1.c @@ -85,8 +85,7 @@ void bli_trsv_l_blk_var1( obj_t* alpha, cntl_sub_packv_x1( cntl ) ); // Copy/pack A11, x1 (if needed). - bli_packm_int( &BLIS_ONE, - &a11, + bli_packm_int( &a11, &a11_pack, cntl_sub_packm_a11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/trsv/bli_trsv_l_blk_var2.c b/frame/2/trsv/bli_trsv_l_blk_var2.c index 910969c98..5f924bef6 100644 --- a/frame/2/trsv/bli_trsv_l_blk_var2.c +++ b/frame/2/trsv/bli_trsv_l_blk_var2.c @@ -85,8 +85,7 @@ void bli_trsv_l_blk_var2( obj_t* alpha, cntl_sub_packv_x1( cntl ) ); // Copy/pack A11, x1 (if needed). - bli_packm_int( &BLIS_ONE, - &a11, + bli_packm_int( &a11, &a11_pack, cntl_sub_packm_a11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/trsv/bli_trsv_u_blk_var1.c b/frame/2/trsv/bli_trsv_u_blk_var1.c index b16162826..ffc533642 100644 --- a/frame/2/trsv/bli_trsv_u_blk_var1.c +++ b/frame/2/trsv/bli_trsv_u_blk_var1.c @@ -85,8 +85,7 @@ void bli_trsv_u_blk_var1( obj_t* alpha, cntl_sub_packv_x1( cntl ) ); // Copy/pack A11, x1 (if needed). - bli_packm_int( &BLIS_ONE, - &a11, + bli_packm_int( &a11, &a11_pack, cntl_sub_packm_a11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/trsv/bli_trsv_u_blk_var2.c b/frame/2/trsv/bli_trsv_u_blk_var2.c index 145b2717e..8b03d9625 100644 --- a/frame/2/trsv/bli_trsv_u_blk_var2.c +++ b/frame/2/trsv/bli_trsv_u_blk_var2.c @@ -85,8 +85,7 @@ void bli_trsv_u_blk_var2( obj_t* alpha, cntl_sub_packv_x1( cntl ) ); // Copy/pack A11, x1 (if needed). - bli_packm_int( &BLIS_ONE, - &a11, + bli_packm_int( &a11, &a11_pack, cntl_sub_packm_a11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/trsv/bli_trsv_unb_var1.c b/frame/2/trsv/bli_trsv_unb_var1.c index 3166dc3be..8888df017 100644 --- a/frame/2/trsv/bli_trsv_unb_var1.c +++ b/frame/2/trsv/bli_trsv_unb_var1.c @@ -88,7 +88,7 @@ void bli_trsv_unb_var1( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/trsv/bli_trsv_unb_var2.c b/frame/2/trsv/bli_trsv_unb_var2.c index 5bd60675e..b8ea6996f 100644 --- a/frame/2/trsv/bli_trsv_unb_var2.c +++ b/frame/2/trsv/bli_trsv_unb_var2.c @@ -88,7 +88,7 @@ void bli_trsv_unb_var2( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/trsv/bli_trsv_unf_var1.c b/frame/2/trsv/bli_trsv_unf_var1.c index 2e31c741c..b00c877b1 100644 --- a/frame/2/trsv/bli_trsv_unf_var1.c +++ b/frame/2/trsv/bli_trsv_unf_var1.c @@ -88,7 +88,7 @@ void bli_trsv_unf_var1( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/trsv/bli_trsv_unf_var2.c b/frame/2/trsv/bli_trsv_unf_var2.c index 7619cb529..10c923686 100644 --- a/frame/2/trsv/bli_trsv_unf_var2.c +++ b/frame/2/trsv/bli_trsv_unf_var2.c @@ -88,7 +88,7 @@ void bli_trsv_unf_var2( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/3/gemm/bli_gemm.c b/frame/3/gemm/bli_gemm.c index c9ff2fb33..2277fd1c7 100644 --- a/frame/3/gemm/bli_gemm.c +++ b/frame/3/gemm/bli_gemm.c @@ -48,21 +48,16 @@ void bli_gemm( obj_t* alpha, obj_t* c ) { gemm_t* cntl; - obj_t alpha_local; - obj_t beta_local; obj_t a_local; obj_t b_local; obj_t c_local; - num_t dt_alpha; - num_t dt_beta; - bool_t pack_c; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_gemm_check( alpha, a, b, beta, c ); // If alpha is zero, scale by beta and return. - if ( bli_obj_scalar_equals( alpha, &BLIS_ZERO ) ) + if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); return; @@ -86,29 +81,6 @@ void bli_gemm( obj_t* alpha, bli_obj_induce_trans( c_local ); } - // Set the target and execution datatypes of the objects, and apply - // any transformations necessary to handle mixed domain computation. - bli_gemm_set_targ_exec_datatypes( &a_local, - &b_local, - &c_local, - &dt_alpha, - &dt_beta, - &pack_c ); - - // Create an object to hold a copy-cast of alpha. - bli_obj_init_scalar_copy_of( dt_alpha, - BLIS_NO_CONJUGATE, - alpha, - &alpha_local ); - - // Create an object to hold a copy-cast of beta. - bli_obj_init_scalar_copy_of( dt_beta, - BLIS_NO_CONJUGATE, - beta, - &beta_local ); - - if ( pack_c ) bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); - // Choose the control tree. cntl = gemm_cntl; @@ -122,10 +94,10 @@ void bli_gemm( obj_t* alpha, #endif // Invoke the internal back-end. - bli_gemm_int( &alpha_local, + bli_gemm_int( alpha, &a_local, &b_local, - &beta_local, + beta, &c_local, cntl ); } @@ -159,8 +131,8 @@ void PASTEMAC(ch,opname)( \ bli_set_dims_with_trans( transa, m, k, m_a, n_a ); \ bli_set_dims_with_trans( transb, k, n, m_b, n_b ); \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ - bli_obj_create_scalar_with_attached_buffer( dt, beta, &betao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ diff --git a/frame/3/gemm/bli_gemm_blk_var1.c b/frame/3/gemm/bli_gemm_blk_var1.c index 3e9a6d30b..9c381c5dd 100644 --- a/frame/3/gemm/bli_gemm_blk_var1.c +++ b/frame/3/gemm/bli_gemm_blk_var1.c @@ -34,10 +34,8 @@ #include "blis.h" -void bli_gemm_blk_var1( obj_t* alpha, - obj_t* a, +void bli_gemm_blk_var1( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, gemm_t* cntl ) { @@ -58,7 +56,7 @@ void bli_gemm_blk_var1( obj_t* alpha, m_trans = bli_obj_length_after_trans( *a ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -66,9 +64,8 @@ void bli_gemm_blk_var1( obj_t* alpha, bli_packm_init( b, &b_pack, cntl_sub_packm_b( cntl ) ); - // Pack B and scale by alpha (if instructed). - bli_packm_int( alpha, - b, &b_pack, + // Pack B (if instructed). + bli_packm_int( b, &b_pack, cntl_sub_packm_b( cntl ) ); // Partition along the m dimension. @@ -93,21 +90,19 @@ void bli_gemm_blk_var1( obj_t* alpha, bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, &a1_pack, + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1, &c1_pack, + // Pack C1 (if instructed). + bli_packm_int( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); // Perform gemm subproblem. - bli_gemm_int( alpha, + bli_gemm_int( &BLIS_ONE, &a1_pack, &b_pack, - beta, + &BLIS_ONE, &c1_pack, cntl_sub_gemm( cntl ) ); diff --git a/frame/3/gemm/bli_gemm_blk_var1.h b/frame/3/gemm/bli_gemm_blk_var1.h index 65bf31e81..048468284 100644 --- a/frame/3/gemm/bli_gemm_blk_var1.h +++ b/frame/3/gemm/bli_gemm_blk_var1.h @@ -32,10 +32,8 @@ */ -void bli_gemm_blk_var1( obj_t* alpha, - obj_t* a, +void bli_gemm_blk_var1( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, gemm_t* cntl ); diff --git a/frame/3/gemm/bli_gemm_blk_var2.c b/frame/3/gemm/bli_gemm_blk_var2.c index 1c1cda237..7a64d92c9 100644 --- a/frame/3/gemm/bli_gemm_blk_var2.c +++ b/frame/3/gemm/bli_gemm_blk_var2.c @@ -34,10 +34,8 @@ #include "blis.h" -void bli_gemm_blk_var2( obj_t* alpha, - obj_t* a, +void bli_gemm_blk_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, gemm_t* cntl ) { @@ -58,7 +56,7 @@ void bli_gemm_blk_var2( obj_t* alpha, n_trans = bli_obj_width_after_trans( *b ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -66,9 +64,8 @@ void bli_gemm_blk_var2( obj_t* alpha, bli_packm_init( a, &a_pack, cntl_sub_packm_a( cntl ) ); - // Pack A and scale by alpha (if instructed). - bli_packm_int( alpha, - a, &a_pack, + // Pack A (if instructed). + bli_packm_int( a, &a_pack, cntl_sub_packm_a( cntl ) ); // Partition along the n dimension. @@ -93,21 +90,19 @@ void bli_gemm_blk_var2( obj_t* alpha, bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); - // Pack B1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &b1, &b1_pack, + // Pack B1 (if instructed). + bli_packm_int( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1, &c1_pack, + // Pack C1 (if instructed). + bli_packm_int( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); // Perform gemm subproblem. - bli_gemm_int( alpha, + bli_gemm_int( &BLIS_ONE, &a_pack, &b1_pack, - beta, + &BLIS_ONE, &c1_pack, cntl_sub_gemm( cntl ) ); diff --git a/frame/3/gemm/bli_gemm_blk_var2.h b/frame/3/gemm/bli_gemm_blk_var2.h index 363b21409..2f07e8365 100644 --- a/frame/3/gemm/bli_gemm_blk_var2.h +++ b/frame/3/gemm/bli_gemm_blk_var2.h @@ -32,10 +32,8 @@ */ -void bli_gemm_blk_var2( obj_t* alpha, - obj_t* a, +void bli_gemm_blk_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, gemm_t* cntl ); diff --git a/frame/3/gemm/bli_gemm_blk_var3.c b/frame/3/gemm/bli_gemm_blk_var3.c index 74fcf8007..db8a009b5 100644 --- a/frame/3/gemm/bli_gemm_blk_var3.c +++ b/frame/3/gemm/bli_gemm_blk_var3.c @@ -34,17 +34,14 @@ #include "blis.h" -void bli_gemm_blk_var3( obj_t* alpha, - obj_t* a, +void bli_gemm_blk_var3( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, gemm_t* cntl ) { obj_t a1, a1_pack; obj_t b1, b1_pack; obj_t c_pack; - obj_t* beta_use; dim_t i; dim_t b_alg; @@ -59,7 +56,7 @@ void bli_gemm_blk_var3( obj_t* alpha, k_trans = bli_obj_width_after_trans( *a ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -67,9 +64,8 @@ void bli_gemm_blk_var3( obj_t* alpha, bli_packm_init( c, &c_pack, cntl_sub_packm_c( cntl ) ); - // Pack C and scale by beta (if instructed). - bli_packm_int( beta, - c, &c_pack, + // Pack C (if instructed). + bli_packm_int( c, &c_pack, cntl_sub_packm_c( cntl ) ); // Partition along the k dimension. @@ -94,28 +90,29 @@ void bli_gemm_blk_var3( obj_t* alpha, bli_packm_init( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, &a1_pack, + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack B1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &b1, &b1_pack, + // Pack B1 (if instructed). + bli_packm_int( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); - // Since this variant executes multiple rank-k updates, we must use - // beta only for the first iteration and BLIS_ONE for all others. - if ( i == 0 ) beta_use = beta; - else beta_use = &BLIS_ONE; - // Perform gemm subproblem. - bli_gemm_int( alpha, + bli_gemm_int( &BLIS_ONE, &a1_pack, &b1_pack, - beta_use, + &BLIS_ONE, &c_pack, cntl_sub_gemm( cntl ) ); + + // This variant executes multiple rank-k updates. Therefore, if the + // internal beta scalar on matrix C is non-zero, we must use it + // only for the first iteration (and then BLIS_ONE for all others). + // And since c_pack is a local obj_t, we can simply overwrite the + // internal beta scalar with BLIS_ONE once it has been used in the + // first iteration. + if ( i == 0 ) bli_obj_scalar_reset( &c_pack ); } // Unpack C (if C was packed). diff --git a/frame/3/gemm/bli_gemm_blk_var3.h b/frame/3/gemm/bli_gemm_blk_var3.h index 6f555e0cc..63a3b2d75 100644 --- a/frame/3/gemm/bli_gemm_blk_var3.h +++ b/frame/3/gemm/bli_gemm_blk_var3.h @@ -32,10 +32,8 @@ */ -void bli_gemm_blk_var3( obj_t* alpha, - obj_t* a, +void bli_gemm_blk_var3( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, gemm_t* cntl ); diff --git a/frame/3/gemm/bli_gemm_blk_var4.c b/frame/3/gemm/bli_gemm_blk_var4.c index 485732e6e..c08403cf3 100644 --- a/frame/3/gemm/bli_gemm_blk_var4.c +++ b/frame/3/gemm/bli_gemm_blk_var4.c @@ -34,10 +34,8 @@ #include "blis.h" -void bli_gemm_blk_var4( obj_t* alpha, - obj_t* a, +void bli_gemm_blk_var4( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, gemm_t* cntl ) { @@ -58,7 +56,7 @@ void bli_gemm_blk_var4( obj_t* alpha, m_trans = bli_obj_length_after_trans( *a ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -92,11 +90,11 @@ void bli_gemm_blk_var4( obj_t* alpha, bli_packm_init( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); + // Pack C1 (if instructed). + bli_packm_int( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); // Partition along the n dimension. for ( j = 0; j < n_trans; j += bn_inc ) @@ -113,14 +111,14 @@ void bli_gemm_blk_var4( obj_t* alpha, bli_acquire_mpart_l2r( BLIS_SUBPART1, j, bn_inc, &c1_pack, &c1_pack_inc ); - // Pack B1 and scale by alpha (if instructed). - bli_packm_int( alpha, &b_inc, &b_pack_inc, cntl_sub_packm_b( cntl ) ); + // Pack B1 (if instructed). + bli_packm_int( &b_inc, &b_pack_inc, cntl_sub_packm_b( cntl ) ); // Perform gemm subproblem. - bli_gemm_int( alpha, + bli_gemm_int( &BLIS_ONE, &a1_pack, &b_pack_inc, - beta, + &BLIS_ONE, &c1_pack_inc, cntl_sub_gemm( cntl ) ); } @@ -152,29 +150,24 @@ void bli_gemm_blk_var4( obj_t* alpha, bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, - &a1_pack, + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1, - &c1_pack, + // Pack C1 (if instructed). + bli_packm_int( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); // Perform gemm subproblem. - bli_gemm_int( alpha, + bli_gemm_int( &BLIS_ONE, &a1_pack, &b_pack, - beta, + &BLIS_ONE, &c1_pack, cntl_sub_gemm( cntl ) ); // Unpack C1 (if C1 was packed). - bli_unpackm_int( &c1_pack, - &c1, + bli_unpackm_int( &c1_pack, &c1, cntl_sub_unpackm_c( cntl ) ); } diff --git a/frame/3/gemm/bli_gemm_blk_var4.h b/frame/3/gemm/bli_gemm_blk_var4.h index a86b1672d..a05f3d865 100644 --- a/frame/3/gemm/bli_gemm_blk_var4.h +++ b/frame/3/gemm/bli_gemm_blk_var4.h @@ -32,10 +32,8 @@ */ -void bli_gemm_blk_var4( obj_t* alpha, - obj_t* a, +void bli_gemm_blk_var4( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, gemm_t* cntl ); diff --git a/frame/3/gemm/bli_gemm_int.c b/frame/3/gemm/bli_gemm_int.c index ebcff79dc..d395d5c10 100644 --- a/frame/3/gemm/bli_gemm_int.c +++ b/frame/3/gemm/bli_gemm_int.c @@ -36,10 +36,8 @@ #define FUNCPTR_T gemm_fp -typedef void (*FUNCPTR_T)( obj_t* alpha, - obj_t* a, +typedef void (*FUNCPTR_T)( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, gemm_t* cntl ); @@ -61,6 +59,8 @@ void bli_gemm_int( obj_t* alpha, obj_t* c, gemm_t* cntl ) { + obj_t a_local; + obj_t b_local; obj_t c_local; varnum_t n; impl_t i; @@ -81,6 +81,10 @@ void bli_gemm_int( obj_t* alpha, return; } + // Alias A and B in case we need to update attached scalars. + bli_obj_alias_to( *a, a_local ); + bli_obj_alias_to( *b, b_local ); + // Alias C in case we need to induce a transposition. bli_obj_alias_to( *c, c_local ); @@ -95,6 +99,20 @@ void bli_gemm_int( obj_t* alpha, bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, c_local ); } + // If alpha is non-unit, typecast and apply it to the scalar attached + // to B. + if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) + { + bli_obj_scalar_apply_scalar( alpha, &b_local ); + } + + // If beta is non-unit, typecast and apply it to the scalar attached + // to C. + if ( !bli_obj_equals( beta, &BLIS_ONE ) ) + { + bli_obj_scalar_apply_scalar( beta, &c_local ); + } + // Extract the variant number and implementation type. n = cntl_var_num( cntl ); i = cntl_impl_type( cntl ); @@ -103,10 +121,8 @@ void bli_gemm_int( obj_t* alpha, f = vars[n][i]; // Invoke the variant. - f( alpha, - a, - b, - beta, + f( &a_local, + &b_local, &c_local, cntl ); } diff --git a/frame/3/gemm/bli_gemm_ker_var2.c b/frame/3/gemm/bli_gemm_ker_var2.c index 924c18bf2..d5d7f773d 100644 --- a/frame/3/gemm/bli_gemm_ker_var2.c +++ b/frame/3/gemm/bli_gemm_ker_var2.c @@ -50,10 +50,8 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,gemm_ker_var2); -void bli_gemm_ker_var2( obj_t* alpha, - obj_t* a, +void bli_gemm_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, gemm_t* cntl ) { @@ -77,10 +75,10 @@ void bli_gemm_ker_var2( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); - num_t dt_alpha; - void* buf_alpha; + obj_t scalar_a; + obj_t scalar_b; - num_t dt_beta; + void* buf_alpha; void* buf_beta; FUNCPTR_T f; @@ -103,15 +101,15 @@ void bli_gemm_ker_var2( obj_t* alpha, } */ - // If alpha is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the alpha object and extract the buffer at the alpha offset. - bli_set_scalar_dt_buffer( alpha, dt_exec, dt_alpha, buf_alpha ); + // Detach and multiply the scalars attached to A and B. + bli_obj_scalar_detach( a, &scalar_a ); + bli_obj_scalar_detach( b, &scalar_b ); + bli_mulsc( &scalar_a, &scalar_b ); - // If beta is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the beta object and extract the buffer at the beta offset. - bli_set_scalar_dt_buffer( beta, dt_exec, dt_beta, buf_beta ); + // Grab the addresses of the internal scalar buffers for the scalar + // merged above and the scalar attached to C. + buf_alpha = bli_obj_internal_scalar_buffer( scalar_b ); + buf_beta = bli_obj_internal_scalar_buffer( *c ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/3/gemm/bli_gemm_ker_var2.h b/frame/3/gemm/bli_gemm_ker_var2.h index 0d8cb8c3c..db195332b 100644 --- a/frame/3/gemm/bli_gemm_ker_var2.h +++ b/frame/3/gemm/bli_gemm_ker_var2.h @@ -36,10 +36,8 @@ // // Prototype object-based interface. // -void bli_gemm_ker_var2( obj_t* alpha, - obj_t* a, +void bli_gemm_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, gemm_t* cntl ); diff --git a/frame/3/gemm/bli_gemm_ker_var5.c b/frame/3/gemm/bli_gemm_ker_var5.c index 96e1b9232..785e3d97c 100644 --- a/frame/3/gemm/bli_gemm_ker_var5.c +++ b/frame/3/gemm/bli_gemm_ker_var5.c @@ -50,10 +50,8 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,gemm_ker_var5); -void bli_gemm_ker_var5( obj_t* alpha, - obj_t* a, +void bli_gemm_ker_var5( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, gemm_t* cntl ) { @@ -77,10 +75,10 @@ void bli_gemm_ker_var5( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); - num_t dt_alpha; - void* buf_alpha; + obj_t scalar_a; + obj_t scalar_b; - num_t dt_beta; + void* buf_alpha; void* buf_beta; FUNCPTR_T f; @@ -103,15 +101,16 @@ void bli_gemm_ker_var5( obj_t* alpha, } */ - // If alpha is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the alpha object and extract the buffer at the alpha offset. - bli_set_scalar_dt_buffer( alpha, dt_exec, dt_alpha, buf_alpha ); - // If beta is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the beta object and extract the buffer at the beta offset. - bli_set_scalar_dt_buffer( beta, dt_exec, dt_beta, buf_beta ); + // Detach and multiply the scalars attached to A and B. + bli_obj_scalar_detach( a, &scalar_a ); + bli_obj_scalar_detach( b, &scalar_b ); + bli_mulsc( &scalar_a, &scalar_b ); + + // Grab the addresses of the internal scalar buffers for the scalar + // merged above and the scalar attached to C. + buf_alpha = bli_obj_internal_scalar_buffer( scalar_b ); + buf_beta = bli_obj_internal_scalar_buffer( *c ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/3/gemm/bli_gemm_ker_var5.h b/frame/3/gemm/bli_gemm_ker_var5.h index 48b0d58ce..eaa506d97 100644 --- a/frame/3/gemm/bli_gemm_ker_var5.h +++ b/frame/3/gemm/bli_gemm_ker_var5.h @@ -36,10 +36,8 @@ // // Prototype object-based interface. // -void bli_gemm_ker_var5( obj_t* alpha, - obj_t* a, +void bli_gemm_ker_var5( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, gemm_t* cntl ); diff --git a/frame/3/gemm/bli_gemm_target.c b/frame/3/gemm/bli_gemm_target.c index e70ee7262..fc9519c04 100644 --- a/frame/3/gemm/bli_gemm_target.c +++ b/frame/3/gemm/bli_gemm_target.c @@ -37,8 +37,6 @@ void bli_gemm_set_targ_exec_datatypes( obj_t* a, obj_t* b, obj_t* c, - num_t* dt_alpha, - num_t* dt_beta, bool_t* pack_c ) { num_t dt_targ_a; @@ -110,18 +108,6 @@ void bli_gemm_set_targ_exec_datatypes( obj_t* a, bli_obj_toggle_trans( *b ); } - // Notice that we use the target datatype of matrix a. By inspecting - // the table above, this clearly works for cases (0) through (4), (6), - // and (7). It also works for case (5) since it is transformed into - // case (6) by the above code. - *dt_alpha = bli_obj_target_datatype( *a ); - - // Notice that we use the target datatype of matrix a. By inspecting - // the table above, this clearly works for cases (0) through (4), (6), - // and (7). It also works for case (5) since it is transformed into - // case (6) by the above code. - *dt_beta = bli_obj_datatype( *c ); - // For now disable packing of C. *pack_c = FALSE; } diff --git a/frame/3/gemm/bli_gemm_target.h b/frame/3/gemm/bli_gemm_target.h index 93c27cd31..4416ff955 100644 --- a/frame/3/gemm/bli_gemm_target.h +++ b/frame/3/gemm/bli_gemm_target.h @@ -35,8 +35,6 @@ void bli_gemm_set_targ_exec_datatypes( obj_t* a, obj_t* b, obj_t* c, - num_t* dt_alpha, - num_t* dt_beta, bool_t* pack_c ); void bli_gemm_get_target_datatypes( obj_t* a, diff --git a/frame/3/gemm/other/bli_gemm_ker_var2.c b/frame/3/gemm/other/bli_gemm_ker_var2.c index f97913df9..5fd8c7db2 100644 --- a/frame/3/gemm/other/bli_gemm_ker_var2.c +++ b/frame/3/gemm/other/bli_gemm_ker_var2.c @@ -51,10 +51,8 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,gemm_ker_var2); -void bli_gemm_ker_var2( obj_t* alpha, - obj_t* a, +void bli_gemm_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, gemm_t* cntl ) { @@ -78,10 +76,10 @@ void bli_gemm_ker_var2( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); - num_t dt_alpha; - void* buf_alpha; + obj_t scalar_a; + obj_t scalar_b; - num_t dt_beta; + void* buf_alpha; void* buf_beta; FUNCPTR_T f; @@ -104,15 +102,13 @@ void bli_gemm_ker_var2( obj_t* alpha, } */ - // If alpha is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the alpha object and extract the buffer at the alpha offset. - bli_set_scalar_dt_buffer( alpha, dt_exec, dt_alpha, buf_alpha ); + bli_obj_scalar_detach( a, &scalar_a ); + bli_obj_scalar_detach( b, &scalar_b ); + bli_mulsc( &scalar_a, &scalar_b ); - // If beta is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the beta object and extract the buffer at the beta offset. - bli_set_scalar_dt_buffer( beta, dt_exec, dt_beta, buf_beta ); + buf_alpha = bli_obj_internal_scalar_buffer( scalar_b ); + + buf_beta = bli_obj_internal_scalar_buffer( *c ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/3/hemm/bli_hemm.c b/frame/3/hemm/bli_hemm.c index 1973b24ab..38eecaa5a 100644 --- a/frame/3/hemm/bli_hemm.c +++ b/frame/3/hemm/bli_hemm.c @@ -47,21 +47,16 @@ void bli_hemm( side_t side, obj_t* c ) { gemm_t* cntl; - obj_t alpha_local; - obj_t beta_local; obj_t a_local; obj_t b_local; obj_t c_local; - num_t dt_alpha; - num_t dt_beta; - bool_t pack_c; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_hemm_check( side, alpha, a, b, beta, c ); // If alpha is zero, scale by beta and return. - if ( bli_obj_scalar_equals( alpha, &BLIS_ZERO ) ) + if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); return; @@ -90,37 +85,14 @@ void bli_hemm( side_t side, bli_obj_swap( a_local, b_local ); } - // Set the target and execution datatypes of the objects, and apply - // any transformations necessary to handle mixed domain computation. - bli_gemm_set_targ_exec_datatypes( &a_local, - &b_local, - &c_local, - &dt_alpha, - &dt_beta, - &pack_c ); - - // Create an object to hold a copy-cast of alpha. - bli_obj_init_scalar_copy_of( dt_alpha, - BLIS_NO_CONJUGATE, - alpha, - &alpha_local ); - - // Create an object to hold a copy-cast of beta. - bli_obj_init_scalar_copy_of( dt_beta, - BLIS_NO_CONJUGATE, - beta, - &beta_local ); - - if ( pack_c ) bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); - // Choose the control tree. cntl = hemm_cntl; // Invoke the internal back-end. - bli_gemm_int( &alpha_local, + bli_gemm_int( alpha, &a_local, &b_local, - &beta_local, + beta, &c_local, cntl ); } @@ -155,8 +127,8 @@ void PASTEMAC(ch,opname)( \ bli_set_dim_with_side( side, m, n, mn_a ); \ bli_set_dims_with_trans( transb, m, n, m_b, n_b ); \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ - bli_obj_create_scalar_with_attached_buffer( dt, beta, &betao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, mn_a, mn_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ diff --git a/frame/3/her2k/bli_her2k.c b/frame/3/her2k/bli_her2k.c index cf70f396b..51c801526 100644 --- a/frame/3/her2k/bli_her2k.c +++ b/frame/3/her2k/bli_her2k.c @@ -47,24 +47,19 @@ void bli_her2k( obj_t* alpha, obj_t* c ) { //her2k_t* cntl; - obj_t alpha_local; - obj_t alpha_conj_local; - obj_t beta_local; + obj_t alpha_conj; obj_t c_local; obj_t a_local; obj_t bh_local; obj_t b_local; obj_t ah_local; - num_t dt_alpha; - num_t dt_beta; - bool_t pack_c; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_her2k_check( alpha, a, b, beta, c ); // If alpha is zero, scale by beta and return. - if ( bli_obj_scalar_equals( alpha, &BLIS_ZERO ) ) + if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); return; @@ -85,6 +80,12 @@ void bli_her2k( obj_t* alpha, bli_obj_induce_trans( ah_local ); bli_obj_toggle_conj( ah_local ); + // Initialize a conjugated copy of alpha. + bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *a ), + BLIS_CONJUGATE, + alpha, + &alpha_conj ); + // An optimization: If C is row-stored, transpose the entire operation // so as to allow the macro-kernel more favorable access patterns // through C. (The effect of the transposition of A and A' is negligible @@ -102,66 +103,36 @@ void bli_her2k( obj_t* alpha, bli_obj_induce_trans( c_local ); } - // Set the target and execution datatypes of the objects, and apply - // any transformations necessary to handle mixed domain computation. - bli_her2k_set_targ_exec_datatypes( &a_local, - &bh_local, - &b_local, - &ah_local, - &c_local, - &dt_alpha, - &dt_beta, - &pack_c ); - - // Create an object to hold a copy-cast of alpha. - bli_obj_init_scalar_copy_of( dt_alpha, - BLIS_NO_CONJUGATE, - alpha, - &alpha_local ); - - // Create an object to hold a copy-cast of conj(alpha). - bli_obj_init_scalar_copy_of( dt_alpha, - BLIS_CONJUGATE, - alpha, - &alpha_conj_local ); - - // Create an object to hold a copy-cast of beta. - bli_obj_init_scalar_copy_of( dt_beta, - BLIS_NO_CONJUGATE, - beta, - &beta_local ); - - if ( pack_c ) bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); - -/* +#if 0 // Choose the control tree. cntl = her2k_cntl; // Invoke the internal back-end. - bli_her2k_int( &alpha_local, + bli_her2k_int( alpha, &a_local, &bh_local, - &alpha_conj_local, + &alpha_conj, &b_local, &ah_local, - &beta_local, + beta, &c_local, cntl ); -*/ - - bli_herk_int( &alpha_local, +#else + // Invoke herk twice, using beta only the first time. + bli_herk_int( alpha, &a_local, &bh_local, - &beta_local, + beta, &c_local, herk_cntl ); - bli_herk_int( &alpha_conj_local, + + bli_herk_int( &alpha_conj, &b_local, &ah_local, &BLIS_ONE, &c_local, herk_cntl ); - +#endif } // @@ -194,8 +165,8 @@ void PASTEMAC(ch,opname)( \ bli_set_dims_with_trans( transa, m, k, m_a, n_a ); \ bli_set_dims_with_trans( transb, m, k, m_b, n_b ); \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ - bli_obj_create_scalar_with_attached_buffer( dt_r, beta, &betao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt_r, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ diff --git a/frame/3/her2k/bli_her2k_blk_var1f.c b/frame/3/her2k/bli_her2k_blk_var1f.c index 6463193c2..18ab8d1f4 100644 --- a/frame/3/her2k/bli_her2k_blk_var1f.c +++ b/frame/3/her2k/bli_her2k_blk_var1f.c @@ -34,13 +34,10 @@ #include "blis.h" -void bli_her2k_blk_var1f( obj_t* alpha, - obj_t* a, +void bli_her2k_blk_var1f( obj_t* a, obj_t* bh, - obj_t* alpha_conj, obj_t* b, obj_t* ah, - obj_t* beta, obj_t* c, her2k_t* cntl ) { @@ -65,7 +62,7 @@ void bli_her2k_blk_var1f( obj_t* alpha, m_trans = bli_obj_length_after_trans( *c ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -77,9 +74,8 @@ void bli_her2k_blk_var1f( obj_t* alpha, bli_packm_init( bh, &bh_pack, cntl_sub_packm_b( cntl ) ); - // Pack B' and scale by alpha (if instructed). - bli_packm_int( alpha, - bh, &bh_pack, + // Pack B' (if instructed). + bli_packm_int( bh, &bh_pack, cntl_sub_packm_b( cntl ) ); // Partition along the m dimension. @@ -101,21 +97,19 @@ void bli_her2k_blk_var1f( obj_t* alpha, bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, &a1_pack, + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1, &c1_pack, + // Pack C1 (if instructed). + bli_packm_int( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); // Perform herk subproblem. - bli_herk_int( alpha, + bli_herk_int( &BLIS_ONE, &a1_pack, &bh_pack, - beta, + &BLIS_ONE, &c1_pack, cntl_sub_herk( cntl ) ); @@ -129,6 +123,11 @@ void bli_her2k_blk_var1f( obj_t* alpha, bli_obj_release_pack( &a1_pack ); bli_obj_release_pack( &bh_pack ); + // This variant executes two rank-k updates. Therefore, if the + // internal beta scalar on matrix C is non-zero, we must use it only + // for the first rank-k update (and then BLIS_ONE for the other). + bli_obj_scalar_reset( c ); + // // Perform second rank-k update: C = C + conj(alpha) * B * A'. // @@ -137,9 +136,8 @@ void bli_her2k_blk_var1f( obj_t* alpha, bli_packm_init( ah, &ah_pack, cntl_sub_packm_b( cntl ) ); - // Pack A' and scale by alpha_conj (if instructed). - bli_packm_int( alpha_conj, - ah, &ah_pack, + // Pack A' (if instructed). + bli_packm_int( ah, &ah_pack, cntl_sub_packm_b( cntl ) ); // Partition along the m dimension. @@ -161,18 +159,16 @@ void bli_her2k_blk_var1f( obj_t* alpha, bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); - // Pack B1 and scale by alpha_conj (if instructed). - bli_packm_int( alpha_conj, - &b1, &b1_pack, + // Pack B1 (if instructed). + bli_packm_int( &b1, &b1_pack, cntl_sub_packm_a( cntl ) ); - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1, &c1_pack, + // Pack C1 (if instructed). + bli_packm_int( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); // Perform herk subproblem. - bli_herk_int( alpha_conj, + bli_herk_int( &BLIS_ONE, &b1_pack, &ah_pack, &BLIS_ONE, diff --git a/frame/3/her2k/bli_her2k_blk_var1f.h b/frame/3/her2k/bli_her2k_blk_var1f.h index b96caf99b..f766ac7dd 100644 --- a/frame/3/her2k/bli_her2k_blk_var1f.h +++ b/frame/3/her2k/bli_her2k_blk_var1f.h @@ -32,13 +32,10 @@ */ -void bli_her2k_blk_var1f( obj_t* alpha, - obj_t* a, +void bli_her2k_blk_var1f( obj_t* a, obj_t* bh, - obj_t* alpha_conj, obj_t* b, obj_t* ah, - obj_t* beta, obj_t* c, her2k_t* cntl ); diff --git a/frame/3/her2k/bli_her2k_blk_var2f.c b/frame/3/her2k/bli_her2k_blk_var2f.c index 2c53aad77..64285dc52 100644 --- a/frame/3/her2k/bli_her2k_blk_var2f.c +++ b/frame/3/her2k/bli_her2k_blk_var2f.c @@ -34,13 +34,10 @@ #include "blis.h" -void bli_her2k_blk_var2f( obj_t* alpha, - obj_t* a, +void bli_her2k_blk_var2f( obj_t* a, obj_t* bh, - obj_t* alpha_conj, obj_t* b, obj_t* ah, - obj_t* beta, obj_t* c, her2k_t* cntl ) { @@ -72,7 +69,7 @@ void bli_her2k_blk_var2f( obj_t* alpha, n_trans = bli_obj_width_after_trans( *c ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -82,14 +79,12 @@ void bli_her2k_blk_var2f( obj_t* alpha, bli_packm_init( b, &b_pack, cntl_sub_packm_a( cntl ) ); - // Pack A and scale by alpha (if instructed). - bli_packm_int( alpha, - a, &a_pack, + // Pack A (if instructed). + bli_packm_int( a, &a_pack, cntl_sub_packm_a( cntl ) ); - // Pack B and scale by alpha_conj (if instructed). - bli_packm_int( alpha_conj, - b, &b_pack, + // Pack B (if instructed). + bli_packm_int( b, &b_pack, cntl_sub_packm_a( cntl ) ); // Partition along the n dimension. @@ -124,29 +119,26 @@ void bli_her2k_blk_var2f( obj_t* alpha, bli_packm_init( &c1S, &c1S_pack, cntl_sub_packm_c( cntl ) ); - // Pack B1' and scale by alpha (if instructed). - bli_packm_int( alpha, - &bh1, &bh1_pack, + // Pack B1' (if instructed). + bli_packm_int( &bh1, &bh1_pack, cntl_sub_packm_b( cntl ) ); - // Pack A1' and scale by alpha_conj (if instructed). - bli_packm_int( alpha_conj, - &ah1, &ah1_pack, + // Pack A1' (if instructed). + bli_packm_int( &ah1, &ah1_pack, cntl_sub_packm_b( cntl ) ); - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1S, &c1S_pack, + // Pack C1 (if instructed). + bli_packm_int( &c1S, &c1S_pack, cntl_sub_packm_c( cntl ) ); // Perform her2k subproblem. - bli_her2k_int( alpha, + bli_her2k_int( &BLIS_ONE, &aS_pack, &bh1_pack, - alpha_conj, + &BLIS_ONE, &bS_pack, &ah1_pack, - beta, + &BLIS_ONE, &c1S_pack, cntl_sub_her2k( cntl ) ); diff --git a/frame/3/her2k/bli_her2k_blk_var2f.h b/frame/3/her2k/bli_her2k_blk_var2f.h index e1cbcdb6b..eeb939d0a 100644 --- a/frame/3/her2k/bli_her2k_blk_var2f.h +++ b/frame/3/her2k/bli_her2k_blk_var2f.h @@ -32,13 +32,10 @@ */ -void bli_her2k_blk_var2f( obj_t* alpha, - obj_t* a, +void bli_her2k_blk_var2f( obj_t* a, obj_t* bh, - obj_t* alpha_conj, obj_t* b, obj_t* ah, - obj_t* beta, obj_t* c, her2k_t* cntl ); diff --git a/frame/3/her2k/bli_her2k_blk_var3f.c b/frame/3/her2k/bli_her2k_blk_var3f.c index 58a9699c7..f143ae2a7 100644 --- a/frame/3/her2k/bli_her2k_blk_var3f.c +++ b/frame/3/her2k/bli_her2k_blk_var3f.c @@ -34,13 +34,10 @@ #include "blis.h" -void bli_her2k_blk_var3f( obj_t* alpha, - obj_t* a, +void bli_her2k_blk_var3f( obj_t* a, obj_t* bh, - obj_t* alpha_conj, obj_t* b, obj_t* ah, - obj_t* beta, obj_t* c, her2k_t* cntl ) { @@ -49,7 +46,6 @@ void bli_her2k_blk_var3f( obj_t* alpha, obj_t b1, b1_pack; obj_t ah1, ah1_pack; obj_t c_pack; - obj_t* beta_use; dim_t i; dim_t b_alg; @@ -66,7 +62,7 @@ void bli_her2k_blk_var3f( obj_t* alpha, k_trans = bli_obj_width_after_trans( *a ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -74,9 +70,8 @@ void bli_her2k_blk_var3f( obj_t* alpha, bli_packm_init( c, &c_pack, cntl_sub_packm_c( cntl ) ); - // Pack C and scale by beta (if instructed). - bli_packm_int( beta, - c, &c_pack, + // Pack C (if instructed). + bli_packm_int( c, &c_pack, cntl_sub_packm_c( cntl ) ); // Partition along the k dimension. @@ -98,14 +93,12 @@ void bli_her2k_blk_var3f( obj_t* alpha, bli_packm_init( &bh1, &bh1_pack, cntl_sub_packm_b( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, &a1_pack, + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack B1' and scale by alpha (if instructed). - bli_packm_int( alpha, - &bh1, &bh1_pack, + // Pack B1' (if instructed). + bli_packm_int( &bh1, &bh1_pack, cntl_sub_packm_b( cntl ) ); // Acquire partitions for B1 and A1'. @@ -120,31 +113,32 @@ void bli_her2k_blk_var3f( obj_t* alpha, bli_packm_init( &ah1, &ah1_pack, cntl_sub_packm_b( cntl ) ); - // Pack B1 and scale by alpha_conj (if instructed). - bli_packm_int( alpha_conj, - &b1, &b1_pack, + // Pack B1 (if instructed). + bli_packm_int( &b1, &b1_pack, cntl_sub_packm_a( cntl ) ); - // Pack A1' and scale by alpha_conj (if instructed). - bli_packm_int( alpha_conj, - &ah1, &ah1_pack, + // Pack A1' (if instructed). + bli_packm_int( &ah1, &ah1_pack, cntl_sub_packm_b( cntl ) ); - // Since this variant executes multiple rank-2k updates, we must use - // beta only for the first iteration and BLIS_ONE for all others. - if ( i == 0 ) beta_use = beta; - else beta_use = &BLIS_ONE; - // Perform herk subproblem. - bli_her2k_int( alpha, + bli_her2k_int( &BLIS_ONE, &a1_pack, &bh1_pack, - alpha_conj, + &BLIS_ONE, &b1_pack, &ah1_pack, - beta_use, + &BLIS_ONE, &c_pack, cntl_sub_her2k( cntl ) ); + + // This variant executes multiple rank-2k updates. Therefore, if the + // internal beta scalar on matrix C is non-zero, we must use it + // only for the first iteration (and then BLIS_ONE for all others). + // And since c_pack is a local obj_t, we can simply overwrite the + // internal beta scalar with BLIS_ONE once it has been used in the + // first iteration. + if ( i == 0 ) bli_obj_scalar_reset( &c_pack ); } // Unpack C (if C was packed). diff --git a/frame/3/her2k/bli_her2k_blk_var3f.h b/frame/3/her2k/bli_her2k_blk_var3f.h index 0dbac4fc5..5fbdf68bf 100644 --- a/frame/3/her2k/bli_her2k_blk_var3f.h +++ b/frame/3/her2k/bli_her2k_blk_var3f.h @@ -32,13 +32,10 @@ */ -void bli_her2k_blk_var3f( obj_t* alpha, - obj_t* a, +void bli_her2k_blk_var3f( obj_t* a, obj_t* bh, - obj_t* alpha_conj, obj_t* b, obj_t* ah, - obj_t* beta, obj_t* c, her2k_t* cntl ); diff --git a/frame/3/her2k/bli_her2k_int.c b/frame/3/her2k/bli_her2k_int.c index db6dcd9b2..794040efa 100644 --- a/frame/3/her2k/bli_her2k_int.c +++ b/frame/3/her2k/bli_her2k_int.c @@ -36,13 +36,10 @@ #define FUNCPTR_T her2k_fp -typedef void (*FUNCPTR_T)( obj_t* alpha, - obj_t* a, +typedef void (*FUNCPTR_T)( obj_t* a, obj_t* bh, - obj_t* alpha_conj, obj_t* b, obj_t* ah, - obj_t* beta, obj_t* c, her2k_t* cntl ); @@ -66,16 +63,20 @@ static FUNCPTR_T vars[2][4][3] = } }; -void bli_her2k_int( obj_t* alpha, +void bli_her2k_int( obj_t* alpha_abh, obj_t* a, obj_t* bh, - obj_t* alpha_conj, + obj_t* alpha_bah, obj_t* b, obj_t* ah, obj_t* beta, obj_t* c, her2k_t* cntl ) { + obj_t a_local; + obj_t bh_local; + obj_t b_local; + obj_t ah_local; obj_t c_local; varnum_t n; impl_t i; @@ -84,7 +85,7 @@ void bli_her2k_int( obj_t* alpha, // Check parameters. if ( bli_error_checking_is_enabled() ) - bli_her2k_int_check( alpha, a, bh, alpha_conj, b, ah, beta, c, cntl ); + bli_her2k_int_check( alpha_abh, a, bh, alpha_bah, b, ah, beta, c, cntl ); // If C has a zero dimension, return early. if ( bli_obj_has_zero_dim( *c ) ) return; @@ -99,6 +100,12 @@ void bli_her2k_int( obj_t* alpha, return; } + // Alias A, B', B, and A' in case we need to update attached scalars. + bli_obj_alias_to( *a, a_local ); + bli_obj_alias_to( *bh, bh_local ); + bli_obj_alias_to( *b, b_local ); + bli_obj_alias_to( *ah, ah_local ); + // Alias C in case we need to induce a transposition. bli_obj_alias_to( *c, c_local ); @@ -113,6 +120,27 @@ void bli_her2k_int( obj_t* alpha, bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, c_local ); } + // If alpha_abh is non-unit, typecast and apply it to the scalar + // attached to B'. + if ( !bli_obj_equals( alpha_abh, &BLIS_ONE ) ) + { + bli_obj_scalar_apply_scalar( alpha_abh, &bh_local ); + } + + // If alpha_bah is non-unit, typecast and apply it to the scalar + // attached to A'. + if ( !bli_obj_equals( alpha_bah, &BLIS_ONE ) ) + { + bli_obj_scalar_apply_scalar( alpha_bah, &ah_local ); + } + + // If beta is non-unit, typecast and apply it to the scalar + // attached to C. + if ( !bli_obj_equals( beta, &BLIS_ONE ) ) + { + bli_obj_scalar_apply_scalar( beta, &c_local ); + } + // Set a bool based on the uplo field of c. if ( bli_obj_root_is_lower( c_local ) ) uplo = 0; else uplo = 1; @@ -125,13 +153,10 @@ void bli_her2k_int( obj_t* alpha, f = vars[uplo][n][i]; // Invoke the variant. - f( alpha, - a, - bh, - alpha_conj, - b, - ah, - beta, + f( &a_local, + &bh_local, + &b_local, + &ah_local, &c_local, cntl ); } diff --git a/frame/3/her2k/bli_her2k_l_ker_var2.c b/frame/3/her2k/bli_her2k_l_ker_var2.c index fd11555c9..35a7b3eb1 100644 --- a/frame/3/her2k/bli_her2k_l_ker_var2.c +++ b/frame/3/her2k/bli_her2k_l_ker_var2.c @@ -34,16 +34,15 @@ #include "blis.h" -void bli_her2k_l_ker_var2( obj_t* alpha, - obj_t* a, +void bli_her2k_l_ker_var2( obj_t* a, obj_t* bh, - obj_t* alpha_conj, obj_t* b, obj_t* ah, - obj_t* beta, obj_t* c, her2k_t* cntl ) { + obj_t c_local; + // Implement her2k kernel in terms of two calls to the corresponding // herk kernel. @@ -51,18 +50,18 @@ void bli_her2k_l_ker_var2( obj_t* alpha, // only want to apply beta once. (And beta might be unit anyway if this // is not the first iteration of variant 3.) - bli_herk_l_ker_var2( alpha, - a, + bli_obj_alias_to( *c, c_local ); + + bli_herk_l_ker_var2( a, bh, - beta, - c, + &c_local, NULL ); - bli_herk_l_ker_var2( alpha_conj, - b, + bli_obj_scalar_reset( &c_local ); + + bli_herk_l_ker_var2( b, ah, - &BLIS_ONE, - c, + &c_local, NULL ); } diff --git a/frame/3/her2k/bli_her2k_l_ker_var2.h b/frame/3/her2k/bli_her2k_l_ker_var2.h index 493cc98f7..e28711abb 100644 --- a/frame/3/her2k/bli_her2k_l_ker_var2.h +++ b/frame/3/her2k/bli_her2k_l_ker_var2.h @@ -35,13 +35,10 @@ // // Prototype object-based interface. // -void bli_her2k_l_ker_var2( obj_t* alpha, - obj_t* a, +void bli_her2k_l_ker_var2( obj_t* a, obj_t* bh, - obj_t* alpha_conj, obj_t* b, obj_t* ah, - obj_t* beta, obj_t* c, her2k_t* cntl ); diff --git a/frame/3/her2k/bli_her2k_target.c b/frame/3/her2k/bli_her2k_target.c index affde1309..b92d1c9e8 100644 --- a/frame/3/her2k/bli_her2k_target.c +++ b/frame/3/her2k/bli_her2k_target.c @@ -39,8 +39,6 @@ void bli_her2k_set_targ_exec_datatypes( obj_t* a, obj_t* b, obj_t* ah, obj_t* c, - num_t* dt_alpha, - num_t* dt_beta, bool_t* pack_c ) { num_t dt_targ_a; @@ -81,18 +79,6 @@ void bli_her2k_set_targ_exec_datatypes( obj_t* a, bli_obj_set_execution_datatype( dt_exec, *ah ); bli_obj_set_execution_datatype( dt_exec, *c ); - // Notice that we use the target datatype of matrix a. By inspecting - // the table above, this clearly works for cases (0) through (4), (6), - // and (7). It also works for case (5) since it is transformed into - // case (6) by the above code. - *dt_alpha = bli_obj_target_datatype( *a ); - - // Notice that we use the target datatype of matrix a. By inspecting - // the table above, this clearly works for cases (0) through (4), (6), - // and (7). It also works for case (5) since it is transformed into - // case (6) by the above code. - *dt_beta = bli_obj_datatype( *c ); - // For now disable packing of C. *pack_c = FALSE; } diff --git a/frame/3/her2k/bli_her2k_target.h b/frame/3/her2k/bli_her2k_target.h index 4d06be8c7..2568b2f11 100644 --- a/frame/3/her2k/bli_her2k_target.h +++ b/frame/3/her2k/bli_her2k_target.h @@ -37,7 +37,5 @@ void bli_her2k_set_targ_exec_datatypes( obj_t* a, obj_t* b, obj_t* ah, obj_t* c, - num_t* dt_alpha, - num_t* dt_beta, bool_t* pack_c ); diff --git a/frame/3/her2k/bli_her2k_u_ker_var2.c b/frame/3/her2k/bli_her2k_u_ker_var2.c index 3228e0072..b9c98ad80 100644 --- a/frame/3/her2k/bli_her2k_u_ker_var2.c +++ b/frame/3/her2k/bli_her2k_u_ker_var2.c @@ -34,16 +34,15 @@ #include "blis.h" -void bli_her2k_u_ker_var2( obj_t* alpha, - obj_t* a, +void bli_her2k_u_ker_var2( obj_t* a, obj_t* bh, - obj_t* alpha_conj, obj_t* b, obj_t* ah, - obj_t* beta, obj_t* c, her2k_t* cntl ) { + obj_t c_local; + // Implement her2k kernel in terms of two calls to the corresponding // herk kernel. @@ -51,18 +50,18 @@ void bli_her2k_u_ker_var2( obj_t* alpha, // only want to apply beta once. (And beta might be unit anyway if this // is not the first iteration of variant 3.) - bli_herk_u_ker_var2( alpha, - a, + bli_obj_alias_to( *c, c_local ); + + bli_herk_u_ker_var2( a, bh, - beta, - c, + &c_local, NULL ); - bli_herk_u_ker_var2( alpha_conj, - b, + bli_obj_scalar_reset( &c_local ); + + bli_herk_u_ker_var2( b, ah, - &BLIS_ONE, - c, + &c_local, NULL ); } diff --git a/frame/3/her2k/bli_her2k_u_ker_var2.h b/frame/3/her2k/bli_her2k_u_ker_var2.h index b8b66252f..d6934a67f 100644 --- a/frame/3/her2k/bli_her2k_u_ker_var2.h +++ b/frame/3/her2k/bli_her2k_u_ker_var2.h @@ -35,13 +35,10 @@ // // Prototype object-based interface. // -void bli_her2k_u_ker_var2( obj_t* alpha, - obj_t* a, +void bli_her2k_u_ker_var2( obj_t* a, obj_t* bh, - obj_t* alpha_conj, obj_t* b, obj_t* ah, - obj_t* beta, obj_t* c, her2k_t* cntl ); diff --git a/frame/3/herk/bli_herk.c b/frame/3/herk/bli_herk.c index 4969e796c..a8b465ce5 100644 --- a/frame/3/herk/bli_herk.c +++ b/frame/3/herk/bli_herk.c @@ -45,21 +45,16 @@ void bli_herk( obj_t* alpha, obj_t* c ) { herk_t* cntl; - obj_t alpha_local; - obj_t beta_local; obj_t a_local; obj_t ah_local; obj_t c_local; - num_t dt_alpha; - num_t dt_beta; - bool_t pack_c; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_herk_check( alpha, a, beta, c ); // If alpha is zero, scale by beta and return. - if ( bli_obj_scalar_equals( alpha, &BLIS_ZERO ) ) + if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); return; @@ -87,37 +82,14 @@ void bli_herk( obj_t* alpha, bli_obj_induce_trans( c_local ); } - // Set the target and execution datatypes of the objects, and apply - // any transformations necessary to handle mixed domain computation. - bli_herk_set_targ_exec_datatypes( &a_local, - &ah_local, - &c_local, - &dt_alpha, - &dt_beta, - &pack_c ); - - // Create an object to hold a copy-cast of alpha. - bli_obj_init_scalar_copy_of( dt_alpha, - BLIS_NO_CONJUGATE, - alpha, - &alpha_local ); - - // Create an object to hold a copy-cast of beta. - bli_obj_init_scalar_copy_of( dt_beta, - BLIS_NO_CONJUGATE, - beta, - &beta_local ); - - if ( pack_c ) bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); - // Choose the control tree. cntl = herk_cntl; // Invoke the internal back-end. - bli_herk_int( &alpha_local, + bli_herk_int( alpha, &a_local, &ah_local, - &beta_local, + beta, &c_local, cntl ); } @@ -148,8 +120,8 @@ void PASTEMAC(ch,opname)( \ \ bli_set_dims_with_trans( transa, m, k, m_a, n_a ); \ \ - bli_obj_create_scalar_with_attached_buffer( dt_r, alpha, &alphao ); \ - bli_obj_create_scalar_with_attached_buffer( dt_r, beta, &betao ); \ + bli_obj_create_1x1_with_attached_buffer( dt_r, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt_r, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \ diff --git a/frame/3/herk/bli_herk_blk_var1f.c b/frame/3/herk/bli_herk_blk_var1f.c index dc137e1ce..ffa1fc8f1 100644 --- a/frame/3/herk/bli_herk_blk_var1f.c +++ b/frame/3/herk/bli_herk_blk_var1f.c @@ -34,10 +34,8 @@ #include "blis.h" -void bli_herk_blk_var1f( obj_t* alpha, - obj_t* a, +void bli_herk_blk_var1f( obj_t* a, obj_t* ah, - obj_t* beta, obj_t* c, herk_t* cntl ) { @@ -58,7 +56,7 @@ void bli_herk_blk_var1f( obj_t* alpha, m_trans = bli_obj_length_after_trans( *c ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -66,9 +64,8 @@ void bli_herk_blk_var1f( obj_t* alpha, bli_packm_init( ah, &ah_pack, cntl_sub_packm_b( cntl ) ); - // Pack A' and scale by alpha (if instructed). - bli_packm_int( alpha, - ah, &ah_pack, + // Pack A' (if instructed). + bli_packm_int( ah, &ah_pack, cntl_sub_packm_b( cntl ) ); // Partition along the m dimension. @@ -90,21 +87,19 @@ void bli_herk_blk_var1f( obj_t* alpha, bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, &a1_pack, + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1, &c1_pack, + // Pack C1 (if instructed). + bli_packm_int( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); // Perform herk subproblem. - bli_herk_int( alpha, + bli_herk_int( &BLIS_ONE, &a1_pack, &ah_pack, - beta, + &BLIS_ONE, &c1_pack, cntl_sub_herk( cntl ) ); diff --git a/frame/3/herk/bli_herk_blk_var1f.h b/frame/3/herk/bli_herk_blk_var1f.h index 94900b772..a33247a52 100644 --- a/frame/3/herk/bli_herk_blk_var1f.h +++ b/frame/3/herk/bli_herk_blk_var1f.h @@ -32,10 +32,8 @@ */ -void bli_herk_blk_var1f( obj_t* alpha, - obj_t* a, +void bli_herk_blk_var1f( obj_t* a, obj_t* ah, - obj_t* beta, obj_t* c, herk_t* cntl ); diff --git a/frame/3/herk/bli_herk_blk_var2f.c b/frame/3/herk/bli_herk_blk_var2f.c index df39ce55d..55edea9af 100644 --- a/frame/3/herk/bli_herk_blk_var2f.c +++ b/frame/3/herk/bli_herk_blk_var2f.c @@ -34,10 +34,8 @@ #include "blis.h" -void bli_herk_blk_var2f( obj_t* alpha, - obj_t* a, +void bli_herk_blk_var2f( obj_t* a, obj_t* ah, - obj_t* beta, obj_t* c, herk_t* cntl ) { @@ -65,7 +63,7 @@ void bli_herk_blk_var2f( obj_t* alpha, n_trans = bli_obj_width_after_trans( *c ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -73,9 +71,8 @@ void bli_herk_blk_var2f( obj_t* alpha, bli_packm_init( a, &a_pack, cntl_sub_packm_a( cntl ) ); - // Pack A and scale by alpha (if instructed). - bli_packm_int( alpha, - a, &a_pack, + // Pack A (if instructed). + bli_packm_int( a, &a_pack, cntl_sub_packm_a( cntl ) ); // Partition along the n dimension. @@ -104,21 +101,19 @@ void bli_herk_blk_var2f( obj_t* alpha, bli_packm_init( &c1S, &c1S_pack, cntl_sub_packm_c( cntl ) ); - // Pack A1' and scale by alpha (if instructed). - bli_packm_int( alpha, - &ah1, &ah1_pack, + // Pack A1' (if instructed). + bli_packm_int( &ah1, &ah1_pack, cntl_sub_packm_b( cntl ) ); - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1S, &c1S_pack, + // Pack C1 (if instructed). + bli_packm_int( &c1S, &c1S_pack, cntl_sub_packm_c( cntl ) ); // Perform herk subproblem. - bli_herk_int( alpha, + bli_herk_int( &BLIS_ONE, &aS_pack, &ah1_pack, - beta, + &BLIS_ONE, &c1S_pack, cntl_sub_herk( cntl ) ); diff --git a/frame/3/herk/bli_herk_blk_var2f.h b/frame/3/herk/bli_herk_blk_var2f.h index 1a5034242..97eeed211 100644 --- a/frame/3/herk/bli_herk_blk_var2f.h +++ b/frame/3/herk/bli_herk_blk_var2f.h @@ -32,10 +32,8 @@ */ -void bli_herk_blk_var2f( obj_t* alpha, - obj_t* a, +void bli_herk_blk_var2f( obj_t* a, obj_t* ah, - obj_t* beta, obj_t* c, herk_t* cntl ); diff --git a/frame/3/herk/bli_herk_blk_var3f.c b/frame/3/herk/bli_herk_blk_var3f.c index a54e341b9..2c0cafa92 100644 --- a/frame/3/herk/bli_herk_blk_var3f.c +++ b/frame/3/herk/bli_herk_blk_var3f.c @@ -34,17 +34,14 @@ #include "blis.h" -void bli_herk_blk_var3f( obj_t* alpha, - obj_t* a, +void bli_herk_blk_var3f( obj_t* a, obj_t* ah, - obj_t* beta, obj_t* c, herk_t* cntl ) { obj_t a1, a1_pack; obj_t ah1, ah1_pack; obj_t c_pack; - obj_t* beta_use; dim_t i; dim_t b_alg; @@ -59,7 +56,7 @@ void bli_herk_blk_var3f( obj_t* alpha, k_trans = bli_obj_width_after_trans( *a ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -67,9 +64,8 @@ void bli_herk_blk_var3f( obj_t* alpha, bli_packm_init( c, &c_pack, cntl_sub_packm_c( cntl ) ); - // Pack C and scale by beta (if instructed). - bli_packm_int( beta, - c, &c_pack, + // Pack C (if instructed). + bli_packm_int( c, &c_pack, cntl_sub_packm_c( cntl ) ); // Partition along the k dimension. @@ -91,28 +87,29 @@ void bli_herk_blk_var3f( obj_t* alpha, bli_packm_init( &ah1, &ah1_pack, cntl_sub_packm_b( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, &a1_pack, + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack B1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &ah1, &ah1_pack, + // Pack B1 (if instructed). + bli_packm_int( &ah1, &ah1_pack, cntl_sub_packm_b( cntl ) ); - // Since this variant executes multiple rank-k updates, we must use - // beta only for the first iteration and BLIS_ONE for all others. - if ( i == 0 ) beta_use = beta; - else beta_use = &BLIS_ONE; - // Perform herk subproblem. - bli_herk_int( alpha, + bli_herk_int( &BLIS_ONE, &a1_pack, &ah1_pack, - beta_use, + &BLIS_ONE, &c_pack, cntl_sub_herk( cntl ) ); + + // This variant executes multiple rank-k updates. Therefore, if the + // internal beta scalar on matrix C is non-zero, we must use it + // only for the first iteration (and then BLIS_ONE for all others). + // And since c_pack is a local obj_t, we can simply overwrite the + // internal beta scalar with BLIS_ONE once it has been used in the + // first iteration. + if ( i == 0 ) bli_obj_scalar_reset( &c_pack ); } // Unpack C (if C was packed). diff --git a/frame/3/herk/bli_herk_blk_var3f.h b/frame/3/herk/bli_herk_blk_var3f.h index 5e96900f7..726bf2115 100644 --- a/frame/3/herk/bli_herk_blk_var3f.h +++ b/frame/3/herk/bli_herk_blk_var3f.h @@ -32,10 +32,8 @@ */ -void bli_herk_blk_var3f( obj_t* alpha, - obj_t* a, +void bli_herk_blk_var3f( obj_t* a, obj_t* ah, - obj_t* beta, obj_t* c, herk_t* cntl ); diff --git a/frame/3/herk/bli_herk_int.c b/frame/3/herk/bli_herk_int.c index afeef8d32..9a643aa20 100644 --- a/frame/3/herk/bli_herk_int.c +++ b/frame/3/herk/bli_herk_int.c @@ -36,10 +36,8 @@ #define FUNCPTR_T herk_fp -typedef void (*FUNCPTR_T)( obj_t* alpha, - obj_t* a, +typedef void (*FUNCPTR_T)( obj_t* a, obj_t* ah, - obj_t* beta, obj_t* c, herk_t* cntl ); @@ -70,6 +68,8 @@ void bli_herk_int( obj_t* alpha, obj_t* c, herk_t* cntl ) { + obj_t a_local; + obj_t ah_local; obj_t c_local; varnum_t n; impl_t i; @@ -91,6 +91,10 @@ void bli_herk_int( obj_t* alpha, return; } + // Alias A and A' in case we need to update attached scalars. + bli_obj_alias_to( *a, a_local ); + bli_obj_alias_to( *ah, ah_local ); + // Alias C in case we need to induce a transposition. bli_obj_alias_to( *c, c_local ); @@ -105,6 +109,20 @@ void bli_herk_int( obj_t* alpha, bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, c_local ); } + // If alpha is non-unit, typecast and apply it to the scalar + // attached to A'. + if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) + { + bli_obj_scalar_apply_scalar( alpha, &ah_local ); + } + + // If beta is non-unit, typecast and apply it to the scalar + // attached to C. + if ( !bli_obj_equals( beta, &BLIS_ONE ) ) + { + bli_obj_scalar_apply_scalar( beta, &c_local ); + } + // Set a bool based on the uplo field of C's root object. if ( bli_obj_root_is_lower( c_local ) ) uplo = 0; else uplo = 1; @@ -117,10 +135,8 @@ void bli_herk_int( obj_t* alpha, f = vars[uplo][n][i]; // Invoke the variant. - f( alpha, - a, - ah, - beta, + f( &a_local, + &ah_local, &c_local, cntl ); } diff --git a/frame/3/herk/bli_herk_l_ker_var2.c b/frame/3/herk/bli_herk_l_ker_var2.c index be40383a6..ff104f372 100644 --- a/frame/3/herk/bli_herk_l_ker_var2.c +++ b/frame/3/herk/bli_herk_l_ker_var2.c @@ -51,10 +51,8 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,herk_l_ker_var2); -void bli_herk_l_ker_var2( obj_t* alpha, - obj_t* a, +void bli_herk_l_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, herk_t* cntl ) { @@ -80,15 +78,15 @@ void bli_herk_l_ker_var2( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); - num_t dt_alpha; - void* buf_alpha; + obj_t scalar_a; + obj_t scalar_b; - num_t dt_beta; + void* buf_alpha; void* buf_beta; FUNCPTR_T f; - +/* // Handle the special case where c and a are complex and b is real. // Note that this is the ONLY case allowed by the inner kernel whereby // the datatypes of a and b differ. In this situation, the execution @@ -104,16 +102,17 @@ void bli_herk_l_ker_var2( obj_t* alpha, cs_a *= 2; ps_a *= 2; } +*/ - // If alpha is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the alpha object and extract the buffer at the alpha offset. - bli_set_scalar_dt_buffer( alpha, dt_exec, dt_alpha, buf_alpha ); + // Detach and multiply the scalars attached to A and B. + bli_obj_scalar_detach( a, &scalar_a ); + bli_obj_scalar_detach( b, &scalar_b ); + bli_mulsc( &scalar_a, &scalar_b ); - // If beta is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the beta object and extract the buffer at the beta offset. - bli_set_scalar_dt_buffer( beta, dt_exec, dt_beta, buf_beta ); + // Grab the addresses of the internal scalar buffers for the scalar + // merged above and the scalar attached to C. + buf_alpha = bli_obj_internal_scalar_buffer( scalar_b ); + buf_beta = bli_obj_internal_scalar_buffer( *c ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/3/herk/bli_herk_l_ker_var2.h b/frame/3/herk/bli_herk_l_ker_var2.h index cf09d804d..2a36e5b37 100644 --- a/frame/3/herk/bli_herk_l_ker_var2.h +++ b/frame/3/herk/bli_herk_l_ker_var2.h @@ -36,10 +36,8 @@ // // Prototype object-based interface. // -void bli_herk_l_ker_var2( obj_t* alpha, - obj_t* a, +void bli_herk_l_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, herk_t* cntl ); diff --git a/frame/3/herk/bli_herk_target.c b/frame/3/herk/bli_herk_target.c index a04ee34b2..21e63bc60 100644 --- a/frame/3/herk/bli_herk_target.c +++ b/frame/3/herk/bli_herk_target.c @@ -37,8 +37,6 @@ void bli_herk_set_targ_exec_datatypes( obj_t* a, obj_t* ah, obj_t* c, - num_t* dt_alpha, - num_t* dt_beta, bool_t* pack_c ) { num_t dt_targ_a; @@ -69,10 +67,6 @@ void bli_herk_set_targ_exec_datatypes( obj_t* a, bli_obj_set_execution_datatype( dt_exec, *ah ); bli_obj_set_execution_datatype( dt_exec, *c ); - *dt_alpha = bli_obj_target_datatype( *a ); - - *dt_beta = bli_obj_datatype( *c ); - // For now disable packing of C. *pack_c = FALSE; } diff --git a/frame/3/herk/bli_herk_target.h b/frame/3/herk/bli_herk_target.h index 0610c690a..689fc521c 100644 --- a/frame/3/herk/bli_herk_target.h +++ b/frame/3/herk/bli_herk_target.h @@ -35,8 +35,6 @@ void bli_herk_set_targ_exec_datatypes( obj_t* a, obj_t* ah, obj_t* c, - num_t* dt_alpha, - num_t* dt_beta, bool_t* pack_c ); /* diff --git a/frame/3/herk/bli_herk_u_ker_var2.c b/frame/3/herk/bli_herk_u_ker_var2.c index fc332bd11..b3c65145d 100644 --- a/frame/3/herk/bli_herk_u_ker_var2.c +++ b/frame/3/herk/bli_herk_u_ker_var2.c @@ -51,10 +51,8 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,herk_u_ker_var2); -void bli_herk_u_ker_var2( obj_t* alpha, - obj_t* a, +void bli_herk_u_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, herk_t* cntl ) { @@ -80,15 +78,15 @@ void bli_herk_u_ker_var2( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); - num_t dt_alpha; - void* buf_alpha; + obj_t scalar_a; + obj_t scalar_b; - num_t dt_beta; + void* buf_alpha; void* buf_beta; FUNCPTR_T f; - +/* // Handle the special case where c and a are complex and b is real. // Note that this is the ONLY case allowed by the inner kernel whereby // the datatypes of a and b differ. In this situation, the execution @@ -104,16 +102,17 @@ void bli_herk_u_ker_var2( obj_t* alpha, cs_a *= 2; ps_a *= 2; } +*/ - // If alpha is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the alpha object and extract the buffer at the alpha offset. - bli_set_scalar_dt_buffer( alpha, dt_exec, dt_alpha, buf_alpha ); + // Detach and multiply the scalars attached to A and B. + bli_obj_scalar_detach( a, &scalar_a ); + bli_obj_scalar_detach( b, &scalar_b ); + bli_mulsc( &scalar_a, &scalar_b ); - // If beta is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the beta object and extract the buffer at the beta offset. - bli_set_scalar_dt_buffer( beta, dt_exec, dt_beta, buf_beta ); + // Grab the addresses of the internal scalar buffers for the scalar + // merged above and the scalar attached to C. + buf_alpha = bli_obj_internal_scalar_buffer( scalar_b ); + buf_beta = bli_obj_internal_scalar_buffer( *c ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/3/herk/bli_herk_u_ker_var2.h b/frame/3/herk/bli_herk_u_ker_var2.h index 1d8da175b..c72764146 100644 --- a/frame/3/herk/bli_herk_u_ker_var2.h +++ b/frame/3/herk/bli_herk_u_ker_var2.h @@ -36,10 +36,8 @@ // // Prototype object-based interface. // -void bli_herk_u_ker_var2( obj_t* alpha, - obj_t* a, +void bli_herk_u_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, herk_t* cntl ); diff --git a/frame/3/symm/bli_symm.c b/frame/3/symm/bli_symm.c index e95f71b8f..fe56aeb59 100644 --- a/frame/3/symm/bli_symm.c +++ b/frame/3/symm/bli_symm.c @@ -47,21 +47,16 @@ void bli_symm( side_t side, obj_t* c ) { gemm_t* cntl; - obj_t alpha_local; - obj_t beta_local; obj_t a_local; obj_t b_local; obj_t c_local; - num_t dt_alpha; - num_t dt_beta; - bool_t pack_c; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_symm_check( side, alpha, a, b, beta, c ); // If alpha is zero, scale by beta and return. - if ( bli_obj_scalar_equals( alpha, &BLIS_ZERO ) ) + if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); return; @@ -89,38 +84,15 @@ void bli_symm( side_t side, bli_obj_swap( a_local, b_local ); } - // Set the target and execution datatypes of the objects, and apply - // any transformations necessary to handle mixed domain computation. - bli_gemm_set_targ_exec_datatypes( &a_local, - &b_local, - &c_local, - &dt_alpha, - &dt_beta, - &pack_c ); - - // Create an object to hold a copy-cast of alpha. - bli_obj_init_scalar_copy_of( dt_alpha, - BLIS_NO_CONJUGATE, - alpha, - &alpha_local ); - - // Create an object to hold a copy-cast of beta. - bli_obj_init_scalar_copy_of( dt_beta, - BLIS_NO_CONJUGATE, - beta, - &beta_local ); - - if ( pack_c ) bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); - // Choose the control tree. We can just use hemm since the algorithm // is nearly identical to that of symm. cntl = hemm_cntl; // Invoke the internal back-end. - bli_gemm_int( &alpha_local, + bli_gemm_int( alpha, &a_local, &b_local, - &beta_local, + beta, &c_local, cntl ); } @@ -155,8 +127,8 @@ void PASTEMAC(ch,opname)( \ bli_set_dim_with_side( side, m, n, mn_a ); \ bli_set_dims_with_trans( transb, m, n, m_b, n_b ); \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ - bli_obj_create_scalar_with_attached_buffer( dt, beta, &betao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, mn_a, mn_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ diff --git a/frame/3/syr2k/bli_syr2k.c b/frame/3/syr2k/bli_syr2k.c index 15fd5f761..e2be579cd 100644 --- a/frame/3/syr2k/bli_syr2k.c +++ b/frame/3/syr2k/bli_syr2k.c @@ -47,23 +47,18 @@ void bli_syr2k( obj_t* alpha, obj_t* c ) { her2k_t* cntl; - obj_t alpha_local; - obj_t beta_local; obj_t c_local; obj_t a_local; obj_t bt_local; obj_t b_local; obj_t at_local; - num_t dt_alpha; - num_t dt_beta; - bool_t pack_c; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_syr2k_check( alpha, a, b, beta, c ); // If alpha is zero, scale by beta and return. - if ( bli_obj_scalar_equals( alpha, &BLIS_ZERO ) ) + if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); return; @@ -91,59 +86,37 @@ void bli_syr2k( obj_t* alpha, bli_obj_induce_trans( c_local ); } - // Set the target and execution datatypes of the objects, and apply - // any transformations necessary to handle mixed domain computation. - bli_her2k_set_targ_exec_datatypes( &a_local, - &bt_local, - &b_local, - &at_local, - &c_local, - &dt_alpha, - &dt_beta, - &pack_c ); - - // Create an object to hold a copy-cast of alpha. - bli_obj_init_scalar_copy_of( dt_alpha, - BLIS_NO_CONJUGATE, - alpha, - &alpha_local ); - - // Create an object to hold a copy-cast of beta. - bli_obj_init_scalar_copy_of( dt_beta, - BLIS_NO_CONJUGATE, - beta, - &beta_local ); - - if ( pack_c ) bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); - // Choose the control tree. We can just use her2k since the algorithm // is nearly identical to that of syr2k. cntl = her2k_cntl; +#if 1 // Invoke the internal back-end. - bli_her2k_int( &alpha_local, + bli_her2k_int( alpha, &a_local, &bt_local, - &alpha_local, + alpha, &b_local, &at_local, - &beta_local, + beta, &c_local, cntl ); -/* - bli_herk_int( &alpha_local, - a, - &bt, - &beta_local, +#else + // Invoke herk twice, using beta only the first time. + bli_herk_int( alpha, + &a_local, + &bt_local, + beta, &c_local, herk_cntl ); - bli_herk_int( &alpha_local, - b, - &at, + + bli_herk_int( alpha, + &b_local, + &at_local, &BLIS_ONE, &c_local, herk_cntl ); -*/ +#endif } // @@ -175,8 +148,8 @@ void PASTEMAC(ch,opname)( \ bli_set_dims_with_trans( transa, m, k, m_a, n_a ); \ bli_set_dims_with_trans( transb, m, k, m_b, n_b ); \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ - bli_obj_create_scalar_with_attached_buffer( dt, beta, &betao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ diff --git a/frame/3/syrk/bli_syrk.c b/frame/3/syrk/bli_syrk.c index af3956ccb..b879378c1 100644 --- a/frame/3/syrk/bli_syrk.c +++ b/frame/3/syrk/bli_syrk.c @@ -45,21 +45,16 @@ void bli_syrk( obj_t* alpha, obj_t* c ) { herk_t* cntl; - obj_t alpha_local; - obj_t beta_local; obj_t a_local; obj_t at_local; obj_t c_local; - num_t dt_alpha; - num_t dt_beta; - bool_t pack_c; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_syrk_check( alpha, a, beta, c ); // If alpha is zero, scale by beta and return. - if ( bli_obj_scalar_equals( alpha, &BLIS_ZERO ) ) + if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); return; @@ -83,38 +78,15 @@ void bli_syrk( obj_t* alpha, bli_obj_induce_trans( c_local ); } - // Set the target and execution datatypes of the objects, and apply - // any transformations necessary to handle mixed domain computation. - bli_herk_set_targ_exec_datatypes( &a_local, - &at_local, - &c_local, - &dt_alpha, - &dt_beta, - &pack_c ); - - // Create an object to hold a copy-cast of alpha. - bli_obj_init_scalar_copy_of( dt_alpha, - BLIS_NO_CONJUGATE, - alpha, - &alpha_local ); - - // Create an object to hold a copy-cast of beta. - bli_obj_init_scalar_copy_of( dt_beta, - BLIS_NO_CONJUGATE, - beta, - &beta_local ); - - if ( pack_c ) bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); - // Choose the control tree. We can just use herk since the algorithm // is nearly identical to that of syrk. cntl = herk_cntl; // Invoke the internal back-end. - bli_herk_int( &alpha_local, + bli_herk_int( alpha, &a_local, &at_local, - &beta_local, + beta, &c_local, cntl ); } @@ -144,8 +116,8 @@ void PASTEMAC(ch,opname)( \ \ bli_set_dims_with_trans( transa, m, k, m_a, n_a ); \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ - bli_obj_create_scalar_with_attached_buffer( dt, beta, &betao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \ diff --git a/frame/3/trmm/bli_trmm.c b/frame/3/trmm/bli_trmm.c index 9c5331eee..23b18bf04 100644 --- a/frame/3/trmm/bli_trmm.c +++ b/frame/3/trmm/bli_trmm.c @@ -46,18 +46,16 @@ void bli_trmm( side_t side, obj_t* b ) { trmm_t* cntl; - obj_t alpha_local; obj_t a_local; obj_t b_local; obj_t c_local; - num_t dt_alpha; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_trmm_check( side, alpha, a, b ); // If alpha is zero, scale by beta and return. - if ( bli_obj_scalar_equals( alpha, &BLIS_ZERO ) ) + if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( alpha, b ); return; @@ -127,25 +125,12 @@ void bli_trmm( side_t side, bli_obj_set_as_root( b_local ); bli_obj_set_as_root( c_local ); - // Set the target and execution datatypes of the objects, and apply - // any transformations necessary to handle mixed domain computation. - bli_trmm_set_targ_exec_datatypes( &a_local, - &b_local, - &c_local, - &dt_alpha ); - - // Create an object to hold a copy-cast of alpha. - bli_obj_init_scalar_copy_of( dt_alpha, - BLIS_NO_CONJUGATE, - alpha, - &alpha_local ); - // Choose the control tree. if ( bli_is_left( side ) ) cntl = trmm_l_cntl; else cntl = trmm_r_cntl; // Invoke the internal back-end. - bli_trmm_int( &alpha_local, + bli_trmm_int( alpha, &a_local, &b_local, &BLIS_ZERO, @@ -179,7 +164,7 @@ void PASTEMAC(ch,opname)( \ \ bli_set_dim_with_side( side, m, n, mn_a ); \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ \ bli_obj_create_with_attached_buffer( dt, mn_a, mn_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m, n, b, rs_b, cs_b, &bo ); \ diff --git a/frame/3/trmm/bli_trmm_blk_var1.c b/frame/3/trmm/bli_trmm_blk_var1.c index aa09f105f..63a925b64 100644 --- a/frame/3/trmm/bli_trmm_blk_var1.c +++ b/frame/3/trmm/bli_trmm_blk_var1.c @@ -34,10 +34,8 @@ #include "blis.h" -void bli_trmm_blk_var1( obj_t* alpha, - obj_t* a, +void bli_trmm_blk_var1( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ) { @@ -69,7 +67,7 @@ void bli_trmm_blk_var1( obj_t* alpha, bli_obj_width_after_trans( *a ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -77,9 +75,8 @@ void bli_trmm_blk_var1( obj_t* alpha, bli_packm_init( b, &b_pack, cntl_sub_packm_b( cntl ) ); - // Pack B and scale by alpha (if instructed). - bli_packm_int( alpha, - b, &b_pack, + // Pack B (if instructed). + bli_packm_int( b, &b_pack, cntl_sub_packm_b( cntl ) ); // Partition along the m dimension. @@ -101,21 +98,19 @@ void bli_trmm_blk_var1( obj_t* alpha, bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, &a1_pack, + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1, &c1_pack, + // Pack C1 (if instructed). + bli_packm_int( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); // Perform trmm subproblem. - bli_trmm_int( alpha, + bli_trmm_int( &BLIS_ONE, &a1_pack, &b_pack, - beta, + &BLIS_ONE, &c1_pack, cntl_sub_trmm( cntl ) ); diff --git a/frame/3/trmm/bli_trmm_blk_var1.h b/frame/3/trmm/bli_trmm_blk_var1.h index 1a0fc6cae..c60acb355 100644 --- a/frame/3/trmm/bli_trmm_blk_var1.h +++ b/frame/3/trmm/bli_trmm_blk_var1.h @@ -32,10 +32,8 @@ */ -void bli_trmm_blk_var1( obj_t* alpha, - obj_t* a, +void bli_trmm_blk_var1( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ); diff --git a/frame/3/trmm/bli_trmm_blk_var2b.c b/frame/3/trmm/bli_trmm_blk_var2b.c index afe3bba90..c2dff76c9 100644 --- a/frame/3/trmm/bli_trmm_blk_var2b.c +++ b/frame/3/trmm/bli_trmm_blk_var2b.c @@ -34,10 +34,8 @@ #include "blis.h" -void bli_trmm_blk_var2b( obj_t* alpha, - obj_t* a, +void bli_trmm_blk_var2b( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ) { @@ -58,7 +56,7 @@ void bli_trmm_blk_var2b( obj_t* alpha, n_trans = bli_obj_width_after_trans( *b ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -66,9 +64,8 @@ void bli_trmm_blk_var2b( obj_t* alpha, bli_packm_init( a, &a_pack, cntl_sub_packm_a( cntl ) ); - // Pack A and scale by alpha (if instructed). - bli_packm_int( alpha, - a, &a_pack, + // Pack A (if instructed). + bli_packm_int( a, &a_pack, cntl_sub_packm_a( cntl ) ); // Partition along the n dimension. @@ -90,21 +87,19 @@ void bli_trmm_blk_var2b( obj_t* alpha, bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); - // Pack B1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &b1, &b1_pack, + // Pack B1 (if instructed). + bli_packm_int( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1, &c1_pack, + // Pack C1 (if instructed). + bli_packm_int( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); // Perform trmm subproblem. - bli_trmm_int( alpha, + bli_trmm_int( &BLIS_ONE, &a_pack, &b1_pack, - beta, + &BLIS_ONE, &c1_pack, cntl_sub_trmm( cntl ) ); diff --git a/frame/3/trmm/bli_trmm_blk_var2b.h b/frame/3/trmm/bli_trmm_blk_var2b.h index 1e04ed383..3679aa0c3 100644 --- a/frame/3/trmm/bli_trmm_blk_var2b.h +++ b/frame/3/trmm/bli_trmm_blk_var2b.h @@ -32,10 +32,8 @@ */ -void bli_trmm_blk_var2b( obj_t* alpha, - obj_t* a, +void bli_trmm_blk_var2b( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ); diff --git a/frame/3/trmm/bli_trmm_blk_var2f.c b/frame/3/trmm/bli_trmm_blk_var2f.c index 3c39b6405..74be4554e 100644 --- a/frame/3/trmm/bli_trmm_blk_var2f.c +++ b/frame/3/trmm/bli_trmm_blk_var2f.c @@ -34,10 +34,8 @@ #include "blis.h" -void bli_trmm_blk_var2f( obj_t* alpha, - obj_t* a, +void bli_trmm_blk_var2f( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ) { @@ -58,7 +56,7 @@ void bli_trmm_blk_var2f( obj_t* alpha, n_trans = bli_obj_width_after_trans( *b ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -66,9 +64,8 @@ void bli_trmm_blk_var2f( obj_t* alpha, bli_packm_init( a, &a_pack, cntl_sub_packm_a( cntl ) ); - // Pack A and scale by alpha (if instructed). - bli_packm_int( alpha, - a, &a_pack, + // Pack A (if instructed). + bli_packm_int( a, &a_pack, cntl_sub_packm_a( cntl ) ); // Partition along the n dimension. @@ -90,21 +87,19 @@ void bli_trmm_blk_var2f( obj_t* alpha, bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); - // Pack B1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &b1, &b1_pack, + // Pack B1 (if instructed). + bli_packm_int( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1, &c1_pack, + // Pack C1 (if instructed). + bli_packm_int( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); // Perform trmm subproblem. - bli_trmm_int( alpha, + bli_trmm_int( &BLIS_ONE, &a_pack, &b1_pack, - beta, + &BLIS_ONE, &c1_pack, cntl_sub_trmm( cntl ) ); diff --git a/frame/3/trmm/bli_trmm_blk_var2f.h b/frame/3/trmm/bli_trmm_blk_var2f.h index 1fae4527a..b45d54256 100644 --- a/frame/3/trmm/bli_trmm_blk_var2f.h +++ b/frame/3/trmm/bli_trmm_blk_var2f.h @@ -32,10 +32,8 @@ */ -void bli_trmm_blk_var2f( obj_t* alpha, - obj_t* a, +void bli_trmm_blk_var2f( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ); diff --git a/frame/3/trmm/bli_trmm_blk_var3b.c b/frame/3/trmm/bli_trmm_blk_var3b.c index e84256a30..4a8ea4549 100644 --- a/frame/3/trmm/bli_trmm_blk_var3b.c +++ b/frame/3/trmm/bli_trmm_blk_var3b.c @@ -34,10 +34,8 @@ #include "blis.h" -void bli_trmm_blk_var3b( obj_t* alpha, - obj_t* a, +void bli_trmm_blk_var3b( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ) { @@ -58,7 +56,7 @@ void bli_trmm_blk_var3b( obj_t* alpha, k_trans = bli_obj_width_after_trans( *a ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -66,9 +64,8 @@ void bli_trmm_blk_var3b( obj_t* alpha, bli_packm_init( c, &c_pack, cntl_sub_packm_c( cntl ) ); - // Pack C and scale by beta (if instructed). - bli_packm_int( beta, - c, &c_pack, + // Pack C (if instructed). + bli_packm_int( c, &c_pack, cntl_sub_packm_c( cntl ) ); // Partition along the k dimension. @@ -90,21 +87,19 @@ void bli_trmm_blk_var3b( obj_t* alpha, bli_packm_init( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, &a1_pack, + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack B1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &b1, &b1_pack, + // Pack B1 (if instructed). + bli_packm_int( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); // Perform trmm subproblem. - bli_trmm_int( alpha, + bli_trmm_int( &BLIS_ONE, &a1_pack, &b1_pack, - beta, + &BLIS_ONE, &c_pack, cntl_sub_trmm( cntl ) ); } diff --git a/frame/3/trmm/bli_trmm_blk_var3b.h b/frame/3/trmm/bli_trmm_blk_var3b.h index 8da841847..124daf2c7 100644 --- a/frame/3/trmm/bli_trmm_blk_var3b.h +++ b/frame/3/trmm/bli_trmm_blk_var3b.h @@ -32,10 +32,8 @@ */ -void bli_trmm_blk_var3b( obj_t* alpha, - obj_t* a, +void bli_trmm_blk_var3b( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ); diff --git a/frame/3/trmm/bli_trmm_blk_var3f.c b/frame/3/trmm/bli_trmm_blk_var3f.c index ce44ead93..8b4df246e 100644 --- a/frame/3/trmm/bli_trmm_blk_var3f.c +++ b/frame/3/trmm/bli_trmm_blk_var3f.c @@ -34,10 +34,8 @@ #include "blis.h" -void bli_trmm_blk_var3f( obj_t* alpha, - obj_t* a, +void bli_trmm_blk_var3f( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ) { @@ -58,7 +56,7 @@ void bli_trmm_blk_var3f( obj_t* alpha, k_trans = bli_obj_width_after_trans( *a ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -66,9 +64,8 @@ void bli_trmm_blk_var3f( obj_t* alpha, bli_packm_init( c, &c_pack, cntl_sub_packm_c( cntl ) ); - // Pack C and scale by beta (if instructed). - bli_packm_int( beta, - c, &c_pack, + // Pack C (if instructed). + bli_packm_int( c, &c_pack, cntl_sub_packm_c( cntl ) ); // Partition along the k dimension. @@ -90,21 +87,19 @@ void bli_trmm_blk_var3f( obj_t* alpha, bli_packm_init( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, &a1_pack, + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack B1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &b1, &b1_pack, + // Pack B1 (if instructed). + bli_packm_int( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); // Perform trmm subproblem. - bli_trmm_int( alpha, + bli_trmm_int( &BLIS_ONE, &a1_pack, &b1_pack, - beta, + &BLIS_ONE, &c_pack, cntl_sub_trmm( cntl ) ); } diff --git a/frame/3/trmm/bli_trmm_blk_var3f.h b/frame/3/trmm/bli_trmm_blk_var3f.h index 72c8e11f9..0f15f9a48 100644 --- a/frame/3/trmm/bli_trmm_blk_var3f.h +++ b/frame/3/trmm/bli_trmm_blk_var3f.h @@ -32,10 +32,8 @@ */ -void bli_trmm_blk_var3f( obj_t* alpha, - obj_t* a, +void bli_trmm_blk_var3f( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ); diff --git a/frame/3/trmm/bli_trmm_int.c b/frame/3/trmm/bli_trmm_int.c index 17552e5f5..7f11d8513 100644 --- a/frame/3/trmm/bli_trmm_int.c +++ b/frame/3/trmm/bli_trmm_int.c @@ -36,10 +36,8 @@ #define FUNCPTR_T trmm_fp -typedef void (*FUNCPTR_T)( obj_t* alpha, - obj_t* a, +typedef void (*FUNCPTR_T)( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ); @@ -92,6 +90,8 @@ void bli_trmm_int( obj_t* alpha, obj_t* c, trmm_t* cntl ) { + obj_t a_local; + obj_t b_local; obj_t c_local; bool_t side, uplo; varnum_t n; @@ -113,6 +113,10 @@ void bli_trmm_int( obj_t* alpha, return; } + // Alias A and B in case we need to update attached scalars. + bli_obj_alias_to( *a, a_local ); + bli_obj_alias_to( *b, b_local ); + // Alias C in case we need to induce a transposition. bli_obj_alias_to( *c, c_local ); @@ -127,6 +131,20 @@ void bli_trmm_int( obj_t* alpha, bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, c_local ); } + // If alpha is non-unit, typecast and apply it to the scalar attached + // to B. + if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) + { + bli_obj_scalar_apply_scalar( alpha, &b_local ); + } + + // If beta is non-unit, typecast and apply it to the scalar attached + // to C. + if ( !bli_obj_equals( beta, &BLIS_ONE ) ) + { + bli_obj_scalar_apply_scalar( beta, &c_local ); + } + // Set two bools: one based on the implied side parameter (the structure // of the root object) and one based on the uplo field of the triangular // matrix's root object (whether that is matrix A or matrix B). @@ -152,10 +170,8 @@ void bli_trmm_int( obj_t* alpha, f = vars[side][uplo][n][i]; // Invoke the variant. - f( alpha, - a, - b, - beta, + f( &a_local, + &b_local, &c_local, cntl ); } diff --git a/frame/3/trmm/bli_trmm_ll_ker_var2.c b/frame/3/trmm/bli_trmm_ll_ker_var2.c index e85ba7a73..7ba1e10be 100644 --- a/frame/3/trmm/bli_trmm_ll_ker_var2.c +++ b/frame/3/trmm/bli_trmm_ll_ker_var2.c @@ -51,10 +51,8 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,trmm_ll_ker_var2); -void bli_trmm_ll_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trmm_ll_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ) { @@ -80,23 +78,24 @@ void bli_trmm_ll_ker_var2( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); - num_t dt_alpha; - void* buf_alpha; + obj_t scalar_a; + obj_t scalar_b; - num_t dt_beta; + void* buf_alpha; void* buf_beta; FUNCPTR_T f; - // If alpha is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the alpha object and extract the buffer at the beta offset. - bli_set_scalar_dt_buffer( alpha, dt_exec, dt_alpha, buf_alpha ); - // If beta is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the beta object and extract the buffer at the beta offset. - bli_set_scalar_dt_buffer( beta, dt_exec, dt_beta, buf_beta ); + // Detach and multiply the scalars attached to A and B. + bli_obj_scalar_detach( a, &scalar_a ); + bli_obj_scalar_detach( b, &scalar_b ); + bli_mulsc( &scalar_a, &scalar_b ); + + // Grab the addresses of the internal scalar buffers for the scalar + // merged above and the scalar attached to C. + buf_alpha = bli_obj_internal_scalar_buffer( scalar_b ); + buf_beta = bli_obj_internal_scalar_buffer( *c ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/3/trmm/bli_trmm_ll_ker_var2.h b/frame/3/trmm/bli_trmm_ll_ker_var2.h index 93fb695e0..08ebc465a 100644 --- a/frame/3/trmm/bli_trmm_ll_ker_var2.h +++ b/frame/3/trmm/bli_trmm_ll_ker_var2.h @@ -36,10 +36,8 @@ // // Prototype object-based interface. // -void bli_trmm_ll_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trmm_ll_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ); diff --git a/frame/3/trmm/bli_trmm_lu_ker_var2.c b/frame/3/trmm/bli_trmm_lu_ker_var2.c index d86673980..66bdea8b6 100644 --- a/frame/3/trmm/bli_trmm_lu_ker_var2.c +++ b/frame/3/trmm/bli_trmm_lu_ker_var2.c @@ -51,10 +51,8 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,trmm_lu_ker_var2); -void bli_trmm_lu_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trmm_lu_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ) { @@ -80,23 +78,24 @@ void bli_trmm_lu_ker_var2( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); - num_t dt_alpha; - void* buf_alpha; + obj_t scalar_a; + obj_t scalar_b; - num_t dt_beta; + void* buf_alpha; void* buf_beta; FUNCPTR_T f; - // If alpha is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the alpha object and extract the buffer at the beta offset. - bli_set_scalar_dt_buffer( alpha, dt_exec, dt_alpha, buf_alpha ); - // If beta is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the beta object and extract the buffer at the beta offset. - bli_set_scalar_dt_buffer( beta, dt_exec, dt_beta, buf_beta ); + // Detach and multiply the scalars attached to A and B. + bli_obj_scalar_detach( a, &scalar_a ); + bli_obj_scalar_detach( b, &scalar_b ); + bli_mulsc( &scalar_a, &scalar_b ); + + // Grab the addresses of the internal scalar buffers for the scalar + // merged above and the scalar attached to C. + buf_alpha = bli_obj_internal_scalar_buffer( scalar_b ); + buf_beta = bli_obj_internal_scalar_buffer( *c ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/3/trmm/bli_trmm_lu_ker_var2.h b/frame/3/trmm/bli_trmm_lu_ker_var2.h index 434376097..6bfe27810 100644 --- a/frame/3/trmm/bli_trmm_lu_ker_var2.h +++ b/frame/3/trmm/bli_trmm_lu_ker_var2.h @@ -36,10 +36,8 @@ // // Prototype object-based interface. // -void bli_trmm_lu_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trmm_lu_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ); diff --git a/frame/3/trmm/bli_trmm_rl_ker_var2.c b/frame/3/trmm/bli_trmm_rl_ker_var2.c index f697811f6..c727b5748 100644 --- a/frame/3/trmm/bli_trmm_rl_ker_var2.c +++ b/frame/3/trmm/bli_trmm_rl_ker_var2.c @@ -51,10 +51,8 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,trmm_rl_ker_var2); -void bli_trmm_rl_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trmm_rl_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ) { @@ -80,23 +78,24 @@ void bli_trmm_rl_ker_var2( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); - num_t dt_alpha; - void* buf_alpha; + obj_t scalar_a; + obj_t scalar_b; - num_t dt_beta; + void* buf_alpha; void* buf_beta; FUNCPTR_T f; - // If alpha is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the alpha object and extract the buffer at the beta offset. - bli_set_scalar_dt_buffer( alpha, dt_exec, dt_alpha, buf_alpha ); - // If beta is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the beta object and extract the buffer at the beta offset. - bli_set_scalar_dt_buffer( beta, dt_exec, dt_beta, buf_beta ); + // Detach and multiply the scalars attached to A and B. + bli_obj_scalar_detach( a, &scalar_a ); + bli_obj_scalar_detach( b, &scalar_b ); + bli_mulsc( &scalar_a, &scalar_b ); + + // Grab the addresses of the internal scalar buffers for the scalar + // merged above and the scalar attached to C. + buf_alpha = bli_obj_internal_scalar_buffer( scalar_b ); + buf_beta = bli_obj_internal_scalar_buffer( *c ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/3/trmm/bli_trmm_rl_ker_var2.h b/frame/3/trmm/bli_trmm_rl_ker_var2.h index 3c237dd70..95e4d3ec3 100644 --- a/frame/3/trmm/bli_trmm_rl_ker_var2.h +++ b/frame/3/trmm/bli_trmm_rl_ker_var2.h @@ -36,10 +36,8 @@ // // Prototype object-based interface. // -void bli_trmm_rl_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trmm_rl_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ); diff --git a/frame/3/trmm/bli_trmm_ru_ker_var2.c b/frame/3/trmm/bli_trmm_ru_ker_var2.c index 1723f7916..e8cc82abd 100644 --- a/frame/3/trmm/bli_trmm_ru_ker_var2.c +++ b/frame/3/trmm/bli_trmm_ru_ker_var2.c @@ -51,10 +51,8 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,trmm_ru_ker_var2); -void bli_trmm_ru_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trmm_ru_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ) { @@ -80,23 +78,24 @@ void bli_trmm_ru_ker_var2( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); - num_t dt_alpha; - void* buf_alpha; + obj_t scalar_a; + obj_t scalar_b; - num_t dt_beta; + void* buf_alpha; void* buf_beta; FUNCPTR_T f; - // If alpha is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the alpha object and extract the buffer at the beta offset. - bli_set_scalar_dt_buffer( alpha, dt_exec, dt_alpha, buf_alpha ); - // If beta is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the beta object and extract the buffer at the beta offset. - bli_set_scalar_dt_buffer( beta, dt_exec, dt_beta, buf_beta ); + // Detach and multiply the scalars attached to A and B. + bli_obj_scalar_detach( a, &scalar_a ); + bli_obj_scalar_detach( b, &scalar_b ); + bli_mulsc( &scalar_a, &scalar_b ); + + // Grab the addresses of the internal scalar buffers for the scalar + // merged above and the scalar attached to C. + buf_alpha = bli_obj_internal_scalar_buffer( scalar_b ); + buf_beta = bli_obj_internal_scalar_buffer( *c ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/3/trmm/bli_trmm_ru_ker_var2.h b/frame/3/trmm/bli_trmm_ru_ker_var2.h index dff6d906c..4537ca12d 100644 --- a/frame/3/trmm/bli_trmm_ru_ker_var2.h +++ b/frame/3/trmm/bli_trmm_ru_ker_var2.h @@ -36,10 +36,8 @@ // // Prototype object-based interface. // -void bli_trmm_ru_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trmm_ru_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ); diff --git a/frame/3/trmm/bli_trmm_target.c b/frame/3/trmm/bli_trmm_target.c index 18734df4b..c3ec24e65 100644 --- a/frame/3/trmm/bli_trmm_target.c +++ b/frame/3/trmm/bli_trmm_target.c @@ -36,8 +36,7 @@ void bli_trmm_set_targ_exec_datatypes( obj_t* a, obj_t* b, - obj_t* c, - num_t* dt_alpha ) + obj_t* c ) { num_t dt_targ_a; num_t dt_targ_b; @@ -58,8 +57,6 @@ void bli_trmm_set_targ_exec_datatypes( obj_t* a, bli_obj_set_execution_datatype( dt_exec, *a ); bli_obj_set_execution_datatype( dt_exec, *b ); bli_obj_set_execution_datatype( dt_exec, *c ); - - *dt_alpha = bli_obj_target_datatype( *a ); } /* diff --git a/frame/3/trmm/bli_trmm_target.h b/frame/3/trmm/bli_trmm_target.h index 182d7a823..7fb2bc463 100644 --- a/frame/3/trmm/bli_trmm_target.h +++ b/frame/3/trmm/bli_trmm_target.h @@ -34,8 +34,7 @@ void bli_trmm_set_targ_exec_datatypes( obj_t* a, obj_t* b, - obj_t* c, - num_t* dt_alpha ); + obj_t* c ); /* void bli_trmm_get_target_datatypes( obj_t* a, diff --git a/frame/3/trmm3/bli_trmm3.c b/frame/3/trmm3/bli_trmm3.c index 1baa4fb43..6b8090b41 100644 --- a/frame/3/trmm3/bli_trmm3.c +++ b/frame/3/trmm3/bli_trmm3.c @@ -48,23 +48,16 @@ void bli_trmm3( side_t side, obj_t* c ) { trmm_t* cntl; - obj_t alpha_local; - obj_t beta_local; obj_t a_local; obj_t b_local; obj_t c_local; - num_t dt_targ_a; - //num_t dt_targ_b; - //num_t dt_targ_c; - num_t dt_alpha; - num_t dt_beta; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_trmm3_check( side, alpha, a, b, beta, c ); // If alpha is zero, scale by beta and return. - if ( bli_obj_scalar_equals( alpha, &BLIS_ZERO ) ) + if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); return; @@ -83,12 +76,6 @@ void bli_trmm3( side_t side, bli_obj_set_as_root( b_local ); bli_obj_set_as_root( c_local ); - // For now, assume the storage datatypes are the desired target - // datatypes. - dt_targ_a = bli_obj_datatype( *a ); - //dt_targ_b = bli_obj_datatype( *b ); - //dt_targ_c = bli_obj_datatype( *c ); - // We assume trmm is implemented with a block-panel kernel, thus, we will // only directly support the BLIS_LEFT case. We handle the BLIS_RIGHT case // by transposing the operation. @@ -117,31 +104,15 @@ void bli_trmm3( side_t side, bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, a_local ); } - // Create an object to hold a copy-cast of alpha. Notice that we use - // the target datatype of matrix A. - dt_alpha = dt_targ_a; - bli_obj_init_scalar_copy_of( dt_alpha, - BLIS_NO_CONJUGATE, - alpha, - &alpha_local ); - - // Create an object to hold a copy-cast of beta. Notice that we use - // the datatype of C. - dt_beta = bli_obj_datatype( *c ); - bli_obj_init_scalar_copy_of( dt_beta, - BLIS_NO_CONJUGATE, - beta, - &beta_local ); - // Choose the control tree. if ( bli_is_left( side ) ) cntl = trmm_l_cntl; else cntl = trmm_r_cntl; // Invoke the internal back-end. - bli_trmm_int( &alpha_local, + bli_trmm_int( alpha, &a_local, &b_local, - &beta_local, + beta, &c_local, cntl ); } @@ -177,8 +148,8 @@ void PASTEMAC(ch,opname)( \ bli_set_dim_with_side( side, m, n, mn_a ); \ bli_set_dims_with_trans( transb, m, n, m_b, n_b ); \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ - bli_obj_create_scalar_with_attached_buffer( dt, beta, &betao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, mn_a, mn_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ diff --git a/frame/3/trsm/bli_trsm.c b/frame/3/trsm/bli_trsm.c index db37d652b..69d544341 100644 --- a/frame/3/trsm/bli_trsm.c +++ b/frame/3/trsm/bli_trsm.c @@ -46,18 +46,16 @@ void bli_trsm( side_t side, obj_t* b ) { trsm_t* cntl; - obj_t alpha_local; obj_t a_local; obj_t b_local; obj_t c_local; - num_t dt_alpha; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_trsm_check( side, alpha, a, b ); // If alpha is zero, scale by beta and return. - if ( bli_obj_scalar_equals( alpha, &BLIS_ZERO ) ) + if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( alpha, b ); return; @@ -127,25 +125,12 @@ void bli_trsm( side_t side, bli_obj_set_as_root( b_local ); bli_obj_set_as_root( c_local ); - // Set the target and execution datatypes of the objects, and apply - // any transformations necessary to handle mixed domain computation. - bli_trmm_set_targ_exec_datatypes( &a_local, - &b_local, - &c_local, - &dt_alpha ); - - // Create an object to hold a copy-cast of alpha. - bli_obj_init_scalar_copy_of( dt_alpha, - BLIS_NO_CONJUGATE, - alpha, - &alpha_local ); - // Choose the control tree. if ( bli_is_left( side ) ) cntl = trsm_l_cntl; else cntl = trsm_r_cntl; // Invoke the internal back-end. - bli_trsm_int( &alpha_local, + bli_trsm_int( alpha, &a_local, &b_local, &BLIS_ZERO, @@ -179,7 +164,7 @@ void PASTEMAC(ch,opname)( \ \ bli_set_dim_with_side( side, m, n, mn_a ); \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ \ bli_obj_create_with_attached_buffer( dt, mn_a, mn_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m, n, b, rs_b, cs_b, &bo ); \ diff --git a/frame/3/trsm/bli_trsm_blk_var1b.c b/frame/3/trsm/bli_trsm_blk_var1b.c index 10bfabffd..16c1973b5 100644 --- a/frame/3/trsm/bli_trsm_blk_var1b.c +++ b/frame/3/trsm/bli_trsm_blk_var1b.c @@ -34,10 +34,8 @@ #include "blis.h" -void bli_trsm_blk_var1b( obj_t* alpha, - obj_t* a, +void bli_trsm_blk_var1b( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ) { @@ -68,9 +66,8 @@ void bli_trsm_blk_var1b( obj_t* alpha, bli_packm_init( b, &b_pack, cntl_sub_packm_b( cntl ) ); - // Pack B1 and scale by alpha (if instructed). - bli_packm_int( alpha, - b, &b_pack, + // Pack B1 (if instructed). + bli_packm_int( b, &b_pack, cntl_sub_packm_b( cntl ) ); // Partition along the remaining portion of the m dimension. @@ -92,16 +89,15 @@ void bli_trsm_blk_var1b( obj_t* alpha, bli_packm_init( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, &a1_pack, + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); // Perform trsm subproblem. - bli_trsm_int( alpha, + bli_trsm_int( &BLIS_ONE, &a1_pack, &b_pack, - beta, + &BLIS_ONE, &c1, cntl_sub_trsm( cntl ) ); } diff --git a/frame/3/trsm/bli_trsm_blk_var1b.h b/frame/3/trsm/bli_trsm_blk_var1b.h index 5c62a375e..614ee0e20 100644 --- a/frame/3/trsm/bli_trsm_blk_var1b.h +++ b/frame/3/trsm/bli_trsm_blk_var1b.h @@ -32,10 +32,8 @@ */ -void bli_trsm_blk_var1b( obj_t* alpha, - obj_t* a, +void bli_trsm_blk_var1b( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ); diff --git a/frame/3/trsm/bli_trsm_blk_var1f.c b/frame/3/trsm/bli_trsm_blk_var1f.c index 188f33421..540de42c0 100644 --- a/frame/3/trsm/bli_trsm_blk_var1f.c +++ b/frame/3/trsm/bli_trsm_blk_var1f.c @@ -34,10 +34,8 @@ #include "blis.h" -void bli_trsm_blk_var1f( obj_t* alpha, - obj_t* a, +void bli_trsm_blk_var1f( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ) { @@ -67,9 +65,8 @@ void bli_trsm_blk_var1f( obj_t* alpha, bli_packm_init( b, &b_pack, cntl_sub_packm_b( cntl ) ); - // Pack B1 and scale by alpha (if instructed). - bli_packm_int( alpha, - b, &b_pack, + // Pack B1 (if instructed). + bli_packm_int( b, &b_pack, cntl_sub_packm_b( cntl ) ); // Partition along the remaining portion of the m dimension. @@ -89,16 +86,15 @@ void bli_trsm_blk_var1f( obj_t* alpha, bli_packm_init( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, &a1_pack, + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); // Perform trsm subproblem. - bli_trsm_int( alpha, + bli_trsm_int( &BLIS_ONE, &a1_pack, &b_pack, - beta, + &BLIS_ONE, &c1, cntl_sub_trsm( cntl ) ); } diff --git a/frame/3/trsm/bli_trsm_blk_var1f.h b/frame/3/trsm/bli_trsm_blk_var1f.h index 9f53fc234..ccc799f0f 100644 --- a/frame/3/trsm/bli_trsm_blk_var1f.h +++ b/frame/3/trsm/bli_trsm_blk_var1f.h @@ -32,10 +32,8 @@ */ -void bli_trsm_blk_var1f( obj_t* alpha, - obj_t* a, +void bli_trsm_blk_var1f( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ); diff --git a/frame/3/trsm/bli_trsm_blk_var2b.c b/frame/3/trsm/bli_trsm_blk_var2b.c index a51a6eed8..9ecadc744 100644 --- a/frame/3/trsm/bli_trsm_blk_var2b.c +++ b/frame/3/trsm/bli_trsm_blk_var2b.c @@ -34,10 +34,8 @@ #include "blis.h" -void bli_trsm_blk_var2b( obj_t* alpha, - obj_t* a, +void bli_trsm_blk_var2b( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ) { @@ -58,7 +56,7 @@ void bli_trsm_blk_var2b( obj_t* alpha, n_trans = bli_obj_width_after_trans( *b ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -66,9 +64,8 @@ void bli_trsm_blk_var2b( obj_t* alpha, bli_packm_init( a, &a_pack, cntl_sub_packm_a( cntl ) ); - // Pack A and scale by alpha (if instructed). - bli_packm_int( alpha, - a, &a_pack, + // Pack A (if instructed). + bli_packm_int( a, &a_pack, cntl_sub_packm_a( cntl ) ); // Partition along the n dimension. @@ -90,21 +87,19 @@ void bli_trsm_blk_var2b( obj_t* alpha, bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); - // Pack B1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &b1, &b1_pack, + // Pack B1 (if instructed). + bli_packm_int( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1, &c1_pack, + // Pack C1 (if instructed). + bli_packm_int( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); // Perform trsm subproblem. - bli_trsm_int( alpha, + bli_trsm_int( &BLIS_ONE, &a_pack, &b1_pack, - beta, + &BLIS_ONE, &c1_pack, cntl_sub_trsm( cntl ) ); diff --git a/frame/3/trsm/bli_trsm_blk_var2b.h b/frame/3/trsm/bli_trsm_blk_var2b.h index 5a5f00d66..26f52d759 100644 --- a/frame/3/trsm/bli_trsm_blk_var2b.h +++ b/frame/3/trsm/bli_trsm_blk_var2b.h @@ -32,10 +32,8 @@ */ -void bli_trsm_blk_var2b( obj_t* alpha, - obj_t* a, +void bli_trsm_blk_var2b( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ); diff --git a/frame/3/trsm/bli_trsm_blk_var2f.c b/frame/3/trsm/bli_trsm_blk_var2f.c index bfd59ff6d..05da54b0e 100644 --- a/frame/3/trsm/bli_trsm_blk_var2f.c +++ b/frame/3/trsm/bli_trsm_blk_var2f.c @@ -34,10 +34,8 @@ #include "blis.h" -void bli_trsm_blk_var2f( obj_t* alpha, - obj_t* a, +void bli_trsm_blk_var2f( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ) { @@ -58,7 +56,7 @@ void bli_trsm_blk_var2f( obj_t* alpha, n_trans = bli_obj_width_after_trans( *b ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -66,9 +64,8 @@ void bli_trsm_blk_var2f( obj_t* alpha, bli_packm_init( a, &a_pack, cntl_sub_packm_a( cntl ) ); - // Pack A and scale by alpha (if instructed). - bli_packm_int( alpha, - a, &a_pack, + // Pack A (if instructed). + bli_packm_int( a, &a_pack, cntl_sub_packm_a( cntl ) ); // Partition along the n dimension. @@ -90,21 +87,19 @@ void bli_trsm_blk_var2f( obj_t* alpha, bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); - // Pack B1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &b1, &b1_pack, + // Pack B1 (if instructed). + bli_packm_int( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1, &c1_pack, + // Pack C1 (if instructed). + bli_packm_int( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); // Perform trsm subproblem. - bli_trsm_int( alpha, + bli_trsm_int( &BLIS_ONE, &a_pack, &b1_pack, - beta, + &BLIS_ONE, &c1_pack, cntl_sub_trsm( cntl ) ); diff --git a/frame/3/trsm/bli_trsm_blk_var2f.h b/frame/3/trsm/bli_trsm_blk_var2f.h index eed4040b0..823233b15 100644 --- a/frame/3/trsm/bli_trsm_blk_var2f.h +++ b/frame/3/trsm/bli_trsm_blk_var2f.h @@ -32,10 +32,8 @@ */ -void bli_trsm_blk_var2f( obj_t* alpha, - obj_t* a, +void bli_trsm_blk_var2f( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ); diff --git a/frame/3/trsm/bli_trsm_blk_var3b.c b/frame/3/trsm/bli_trsm_blk_var3b.c index 6176fdda6..2ba3c3532 100644 --- a/frame/3/trsm/bli_trsm_blk_var3b.c +++ b/frame/3/trsm/bli_trsm_blk_var3b.c @@ -34,17 +34,14 @@ #include "blis.h" -void bli_trsm_blk_var3b( obj_t* alpha, - obj_t* a, +void bli_trsm_blk_var3b( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ) { obj_t a1, a1_pack; obj_t b1, b1_pack; obj_t c_pack; - obj_t* alpha_use; dim_t i; dim_t b_alg; @@ -59,7 +56,7 @@ void bli_trsm_blk_var3b( obj_t* alpha, k_trans = bli_obj_width_after_trans( *a ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -67,9 +64,8 @@ void bli_trsm_blk_var3b( obj_t* alpha, bli_packm_init( c, &c_pack, cntl_sub_packm_c( cntl ) ); - // Pack C and scale by beta (if instructed). - bli_packm_int( beta, - c, &c_pack, + // Pack C (if instructed). + bli_packm_int( c, &c_pack, cntl_sub_packm_c( cntl ) ); // Partition along the k dimension. @@ -91,28 +87,27 @@ void bli_trsm_blk_var3b( obj_t* alpha, bli_packm_init( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, &a1_pack, + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack B1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &b1, &b1_pack, + // Pack B1 (if instructed). + bli_packm_int( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); - // Since this variant executes multiple rank-k updates, we must use - // alpha only for the first iteration and BLIS_ONE for all others. - if ( i == 0 ) alpha_use = alpha; - else alpha_use = &BLIS_ONE; - // Perform trsm subproblem. - bli_trsm_int( alpha_use, + bli_trsm_int( &BLIS_ONE, &a1_pack, &b1_pack, - beta, + &BLIS_ONE, &c_pack, cntl_sub_trsm( cntl ) ); + + // This variant executes multiple rank-k updates. Therefore, if the + // internal alpha scalar on matrix A/B is non-zero, we must use it + // only for the first iteration (and then BLIS_ONE for all others). + if ( i == 0 ) { bli_obj_scalar_reset( a ); + bli_obj_scalar_reset( b ); } } // Unpack C (if C was packed). diff --git a/frame/3/trsm/bli_trsm_blk_var3b.h b/frame/3/trsm/bli_trsm_blk_var3b.h index c37fbd498..83f6b74f4 100644 --- a/frame/3/trsm/bli_trsm_blk_var3b.h +++ b/frame/3/trsm/bli_trsm_blk_var3b.h @@ -32,10 +32,8 @@ */ -void bli_trsm_blk_var3b( obj_t* alpha, - obj_t* a, +void bli_trsm_blk_var3b( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ); diff --git a/frame/3/trsm/bli_trsm_blk_var3f.c b/frame/3/trsm/bli_trsm_blk_var3f.c index 596e63426..e93a67e3e 100644 --- a/frame/3/trsm/bli_trsm_blk_var3f.c +++ b/frame/3/trsm/bli_trsm_blk_var3f.c @@ -34,17 +34,14 @@ #include "blis.h" -void bli_trsm_blk_var3f( obj_t* alpha, - obj_t* a, +void bli_trsm_blk_var3f( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ) { obj_t a1, a1_pack; obj_t b1, b1_pack; obj_t c_pack; - obj_t* alpha_use; dim_t i; dim_t b_alg; @@ -59,7 +56,7 @@ void bli_trsm_blk_var3f( obj_t* alpha, k_trans = bli_obj_width_after_trans( *a ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -67,9 +64,8 @@ void bli_trsm_blk_var3f( obj_t* alpha, bli_packm_init( c, &c_pack, cntl_sub_packm_c( cntl ) ); - // Pack C and scale by beta (if instructed). - bli_packm_int( beta, - c, &c_pack, + // Pack C (if instructed). + bli_packm_int( c, &c_pack, cntl_sub_packm_c( cntl ) ); // Partition along the k dimension. @@ -91,28 +87,27 @@ void bli_trsm_blk_var3f( obj_t* alpha, bli_packm_init( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, &a1_pack, + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack B1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &b1, &b1_pack, + // Pack B1 (if instructed). + bli_packm_int( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); - // Since this variant executes multiple rank-k updates, we must use - // alpha only for the first iteration and BLIS_ONE for all others. - if ( i == 0 ) alpha_use = alpha; - else alpha_use = &BLIS_ONE; - // Perform trsm subproblem. - bli_trsm_int( alpha_use, + bli_trsm_int( &BLIS_ONE, &a1_pack, &b1_pack, - beta, + &BLIS_ONE, &c_pack, cntl_sub_trsm( cntl ) ); + + // This variant executes multiple rank-k updates. Therefore, if the + // internal alpha scalar on matrix A/B is non-zero, we must use it + // only for the first iteration (and then BLIS_ONE for all others). + if ( i == 0 ) { bli_obj_scalar_reset( a ); + bli_obj_scalar_reset( b ); } } // Unpack C (if C was packed). diff --git a/frame/3/trsm/bli_trsm_blk_var3f.h b/frame/3/trsm/bli_trsm_blk_var3f.h index 203f13b15..dbfafab47 100644 --- a/frame/3/trsm/bli_trsm_blk_var3f.h +++ b/frame/3/trsm/bli_trsm_blk_var3f.h @@ -32,10 +32,8 @@ */ -void bli_trsm_blk_var3f( obj_t* alpha, - obj_t* a, +void bli_trsm_blk_var3f( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ); diff --git a/frame/3/trsm/bli_trsm_int.c b/frame/3/trsm/bli_trsm_int.c index e3970c5bb..3e8d7dd3f 100644 --- a/frame/3/trsm/bli_trsm_int.c +++ b/frame/3/trsm/bli_trsm_int.c @@ -36,10 +36,8 @@ #define FUNCPTR_T trsm_fp -typedef void (*FUNCPTR_T)( obj_t* alpha, - obj_t* a, +typedef void (*FUNCPTR_T)( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ); @@ -92,6 +90,8 @@ void bli_trsm_int( obj_t* alpha, obj_t* c, trsm_t* cntl ) { + obj_t a_local; + obj_t b_local; obj_t c_local; bool_t side, uplo; varnum_t n; @@ -113,6 +113,10 @@ void bli_trsm_int( obj_t* alpha, return; } + // Alias A and B in case we need to update attached scalars. + bli_obj_alias_to( *a, a_local ); + bli_obj_alias_to( *b, b_local ); + // Alias C in case we need to induce a transposition. bli_obj_alias_to( *c, c_local ); @@ -127,6 +131,12 @@ void bli_trsm_int( obj_t* alpha, bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, c_local ); } + // If beta is non-unit, apply it to the scalar attached to C. + if ( !bli_obj_equals( beta, &BLIS_ONE ) ) + { + bli_obj_scalar_apply_scalar( beta, &c_local ); + } + // Set two bools: one based on the implied side parameter (the structure // of the root object) and one based on the uplo field of the triangular // matrix's root object (whether that is matrix A or matrix B). @@ -135,6 +145,13 @@ void bli_trsm_int( obj_t* alpha, side = 0; if ( bli_obj_root_is_lower( *a ) ) uplo = 0; else uplo = 1; + + // If alpha is non-unit, typecast and apply it to the scalar + // attached to B (the non-triangular matrix). + if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) + { + bli_obj_scalar_apply_scalar( alpha, &b_local ); + } } else // if ( bli_obj_root_is_triangular( *b ) ) { @@ -142,6 +159,13 @@ void bli_trsm_int( obj_t* alpha, // Set a bool based on the uplo field of A's root object. if ( bli_obj_root_is_lower( *b ) ) uplo = 0; else uplo = 1; + + // If alpha is non-unit, typecast and apply it to the scalar + // attached to A (the non-triangular matrix). + if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) + { + bli_obj_scalar_apply_scalar( alpha, &a_local ); + } } // Extract the variant number and implementation type. @@ -152,10 +176,8 @@ void bli_trsm_int( obj_t* alpha, f = vars[side][uplo][n][i]; // Invoke the variant. - f( alpha, - a, - b, - beta, + f( &a_local, + &b_local, &c_local, cntl ); } diff --git a/frame/3/trsm/bli_trsm_ll_ker_var2.c b/frame/3/trsm/bli_trsm_ll_ker_var2.c index 520d8d714..74788ebeb 100644 --- a/frame/3/trsm/bli_trsm_ll_ker_var2.c +++ b/frame/3/trsm/bli_trsm_ll_ker_var2.c @@ -50,10 +50,8 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,trsm_ll_ker_var2); -void bli_trsm_ll_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trsm_ll_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ) { @@ -79,15 +77,14 @@ void bli_trsm_ll_ker_var2( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); - num_t dt_alpha; void* buf_alpha; FUNCPTR_T f; - // If alpha is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the alpha object and extract the buffer at the alpha offset. - bli_set_scalar_dt_buffer( alpha, dt_exec, dt_alpha, buf_alpha ); + + // Grab the address of the internal scalar buffer for the scalar + // attached to B. + buf_alpha = bli_obj_internal_scalar_buffer( *b ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/3/trsm/bli_trsm_ll_ker_var2.h b/frame/3/trsm/bli_trsm_ll_ker_var2.h index f87001583..35f4bf0cf 100644 --- a/frame/3/trsm/bli_trsm_ll_ker_var2.h +++ b/frame/3/trsm/bli_trsm_ll_ker_var2.h @@ -36,10 +36,8 @@ // // Prototype object-based interface. // -void bli_trsm_ll_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trsm_ll_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ); diff --git a/frame/3/trsm/bli_trsm_lu_ker_var2.c b/frame/3/trsm/bli_trsm_lu_ker_var2.c index 4cd0a20f6..35b77b365 100644 --- a/frame/3/trsm/bli_trsm_lu_ker_var2.c +++ b/frame/3/trsm/bli_trsm_lu_ker_var2.c @@ -50,10 +50,8 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,trsm_lu_ker_var2); -void bli_trsm_lu_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trsm_lu_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ) { @@ -79,15 +77,14 @@ void bli_trsm_lu_ker_var2( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); - num_t dt_alpha; void* buf_alpha; FUNCPTR_T f; - // If alpha is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the alpha object and extract the buffer at the alpha offset. - bli_set_scalar_dt_buffer( alpha, dt_exec, dt_alpha, buf_alpha ); + + // Grab the address of the internal scalar buffer for the scalar + // attached to B. + buf_alpha = bli_obj_internal_scalar_buffer( *b ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/3/trsm/bli_trsm_lu_ker_var2.h b/frame/3/trsm/bli_trsm_lu_ker_var2.h index 6317cc528..d864328be 100644 --- a/frame/3/trsm/bli_trsm_lu_ker_var2.h +++ b/frame/3/trsm/bli_trsm_lu_ker_var2.h @@ -36,10 +36,8 @@ // // Prototype object-based interface. // -void bli_trsm_lu_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trsm_lu_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ); diff --git a/frame/3/trsm/bli_trsm_rl_ker_var2.c b/frame/3/trsm/bli_trsm_rl_ker_var2.c index 20aa0700c..8396be9f4 100644 --- a/frame/3/trsm/bli_trsm_rl_ker_var2.c +++ b/frame/3/trsm/bli_trsm_rl_ker_var2.c @@ -50,10 +50,8 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,trsm_rl_ker_var2); -void bli_trsm_rl_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trsm_rl_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ) { @@ -79,15 +77,14 @@ void bli_trsm_rl_ker_var2( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); - num_t dt_alpha; void* buf_alpha; FUNCPTR_T f; - // If alpha is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the alpha object and extract the buffer at the alpha offset. - bli_set_scalar_dt_buffer( alpha, dt_exec, dt_alpha, buf_alpha ); + + // Grab the address of the internal scalar buffer for the scalar + // attached to B. + buf_alpha = bli_obj_internal_scalar_buffer( *a ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/3/trsm/bli_trsm_rl_ker_var2.h b/frame/3/trsm/bli_trsm_rl_ker_var2.h index 7929b8fed..fc676dfb0 100644 --- a/frame/3/trsm/bli_trsm_rl_ker_var2.h +++ b/frame/3/trsm/bli_trsm_rl_ker_var2.h @@ -36,10 +36,8 @@ // // Prototype object-based interface. // -void bli_trsm_rl_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trsm_rl_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ); diff --git a/frame/3/trsm/bli_trsm_ru_ker_var2.c b/frame/3/trsm/bli_trsm_ru_ker_var2.c index 580f1e18d..ef3a37606 100644 --- a/frame/3/trsm/bli_trsm_ru_ker_var2.c +++ b/frame/3/trsm/bli_trsm_ru_ker_var2.c @@ -50,10 +50,8 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,trsm_ru_ker_var2); -void bli_trsm_ru_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trsm_ru_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ) { @@ -79,15 +77,14 @@ void bli_trsm_ru_ker_var2( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); - num_t dt_alpha; void* buf_alpha; FUNCPTR_T f; - // If alpha is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the alpha object and extract the buffer at the alpha offset. - bli_set_scalar_dt_buffer( alpha, dt_exec, dt_alpha, buf_alpha ); + + // Grab the address of the internal scalar buffer for the scalar + // attached to B. + buf_alpha = bli_obj_internal_scalar_buffer( *a ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/3/trsm/bli_trsm_ru_ker_var2.h b/frame/3/trsm/bli_trsm_ru_ker_var2.h index 1cfeddf9e..9d4295ebb 100644 --- a/frame/3/trsm/bli_trsm_ru_ker_var2.h +++ b/frame/3/trsm/bli_trsm_ru_ker_var2.h @@ -36,10 +36,8 @@ // // Prototype object-based interface. // -void bli_trsm_ru_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trsm_ru_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ); diff --git a/frame/base/bli_obj.c b/frame/base/bli_obj.c index 15d303c56..9d76e108d 100644 --- a/frame/base/bli_obj.c +++ b/frame/base/bli_obj.c @@ -66,7 +66,7 @@ void bli_obj_create_without_buffer( num_t dt, { siz_t elem_size; mem_t* pack_mem; - //mem_t* cast_mem; + void* s; if ( bli_error_checking_is_enabled() ) bli_obj_create_without_buffer_check( dt, m, n, obj ); @@ -99,9 +99,15 @@ void bli_obj_create_without_buffer( num_t dt, bli_obj_set_diag_offset( 0, *obj ); pack_mem = bli_obj_pack_mem( *obj ); - //cast_mem = bli_obj_cast_mem( *obj ); bli_mem_set_buffer( NULL, pack_mem ); - //bli_mem_set_buffer( NULL, cast_mem ); + + // Set the internal scalar to 1.0. + s = bli_obj_internal_scalar_buffer( *obj ); + + if ( bli_is_float( dt ) ) bli_sset1s( *(( float* )s) ) + else if ( bli_is_double( dt ) ) bli_dset1s( *(( double* )s) ) + else if ( bli_is_scomplex( dt ) ) bli_cset1s( *(( scomplex* )s) ) + else if ( bli_is_dcomplex( dt ) ) bli_zset1s( *(( dcomplex* )s) ) } void bli_obj_alloc_buffer( inc_t rs, @@ -210,56 +216,17 @@ void bli_obj_attach_buffer( void* p, bli_obj_set_incs( rs, cs, *obj ); } -void bli_obj_attach_internal_buffer( obj_t* obj ) -{ - void* p; - - // Query the address of the object's internal scalar buffer. - p = bli_obj_internal_scalar_buffer( *obj ); - - // Update the object. - bli_obj_set_buffer( p, *obj ); - bli_obj_set_incs( 1, 1, *obj ); -} - -void bli_obj_init_scalar( num_t dt, - obj_t* b ) -{ - // Initialize b without a buffer and then attach its internal buffer. - bli_obj_create_without_buffer( dt, 1, 1, b ); - bli_obj_attach_internal_buffer( b ); -} - -void bli_obj_init_scalar_copy_of( num_t dt, - conj_t conj, - obj_t* a, - obj_t* b ) -{ - obj_t a_local; - - // Make a local copy of scalar a so we can apply the conj parameter. - bli_obj_alias_to( *a, a_local ); - bli_obj_apply_conj( conj, a_local ); - - // Initialize b without a buffer and then attach its internal buffer. - bli_obj_create_without_buffer( dt, 1, 1, b ); - bli_obj_attach_internal_buffer( b ); - - // Copy the scalar value in a to object b, conjugating if needed. - bli_copysc( &a_local, b ); -} - -void bli_obj_create_scalar( num_t dt, - obj_t* obj ) +void bli_obj_create_1x1( num_t dt, + obj_t* obj ) { bli_obj_create_without_buffer( dt, 1, 1, obj ); bli_obj_alloc_buffer( 1, 1, obj ); } -void bli_obj_create_scalar_with_attached_buffer( num_t dt, - void* p, - obj_t* obj ) +void bli_obj_create_1x1_with_attached_buffer( num_t dt, + void* p, + obj_t* obj ) { bli_obj_create_without_buffer( dt, 1, 1, obj ); @@ -274,8 +241,9 @@ void bli_obj_free( obj_t* obj ) // Don't dereference obj if it is NULL. if ( obj != NULL ) { - // Idiot safety: Don't try to free the buffer field if it currently - // refers to the internal scalar buffer. + // Idiot safety: Don't try to free the buffer field if the object + // is a detached scalar (ie: if the buffer pointer refers to the + // address of the internal scalar buffer). if ( bli_obj_buffer( *obj ) != bli_obj_internal_scalar_buffer( *obj ) ) bli_free( bli_obj_buffer( *obj ) ); } @@ -387,7 +355,7 @@ void bli_adjust_strides( dim_t m, // Interpret rs = cs = 0 as request for column storage. if ( *rs == 0 && *cs == 0 ) { - // First we handle the scalar case explicitly. + // First we handle the 1x1 scalar case explicitly. if ( m == 1 && n == 1 ) { *rs = 1; @@ -412,7 +380,7 @@ void bli_adjust_strides( dim_t m, // single vector (but could also be a request for a 1xn matrix in // column-major order or an mx1 matrix in row-major order). In BLIS, // we have decided to "reserve" the case where rs = cs = 1 for - // scalars only. + // 1x1 scalars only. if ( m > 1 && n == 1 ) { // Set the column stride to indicate that this is a column vector @@ -431,7 +399,7 @@ void bli_adjust_strides( dim_t m, *rs = n; } - // Nothing needs to be done for the scalar case where m == n == 1. + // Nothing needs to be done for the 1x1 scalar case where m == n == 1. } } diff --git a/frame/base/bli_obj.h b/frame/base/bli_obj.h index b44ab9d48..77c62b531 100644 --- a/frame/base/bli_obj.h +++ b/frame/base/bli_obj.h @@ -63,22 +63,12 @@ void bli_obj_attach_buffer( void* p, inc_t cs, obj_t* obj ); -void bli_obj_attach_internal_buffer( obj_t* obj ); +void bli_obj_create_1x1( num_t dt, + obj_t* obj ); -void bli_obj_init_scalar( num_t dt, - obj_t* b ); - -void bli_obj_init_scalar_copy_of( num_t dt, - conj_t conj, - obj_t* a, - obj_t* b ); - -void bli_obj_create_scalar( num_t dt, - obj_t* obj ); - -void bli_obj_create_scalar_with_attached_buffer( num_t dt, - void* p, - obj_t* obj ); +void bli_obj_create_1x1_with_attached_buffer( num_t dt, + void* p, + obj_t* obj ); void bli_obj_free( obj_t* obj ); diff --git a/frame/base/bli_obj_scalar.c b/frame/base/bli_obj_scalar.c new file mode 100644 index 000000000..1052f5154 --- /dev/null +++ b/frame/base/bli_obj_scalar.c @@ -0,0 +1,174 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2013, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + + +void bli_obj_scalar_init_detached( num_t dt, + obj_t* beta ) +{ + void* p; + + // Initialize beta without a buffer and then attach its internal buffer. + bli_obj_create_without_buffer( dt, 1, 1, beta ); + + // Query the address of the object's internal scalar buffer. + p = bli_obj_internal_scalar_buffer( *beta ); + + // Update the object. + bli_obj_set_buffer( p, *beta ); + bli_obj_set_incs( 1, 1, *beta ); +} + +void bli_obj_scalar_init_detached_copy_of( num_t dt, + conj_t conj, + obj_t* alpha, + obj_t* beta ) +{ + obj_t alpha_local; + + // Make a local copy of alpha so we can apply the conj parameter. + bli_obj_alias_to( *alpha, alpha_local ); + bli_obj_apply_conj( conj, alpha_local ); + + // Initialize beta without a buffer and then attach its internal buffer. + bli_obj_scalar_init_detached( dt, beta ); + + // Copy the scalar value in a to object b, conjugating and/or + // typecasting if needed. + bli_copysc( &alpha_local, beta ); +} + +void bli_obj_scalar_detach( obj_t* a, + obj_t* alpha ) +{ + num_t dt_a = bli_obj_datatype( *a ); + + // Initialize alpha to be a bufferless internal scalar of the same + // datatype as A. + bli_obj_scalar_init_detached( dt_a, alpha ); + + // Copy the internal scalar in A to alpha. + bli_obj_copy_internal_scalar( *a, *alpha ); +} + +void bli_obj_scalar_attach( conj_t conj, + obj_t* alpha, + obj_t* a ) +{ + obj_t alpha_cast; + + // Make a copy-cast of alpha of the same datatype as A. This step + // gives us the opportunity to conjugate and/or typecast alpha. + bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *a ), + conj, + alpha, + &alpha_cast ); + + // Copy the internal scalar in alpha_cast to A. + bli_obj_copy_internal_scalar( alpha_cast, *a ); +} + +void bli_obj_scalar_apply_scalar( obj_t* alpha, + obj_t* a ) +{ + obj_t alpha_cast; + obj_t scalar_a; + + // Make a copy-cast of alpha of the same datatype as A. This step + // gives us the opportunity to typecast alpha. + bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *a ), + BLIS_NO_CONJUGATE, + alpha, + &alpha_cast ); + // Detach the scalar from A. + bli_obj_scalar_detach( a, &scalar_a ); + + // Scale the detached scalar by alpha. + bli_mulsc( &alpha_cast, &scalar_a ); + + // Copy the internal scalar in scalar_a to A. + bli_obj_copy_internal_scalar( scalar_a, *a ); +} + +void bli_obj_scalar_reset( obj_t* a ) +{ + num_t dt = bli_obj_datatype( *a ); + void* scalar_a = bli_obj_internal_scalar_buffer( *a ); + void* one = bli_obj_buffer_for_const( dt, BLIS_ONE ); + + if ( bli_is_float( dt ) ) *(( float* )scalar_a) = *(( float* )one); + else if ( bli_is_double( dt ) ) *(( double* )scalar_a) = *(( double* )one); + else if ( bli_is_scomplex( dt ) ) *(( scomplex* )scalar_a) = *(( scomplex* )one); + else if ( bli_is_dcomplex( dt ) ) *(( dcomplex* )scalar_a) = *(( dcomplex* )one); + + // Alternate implementation: + //bli_obj_scalar_attach( &BLIS_ONE, a ); +} + +bool_t bli_obj_scalar_has_nonzero_imag( obj_t* a ) +{ + bool_t r_val = FALSE; + num_t dt = bli_obj_datatype( *a ); + void* scalar_a = bli_obj_internal_scalar_buffer( *a ); + + if ( bli_is_real( dt ) ) + { + r_val = FALSE; + } + else if ( bli_is_scomplex( dt ) ) + { + r_val = ( bli_cimag( *(( scomplex* )scalar_a) ) != 0.0F ); + } + else if ( bli_is_dcomplex( dt ) ) + { + r_val = ( bli_zimag( *(( dcomplex* )scalar_a) ) != 0.0 ); + } + + return r_val; +} + +bool_t bli_obj_scalar_equals( obj_t* a, + obj_t* beta ) +{ + obj_t scalar_a; + bool_t r_val; + + bli_obj_scalar_detach( a, &scalar_a ); + + r_val = bli_obj_equals( &scalar_a, beta ); + + return r_val; +} + diff --git a/frame/base/bli_obj_scalar.h b/frame/base/bli_obj_scalar.h new file mode 100644 index 000000000..f8f1c682f --- /dev/null +++ b/frame/base/bli_obj_scalar.h @@ -0,0 +1,59 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2013, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_obj_scalar_init_detached( num_t dt, + obj_t* beta ); + +void bli_obj_scalar_init_detached_copy_of( num_t dt, + conj_t conj, + obj_t* alpha, + obj_t* beta ); + +void bli_obj_scalar_detach( obj_t* a, + obj_t* alpha ); + +void bli_obj_scalar_attach( conj_t conj, + obj_t* alpha, + obj_t* a ); + +void bli_obj_scalar_apply_scalar( obj_t* alpha, + obj_t* a ); + +void bli_obj_scalar_reset( obj_t* a ); + +bool_t bli_obj_scalar_has_nonzero_imag( obj_t* a ); + +bool_t bli_obj_scalar_equals( obj_t* a, + obj_t* beta ); + diff --git a/frame/base/bli_query.c b/frame/base/bli_query.c index af52a6c98..06fa89909 100644 --- a/frame/base/bli_query.c +++ b/frame/base/bli_query.c @@ -34,8 +34,8 @@ #include "blis.h" -bool_t bli_obj_scalar_equals( obj_t* a, - obj_t* b ) +bool_t bli_obj_equals( obj_t* a, + obj_t* b ) { bool_t r_val = FALSE; num_t dt_a; @@ -43,6 +43,11 @@ bool_t bli_obj_scalar_equals( obj_t* a, num_t dt; void* buf_a; void* buf_b; + + // The function is not yet implemented for vectors and matrices. + if ( !bli_obj_is_1x1( *a ) || + !bli_obj_is_1x1( *b ) ) + bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); /* bli_printm( "a:", a, "%9.2e", "" ); bli_printm( "b:", b, "%9.2e", "" ); @@ -58,8 +63,8 @@ bli_printm( "b:", b, "%9.2e", "" ); if ( dt_b == BLIS_CONSTANT ) dt = dt_a; else dt = dt_b; - buf_a = bli_obj_scalar_buffer( dt, *a ); - buf_b = bli_obj_scalar_buffer( dt, *b ); + buf_a = bli_obj_buffer_for_1x1( dt, *a ); + buf_b = bli_obj_buffer_for_1x1( dt, *b ); /* printf( "dt: %u\n", dt ); printf( "dt_a: %u\n", dt_a ); @@ -79,10 +84,10 @@ printf( "bufb: %p\n", buf_b ); scomplex* bp_c = bli_obj_buffer_for_const( BLIS_SCOMPLEX, *b ); dcomplex* bp_z = bli_obj_buffer_for_const( BLIS_DCOMPLEX, *b ); - r_val = r_val || ( *ap_s == *bp_s ); - r_val = r_val || ( *ap_d == *bp_d ); - r_val = r_val || ( ap_c->real == bp_c->real && ap_c->imag == bp_c->imag ); - r_val = r_val || ( ap_z->real == bp_z->real && ap_z->imag == bp_z->imag ); + r_val = r_val || bli_seqa( ap_s, bp_s ); + r_val = r_val || bli_deqa( ap_d, bp_d ); + r_val = r_val || bli_ceqa( ap_c, bp_c ); + r_val = r_val || bli_zeqa( ap_z, bp_z ); } else if ( dt == BLIS_FLOAT ) r_val = bli_seqa( buf_a, buf_b ); else if ( dt == BLIS_DOUBLE ) r_val = bli_deqa( buf_a, buf_b ); diff --git a/frame/base/bli_query.h b/frame/base/bli_query.h index cd22a4d86..777c69046 100644 --- a/frame/base/bli_query.h +++ b/frame/base/bli_query.h @@ -32,6 +32,6 @@ */ -bool_t bli_obj_scalar_equals( obj_t* a, - obj_t* b ); +bool_t bli_obj_equals( obj_t* a, + obj_t* b ); diff --git a/frame/include/bli_obj_macro_defs.h b/frame/include/bli_obj_macro_defs.h index fdfbc7ad7..0de62a47f 100644 --- a/frame/include/bli_obj_macro_defs.h +++ b/frame/include/bli_obj_macro_defs.h @@ -485,7 +485,7 @@ bli_obj_width_stored( obj ) #define bli_obj_vector_inc( x ) \ \ - ( bli_obj_is_scalar( x ) ? 1 : \ + ( bli_obj_is_1x1( x ) ? 1 : \ ( bli_obj_length( x ) == 1 ? bli_obj_col_stride( x ) \ : bli_obj_row_stride( x ) ) \ ) @@ -508,7 +508,7 @@ bli_obj_width_stored( obj ) ( bli_obj_length( obj ) == 0 || \ bli_obj_width( obj ) == 0 ) -#define bli_obj_is_scalar( x ) \ +#define bli_obj_is_1x1( x ) \ \ ( bli_obj_length( x ) == 1 && \ bli_obj_width( x ) == 1 ) @@ -695,6 +695,17 @@ bli_obj_width_stored( obj ) \ &((obj).scalar) +// Bufferless scalar field modification + +#define bli_obj_set_internal_scalar( val, obj ) \ +{ \ + (obj).scalar = val; \ +} + +#define bli_obj_copy_internal_scalar( a, b ) \ +{ \ + (b).scalar = (a).scalar; \ +} // Element size query @@ -897,7 +908,7 @@ bli_obj_width_stored( obj ) (obj).offm * (obj).rs ) \ ) -#define bli_obj_scalar_buffer( dt, obj ) \ +#define bli_obj_buffer_for_1x1( dt, obj ) \ \ ( void* )( bli_obj_is_const( obj ) ? ( bli_obj_buffer_for_const( dt, obj ) ) \ : ( bli_obj_buffer_at_off( obj ) ) \ diff --git a/frame/include/bli_param_macro_defs.h b/frame/include/bli_param_macro_defs.h index 2ea6e47b6..4ef2fd6f6 100644 --- a/frame/include/bli_param_macro_defs.h +++ b/frame/include/bli_param_macro_defs.h @@ -543,7 +543,7 @@ if ( bli_obj_is_const( *(obj_scalar) ) ) \ { \ dt_scalar = dt_aux; \ - buf_scalar = bli_obj_scalar_buffer( dt_scalar, *(obj_scalar) ); \ + buf_scalar = bli_obj_buffer_for_1x1( dt_scalar, *(obj_scalar) ); \ } \ else \ { \ @@ -558,7 +558,7 @@ { \ { \ dt_scalar = dt_aux; \ - buf_scalar = bli_obj_scalar_buffer( dt_scalar, *(obj_scalar) ); \ + buf_scalar = bli_obj_buffer_for_1x1( dt_scalar, *(obj_scalar) ); \ } \ } diff --git a/frame/include/blis.h b/frame/include/blis.h index a2ea09ab1..849e1f0ec 100644 --- a/frame/include/blis.h +++ b/frame/include/blis.h @@ -84,6 +84,7 @@ extern "C" { #include "bli_init.h" #include "bli_malloc.h" #include "bli_obj.h" +#include "bli_obj_scalar.h" #include "bli_mem.h" #include "bli_part.h" #include "bli_query.h" diff --git a/kernels/x86_64/core2-sse3/1f/bli_dotxf_opt_var1.c.alt b/kernels/x86_64/core2-sse3/1f/bli_dotxf_opt_var1.c.alt index e5c0f517b..b4812a8e4 100644 --- a/kernels/x86_64/core2-sse3/1f/bli_dotxf_opt_var1.c.alt +++ b/kernels/x86_64/core2-sse3/1f/bli_dotxf_opt_var1.c.alt @@ -94,11 +94,11 @@ void bli_dotxf_opt_var1( obj_t* alpha, // The datatype of alpha MUST be the type union of x and y. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_x, dt_y ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of rho. dt_beta = dt_rho; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/testsuite/src/test_addm.c b/testsuite/src/test_addm.c index 60beab086..194e80bc6 100644 --- a/testsuite/src/test_addm.c +++ b/testsuite/src/test_addm.c @@ -137,8 +137,8 @@ void libblis_test_addm_experiment( test_params_t* params, bli_param_map_char_to_blis_trans( pc_str[0], &transx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transx, @@ -239,13 +239,13 @@ void libblis_test_addm_check( obj_t* alpha, // is negligible. // - bli_obj_init_scalar( dt, &aplusb ); - bli_obj_init_scalar( dt_real, &temp_r ); - bli_obj_init_scalar( dt_real, &norm_r ); - bli_obj_init_scalar( dt_real, &m_r ); - bli_obj_init_scalar( dt_real, &n_r ); + bli_obj_scalar_init_detached( dt, &aplusb ); + bli_obj_scalar_init_detached( dt_real, &temp_r ); + bli_obj_scalar_init_detached( dt_real, &norm_r ); + bli_obj_scalar_init_detached( dt_real, &m_r ); + bli_obj_scalar_init_detached( dt_real, &n_r ); - bli_obj_init_scalar_copy_of( dt, conjx, alpha, &alpha_conj ); + bli_obj_scalar_init_detached_copy_of( dt, conjx, alpha, &alpha_conj ); bli_fnormm( y, &norm_r ); diff --git a/testsuite/src/test_addv.c b/testsuite/src/test_addv.c index 49a5752b3..248d15868 100644 --- a/testsuite/src/test_addv.c +++ b/testsuite/src/test_addv.c @@ -135,8 +135,8 @@ void libblis_test_addv_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); @@ -234,12 +234,12 @@ void libblis_test_addv_check( obj_t* alpha, // is negligible. // - bli_obj_init_scalar( dt, &aplusb ); - bli_obj_init_scalar( dt_real, &temp_r ); - bli_obj_init_scalar( dt_real, &norm_r ); - bli_obj_init_scalar( dt_real, &m_r ); + bli_obj_scalar_init_detached( dt, &aplusb ); + bli_obj_scalar_init_detached( dt_real, &temp_r ); + bli_obj_scalar_init_detached( dt_real, &norm_r ); + bli_obj_scalar_init_detached( dt_real, &m_r ); - bli_obj_init_scalar_copy_of( dt, conjx, alpha, &alpha_conj ); + bli_obj_scalar_init_detached_copy_of( dt, conjx, alpha, &alpha_conj ); bli_fnormv( y, &norm_r ); diff --git a/testsuite/src/test_axpy2v.c b/testsuite/src/test_axpy2v.c index d046e3113..d17d2fad3 100644 --- a/testsuite/src/test_axpy2v.c +++ b/testsuite/src/test_axpy2v.c @@ -149,8 +149,8 @@ void libblis_test_axpy2v_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[1], &conjy ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha1 ); - bli_obj_init_scalar( datatype, &alpha2 ); + bli_obj_scalar_init_detached( datatype, &alpha1 ); + bli_obj_scalar_init_detached( datatype, &alpha2 ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); @@ -269,7 +269,7 @@ void libblis_test_axpy2v_check( obj_t* alpha1, // is negligible, where v contains z as computed by two calls to axpyv. // - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &x_temp ); bli_obj_create( dt, m, 1, 0, 0, &y_temp ); @@ -335,9 +335,9 @@ void bli_axpy2v_ker( obj_t* alpha1, inc_t inc_z = bli_obj_vector_inc( *z ); void* buf_z = bli_obj_buffer_at_off( *z ); - void* buf_alpha1 = bli_obj_scalar_buffer( dt, *alpha1 ); + void* buf_alpha1 = bli_obj_buffer_for_1x1( dt, *alpha1 ); - void* buf_alpha2 = bli_obj_scalar_buffer( dt, *alpha2 ); + void* buf_alpha2 = bli_obj_buffer_for_1x1( dt, *alpha2 ); FUNCPTR_T f; diff --git a/testsuite/src/test_axpyf.c b/testsuite/src/test_axpyf.c index a821db723..0d92f7ab2 100644 --- a/testsuite/src/test_axpyf.c +++ b/testsuite/src/test_axpyf.c @@ -154,7 +154,7 @@ void libblis_test_axpyf_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[1], &conjx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, @@ -275,8 +275,8 @@ void libblis_test_axpyf_check( obj_t* alpha, // axpyv. // - bli_obj_init_scalar( dt_real, &norm ); - bli_obj_init_scalar( dt, &alpha_chi1 ); + bli_obj_scalar_init_detached( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &alpha_chi1 ); bli_obj_create( dt, m, 1, 0, 0, &v ); @@ -344,7 +344,7 @@ void bli_axpyf_ker( obj_t* alpha, inc_t inc_y = bli_obj_vector_inc( *y ); void* buf_y = bli_obj_buffer_at_off( *y ); - void* buf_alpha = bli_obj_scalar_buffer( dt, *alpha ); + void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); FUNCPTR_T f; diff --git a/testsuite/src/test_axpym.c b/testsuite/src/test_axpym.c index 78eaca522..99378b860 100644 --- a/testsuite/src/test_axpym.c +++ b/testsuite/src/test_axpym.c @@ -145,7 +145,7 @@ void libblis_test_axpym_experiment( test_params_t* params, bli_param_map_char_to_blis_trans( pc_str[0], &transx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transx, @@ -253,7 +253,7 @@ void libblis_test_axpym_check( obj_t* alpha, // is negligible. // - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, n, 0, 0, &x_temp ); bli_obj_create( dt, m, n, 0, 0, &y_temp ); diff --git a/testsuite/src/test_axpyv.c b/testsuite/src/test_axpyv.c index bb4114584..435a0aaf8 100644 --- a/testsuite/src/test_axpyv.c +++ b/testsuite/src/test_axpyv.c @@ -144,7 +144,7 @@ void libblis_test_axpyv_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); @@ -250,7 +250,7 @@ void libblis_test_axpyv_check( obj_t* alpha, // is negligible. // - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &x_temp ); bli_obj_create( dt, m, 1, 0, 0, &y_temp ); diff --git a/testsuite/src/test_copym.c b/testsuite/src/test_copym.c index 79bf2f7fe..060f95199 100644 --- a/testsuite/src/test_copym.c +++ b/testsuite/src/test_copym.c @@ -216,7 +216,7 @@ void libblis_test_copym_check( obj_t* x, // is negligible. // - bli_obj_init_scalar( dt_real, &norm_y_r ); + bli_obj_scalar_init_detached( dt_real, &norm_y_r ); bli_subm( x, y ); diff --git a/testsuite/src/test_copyv.c b/testsuite/src/test_copyv.c index 69e986051..fcb934ab7 100644 --- a/testsuite/src/test_copyv.c +++ b/testsuite/src/test_copyv.c @@ -213,7 +213,7 @@ void libblis_test_copyv_check( obj_t* x, // is negligible. // - bli_obj_init_scalar( dt_real, &norm_y_r ); + bli_obj_scalar_init_detached( dt_real, &norm_y_r ); bli_subv( x, y ); diff --git a/testsuite/src/test_dotaxpyv.c b/testsuite/src/test_dotaxpyv.c index 377af7b29..96953d9e6 100644 --- a/testsuite/src/test_dotaxpyv.c +++ b/testsuite/src/test_dotaxpyv.c @@ -153,8 +153,8 @@ void libblis_test_dotaxpyv_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[2], &conjy ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &rho ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &rho ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); @@ -301,8 +301,8 @@ void libblis_test_dotaxpyv_check( obj_t* alpha, // computed by dotv and axpyv, respectively. // - bli_obj_init_scalar( dt, &rho_temp ); - bli_obj_init_scalar( dt_real, &norm_z ); + bli_obj_scalar_init_detached( dt, &rho_temp ); + bli_obj_scalar_init_detached( dt_real, &norm_z ); bli_obj_create( dt, m, 1, 0, 0, &z_temp ); bli_copyv( z_orig, &z_temp ); @@ -371,7 +371,7 @@ void bli_dotaxpyv_ker( obj_t* alpha, void* buf_rho = bli_obj_buffer_at_off( *rho ); - void* buf_alpha = bli_obj_scalar_buffer( dt, *alpha ); + void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); FUNCPTR_T f; diff --git a/testsuite/src/test_dotv.c b/testsuite/src/test_dotv.c index 761a78e25..3a8e46d5a 100644 --- a/testsuite/src/test_dotv.c +++ b/testsuite/src/test_dotv.c @@ -140,7 +140,7 @@ void libblis_test_dotv_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[1], &conjy ); // Create test scalars. - bli_obj_init_scalar( datatype, &rho ); + bli_obj_scalar_init_detached( datatype, &rho ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); @@ -246,10 +246,10 @@ void libblis_test_dotv_check( obj_t* x, // are negligible. // - bli_obj_init_scalar( dt_real, &rho_r ); - bli_obj_init_scalar( dt_real, &rho_i ); - bli_obj_init_scalar( dt_real, &norm_x ); - bli_obj_init_scalar( dt_real, &norm_xy ); + bli_obj_scalar_init_detached( dt_real, &rho_r ); + bli_obj_scalar_init_detached( dt_real, &rho_i ); + bli_obj_scalar_init_detached( dt_real, &norm_x ); + bli_obj_scalar_init_detached( dt_real, &norm_xy ); bli_fnormv( x, &norm_x ); diff --git a/testsuite/src/test_dotxaxpyf.c b/testsuite/src/test_dotxaxpyf.c index 6a0678267..ee066c9c3 100644 --- a/testsuite/src/test_dotxaxpyf.c +++ b/testsuite/src/test_dotxaxpyf.c @@ -166,8 +166,8 @@ void libblis_test_dotxaxpyf_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[3], &conjx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, @@ -326,8 +326,8 @@ void libblis_test_dotxaxpyf_check( obj_t* alpha, // calls to dotxv and axpyv, respectively. // - bli_obj_init_scalar( dt_real, &norm ); - bli_obj_init_scalar( dt, &alpha_chi1 ); + bli_obj_scalar_init_detached( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &alpha_chi1 ); bli_obj_create( dt, b_n, 1, 0, 0, &v ); bli_obj_create( dt, m, 1, 0, 0, &q ); @@ -433,9 +433,9 @@ void bli_dotxaxpyf_ker( obj_t* alpha, inc_t inc_z = bli_obj_vector_inc( *z ); void* buf_z = bli_obj_buffer_at_off( *z ); - void* buf_alpha = bli_obj_scalar_buffer( dt, *alpha );; + void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha );; - void* buf_beta = bli_obj_scalar_buffer( dt, *beta );; + void* buf_beta = bli_obj_buffer_for_1x1( dt, *beta );; FUNCPTR_T f; diff --git a/testsuite/src/test_dotxf.c b/testsuite/src/test_dotxf.c index 0eb00ae57..3c8091f71 100644 --- a/testsuite/src/test_dotxf.c +++ b/testsuite/src/test_dotxf.c @@ -156,8 +156,8 @@ void libblis_test_dotxf_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[1], &conjx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, @@ -280,7 +280,7 @@ void libblis_test_dotxf_check( obj_t* alpha, // dotxv. // - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, b_n, 1, 0, 0, &v ); @@ -347,9 +347,9 @@ void bli_dotxf_ker( obj_t* alpha, inc_t inc_y = bli_obj_vector_inc( *y ); void* buf_y = bli_obj_buffer_at_off( *y ); - void* buf_alpha = bli_obj_scalar_buffer( dt, *alpha ); + void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); - void* buf_beta = bli_obj_scalar_buffer( dt, *beta ); + void* buf_beta = bli_obj_buffer_for_1x1( dt, *beta ); FUNCPTR_T f; diff --git a/testsuite/src/test_dotxv.c b/testsuite/src/test_dotxv.c index f5e6d6d66..9d78e4a48 100644 --- a/testsuite/src/test_dotxv.c +++ b/testsuite/src/test_dotxv.c @@ -146,10 +146,10 @@ void libblis_test_dotxv_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[1], &conjy ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); - bli_obj_init_scalar( datatype, &rho ); - bli_obj_init_scalar( datatype, &rho_save ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &rho ); + bli_obj_scalar_init_detached( datatype, &rho_save ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); @@ -272,11 +272,11 @@ void libblis_test_dotxv_check( obj_t* alpha, // are negligible. // - bli_obj_init_scalar( dt_real, &rho_r ); - bli_obj_init_scalar( dt_real, &rho_i ); - bli_obj_init_scalar( dt_real, &norm_x_r ); - bli_obj_init_scalar( dt_real, &norm_xy_r ); - bli_obj_init_scalar( dt_real, &temp_r ); + bli_obj_scalar_init_detached( dt_real, &rho_r ); + bli_obj_scalar_init_detached( dt_real, &rho_i ); + bli_obj_scalar_init_detached( dt_real, &norm_x_r ); + bli_obj_scalar_init_detached( dt_real, &norm_xy_r ); + bli_obj_scalar_init_detached( dt_real, &temp_r ); bli_copysc( alpha, &temp_r ); bli_sqrtsc( &temp_r, &temp_r ); diff --git a/testsuite/src/test_fnormm.c b/testsuite/src/test_fnormm.c index 07e5248cc..761504141 100644 --- a/testsuite/src/test_fnormm.c +++ b/testsuite/src/test_fnormm.c @@ -138,8 +138,8 @@ void libblis_test_fnormm_experiment( test_params_t* params, // Create test scalars. - bli_obj_init_scalar( datatype, &beta ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( datatype, &beta ); + bli_obj_scalar_init_detached( dt_real, &norm ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, @@ -225,9 +225,9 @@ void libblis_test_fnormm_check( obj_t* beta, // where m and n are the dimensions of x. // - bli_obj_init_scalar( dt_real, &temp_r ); - bli_obj_init_scalar( dt_real, &m_r ); - bli_obj_init_scalar( dt_real, &n_r ); + bli_obj_scalar_init_detached( dt_real, &temp_r ); + bli_obj_scalar_init_detached( dt_real, &m_r ); + bli_obj_scalar_init_detached( dt_real, &n_r ); bli_setsc( ( double )m, 0.0, &m_r ); bli_setsc( ( double )n, 0.0, &n_r ); diff --git a/testsuite/src/test_fnormv.c b/testsuite/src/test_fnormv.c index 2458b1b1f..55ff8630e 100644 --- a/testsuite/src/test_fnormv.c +++ b/testsuite/src/test_fnormv.c @@ -137,8 +137,8 @@ void libblis_test_fnormv_experiment( test_params_t* params, // Create test scalars. - bli_obj_init_scalar( datatype, &beta ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( datatype, &beta ); + bli_obj_scalar_init_detached( dt_real, &norm ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); @@ -222,8 +222,8 @@ void libblis_test_fnormv_check( obj_t* beta, // where m is the length of x. // - bli_obj_init_scalar( dt_real, &temp_r ); - bli_obj_init_scalar( dt_real, &m_r ); + bli_obj_scalar_init_detached( dt_real, &temp_r ); + bli_obj_scalar_init_detached( dt_real, &m_r ); bli_setsc( ( double )m, 0.0, &m_r ); diff --git a/testsuite/src/test_gemm.c b/testsuite/src/test_gemm.c index 2373e0988..40f0550f9 100644 --- a/testsuite/src/test_gemm.c +++ b/testsuite/src/test_gemm.c @@ -156,9 +156,9 @@ void libblis_test_gemm_experiment( test_params_t* params, bli_param_map_char_to_blis_trans( pc_str[1], &transb ); // Create test scalars. - bli_obj_init_scalar( datatype, &kappa ); - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transa, @@ -175,7 +175,6 @@ void libblis_test_gemm_experiment( test_params_t* params, { bli_setsc( 1.2, 0.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); - //bli_setsc( 0.0, 0.0, &beta ); } else { @@ -295,8 +294,8 @@ void libblis_test_gemm_check( obj_t* alpha, // = beta * C_orig * t + z // - bli_obj_init_scalar( dt, &kappa ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &kappa ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); diff --git a/testsuite/src/test_gemm_ukr.c b/testsuite/src/test_gemm_ukr.c index 6e27a8f30..21ffc2564 100644 --- a/testsuite/src/test_gemm_ukr.c +++ b/testsuite/src/test_gemm_ukr.c @@ -164,9 +164,9 @@ void libblis_test_gemm_ukr_experiment( test_params_t* params, op->dim_aux[1] = n; // Create test scalars. - bli_obj_init_scalar( datatype, &kappa ); - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands. libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, @@ -221,8 +221,8 @@ void libblis_test_gemm_ukr_experiment( test_params_t* params, &b, &bp ); // Pack the contents of a and b to ap and bp, respectively. - bli_packm_blk_var2( &BLIS_ONE, &a, &ap ); - bli_packm_blk_var2( &BLIS_ONE, &b, &bp ); + bli_packm_blk_var2( &a, &ap ); + bli_packm_blk_var2( &b, &bp ); // Repeat the experiment n_repeats times and record results. @@ -326,8 +326,8 @@ void libblis_test_gemm_ukr_check( obj_t* alpha, // = beta * C_orig * t + z // - bli_obj_init_scalar( dt, &kappa ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &kappa ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); @@ -392,9 +392,9 @@ void bli_gemm_ukr( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); - void* buf_alpha = bli_obj_scalar_buffer( dt, *alpha ); + void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); - void* buf_beta = bli_obj_scalar_buffer( dt, *beta ); + void* buf_beta = bli_obj_buffer_for_1x1( dt, *beta ); FUNCPTR_T f; diff --git a/testsuite/src/test_gemmtrsm_ukr.c b/testsuite/src/test_gemmtrsm_ukr.c index be9ce1723..a719955bc 100644 --- a/testsuite/src/test_gemmtrsm_ukr.c +++ b/testsuite/src/test_gemmtrsm_ukr.c @@ -185,8 +185,8 @@ void libblis_test_gemmtrsm_ukr_experiment( test_params_t* params, bli_param_map_char_to_blis_uplo( pc_str[0], &uploa ); // Create test scalars. - bli_obj_init_scalar( datatype, &kappa ); - bli_obj_init_scalar( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, @@ -251,10 +251,10 @@ void libblis_test_gemmtrsm_ukr_experiment( test_params_t* params, &b, &bp ); // Pack the contents of a to ap. - bli_packm_blk_var3( &BLIS_ONE, &a, &ap ); + bli_packm_blk_var3( &a, &ap ); // Pack the contents of b to bp. - bli_packm_blk_var2( &BLIS_ONE, &b, &bp ); + bli_packm_blk_var2( &b, &bp ); // Create subpartitions from the a and b panels. @@ -268,7 +268,7 @@ void libblis_test_gemmtrsm_ukr_experiment( test_params_t* params, bli_copym( &c11_save, &c11 ); // Re-pack the contents of b to bp. - bli_packm_blk_var2( &BLIS_ONE, &b, &bp ); + bli_packm_blk_var2( &b, &bp ); time = bli_clock(); @@ -369,8 +369,8 @@ void libblis_test_gemmtrsm_ukr_check( side_t side, // = inv(A11) * ( alpha * B11_orig * t - A1x * w ) // - bli_obj_init_scalar( dt, &kappa ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &kappa ); + bli_obj_scalar_init_detached( dt_real, &norm ); if ( bli_is_left( side ) ) { @@ -523,7 +523,7 @@ void bli_gemmtrsm_ukr( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c11 ); inc_t cs_c = bli_obj_col_stride( *c11 ); - void* buf_alpha = bli_obj_scalar_buffer( dt, *alpha ); + void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); FUNCPTR_T f; diff --git a/testsuite/src/test_gemv.c b/testsuite/src/test_gemv.c index 0bfd3017b..ce77ffb50 100644 --- a/testsuite/src/test_gemv.c +++ b/testsuite/src/test_gemv.c @@ -152,9 +152,9 @@ void libblis_test_gemv_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[1], &conjx ); // Create test scalars. - bli_obj_init_scalar( datatype, &kappa ); - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transa, @@ -290,8 +290,8 @@ void libblis_test_gemv_check( obj_t* kappa, // z = beta * y_orig + alpha * conja(kappa) * x // - bli_obj_init_scalar_copy_of( dt, conja, kappa, &kappac ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached_copy_of( dt, conja, kappa, &kappac ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, n_x, 1, 0, 0, &x_temp ); bli_obj_create( dt, m_y, 1, 0, 0, &y_temp ); diff --git a/testsuite/src/test_ger.c b/testsuite/src/test_ger.c index 13b3b5326..20e8a452f 100644 --- a/testsuite/src/test_ger.c +++ b/testsuite/src/test_ger.c @@ -148,7 +148,7 @@ void libblis_test_ger_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[1], &conjy ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, @@ -273,9 +273,9 @@ void libblis_test_ger_check( obj_t* alpha, // = A_orig * t + w // - bli_obj_init_scalar( dt, &tau ); - bli_obj_init_scalar( dt, &rho ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &tau ); + bli_obj_scalar_init_detached( dt, &rho ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, n_a, 1, 0, 0, &t ); bli_obj_create( dt, m_a, 1, 0, 0, &v ); diff --git a/testsuite/src/test_hemm.c b/testsuite/src/test_hemm.c index c9a125fec..3f6d44bc2 100644 --- a/testsuite/src/test_hemm.c +++ b/testsuite/src/test_hemm.c @@ -163,9 +163,9 @@ void libblis_test_hemm_experiment( test_params_t* params, bli_param_map_char_to_blis_trans( pc_str[3], &transb ); // Create test scalars. - bli_obj_init_scalar( datatype, &kappa ); - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). bli_set_dim_with_side( side, m, n, mn_side ); @@ -317,8 +317,8 @@ void libblis_test_hemm_check( side_t side, // = beta * C_orig * t + alpha * transb(B) * w // = beta * C_orig * t + z - bli_obj_init_scalar( dt, &kappa ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &kappa ); + bli_obj_scalar_init_detached( dt_real, &norm ); if ( bli_is_left( side ) ) { diff --git a/testsuite/src/test_hemv.c b/testsuite/src/test_hemv.c index 6c2f59e10..99f775107 100644 --- a/testsuite/src/test_hemv.c +++ b/testsuite/src/test_hemv.c @@ -154,9 +154,9 @@ void libblis_test_hemv_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[2], &conjx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); - bli_obj_init_scalar( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &kappa ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, @@ -296,7 +296,7 @@ void libblis_test_hemv_check( obj_t* alpha, // v = beta * y_orig + alpha * conja(A_dense) * x // - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &v ); diff --git a/testsuite/src/test_her.c b/testsuite/src/test_her.c index b3baf3c88..1ae34b5af 100644 --- a/testsuite/src/test_her.c +++ b/testsuite/src/test_her.c @@ -148,7 +148,7 @@ void libblis_test_her_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[1], &conjx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, @@ -278,9 +278,9 @@ void libblis_test_her_check( obj_t* alpha, bli_obj_set_uplo( BLIS_DENSE, *a ); bli_obj_set_uplo( BLIS_DENSE, *a_orig ); - bli_obj_init_scalar( dt, &tau ); - bli_obj_init_scalar( dt, &rho ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &tau ); + bli_obj_scalar_init_detached( dt, &rho ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m_a, 1, 0, 0, &t ); bli_obj_create( dt, m_a, 1, 0, 0, &v ); diff --git a/testsuite/src/test_her2.c b/testsuite/src/test_her2.c index 55846980b..cf947b8eb 100644 --- a/testsuite/src/test_her2.c +++ b/testsuite/src/test_her2.c @@ -151,7 +151,7 @@ void libblis_test_her2_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[2], &conjy ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, @@ -290,10 +290,10 @@ void libblis_test_her2_check( obj_t* alpha, bli_obj_set_uplo( BLIS_DENSE, *a ); bli_obj_set_uplo( BLIS_DENSE, *a_orig ); - bli_obj_init_scalar( dt, &tau ); - bli_obj_init_scalar( dt, &rho ); - bli_obj_init_scalar( dt, &alphac ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &tau ); + bli_obj_scalar_init_detached( dt, &rho ); + bli_obj_scalar_init_detached( dt, &alphac ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m_a, 1, 0, 0, &t ); bli_obj_create( dt, m_a, 1, 0, 0, &v ); diff --git a/testsuite/src/test_her2k.c b/testsuite/src/test_her2k.c index b12817402..b6023b19a 100644 --- a/testsuite/src/test_her2k.c +++ b/testsuite/src/test_her2k.c @@ -157,9 +157,9 @@ void libblis_test_her2k_experiment( test_params_t* params, bli_param_map_char_to_blis_trans( pc_str[2], &transb ); // Create test scalars. - bli_obj_init_scalar( datatype, &kappa ); - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transa, @@ -316,9 +316,9 @@ void libblis_test_her2k_check( obj_t* alpha, bli_obj_alias_with_trans( BLIS_CONJ_TRANSPOSE, *a, ah ); bli_obj_alias_with_trans( BLIS_CONJ_TRANSPOSE, *b, bh ); - bli_obj_init_scalar( dt, &kappa ); - bli_obj_init_scalar( dt_real, &norm ); - bli_obj_init_scalar_copy_of( dt, BLIS_CONJUGATE, alpha, &alphac ); + bli_obj_scalar_init_detached( dt, &kappa ); + bli_obj_scalar_init_detached( dt_real, &norm ); + bli_obj_scalar_init_detached_copy_of( dt, BLIS_CONJUGATE, alpha, &alphac ); bli_obj_create( dt, m, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); diff --git a/testsuite/src/test_herk.c b/testsuite/src/test_herk.c index 2b70b6dd0..e535ee9b8 100644 --- a/testsuite/src/test_herk.c +++ b/testsuite/src/test_herk.c @@ -154,9 +154,9 @@ void libblis_test_herk_experiment( test_params_t* params, bli_param_map_char_to_blis_trans( pc_str[1], &transa ); // Create test scalars. - bli_obj_init_scalar( datatype, &kappa ); - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transa, @@ -299,8 +299,8 @@ void libblis_test_herk_check( obj_t* alpha, bli_obj_alias_with_trans( BLIS_CONJ_TRANSPOSE, *a, ah ); - bli_obj_init_scalar( dt, &kappa ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &kappa ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); diff --git a/testsuite/src/test_scal2m.c b/testsuite/src/test_scal2m.c index 6b5765e0b..e2f594945 100644 --- a/testsuite/src/test_scal2m.c +++ b/testsuite/src/test_scal2m.c @@ -144,7 +144,7 @@ void libblis_test_scal2m_experiment( test_params_t* params, bli_param_map_char_to_blis_trans( pc_str[0], &transx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transx, @@ -252,7 +252,7 @@ void libblis_test_scal2m_check( obj_t* alpha, // is negligible. // - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, n, 0, 0, &x_temp ); diff --git a/testsuite/src/test_scal2v.c b/testsuite/src/test_scal2v.c index 3951a189b..fc2471071 100644 --- a/testsuite/src/test_scal2v.c +++ b/testsuite/src/test_scal2v.c @@ -143,7 +143,7 @@ void libblis_test_scal2v_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); @@ -249,7 +249,7 @@ void libblis_test_scal2v_check( obj_t* alpha, // is negligible. // - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &x_temp ); diff --git a/testsuite/src/test_scalm.c b/testsuite/src/test_scalm.c index 19a58c078..e580b9cd3 100644 --- a/testsuite/src/test_scalm.c +++ b/testsuite/src/test_scalm.c @@ -140,7 +140,7 @@ void libblis_test_scalm_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[0], &conjbeta ); // Create test scalars. - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, @@ -247,8 +247,8 @@ void libblis_test_scalm_check( obj_t* beta, bli_obj_create( dt, m, n, 0, 0, &y2 ); bli_copym( y_orig, &y2 ); - bli_obj_init_scalar( dt, &nbeta ); - bli_obj_init_scalar( dt_real, &norm_y_r ); + bli_obj_scalar_init_detached( dt, &nbeta ); + bli_obj_scalar_init_detached( dt_real, &norm_y_r ); bli_copysc( beta, &nbeta ); bli_mulsc( &BLIS_MINUS_ONE, &nbeta ); diff --git a/testsuite/src/test_scalv.c b/testsuite/src/test_scalv.c index 25db4f0fa..fa60b1389 100644 --- a/testsuite/src/test_scalv.c +++ b/testsuite/src/test_scalv.c @@ -140,7 +140,7 @@ void libblis_test_scalv_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[0], &conjbeta ); // Create test scalars. - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &y ); @@ -243,8 +243,8 @@ void libblis_test_scalv_check( obj_t* beta, bli_obj_create( dt, m, 1, 0, 0, &y2 ); bli_copyv( y_orig, &y2 ); - bli_obj_init_scalar( dt, &nbeta ); - bli_obj_init_scalar( dt_real, &norm_y_r ); + bli_obj_scalar_init_detached( dt, &nbeta ); + bli_obj_scalar_init_detached( dt_real, &norm_y_r ); bli_copysc( beta, &nbeta ); bli_mulsc( &BLIS_MINUS_ONE, &nbeta ); diff --git a/testsuite/src/test_setm.c b/testsuite/src/test_setm.c index f4c9e5d85..2e1ac390a 100644 --- a/testsuite/src/test_setm.c +++ b/testsuite/src/test_setm.c @@ -135,7 +135,7 @@ void libblis_test_setm_experiment( test_params_t* params, // Create test scalars. - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, @@ -200,7 +200,7 @@ void libblis_test_setm_check( obj_t* beta, inc_t rs_x = bli_obj_row_stride( *x ); inc_t cs_x = bli_obj_col_stride( *x ); void* buf_x = bli_obj_buffer_at_off( *x ); - void* buf_beta = bli_obj_scalar_buffer( dt_x, *beta ); + void* buf_beta = bli_obj_buffer_for_1x1( dt_x, *beta ); dim_t i, j; *resid = 0.0; diff --git a/testsuite/src/test_setv.c b/testsuite/src/test_setv.c index 9bada131c..d147fbd39 100644 --- a/testsuite/src/test_setv.c +++ b/testsuite/src/test_setv.c @@ -134,7 +134,7 @@ void libblis_test_setv_experiment( test_params_t* params, // Create test scalars. - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); @@ -196,7 +196,7 @@ void libblis_test_setv_check( obj_t* beta, dim_t m_x = bli_obj_vector_dim( *x ); inc_t inc_x = bli_obj_vector_inc( *x ); void* buf_x = bli_obj_buffer_at_off( *x ); - void* buf_beta = bli_obj_scalar_buffer( dt_x, *beta ); + void* buf_beta = bli_obj_buffer_for_1x1( dt_x, *beta ); dim_t i; *resid = 0.0; diff --git a/testsuite/src/test_subm.c b/testsuite/src/test_subm.c index 9322fbc27..c9c177972 100644 --- a/testsuite/src/test_subm.c +++ b/testsuite/src/test_subm.c @@ -137,8 +137,8 @@ void libblis_test_subm_experiment( test_params_t* params, bli_param_map_char_to_blis_trans( pc_str[0], &transx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transx, @@ -239,13 +239,13 @@ void libblis_test_subm_check( obj_t* alpha, // is negligible. // - bli_obj_init_scalar( dt, &aminusb ); - bli_obj_init_scalar( dt_real, &temp_r ); - bli_obj_init_scalar( dt_real, &norm_r ); - bli_obj_init_scalar( dt_real, &m_r ); - bli_obj_init_scalar( dt_real, &n_r ); + bli_obj_scalar_init_detached( dt, &aminusb ); + bli_obj_scalar_init_detached( dt_real, &temp_r ); + bli_obj_scalar_init_detached( dt_real, &norm_r ); + bli_obj_scalar_init_detached( dt_real, &m_r ); + bli_obj_scalar_init_detached( dt_real, &n_r ); - bli_obj_init_scalar_copy_of( dt, conjx, alpha, &alpha_conj ); + bli_obj_scalar_init_detached_copy_of( dt, conjx, alpha, &alpha_conj ); bli_fnormm( y, &norm_r ); diff --git a/testsuite/src/test_subv.c b/testsuite/src/test_subv.c index f6486a5cb..dbd09d5d5 100644 --- a/testsuite/src/test_subv.c +++ b/testsuite/src/test_subv.c @@ -136,8 +136,8 @@ void libblis_test_subv_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); @@ -235,12 +235,12 @@ void libblis_test_subv_check( obj_t* alpha, // is negligible. // - bli_obj_init_scalar( dt, &aminusb ); - bli_obj_init_scalar( dt_real, &temp_r ); - bli_obj_init_scalar( dt_real, &norm_r ); - bli_obj_init_scalar( dt_real, &m_r ); + bli_obj_scalar_init_detached( dt, &aminusb ); + bli_obj_scalar_init_detached( dt_real, &temp_r ); + bli_obj_scalar_init_detached( dt_real, &norm_r ); + bli_obj_scalar_init_detached( dt_real, &m_r ); - bli_obj_init_scalar_copy_of( dt, conjx, alpha, &alpha_conj ); + bli_obj_scalar_init_detached_copy_of( dt, conjx, alpha, &alpha_conj ); bli_fnormv( y, &norm_r ); diff --git a/testsuite/src/test_symm.c b/testsuite/src/test_symm.c index 401a874ff..114fe2f4a 100644 --- a/testsuite/src/test_symm.c +++ b/testsuite/src/test_symm.c @@ -163,9 +163,9 @@ void libblis_test_symm_experiment( test_params_t* params, bli_param_map_char_to_blis_trans( pc_str[3], &transb ); // Create test scalars. - bli_obj_init_scalar( datatype, &kappa ); - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). bli_set_dim_with_side( side, m, n, mn_side ); @@ -317,8 +317,8 @@ void libblis_test_symm_check( side_t side, // = beta * C_orig * t + alpha * transb(B) * w // = beta * C_orig * t + z - bli_obj_init_scalar( dt, &kappa ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &kappa ); + bli_obj_scalar_init_detached( dt_real, &norm ); if ( bli_is_left( side ) ) { diff --git a/testsuite/src/test_symv.c b/testsuite/src/test_symv.c index c12f216a7..80207ee90 100644 --- a/testsuite/src/test_symv.c +++ b/testsuite/src/test_symv.c @@ -154,9 +154,9 @@ void libblis_test_symv_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[2], &conjx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); - bli_obj_init_scalar( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &kappa ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, @@ -296,7 +296,7 @@ void libblis_test_symv_check( obj_t* alpha, // v = beta * y_orig + alpha * conja(A_dense) * x // - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &v ); diff --git a/testsuite/src/test_syr.c b/testsuite/src/test_syr.c index 68362a4d5..607a23800 100644 --- a/testsuite/src/test_syr.c +++ b/testsuite/src/test_syr.c @@ -148,7 +148,7 @@ void libblis_test_syr_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[1], &conjx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, @@ -278,9 +278,9 @@ void libblis_test_syr_check( obj_t* alpha, bli_obj_set_uplo( BLIS_DENSE, *a ); bli_obj_set_uplo( BLIS_DENSE, *a_orig ); - bli_obj_init_scalar( dt, &tau ); - bli_obj_init_scalar( dt, &rho ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &tau ); + bli_obj_scalar_init_detached( dt, &rho ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m_a, 1, 0, 0, &t ); bli_obj_create( dt, m_a, 1, 0, 0, &v ); diff --git a/testsuite/src/test_syr2.c b/testsuite/src/test_syr2.c index 947fedcc4..69a015351 100644 --- a/testsuite/src/test_syr2.c +++ b/testsuite/src/test_syr2.c @@ -151,7 +151,7 @@ void libblis_test_syr2_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[2], &conjy ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, @@ -292,9 +292,9 @@ void libblis_test_syr2_check( obj_t* alpha, bli_obj_set_uplo( BLIS_DENSE, *a ); bli_obj_set_uplo( BLIS_DENSE, *a_orig ); - bli_obj_init_scalar( dt, &tau ); - bli_obj_init_scalar( dt, &rho ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &tau ); + bli_obj_scalar_init_detached( dt, &rho ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m_a, 1, 0, 0, &t ); bli_obj_create( dt, m_a, 1, 0, 0, &v ); diff --git a/testsuite/src/test_syr2k.c b/testsuite/src/test_syr2k.c index c565fd78a..1638a82d1 100644 --- a/testsuite/src/test_syr2k.c +++ b/testsuite/src/test_syr2k.c @@ -157,9 +157,9 @@ void libblis_test_syr2k_experiment( test_params_t* params, bli_param_map_char_to_blis_trans( pc_str[2], &transb ); // Create test scalars. - bli_obj_init_scalar( datatype, &kappa ); - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transa, @@ -315,8 +315,8 @@ void libblis_test_syr2k_check( obj_t* alpha, bli_obj_alias_with_trans( BLIS_TRANSPOSE, *a, at ); bli_obj_alias_with_trans( BLIS_TRANSPOSE, *b, bt ); - bli_obj_init_scalar( dt, &kappa ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &kappa ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); diff --git a/testsuite/src/test_syrk.c b/testsuite/src/test_syrk.c index 24c43b46a..212c8a457 100644 --- a/testsuite/src/test_syrk.c +++ b/testsuite/src/test_syrk.c @@ -154,9 +154,9 @@ void libblis_test_syrk_experiment( test_params_t* params, bli_param_map_char_to_blis_trans( pc_str[1], &transa ); // Create test scalars. - bli_obj_init_scalar( datatype, &kappa ); - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transa, @@ -300,8 +300,8 @@ void libblis_test_syrk_check( obj_t* alpha, bli_obj_alias_with_trans( BLIS_TRANSPOSE, *a, at ); - bli_obj_init_scalar( dt, &kappa ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &kappa ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); diff --git a/testsuite/src/test_trmm.c b/testsuite/src/test_trmm.c index 281d44ddd..514fdf9b7 100644 --- a/testsuite/src/test_trmm.c +++ b/testsuite/src/test_trmm.c @@ -159,8 +159,8 @@ void libblis_test_trmm_experiment( test_params_t* params, bli_param_map_char_to_blis_diag( pc_str[3], &diaga ); // Create test scalars. - bli_obj_init_scalar( datatype, &kappa ); - bli_obj_init_scalar( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). bli_set_dim_with_side( side, m, n, mn_side ); @@ -299,8 +299,8 @@ void libblis_test_trmm_check( side_t side, // = alpha * B * transa(A) * t // = alpha * B * w - bli_obj_init_scalar( dt, &kappa ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &kappa ); + bli_obj_scalar_init_detached( dt_real, &norm ); if ( bli_is_left( side ) ) { diff --git a/testsuite/src/test_trmm3.c b/testsuite/src/test_trmm3.c index cf1f36f79..6a8ad3feb 100644 --- a/testsuite/src/test_trmm3.c +++ b/testsuite/src/test_trmm3.c @@ -165,9 +165,9 @@ void libblis_test_trmm3_experiment( test_params_t* params, bli_param_map_char_to_blis_trans( pc_str[4], &transb ); // Create test scalars. - bli_obj_init_scalar( datatype, &kappa ); - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). bli_set_dim_with_side( side, m, n, mn_side ); @@ -318,8 +318,8 @@ void libblis_test_trmm3_check( side_t side, // = beta * C_orig * t + alpha * transb(B) * w // = beta * C_orig * t + z - bli_obj_init_scalar( dt, &kappa ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &kappa ); + bli_obj_scalar_init_detached( dt_real, &norm ); if ( bli_is_left( side ) ) { diff --git a/testsuite/src/test_trmv.c b/testsuite/src/test_trmv.c index fbcd31621..0a7d788b6 100644 --- a/testsuite/src/test_trmv.c +++ b/testsuite/src/test_trmv.c @@ -150,8 +150,8 @@ void libblis_test_trmv_experiment( test_params_t* params, bli_param_map_char_to_blis_diag( pc_str[2], &diaga ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &kappa ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, @@ -276,7 +276,7 @@ void libblis_test_trmv_check( obj_t* alpha, // y = alpha * conja(A_dense) * x_orig // - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &y ); bli_obj_create( dt, m, m, 0, 0, &a_local ); diff --git a/testsuite/src/test_trsm.c b/testsuite/src/test_trsm.c index 2de3f27ad..b78e135d6 100644 --- a/testsuite/src/test_trsm.c +++ b/testsuite/src/test_trsm.c @@ -159,8 +159,8 @@ void libblis_test_trsm_experiment( test_params_t* params, bli_param_map_char_to_blis_diag( pc_str[3], &diaga ); // Create test scalars. - bli_obj_init_scalar( datatype, &kappa ); - bli_obj_init_scalar( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). bli_set_dim_with_side( side, m, n, mn_side ); @@ -300,8 +300,8 @@ void libblis_test_trsm_check( side_t side, // = alpha * B * tinv(ransa(A)) * t // = alpha * B * w - bli_obj_init_scalar( dt, &kappa ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &kappa ); + bli_obj_scalar_init_detached( dt_real, &norm ); if ( bli_is_left( side ) ) { diff --git a/testsuite/src/test_trsm_ukr.c b/testsuite/src/test_trsm_ukr.c index bce3ace69..cb9139f7c 100644 --- a/testsuite/src/test_trsm_ukr.c +++ b/testsuite/src/test_trsm_ukr.c @@ -165,7 +165,7 @@ void libblis_test_trsm_ukr_experiment( test_params_t* params, bli_param_map_char_to_blis_uplo( pc_str[0], &uploa ); // Create test scalars. - bli_obj_init_scalar( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &kappa ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, @@ -217,14 +217,14 @@ void libblis_test_trsm_ukr_experiment( test_params_t* params, &b, &bp ); // Pack the contents of a to ap. - bli_packm_blk_var3( &BLIS_ONE, &a, &ap ); + bli_packm_blk_var3( &a, &ap ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { // Re-pack the contents of b to bp. - bli_packm_blk_var2( &BLIS_ONE, &b, &bp ); + bli_packm_blk_var2( &b, &bp ); bli_copym( &c_save, &c ); @@ -320,8 +320,8 @@ void libblis_test_trsm_ukr_check( side_t side, // = B * tinv(ransa(A)) * t // = B * w - bli_obj_init_scalar( dt, &kappa ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &kappa ); + bli_obj_scalar_init_detached( dt_real, &norm ); if ( bli_is_left( side ) ) { diff --git a/testsuite/src/test_trsv.c b/testsuite/src/test_trsv.c index 147a1dbfb..1a2aa3431 100644 --- a/testsuite/src/test_trsv.c +++ b/testsuite/src/test_trsv.c @@ -150,8 +150,8 @@ void libblis_test_trsv_experiment( test_params_t* params, bli_param_map_char_to_blis_diag( pc_str[2], &diaga ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &kappa ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, @@ -277,8 +277,8 @@ void libblis_test_trsv_check( obj_t* alpha, // y = inv(alpha) * transa(A_dense) * x // - bli_obj_init_scalar( dt, &alpha_inv ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &alpha_inv ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_copysc( &BLIS_ONE, &alpha_inv ); bli_divsc( alpha, &alpha_inv );