diff --git a/frame/0/getsc/bli_getsc.c b/frame/0/getsc/bli_getsc.c index ad8c1cd28..ffd93dd05 100644 --- a/frame/0/getsc/bli_getsc.c +++ b/frame/0/getsc/bli_getsc.c @@ -56,7 +56,7 @@ void bli_getsc( obj_t* chi, // If chi is a constant object, default to using the dcomplex // value within since we don't know if the caller needs just the // real or the real and imaginary parts. - void* buf_chi = bli_obj_scalar_buffer( dt_def, *chi ); + void* buf_chi = bli_obj_buffer_for_1x1( dt_def, *chi ); FUNCPTR_T f; diff --git a/frame/1/axpyv/bli_axpyv.c b/frame/1/axpyv/bli_axpyv.c index 45388df15..23f6a8b08 100644 --- a/frame/1/axpyv/bli_axpyv.c +++ b/frame/1/axpyv/bli_axpyv.c @@ -58,7 +58,7 @@ void PASTEMAC0(opname)( \ dt_x = bli_obj_datatype( *x ); \ \ /* Create an object to hold a copy-cast of alpha. */ \ - bli_obj_init_scalar_copy_of( dt_x, \ + bli_obj_scalar_init_detached_copy_of( dt_x, \ BLIS_NO_CONJUGATE, \ alpha, \ &alpha_local ); \ diff --git a/frame/1/dotxv/bli_dotxv_unb_var1.c b/frame/1/dotxv/bli_dotxv_unb_var1.c index e529e08e8..da9c92c4b 100644 --- a/frame/1/dotxv/bli_dotxv_unb_var1.c +++ b/frame/1/dotxv/bli_dotxv_unb_var1.c @@ -93,11 +93,11 @@ void bli_dotxv_unb_var1( obj_t* alpha, // The datatype of alpha MUST be the type union of x and y. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_x, dt_y ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of rho. dt_beta = dt_rho; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/1/scal2v/bli_scal2v.c b/frame/1/scal2v/bli_scal2v.c index 1984c500b..91cc6c163 100644 --- a/frame/1/scal2v/bli_scal2v.c +++ b/frame/1/scal2v/bli_scal2v.c @@ -58,7 +58,7 @@ void PASTEMAC0(opname)( \ dt_x = bli_obj_datatype( *x ); \ \ /* Create an object to hold a copy-cast of beta. */ \ - bli_obj_init_scalar_copy_of( dt_x, \ + bli_obj_scalar_init_detached_copy_of( dt_x, \ BLIS_NO_CONJUGATE, \ beta, \ &beta_local ); \ diff --git a/frame/1/scalv/bli_scalv.c b/frame/1/scalv/bli_scalv.c index f2c9e794d..915714027 100644 --- a/frame/1/scalv/bli_scalv.c +++ b/frame/1/scalv/bli_scalv.c @@ -57,7 +57,7 @@ void PASTEMAC0(opname)( \ dt_x = bli_obj_datatype( *x ); \ \ /* Create an object to hold a copy-cast of beta. */ \ - bli_obj_init_scalar_copy_of( dt_x, \ + bli_obj_scalar_init_detached_copy_of( dt_x, \ BLIS_NO_CONJUGATE, \ beta, \ &beta_local ); \ diff --git a/frame/1/scalv/bli_scalv_int.c b/frame/1/scalv/bli_scalv_int.c index 8adea7589..10ab4fb51 100644 --- a/frame/1/scalv/bli_scalv_int.c +++ b/frame/1/scalv/bli_scalv_int.c @@ -64,7 +64,7 @@ void bli_scalv_int( obj_t* beta, if ( bli_obj_has_zero_dim( *x ) ) return; // Return early if the beta scalar equals one. - if ( bli_obj_scalar_equals( beta, &BLIS_ONE ) ) return; + if ( bli_obj_equals( beta, &BLIS_ONE ) ) return; // Extract the variant number and implementation type. n = cntl_var_num( cntl ); diff --git a/frame/1/setv/bli_setv.c b/frame/1/setv/bli_setv.c index 23c59a51d..f6676c471 100644 --- a/frame/1/setv/bli_setv.c +++ b/frame/1/setv/bli_setv.c @@ -58,7 +58,7 @@ void PASTEMAC0(opname)( \ dt_x = bli_obj_datatype( *x ); \ \ /* Create an object to hold a copy-cast of beta. */ \ - bli_obj_init_scalar_copy_of( dt_x, \ + bli_obj_scalar_init_detached_copy_of( dt_x, \ BLIS_NO_CONJUGATE, \ beta, \ &beta_local ); \ diff --git a/frame/1d/axpyd/bli_axpyd.c b/frame/1d/axpyd/bli_axpyd.c index 956987690..541b9a543 100644 --- a/frame/1d/axpyd/bli_axpyd.c +++ b/frame/1d/axpyd/bli_axpyd.c @@ -53,7 +53,7 @@ void bli_axpyd( obj_t* alpha, dt_x = bli_obj_datatype( *x ); // Create an object to hold a copy-cast of alpha. - bli_obj_init_scalar_copy_of( dt_x, + bli_obj_scalar_init_detached_copy_of( dt_x, BLIS_NO_CONJUGATE, alpha, &alpha_local ); diff --git a/frame/1d/scal2d/bli_scal2d.c b/frame/1d/scal2d/bli_scal2d.c index fb61b6ebd..9f9bb533d 100644 --- a/frame/1d/scal2d/bli_scal2d.c +++ b/frame/1d/scal2d/bli_scal2d.c @@ -53,7 +53,7 @@ void bli_scal2d( obj_t* beta, dt_x = bli_obj_datatype( *x ); // Create an object to hold a copy-cast of alpha. - bli_obj_init_scalar_copy_of( dt_x, + bli_obj_scalar_init_detached_copy_of( dt_x, BLIS_NO_CONJUGATE, beta, &beta_local ); diff --git a/frame/1d/scald/bli_scald.c b/frame/1d/scald/bli_scald.c index a491e2859..80aa13d84 100644 --- a/frame/1d/scald/bli_scald.c +++ b/frame/1d/scald/bli_scald.c @@ -52,7 +52,7 @@ void bli_scald( obj_t* beta, dt_x = bli_obj_datatype( *x ); // Create an object to hold a copy-cast of alpha. - bli_obj_init_scalar_copy_of( dt_x, + bli_obj_scalar_init_detached_copy_of( dt_x, BLIS_NO_CONJUGATE, beta, &beta_local ); diff --git a/frame/1d/setd/bli_setd.c b/frame/1d/setd/bli_setd.c index 7bf2c623a..0fe088ec8 100644 --- a/frame/1d/setd/bli_setd.c +++ b/frame/1d/setd/bli_setd.c @@ -52,7 +52,7 @@ void bli_setd( obj_t* beta, dt_x = bli_obj_datatype( *x ); // Create an object to hold a copy-cast of alpha. - bli_obj_init_scalar_copy_of( dt_x, + bli_obj_scalar_init_detached_copy_of( dt_x, BLIS_NO_CONJUGATE, beta, &beta_local ); diff --git a/frame/1f/axpyf/bli_axpyf_unb_var1.c b/frame/1f/axpyf/bli_axpyf_unb_var1.c index 9b15d8ee4..7442a4696 100644 --- a/frame/1f/axpyf/bli_axpyf_unb_var1.c +++ b/frame/1f/axpyf/bli_axpyf_unb_var1.c @@ -93,7 +93,7 @@ void bli_axpyf_unb_var1( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/1f/dotxaxpyf/bli_dotxaxpyf_unb_var1.c b/frame/1f/dotxaxpyf/bli_dotxaxpyf_unb_var1.c index e8a4fbeef..dd15bce19 100644 --- a/frame/1f/dotxaxpyf/bli_dotxaxpyf_unb_var1.c +++ b/frame/1f/dotxaxpyf/bli_dotxaxpyf_unb_var1.c @@ -113,11 +113,11 @@ void bli_dotxaxpyf_unb_var1( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/1f/dotxaxpyf/bli_dotxaxpyf_unb_var2.c b/frame/1f/dotxaxpyf/bli_dotxaxpyf_unb_var2.c index 8b2a3ecda..cb1526623 100644 --- a/frame/1f/dotxaxpyf/bli_dotxaxpyf_unb_var2.c +++ b/frame/1f/dotxaxpyf/bli_dotxaxpyf_unb_var2.c @@ -113,11 +113,11 @@ void bli_dotxaxpyf_unb_var2( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/1f/dotxf/bli_dotxf_unb_var1.c b/frame/1f/dotxf/bli_dotxf_unb_var1.c index ba7ac47cd..a706b7f85 100644 --- a/frame/1f/dotxf/bli_dotxf_unb_var1.c +++ b/frame/1f/dotxf/bli_dotxf_unb_var1.c @@ -98,11 +98,11 @@ void bli_dotxf_unb_var1( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/1m/axpym/bli_axpym.c b/frame/1m/axpym/bli_axpym.c index 192624fa5..7adc5442a 100644 --- a/frame/1m/axpym/bli_axpym.c +++ b/frame/1m/axpym/bli_axpym.c @@ -53,7 +53,7 @@ void bli_axpym( obj_t* alpha, dt_x = bli_obj_datatype( *x ); // Create an object to hold a copy-cast of alpha. - bli_obj_init_scalar_copy_of( dt_x, + bli_obj_scalar_init_detached_copy_of( dt_x, BLIS_NO_CONJUGATE, alpha, &alpha_local ); diff --git a/frame/1m/packm/bli_packm_blk_var2.c b/frame/1m/packm/bli_packm_blk_var2.c index 50db6c2c8..d12f65b2e 100644 --- a/frame/1m/packm/bli_packm_blk_var2.c +++ b/frame/1m/packm/bli_packm_blk_var2.c @@ -46,7 +46,7 @@ typedef void (*FUNCPTR_T)( dim_t n, dim_t m_max, dim_t n_max, - void* beta, + void* kappa, void* c, inc_t rs_c, inc_t cs_c, void* p, inc_t rs_p, inc_t cs_p, dim_t pd_p, inc_t ps_p @@ -55,8 +55,7 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,packm_blk_var2); -void bli_packm_blk_var2( obj_t* beta, - obj_t* c, +void bli_packm_blk_var2( obj_t* c, obj_t* p ) { num_t dt_cp = bli_obj_datatype( *c ); @@ -82,10 +81,16 @@ void bli_packm_blk_var2( obj_t* beta, dim_t pd_p = bli_obj_panel_dim( *p ); inc_t ps_p = bli_obj_panel_stride( *p ); - void* buf_beta = bli_obj_scalar_buffer( dt_cp, *beta ); + void* buf_kappa; FUNCPTR_T f; + // This variant assumes that the micro-kernel will always apply the + // alpha scalar of the higher-level operation. Thus, we use BLIS_ONE + // for kappa so that the underlying packm implementation does not + // scale during packing. + buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE ); + // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_cp]; @@ -100,7 +105,7 @@ void bli_packm_blk_var2( obj_t* beta, n_p, m_max_p, n_max_p, - buf_beta, + buf_kappa, buf_c, rs_c, cs_c, buf_p, rs_p, cs_p, pd_p, ps_p ); @@ -120,16 +125,16 @@ void PASTEMAC(ch,varname )( \ dim_t n, \ dim_t m_max, \ dim_t n_max, \ - void* beta, \ + void* kappa, \ void* c, inc_t rs_c, inc_t cs_c, \ void* p, inc_t rs_p, inc_t cs_p, \ dim_t pd_p, inc_t ps_p \ ) \ { \ - ctype* restrict beta_cast = beta; \ - ctype* restrict c_cast = c; \ - ctype* restrict p_cast = p; \ - ctype* restrict zero = PASTEMAC(ch,0); \ + ctype* restrict kappa_cast = kappa; \ + ctype* restrict c_cast = c; \ + ctype* restrict p_cast = p; \ + ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict c_begin; \ ctype* restrict p_begin; \ \ @@ -338,7 +343,7 @@ void PASTEMAC(ch,varname )( \ PASTEMAC(ch,packm_cxk)( conjc10, \ p10_dim, \ p10_len, \ - beta_cast, \ + kappa_cast, \ c10, incc10, ldc10, \ p10, ldp ); \ \ @@ -347,7 +352,7 @@ void PASTEMAC(ch,varname )( \ PASTEMAC(ch,packm_cxk)( conjc12, \ p12_dim, \ p12_len, \ - beta_cast, \ + kappa_cast, \ c12, incc12, ldc12, \ p12, ldp ); \ \ @@ -358,7 +363,7 @@ void PASTEMAC(ch,varname )( \ conjc, \ p11_m, \ p11_n, \ - beta_cast, \ + kappa_cast, \ c11, rs_c, cs_c, \ p11, rs_p11, cs_p11 ); \ \ @@ -412,7 +417,7 @@ void PASTEMAC(ch,varname )( \ PASTEMAC(ch,packm_cxk)( conjc10, \ panel_dim_i, \ panel_len, \ - beta_cast, \ + kappa_cast, \ c10, incc10, ldc10, \ p_begin, ldp ); \ \ diff --git a/frame/1m/packm/bli_packm_blk_var2.h b/frame/1m/packm/bli_packm_blk_var2.h index 8022f7ff6..578150c89 100644 --- a/frame/1m/packm/bli_packm_blk_var2.h +++ b/frame/1m/packm/bli_packm_blk_var2.h @@ -32,8 +32,7 @@ */ -void bli_packm_blk_var2( obj_t* beta, - obj_t* c, +void bli_packm_blk_var2( obj_t* c, obj_t* p ); @@ -50,7 +49,7 @@ void PASTEMAC(ch,varname)( \ dim_t n, \ dim_t m_max, \ dim_t n_max, \ - void* beta, \ + void* kappa, \ void* c, inc_t rs_c, inc_t cs_c, \ void* p, inc_t rs_p, inc_t cs_p, \ dim_t pd_p, inc_t ps_p \ diff --git a/frame/1m/packm/bli_packm_blk_var3.c b/frame/1m/packm/bli_packm_blk_var3.c index 52be1a5a4..f6a9ac727 100644 --- a/frame/1m/packm/bli_packm_blk_var3.c +++ b/frame/1m/packm/bli_packm_blk_var3.c @@ -49,7 +49,7 @@ typedef void (*FUNCPTR_T)( dim_t n, dim_t m_max, dim_t n_max, - void* beta, + void* kappa, void* c, inc_t rs_c, inc_t cs_c, void* p, inc_t rs_p, inc_t cs_p, dim_t pd_p, inc_t ps_p @@ -58,8 +58,7 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,packm_blk_var3); -void bli_packm_blk_var3( obj_t* beta, - obj_t* c, +void bli_packm_blk_var3( obj_t* c, obj_t* p ) { num_t dt_cp = bli_obj_datatype( *c ); @@ -88,10 +87,16 @@ void bli_packm_blk_var3( obj_t* beta, dim_t pd_p = bli_obj_panel_dim( *p ); inc_t ps_p = bli_obj_panel_stride( *p ); - void* buf_beta = bli_obj_scalar_buffer( dt_cp, *beta ); + void* buf_kappa; FUNCPTR_T f; + // This variant assumes that the micro-kernel will always apply the + // alpha scalar of the higher-level operation. Thus, we use BLIS_ONE + // for kappa so that the underlying packm implementation does not + // scale during packing. + buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE ); + // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_cp]; @@ -109,7 +114,7 @@ void bli_packm_blk_var3( obj_t* beta, n_p, m_max_p, n_max_p, - buf_beta, + buf_kappa, buf_c, rs_c, cs_c, buf_p, rs_p, cs_p, pd_p, ps_p ); @@ -132,16 +137,16 @@ void PASTEMAC(ch,varname )( \ dim_t n, \ dim_t m_max, \ dim_t n_max, \ - void* beta, \ + void* kappa, \ void* c, inc_t rs_c, inc_t cs_c, \ void* p, inc_t rs_p, inc_t cs_p, \ dim_t pd_p, inc_t ps_p \ ) \ { \ - ctype* restrict beta_cast = beta; \ - ctype* restrict c_cast = c; \ - ctype* restrict p_cast = p; \ - ctype* restrict zero = PASTEMAC(ch,0); \ + ctype* restrict kappa_cast = kappa; \ + ctype* restrict c_cast = c; \ + ctype* restrict p_cast = p; \ + ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict c_begin; \ ctype* restrict p_begin; \ \ @@ -317,7 +322,7 @@ void PASTEMAC(ch,varname )( \ PASTEMAC(ch,packm_cxk)( conjc, \ panel_dim_i, \ panel_len_i, \ - beta_cast, \ + kappa_cast, \ c_use, incc, ldc, \ p_use, ldp ); \ \ @@ -328,7 +333,7 @@ void PASTEMAC(ch,varname )( \ PASTEMAC2(ch,ch,setd_unb_var1)( diagoffp, \ *m_panel_use, \ *n_panel_use, \ - beta_cast, \ + kappa_cast, \ p_use, rs_p, cs_p ); \ } \ \ @@ -378,7 +383,7 @@ void PASTEMAC(ch,varname )( \ PASTEMAC(ch,packm_cxk)( conjc, \ panel_dim_i, \ panel_len_i, \ - beta_cast, \ + kappa_cast, \ c_use, incc, ldc, \ p_use, ldp ); \ \ diff --git a/frame/1m/packm/bli_packm_blk_var3.h b/frame/1m/packm/bli_packm_blk_var3.h index dd0ca1a09..ff7ef2793 100644 --- a/frame/1m/packm/bli_packm_blk_var3.h +++ b/frame/1m/packm/bli_packm_blk_var3.h @@ -32,8 +32,7 @@ */ -void bli_packm_blk_var3( obj_t* beta, - obj_t* c, +void bli_packm_blk_var3( obj_t* c, obj_t* p ); @@ -53,7 +52,7 @@ void PASTEMAC(ch,varname)( \ dim_t n, \ dim_t m_max, \ dim_t n_max, \ - void* beta, \ + void* kappa, \ void* c, inc_t rs_c, inc_t cs_c, \ void* p, inc_t rs_p, inc_t cs_p, \ dim_t pd_p, inc_t ps_p \ diff --git a/frame/1m/packm/bli_packm_check.c b/frame/1m/packm/bli_packm_check.c index da3b686cf..41b5302f4 100644 --- a/frame/1m/packm/bli_packm_check.c +++ b/frame/1m/packm/bli_packm_check.c @@ -34,33 +34,49 @@ #include "blis.h" -void bli_packm_check( obj_t* beta, - obj_t* c, - obj_t* p, - packm_t* cntl ) + +void bli_packm_init_check( obj_t* a, + obj_t* p, + packm_t* cntl ) { err_t e_val; // Check object datatypes. - e_val = bli_check_noninteger_object( beta ); + e_val = bli_check_floating_object( a ); bli_check_error_code( e_val ); - e_val = bli_check_floating_object( c ); - bli_check_error_code( e_val ); + // Check control tree pointer. - // Check object dimensions. - - e_val = bli_check_scalar_object( beta ); - bli_check_error_code( e_val ); - - // We don't check for conformal dimensions between c and p because - // p has not yet been initialized. - - // Check control tree pointer - - // NOTE: We can't check the control tree until we stop interpreting a - // NULL value (in bli_packm_int()) as a request to skip the operation. + // NOTE: We can't check the control tree because we interpret a NULL + // value (in bli_packm_int()) as a request to skip the operation. + //e_val = bli_check_valid_cntl( ( void* )cntl ); + //bli_check_error_code( e_val ); +} + +void bli_packm_int_check( obj_t* a, + obj_t* p, + packm_t* cntl ) +{ + err_t e_val; + + // Check object datatypes. + + e_val = bli_check_floating_object( a ); + bli_check_error_code( e_val ); + + e_val = bli_check_floating_object( p ); + bli_check_error_code( e_val ); + + // Check object dimensions. + + e_val = bli_check_conformal_dims( a, p ); + bli_check_error_code( e_val ); + + // Check control tree pointer. + + // NOTE: We can't check the control tree because we interpret a NULL + // value (in bli_packm_int()) as a request to skip the operation. //e_val = bli_check_valid_cntl( ( void* )cntl ); //bli_check_error_code( e_val ); } diff --git a/frame/1m/packm/bli_packm_check.h b/frame/1m/packm/bli_packm_check.h index e4ce7292c..cc950b302 100644 --- a/frame/1m/packm/bli_packm_check.h +++ b/frame/1m/packm/bli_packm_check.h @@ -32,7 +32,10 @@ */ -void bli_packm_check( obj_t* beta, - obj_t* c, - obj_t* p, - packm_t* cntl ); +void bli_packm_init_check( obj_t* a, + obj_t* p, + packm_t* cntl ); + +void bli_packm_int_check( obj_t* a, + obj_t* p, + packm_t* cntl ); diff --git a/frame/1m/packm/bli_packm_init.c b/frame/1m/packm/bli_packm_init.c index 9f10bd7aa..cc4ede33a 100644 --- a/frame/1m/packm/bli_packm_init.c +++ b/frame/1m/packm/bli_packm_init.c @@ -56,7 +56,7 @@ void bli_packm_init( obj_t* a, // Check parameters. if ( bli_error_checking_is_enabled() ) - bli_packm_check( &BLIS_ONE, a, p, cntl ); + bli_packm_init_check( a, p, cntl ); // First check if we are to skip this operation because the control tree // is NULL, and if so, simply alias the object to its packed counterpart. diff --git a/frame/1m/packm/bli_packm_int.c b/frame/1m/packm/bli_packm_int.c index 6d06fb98c..6aca671c5 100644 --- a/frame/1m/packm/bli_packm_int.c +++ b/frame/1m/packm/bli_packm_int.c @@ -36,8 +36,7 @@ #define FUNCPTR_T packm_fp -typedef void (*FUNCPTR_T)( obj_t* beta, - obj_t* a, +typedef void (*FUNCPTR_T)( obj_t* a, obj_t* p ); static FUNCPTR_T vars[6][3] = @@ -51,20 +50,17 @@ static FUNCPTR_T vars[6][3] = { NULL, NULL, NULL, }, }; -void bli_packm_int( obj_t* beta, - obj_t* a, +void bli_packm_int( obj_t* a, obj_t* p, packm_t* cntl ) { - obj_t* beta_use; - varnum_t n; impl_t i; FUNCPTR_T f; // Check parameters. if ( bli_error_checking_is_enabled() ) - bli_packm_check( beta, a, p, cntl ); + bli_packm_int_check( a, p, cntl ); // Sanity check; A should never have a zero dimension. If we must support // it, then we should fold it into the next alias-and-early-exit block. @@ -106,13 +102,35 @@ void bli_packm_int( obj_t* beta, return; } - // Notice that a beta parameter is always passed in. This value is allowed - // to be non-unit even when no scaling is prescribed. If the control tree - // indicates no scaling, then make sure that BLIS_ONE is passed into the - // packm implementation. - //if ( cntl_does_scale( cntl ) ) beta_use = beta; - //else beta_use = &BLIS_ONE; - beta_use = &BLIS_ONE; +/* + // The value for kappa we use will depend on whether the scalar + // attached to A has a nonzero imaginary component. If it does, + // then we will apply the scalar during packing to facilitate + // implementing complex domain micro-kernels in terms of their + // real domain counterparts. (In the aforementioned situation, + // applying a real scalar is easy, but applying a complex one is + // harder, so we avoid the need altogether with the code below.) + if ( bli_obj_scalar_has_nonzero_imag( a ) ) + { + bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); + + // Detach the scalar. + bli_obj_scalar_detach( a, &kappa ); + + // Reset the attached scalar (to 1.0). + bli_obj_scalar_reset( a ); + + kappa_p = κ + } + else + { + // If the internal scalar of A has only a real component, then + // we will apply it later (in the micro-kernel), and so we will + // use BLIS_ONE to indicate no scaling during packing. + kappa_p = &BLIS_ONE; + } +*/ + // Extract the variant number and implementation type. n = cntl_var_num( cntl ); @@ -121,9 +139,8 @@ void bli_packm_int( obj_t* beta, // Index into the variant array to extract the correct function pointer. f = vars[n][i]; - // Invoke the variant with beta_use. - f( beta_use, - a, + // Invoke the variant with kappa_use. + f( a, p ); } diff --git a/frame/1m/packm/bli_packm_int.h b/frame/1m/packm/bli_packm_int.h index 40ec4ed7d..bd1174b24 100644 --- a/frame/1m/packm/bli_packm_int.h +++ b/frame/1m/packm/bli_packm_int.h @@ -32,8 +32,7 @@ */ -void bli_packm_int( obj_t* beta, - obj_t* c, +void bli_packm_int( obj_t* a, obj_t* p, packm_t* cntl ); diff --git a/frame/1m/packm/bli_packm_unb_var1.c b/frame/1m/packm/bli_packm_unb_var1.c index be0627f39..5d9626aa1 100644 --- a/frame/1m/packm/bli_packm_unb_var1.c +++ b/frame/1m/packm/bli_packm_unb_var1.c @@ -47,7 +47,7 @@ typedef void (*FUNCPTR_T)( dim_t n, dim_t m_max, dim_t n_max, - void* beta, + void* kappa, void* c, inc_t rs_c, inc_t cs_c, void* p, inc_t rs_p, inc_t cs_p ); @@ -55,8 +55,7 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,packm_unb_var1); -void bli_packm_unb_var1( obj_t* beta, - obj_t* c, +void bli_packm_unb_var1( obj_t* c, obj_t* p ) { num_t dt_cp = bli_obj_datatype( *c ); @@ -81,7 +80,7 @@ void bli_packm_unb_var1( obj_t* beta, inc_t rs_p = bli_obj_row_stride( *p ); inc_t cs_p = bli_obj_col_stride( *p ); - void* buf_beta = bli_obj_scalar_buffer( dt_cp, *beta ); + void* buf_kappa; FUNCPTR_T f; @@ -89,6 +88,12 @@ void bli_packm_unb_var1( obj_t* beta, if ( bli_obj_is_dense( *p ) ) densify = TRUE; else densify = FALSE; + // This variant assumes that the computational kernel will always apply + // the alpha scalar of the higher-level operation. Thus, we use BLIS_ONE + // for kappa so that the underlying packm implementation does not scale + // during packing. + buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE ); + // Index into the type combination array to extract the correct // function pointer. f = ftypes[dt_cp]; @@ -104,7 +109,7 @@ void bli_packm_unb_var1( obj_t* beta, n_p, m_max_p, n_max_p, - buf_beta, + buf_kappa, buf_c, rs_c, cs_c, buf_p, rs_p, cs_p ); } @@ -124,20 +129,20 @@ void PASTEMAC(ch,varname)( \ dim_t n, \ dim_t m_max, \ dim_t n_max, \ - void* beta, \ + void* kappa, \ void* c, inc_t rs_c, inc_t cs_c, \ void* p, inc_t rs_p, inc_t cs_p \ ) \ { \ - ctype* restrict beta_cast = beta; \ - ctype* restrict c_cast = c; \ - ctype* restrict p_cast = p; \ - ctype* restrict zero = PASTEMAC(ch,0); \ + ctype* restrict kappa_cast = kappa; \ + ctype* restrict c_cast = c; \ + ctype* restrict p_cast = p; \ + ctype* restrict zero = PASTEMAC(ch,0); \ \ /* We begin by packing the region indicated by the parameters. If matrix c is dense (either because the structure is general or because the structure has already been "densified"), this ends - up being the only action we take. Note that if beta is unit, + up being the only action we take. Note that if kappa is unit, the data is simply copied (rather than scaled by one). */ \ PASTEMAC3(ch,ch,ch,scal2m)( diagoffc, \ diagc, \ @@ -145,7 +150,7 @@ void PASTEMAC(ch,varname)( \ transc, \ m, \ n, \ - beta_cast, \ + kappa_cast, \ c_cast, rs_c, cs_c, \ p_cast, rs_p, cs_p ); \ \ @@ -184,7 +189,7 @@ void PASTEMAC(ch,varname)( \ transc, \ m, \ n, \ - beta_cast, \ + kappa_cast, \ c_cast, rs_c, cs_c, \ p_cast, rs_p, cs_p ); \ } \ diff --git a/frame/1m/packm/bli_packm_unb_var1.h b/frame/1m/packm/bli_packm_unb_var1.h index 5da6224ff..9b7cec060 100644 --- a/frame/1m/packm/bli_packm_unb_var1.h +++ b/frame/1m/packm/bli_packm_unb_var1.h @@ -32,8 +32,7 @@ */ -void bli_packm_unb_var1( obj_t* beta, - obj_t* c, +void bli_packm_unb_var1( obj_t* c, obj_t* p ); @@ -51,7 +50,7 @@ void PASTEMAC(ch,varname)( \ dim_t n, \ dim_t m_max, \ dim_t n_max, \ - void* beta, \ + void* kappa, \ void* c, inc_t rs_c, inc_t cs_c, \ void* p, inc_t rs_p, inc_t cs_p \ ); diff --git a/frame/1m/packm/old/bli_packm_blk_var1.c b/frame/1m/packm/old/bli_packm_blk_var1.c index 7a0d3bfae..4d1207f9a 100644 --- a/frame/1m/packm/old/bli_packm_blk_var1.c +++ b/frame/1m/packm/old/bli_packm_blk_var1.c @@ -83,7 +83,7 @@ void bli_packm_blk_var1( obj_t* beta, inc_t cs_p = bli_obj_col_stride( *p ); inc_t ps_p = bli_obj_panel_stride( *p ); - void* buf_beta = bli_obj_scalar_buffer( dt_cp, *beta ); + void* buf_beta = bli_obj_buffer_for_1x1( dt_cp, *beta ); FUNCPTR_T f; diff --git a/frame/1m/packm/other/bli_packm_blk_var2.c b/frame/1m/packm/other/bli_packm_blk_var2.c index cdbaddc77..194b50c23 100644 --- a/frame/1m/packm/other/bli_packm_blk_var2.c +++ b/frame/1m/packm/other/bli_packm_blk_var2.c @@ -83,7 +83,7 @@ void bli_packm_blk_var2( obj_t* beta, dim_t pd_p = bli_obj_panel_dim( *p ); inc_t ps_p = bli_obj_panel_stride( *p ); - void* buf_beta = bli_obj_scalar_buffer( dt_cp, *beta ); + void* buf_beta = bli_obj_buffer_for_1x1( dt_cp, *beta ); FUNCPTR_T f; diff --git a/frame/1m/scal2m/bli_scal2m.c b/frame/1m/scal2m/bli_scal2m.c index 62b1f368c..c697c9b86 100644 --- a/frame/1m/scal2m/bli_scal2m.c +++ b/frame/1m/scal2m/bli_scal2m.c @@ -53,7 +53,7 @@ void bli_scal2m( obj_t* beta, dt_x = bli_obj_datatype( *x ); // Create an object to hold a copy-cast of beta. - bli_obj_init_scalar_copy_of( dt_x, + bli_obj_scalar_init_detached_copy_of( dt_x, BLIS_NO_CONJUGATE, beta, &beta_local ); diff --git a/frame/1m/scalm/bli_scalm.c b/frame/1m/scalm/bli_scalm.c index a2a4a59c9..e53252629 100644 --- a/frame/1m/scalm/bli_scalm.c +++ b/frame/1m/scalm/bli_scalm.c @@ -43,28 +43,12 @@ extern scalm_t* scalm_cntl; void bli_scalm( obj_t* beta, obj_t* x ) { - num_t dt_x; - obj_t beta_local; - if ( bli_error_checking_is_enabled() ) bli_scalm_check( beta, x ); - // Use the datatype of x as the target type for beta (since we do - // not assume mixed domain/type support is enabled). - dt_x = bli_obj_datatype( *x ); - - // Create an object to hold a copy-cast of beta. - bli_obj_init_scalar_copy_of( dt_x, - BLIS_NO_CONJUGATE, - beta, - &beta_local ); - - bli_scalm_unb_var1( &beta_local, x ); -/* - bli_scalm_int( &beta_local, + bli_scalm_int( beta, x, scalm_cntl ); -*/ } diff --git a/frame/1m/scalm/bli_scalm_int.c b/frame/1m/scalm/bli_scalm_int.c index caf520892..9b168e89a 100644 --- a/frame/1m/scalm/bli_scalm_int.c +++ b/frame/1m/scalm/bli_scalm_int.c @@ -36,8 +36,7 @@ #define FUNCPTR_T scalm_fp -typedef void (*FUNCPTR_T)( obj_t* beta, - obj_t* x ); +typedef void (*FUNCPTR_T)( obj_t* x ); static FUNCPTR_T vars[1][3] = { @@ -49,6 +48,7 @@ void bli_scalm_int( obj_t* beta, obj_t* x, scalm_t* cntl ) { + obj_t x_local; varnum_t n; impl_t i; FUNCPTR_T f; @@ -63,8 +63,18 @@ void bli_scalm_int( obj_t* beta, // Return early if one of the matrix operands has a zero dimension. if ( bli_obj_has_zero_dim( *x ) ) return; - // Return early if the beta scalar equals one. - if ( bli_obj_scalar_equals( beta, &BLIS_ONE ) ) return; + // Return early if both beta and the scalar attached to x are unit. + if ( bli_obj_equals( beta, &BLIS_ONE ) && + bli_obj_scalar_equals( x, &BLIS_ONE ) ) return; + + // Alias x to x_local so we can apply beta if it is non-unit. + bli_obj_alias_to( *x, x_local ); + + // If beta is non-unit, apply it to the scalar attached to x. + if ( !bli_obj_equals( beta, &BLIS_ONE ) ) + { + bli_obj_scalar_apply_scalar( beta, &x_local ); + } // Extract the variant number and implementation type. n = cntl_var_num( cntl ); @@ -74,7 +84,6 @@ void bli_scalm_int( obj_t* beta, f = vars[n][i]; // Invoke the variant. - f( beta, - x ); + f( &x_local ); } diff --git a/frame/1m/scalm/bli_scalm_unb_var1.c b/frame/1m/scalm/bli_scalm_unb_var1.c index 0a97b0d60..a4fa042ed 100644 --- a/frame/1m/scalm/bli_scalm_unb_var1.c +++ b/frame/1m/scalm/bli_scalm_unb_var1.c @@ -59,12 +59,10 @@ static FUNCPTR_T GENARRAY2_MIN(ftypes,scalm_unb_var1); #endif -void bli_scalm_unb_var1( obj_t* beta, - obj_t* x ) +void bli_scalm_unb_var1( obj_t* x ) { num_t dt_x = bli_obj_datatype( *x ); - conj_t conjbeta = bli_obj_conj_status( *beta ); doff_t diagoffx = bli_obj_diag_offset( *x ); uplo_t uplox = bli_obj_uplo( *x ); @@ -76,21 +74,25 @@ void bli_scalm_unb_var1( obj_t* beta, inc_t cs_x = bli_obj_col_stride( *x ); void* buf_beta; - num_t dt_beta; FUNCPTR_T f; - // If beta is a scalar constant, use dt_x to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the beta object and extract the buffer at the beta offset. - bli_set_scalar_dt_buffer( beta, dt_x, dt_beta, buf_beta ); + + // Grab the address of the internal scalar buffer for the scalar + // attached to x. + buf_beta = bli_obj_internal_scalar_buffer( *x ); // Index into the type combination array to extract the correct // function pointer. - f = ftypes[dt_beta][dt_x]; + // NOTE: We use dt_x for both beta and x because beta was obtained + // from the attached scalar of x, which is guaranteed to be of the + // same datatype as x. + f = ftypes[dt_x][dt_x]; // Invoke the function. - f( conjbeta, + // NOTE: We unconditionally pass in BLIS_NO_CONJUGATE for beta + // because it would have already been conjugated by the front-end. + f( BLIS_NO_CONJUGATE, diagoffx, uplox, m, diff --git a/frame/1m/scalm/bli_scalm_unb_var1.h b/frame/1m/scalm/bli_scalm_unb_var1.h index c723e1a52..25912fccf 100644 --- a/frame/1m/scalm/bli_scalm_unb_var1.h +++ b/frame/1m/scalm/bli_scalm_unb_var1.h @@ -32,8 +32,7 @@ */ -void bli_scalm_unb_var1( obj_t* beta, - obj_t* x ); +void bli_scalm_unb_var1( obj_t* x ); #undef GENTPROT2 diff --git a/frame/1m/setm/bli_setm.c b/frame/1m/setm/bli_setm.c index 217f7780a..b996ac751 100644 --- a/frame/1m/setm/bli_setm.c +++ b/frame/1m/setm/bli_setm.c @@ -52,7 +52,7 @@ void bli_setm( obj_t* beta, dt_x = bli_obj_datatype( *x ); // Create an object to hold a copy-cast of beta. - bli_obj_init_scalar_copy_of( dt_x, + bli_obj_scalar_init_detached_copy_of( dt_x, BLIS_NO_CONJUGATE, beta, &beta_local ); diff --git a/frame/2/gemv/bli_gemv.c b/frame/2/gemv/bli_gemv.c index d8377e3f2..6f2180f55 100644 --- a/frame/2/gemv/bli_gemv.c +++ b/frame/2/gemv/bli_gemv.c @@ -78,7 +78,7 @@ void bli_gemv( obj_t* alpha, // the type union of the target datatypes of a and x to prevent any // unnecessary loss of information during the computation. dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x ); - bli_obj_init_scalar_copy_of( dt_alpha, + bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); @@ -89,7 +89,7 @@ void bli_gemv( obj_t* alpha, // the complex part of beta*y will not be stored. If y is complex and // beta is real then beta is harmlessly promoted to complex. dt_beta = dt_targ_y; - bli_obj_init_scalar_copy_of( dt_beta, + bli_obj_scalar_init_detached_copy_of( dt_beta, BLIS_NO_CONJUGATE, beta, &beta_local ); @@ -188,8 +188,8 @@ void PASTEMAC(ch,opname)( \ rs_x = incx; cs_x = m_x * incx; \ rs_y = incy; cs_y = m_y * incy; \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ - bli_obj_create_scalar_with_attached_buffer( dt, beta, &betao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m_x, 1, x, rs_x, cs_x, &xo ); \ diff --git a/frame/2/gemv/bli_gemv_blk_var1.c b/frame/2/gemv/bli_gemv_blk_var1.c index d04b8683d..130e45729 100644 --- a/frame/2/gemv/bli_gemv_blk_var1.c +++ b/frame/2/gemv/bli_gemv_blk_var1.c @@ -76,8 +76,7 @@ void bli_gemv_blk_var1( obj_t* alpha, cntl_sub_packv_y( cntl ) ); // Copy/pack A1, y1 (if needed). - bli_packm_int( &BLIS_ONE, - &a1, + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); bli_packv_int( &y1, diff --git a/frame/2/gemv/bli_gemv_blk_var2.c b/frame/2/gemv/bli_gemv_blk_var2.c index 44ca60630..e6f203d61 100644 --- a/frame/2/gemv/bli_gemv_blk_var2.c +++ b/frame/2/gemv/bli_gemv_blk_var2.c @@ -81,8 +81,7 @@ void bli_gemv_blk_var2( obj_t* alpha, cntl_sub_packv_x( cntl ) ); // Copy/pack A1, x1 (if needed). - bli_packm_int( alpha, - &a1, + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/gemv/bli_gemv_unb_var1.c b/frame/2/gemv/bli_gemv_unb_var1.c index 069dfa44b..bb30dcaab 100644 --- a/frame/2/gemv/bli_gemv_unb_var1.c +++ b/frame/2/gemv/bli_gemv_unb_var1.c @@ -99,11 +99,11 @@ void bli_gemv_unb_var1( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/gemv/bli_gemv_unb_var2.c b/frame/2/gemv/bli_gemv_unb_var2.c index 6bfaf74af..51e379a5d 100644 --- a/frame/2/gemv/bli_gemv_unb_var2.c +++ b/frame/2/gemv/bli_gemv_unb_var2.c @@ -99,11 +99,11 @@ void bli_gemv_unb_var2( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/gemv/bli_gemv_unf_var1.c b/frame/2/gemv/bli_gemv_unf_var1.c index a1580fc39..7bcd97774 100644 --- a/frame/2/gemv/bli_gemv_unf_var1.c +++ b/frame/2/gemv/bli_gemv_unf_var1.c @@ -99,11 +99,11 @@ void bli_gemv_unf_var1( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/gemv/bli_gemv_unf_var2.c b/frame/2/gemv/bli_gemv_unf_var2.c index 6b95f704f..ecf18b4b4 100644 --- a/frame/2/gemv/bli_gemv_unf_var2.c +++ b/frame/2/gemv/bli_gemv_unf_var2.c @@ -99,11 +99,11 @@ void bli_gemv_unf_var2( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/ger/bli_ger.c b/frame/2/ger/bli_ger.c index 4003837f0..10108705e 100644 --- a/frame/2/ger/bli_ger.c +++ b/frame/2/ger/bli_ger.c @@ -75,7 +75,7 @@ void bli_ger( obj_t* alpha, // the type union of the target datatypes of x and y to prevent any // unnecessary loss of information during the computation. dt_alpha = bli_datatype_union( dt_targ_x, dt_targ_y ); - bli_obj_init_scalar_copy_of( dt_alpha, + bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); @@ -148,7 +148,7 @@ void PASTEMAC(ch,opname)( \ rs_x = incx; cs_x = m_x * incx; \ rs_y = incy; cs_y = m_y * incy; \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ \ bli_obj_create_with_attached_buffer( dt, m_x, 1, x, rs_x, cs_x, &xo ); \ bli_obj_create_with_attached_buffer( dt, m_y, 1, y, rs_y, cs_y, &yo ); \ diff --git a/frame/2/ger/bli_ger_blk_var1.c b/frame/2/ger/bli_ger_blk_var1.c index 2b7c6a20c..27662a50a 100644 --- a/frame/2/ger/bli_ger_blk_var1.c +++ b/frame/2/ger/bli_ger_blk_var1.c @@ -75,8 +75,7 @@ void bli_ger_blk_var1( obj_t* alpha, cntl_sub_packv_x( cntl ) ); // Copy/pack A1, x1 (if needed). - bli_packm_int( &BLIS_ONE, - &a1, + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/ger/bli_ger_blk_var2.c b/frame/2/ger/bli_ger_blk_var2.c index 1a977c453..65adc0573 100644 --- a/frame/2/ger/bli_ger_blk_var2.c +++ b/frame/2/ger/bli_ger_blk_var2.c @@ -75,8 +75,7 @@ void bli_ger_blk_var2( obj_t* alpha, cntl_sub_packv_y( cntl ) ); // Copy/pack A1, y1 (if needed). - bli_packm_int( &BLIS_ONE, - &a1, + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); bli_packv_int( &y1, diff --git a/frame/2/ger/bli_ger_int.c b/frame/2/ger/bli_ger_int.c index 59a0b3fbb..cb9273320 100644 --- a/frame/2/ger/bli_ger_int.c +++ b/frame/2/ger/bli_ger_int.c @@ -95,7 +95,7 @@ void bli_ger_int( conj_t conjx, bli_obj_toggle_conj( x_local ); bli_obj_toggle_conj( y_local ); - bli_obj_init_scalar_copy_of( bli_obj_datatype( *alpha ), + bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *alpha ), BLIS_CONJUGATE, alpha, &alpha_local ); diff --git a/frame/2/ger/bli_ger_unb_var1.c b/frame/2/ger/bli_ger_unb_var1.c index d4190eec4..123b8c39c 100644 --- a/frame/2/ger/bli_ger_unb_var1.c +++ b/frame/2/ger/bli_ger_unb_var1.c @@ -94,7 +94,7 @@ void bli_ger_unb_var1( obj_t* alpha, // The datatype of alpha MUST be the type union of x and y. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_x, dt_y ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/ger/bli_ger_unb_var2.c b/frame/2/ger/bli_ger_unb_var2.c index 2efe70c94..b5ffdcd86 100644 --- a/frame/2/ger/bli_ger_unb_var2.c +++ b/frame/2/ger/bli_ger_unb_var2.c @@ -94,7 +94,7 @@ void bli_ger_unb_var2( obj_t* alpha, // The datatype of alpha MUST be the type union of x and y. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_x, dt_y ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/hemv/bli_hemv.c b/frame/2/hemv/bli_hemv.c index cae2ffb71..20a787b1c 100644 --- a/frame/2/hemv/bli_hemv.c +++ b/frame/2/hemv/bli_hemv.c @@ -78,7 +78,7 @@ void bli_hemv( obj_t* alpha, // the type union of the target datatypes of a and x to prevent any // unnecessary loss of information during the computation. dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x ); - bli_obj_init_scalar_copy_of( dt_alpha, + bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); @@ -89,7 +89,7 @@ void bli_hemv( obj_t* alpha, // the complex part of beta*y will not be stored. If y is complex and // beta is real then beta is harmlessly promoted to complex. dt_beta = dt_targ_y; - bli_obj_init_scalar_copy_of( dt_beta, + bli_obj_scalar_init_detached_copy_of( dt_beta, BLIS_NO_CONJUGATE, beta, &beta_local ); @@ -180,8 +180,8 @@ void PASTEMAC(ch,opname)( \ rs_x = incx; cs_x = m * incx; \ rs_y = incy; cs_y = m * incy; \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ - bli_obj_create_scalar_with_attached_buffer( dt, beta, &betao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, m, m, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ diff --git a/frame/2/hemv/bli_hemv_blk_var1.c b/frame/2/hemv/bli_hemv_blk_var1.c index b20e45edc..97dac835c 100644 --- a/frame/2/hemv/bli_hemv_blk_var1.c +++ b/frame/2/hemv/bli_hemv_blk_var1.c @@ -106,8 +106,7 @@ void bli_hemv_blk_var1( conj_t conjh, cntl_sub_packv_y1( cntl ) ); // Copy/pack A11, x1, y1 (if needed). - bli_packm_int( &BLIS_ONE, - &a11, + bli_packm_int( &a11, &a11_pack, cntl_sub_packm_a11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/hemv/bli_hemv_blk_var2.c b/frame/2/hemv/bli_hemv_blk_var2.c index d319520fe..669b142ab 100644 --- a/frame/2/hemv/bli_hemv_blk_var2.c +++ b/frame/2/hemv/bli_hemv_blk_var2.c @@ -109,8 +109,7 @@ void bli_hemv_blk_var2( conj_t conjh, cntl_sub_packv_y1( cntl ) ); // Copy/pack A11, x1, y1 (if needed). - bli_packm_int( &BLIS_ONE, - &a11, + bli_packm_int( &a11, &a11_pack, cntl_sub_packm_a11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/hemv/bli_hemv_blk_var3.c b/frame/2/hemv/bli_hemv_blk_var3.c index 30914d4b6..39a0bf4ff 100644 --- a/frame/2/hemv/bli_hemv_blk_var3.c +++ b/frame/2/hemv/bli_hemv_blk_var3.c @@ -106,8 +106,7 @@ void bli_hemv_blk_var3( conj_t conjh, cntl_sub_packv_y1( cntl ) ); // Copy/pack A11, x1, y1 (if needed). - bli_packm_int( &BLIS_ONE, - &a11, + bli_packm_int( &a11, &a11_pack, cntl_sub_packm_a11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/hemv/bli_hemv_blk_var4.c b/frame/2/hemv/bli_hemv_blk_var4.c index 789b64336..e3365be55 100644 --- a/frame/2/hemv/bli_hemv_blk_var4.c +++ b/frame/2/hemv/bli_hemv_blk_var4.c @@ -109,8 +109,7 @@ void bli_hemv_blk_var4( conj_t conjh, cntl_sub_packv_y1( cntl ) ); // Copy/pack A11, x1, y1 (if needed). - bli_packm_int( &BLIS_ONE, - &a11, + bli_packm_int( &a11, &a11_pack, cntl_sub_packm_a11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/hemv/bli_hemv_unb_var1.c b/frame/2/hemv/bli_hemv_unb_var1.c index 9a3ad87a7..a4c6d471e 100644 --- a/frame/2/hemv/bli_hemv_unb_var1.c +++ b/frame/2/hemv/bli_hemv_unb_var1.c @@ -101,11 +101,11 @@ void bli_hemv_unb_var1( conj_t conjh, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/hemv/bli_hemv_unb_var2.c b/frame/2/hemv/bli_hemv_unb_var2.c index 4e70d5d24..7c6131763 100644 --- a/frame/2/hemv/bli_hemv_unb_var2.c +++ b/frame/2/hemv/bli_hemv_unb_var2.c @@ -101,11 +101,11 @@ void bli_hemv_unb_var2( conj_t conjh, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/hemv/bli_hemv_unb_var3.c b/frame/2/hemv/bli_hemv_unb_var3.c index 5eb85b03e..4ea68d8ba 100644 --- a/frame/2/hemv/bli_hemv_unb_var3.c +++ b/frame/2/hemv/bli_hemv_unb_var3.c @@ -101,11 +101,11 @@ void bli_hemv_unb_var3( conj_t conjh, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/hemv/bli_hemv_unb_var4.c b/frame/2/hemv/bli_hemv_unb_var4.c index 974e0fd16..3a7d61706 100644 --- a/frame/2/hemv/bli_hemv_unb_var4.c +++ b/frame/2/hemv/bli_hemv_unb_var4.c @@ -101,11 +101,11 @@ void bli_hemv_unb_var4( conj_t conjh, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/hemv/bli_hemv_unf_var1.c b/frame/2/hemv/bli_hemv_unf_var1.c index 0f22ede56..70e27ef86 100644 --- a/frame/2/hemv/bli_hemv_unf_var1.c +++ b/frame/2/hemv/bli_hemv_unf_var1.c @@ -101,11 +101,11 @@ void bli_hemv_unf_var1( conj_t conjh, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/hemv/bli_hemv_unf_var1a.c b/frame/2/hemv/bli_hemv_unf_var1a.c index e04e44ea9..e7baf0e2f 100644 --- a/frame/2/hemv/bli_hemv_unf_var1a.c +++ b/frame/2/hemv/bli_hemv_unf_var1a.c @@ -101,11 +101,11 @@ void bli_hemv_unf_var1a( conj_t conjh, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/hemv/bli_hemv_unf_var3.c b/frame/2/hemv/bli_hemv_unf_var3.c index 50ad162a7..218a0d048 100644 --- a/frame/2/hemv/bli_hemv_unf_var3.c +++ b/frame/2/hemv/bli_hemv_unf_var3.c @@ -101,11 +101,11 @@ void bli_hemv_unf_var3( conj_t conjh, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); #if 0 obj_t x_copy, y_copy; diff --git a/frame/2/hemv/bli_hemv_unf_var3a.c b/frame/2/hemv/bli_hemv_unf_var3a.c index 0c1cea2de..079dd42c4 100644 --- a/frame/2/hemv/bli_hemv_unf_var3a.c +++ b/frame/2/hemv/bli_hemv_unf_var3a.c @@ -101,11 +101,11 @@ void bli_hemv_unf_var3a( conj_t conjh, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of y. dt_beta = dt_y; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); #if 0 obj_t x_copy, y_copy; diff --git a/frame/2/her/bli_her.c b/frame/2/her/bli_her.c index c376f10d6..2f2bfdb85 100644 --- a/frame/2/her/bli_her.c +++ b/frame/2/her/bli_her.c @@ -68,7 +68,7 @@ void bli_her( obj_t* alpha, // Create object to hold a copy-cast of alpha. dt_alpha = dt_targ_x; - bli_obj_init_scalar_copy_of( dt_alpha, + bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); @@ -151,7 +151,7 @@ void PASTEMAC(ch,opname)( \ \ rs_x = incx; cs_x = m * incx; \ \ - bli_obj_create_scalar_with_attached_buffer( dt_r, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt_r, alpha, &alphao ); \ \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \ diff --git a/frame/2/her/bli_her_blk_var1.c b/frame/2/her/bli_her_blk_var1.c index 4e3c80ebd..7a5cd3aed 100644 --- a/frame/2/her/bli_her_blk_var1.c +++ b/frame/2/her/bli_her_blk_var1.c @@ -90,8 +90,7 @@ void bli_her_blk_var1( conj_t conjh, cntl_sub_packv_x1( cntl ) ); // Copy/pack C11, x1 (if needed). - bli_packm_int( &BLIS_ONE, - &c11, + bli_packm_int( &c11, &c11_pack, cntl_sub_packm_c11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/her/bli_her_blk_var2.c b/frame/2/her/bli_her_blk_var2.c index e440e4972..3e534e5cb 100644 --- a/frame/2/her/bli_her_blk_var2.c +++ b/frame/2/her/bli_her_blk_var2.c @@ -90,8 +90,7 @@ void bli_her_blk_var2( conj_t conjh, cntl_sub_packv_x1( cntl ) ); // Copy/pack C11, x1 (if needed). - bli_packm_int( &BLIS_ONE, - &c11, + bli_packm_int( &c11, &c11_pack, cntl_sub_packm_c11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/her2/bli_her2.c b/frame/2/her2/bli_her2.c index e33c3a5e9..09929ab5c 100644 --- a/frame/2/her2/bli_her2.c +++ b/frame/2/her2/bli_her2.c @@ -75,13 +75,13 @@ void bli_her2( obj_t* alpha, // Create an object to hold a copy-cast of alpha. Notice that we use // the type union of the datatypes of x and y. dt_alpha = bli_datatype_union( dt_targ_x, dt_targ_y ); - bli_obj_init_scalar_copy_of( dt_alpha, + bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); // Also create a conjugated copy of alpha. - bli_obj_init_scalar_copy_of( dt_alpha, + bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_CONJUGATE, alpha, &alpha_conj_local ); @@ -171,7 +171,7 @@ void PASTEMAC(ch,opname)( \ rs_x = incx; cs_x = m * incx; \ rs_y = incy; cs_y = m * incy; \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ bli_obj_create_with_attached_buffer( dt, m, 1, y, rs_y, cs_y, &yo ); \ diff --git a/frame/2/her2/bli_her2_blk_var1.c b/frame/2/her2/bli_her2_blk_var1.c index c3cc94285..9a3ec5e7f 100644 --- a/frame/2/her2/bli_her2_blk_var1.c +++ b/frame/2/her2/bli_her2_blk_var1.c @@ -101,8 +101,7 @@ void bli_her2_blk_var1( conj_t conjh, cntl_sub_packv_y1( cntl ) ); // Copy/pack C11, x1, y1 (if needed). - bli_packm_int( &BLIS_ONE, - &c11, + bli_packm_int( &c11, &c11_pack, cntl_sub_packm_c11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/her2/bli_her2_blk_var2.c b/frame/2/her2/bli_her2_blk_var2.c index 39bbec4ed..4daab4759 100644 --- a/frame/2/her2/bli_her2_blk_var2.c +++ b/frame/2/her2/bli_her2_blk_var2.c @@ -104,8 +104,7 @@ void bli_her2_blk_var2( conj_t conjh, cntl_sub_packv_y1( cntl ) ); // Copy/pack C11, x1, y1 (if needed). - bli_packm_int( &BLIS_ONE, - &c11, + bli_packm_int( &c11, &c11_pack, cntl_sub_packm_c11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/her2/bli_her2_blk_var3.c b/frame/2/her2/bli_her2_blk_var3.c index 1fed9dc6f..058a951a3 100644 --- a/frame/2/her2/bli_her2_blk_var3.c +++ b/frame/2/her2/bli_her2_blk_var3.c @@ -104,8 +104,7 @@ void bli_her2_blk_var3( conj_t conjh, cntl_sub_packv_y1( cntl ) ); // Copy/pack C11, x1, y1 (if needed). - bli_packm_int( &BLIS_ONE, - &c11, + bli_packm_int( &c11, &c11_pack, cntl_sub_packm_c11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/her2/bli_her2_blk_var4.c b/frame/2/her2/bli_her2_blk_var4.c index dc989abea..e432c57a1 100644 --- a/frame/2/her2/bli_her2_blk_var4.c +++ b/frame/2/her2/bli_her2_blk_var4.c @@ -101,8 +101,7 @@ void bli_her2_blk_var4( conj_t conjh, cntl_sub_packv_y1( cntl ) ); // Copy/pack C11, x1, y1 (if needed). - bli_packm_int( &BLIS_ONE, - &c11, + bli_packm_int( &c11, &c11_pack, cntl_sub_packm_c11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/her2/bli_her2_int.c b/frame/2/her2/bli_her2_int.c index e40dc3c13..5b25f4d1f 100644 --- a/frame/2/her2/bli_her2_int.c +++ b/frame/2/her2/bli_her2_int.c @@ -93,11 +93,11 @@ void bli_her2_int( conj_t conjh, bli_obj_toggle_conj( x_local ); bli_obj_toggle_conj( y_local ); - bli_obj_init_scalar_copy_of( bli_obj_datatype( *alpha ), + bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *alpha ), BLIS_CONJUGATE, alpha, &alpha_local ); - bli_obj_init_scalar_copy_of( bli_obj_datatype( *alpha_conj ), + bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *alpha_conj ), BLIS_CONJUGATE, alpha_conj, &alpha_conj_local ); diff --git a/frame/2/her2/bli_her2_unb_var1.c b/frame/2/her2/bli_her2_unb_var1.c index 355231a89..1276aac37 100644 --- a/frame/2/her2/bli_her2_unb_var1.c +++ b/frame/2/her2/bli_her2_unb_var1.c @@ -96,7 +96,7 @@ void bli_her2_unb_var1( conj_t conjh, // The datatype of alpha MUST be the type union of the datatypes of x and y. dt_alpha = bli_datatype_union( dt_x, dt_y ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/her2/bli_her2_unb_var2.c b/frame/2/her2/bli_her2_unb_var2.c index e08e15e00..4d2ef3b1e 100644 --- a/frame/2/her2/bli_her2_unb_var2.c +++ b/frame/2/her2/bli_her2_unb_var2.c @@ -96,7 +96,7 @@ void bli_her2_unb_var2( conj_t conjh, // The datatype of alpha MUST be the type union of the datatypes of x and y. dt_alpha = bli_datatype_union( dt_x, dt_y ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/her2/bli_her2_unb_var3.c b/frame/2/her2/bli_her2_unb_var3.c index d51462757..5b5a0051b 100644 --- a/frame/2/her2/bli_her2_unb_var3.c +++ b/frame/2/her2/bli_her2_unb_var3.c @@ -96,7 +96,7 @@ void bli_her2_unb_var3( conj_t conjh, // The datatype of alpha MUST be the type union of the datatypes of x and y. dt_alpha = bli_datatype_union( dt_x, dt_y ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/her2/bli_her2_unb_var4.c b/frame/2/her2/bli_her2_unb_var4.c index 024387e0f..f461dd5fd 100644 --- a/frame/2/her2/bli_her2_unb_var4.c +++ b/frame/2/her2/bli_her2_unb_var4.c @@ -96,7 +96,7 @@ void bli_her2_unb_var4( conj_t conjh, // The datatype of alpha MUST be the type union of the datatypes of x and y. dt_alpha = bli_datatype_union( dt_x, dt_y ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/her2/bli_her2_unf_var1.c b/frame/2/her2/bli_her2_unf_var1.c index c8e94ac12..bb172260e 100644 --- a/frame/2/her2/bli_her2_unf_var1.c +++ b/frame/2/her2/bli_her2_unf_var1.c @@ -96,7 +96,7 @@ void bli_her2_unf_var1( conj_t conjh, // The datatype of alpha MUST be the type union of the datatypes of x and y. dt_alpha = bli_datatype_union( dt_x, dt_y ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/her2/bli_her2_unf_var4.c b/frame/2/her2/bli_her2_unf_var4.c index 6437816ec..e10f9eca7 100644 --- a/frame/2/her2/bli_her2_unf_var4.c +++ b/frame/2/her2/bli_her2_unf_var4.c @@ -96,7 +96,7 @@ void bli_her2_unf_var4( conj_t conjh, // The datatype of alpha MUST be the type union of the datatypes of x and y. dt_alpha = bli_datatype_union( dt_x, dt_y ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/symv/bli_symv.c b/frame/2/symv/bli_symv.c index be40e4913..2e267aa29 100644 --- a/frame/2/symv/bli_symv.c +++ b/frame/2/symv/bli_symv.c @@ -78,7 +78,7 @@ void bli_symv( obj_t* alpha, // the type union of the target datatypes of a and x to prevent any // unnecessary loss of information during the computation. dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x ); - bli_obj_init_scalar_copy_of( dt_alpha, + bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); @@ -89,7 +89,7 @@ void bli_symv( obj_t* alpha, // the complex part of beta*y will not be stored. If y is complex and // beta is real then beta is harmlessly promoted to complex. dt_beta = dt_targ_y; - bli_obj_init_scalar_copy_of( dt_beta, + bli_obj_scalar_init_detached_copy_of( dt_beta, BLIS_NO_CONJUGATE, beta, &beta_local ); @@ -180,8 +180,8 @@ void PASTEMAC(ch,opname)( \ rs_x = incx; cs_x = m * incx; \ rs_y = incy; cs_y = m * incy; \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ - bli_obj_create_scalar_with_attached_buffer( dt, beta, &betao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, m, m, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ diff --git a/frame/2/syr/bli_syr.c b/frame/2/syr/bli_syr.c index 2e0a55c94..8750c4031 100644 --- a/frame/2/syr/bli_syr.c +++ b/frame/2/syr/bli_syr.c @@ -70,7 +70,7 @@ void bli_syr( obj_t* alpha, // the type union of the target datatypes of x and c to prevent any // unnecessary loss of information during the computation. dt_alpha = bli_datatype_union( dt_targ_x, dt_targ_c ); - bli_obj_init_scalar_copy_of( dt_alpha, + bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); @@ -152,7 +152,7 @@ void PASTEMAC(ch,opname)( \ \ rs_x = incx; cs_x = m * incx; \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \ diff --git a/frame/2/syr2/bli_syr2.c b/frame/2/syr2/bli_syr2.c index 4debf609e..1b2e061b7 100644 --- a/frame/2/syr2/bli_syr2.c +++ b/frame/2/syr2/bli_syr2.c @@ -74,7 +74,7 @@ void bli_syr2( obj_t* alpha, // Create an object to hold a copy-cast of alpha. Notice that we use // the type union of the datatypes of x and y. dt_alpha = bli_datatype_union( dt_targ_x, dt_targ_y ); - bli_obj_init_scalar_copy_of( dt_alpha, + bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); @@ -164,7 +164,7 @@ void PASTEMAC(ch,opname)( \ rs_x = incx; cs_x = m * incx; \ rs_y = incy; cs_y = m * incy; \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ bli_obj_create_with_attached_buffer( dt, m, 1, y, rs_y, cs_y, &yo ); \ diff --git a/frame/2/trmv/bli_trmv.c b/frame/2/trmv/bli_trmv.c index 6d85e042a..e2b968719 100644 --- a/frame/2/trmv/bli_trmv.c +++ b/frame/2/trmv/bli_trmv.c @@ -70,7 +70,7 @@ void bli_trmv( obj_t* alpha, // the type union of the target datatypes of a and x to prevent any // unnecessary loss of information during the computation. dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x ); - bli_obj_init_scalar_copy_of( dt_alpha, + bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); @@ -150,7 +150,7 @@ void PASTEMAC(ch,opname)( \ \ rs_x = incx; cs_x = m * incx; \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ \ bli_obj_create_with_attached_buffer( dt, m, m, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ diff --git a/frame/2/trmv/bli_trmv_l_blk_var1.c b/frame/2/trmv/bli_trmv_l_blk_var1.c index 1e5ee9488..fab932343 100644 --- a/frame/2/trmv/bli_trmv_l_blk_var1.c +++ b/frame/2/trmv/bli_trmv_l_blk_var1.c @@ -80,8 +80,7 @@ void bli_trmv_l_blk_var1( obj_t* alpha, cntl_sub_packv_x1( cntl ) ); // Copy/pack A11, x1 (if needed). - bli_packm_int( &BLIS_ONE, - &a11, + bli_packm_int( &a11, &a11_pack, cntl_sub_packm_a11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/trmv/bli_trmv_l_blk_var2.c b/frame/2/trmv/bli_trmv_l_blk_var2.c index 3c8eb3265..cf7fa65dd 100644 --- a/frame/2/trmv/bli_trmv_l_blk_var2.c +++ b/frame/2/trmv/bli_trmv_l_blk_var2.c @@ -80,8 +80,7 @@ void bli_trmv_l_blk_var2( obj_t* alpha, cntl_sub_packv_x1( cntl ) ); // Copy/pack A11, x1 (if needed). - bli_packm_int( &BLIS_ONE, - &a11, + bli_packm_int( &a11, &a11_pack, cntl_sub_packm_a11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/trmv/bli_trmv_u_blk_var1.c b/frame/2/trmv/bli_trmv_u_blk_var1.c index 88a3ff182..7a4c8ac44 100644 --- a/frame/2/trmv/bli_trmv_u_blk_var1.c +++ b/frame/2/trmv/bli_trmv_u_blk_var1.c @@ -80,8 +80,7 @@ void bli_trmv_u_blk_var1( obj_t* alpha, cntl_sub_packv_x1( cntl ) ); // Copy/pack A11, x1 (if needed). - bli_packm_int( &BLIS_ONE, - &a11, + bli_packm_int( &a11, &a11_pack, cntl_sub_packm_a11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/trmv/bli_trmv_u_blk_var2.c b/frame/2/trmv/bli_trmv_u_blk_var2.c index 9011d5cb5..b1ed17b12 100644 --- a/frame/2/trmv/bli_trmv_u_blk_var2.c +++ b/frame/2/trmv/bli_trmv_u_blk_var2.c @@ -80,8 +80,7 @@ void bli_trmv_u_blk_var2( obj_t* alpha, cntl_sub_packv_x1( cntl ) ); // Copy/pack A11, x1 (if needed). - bli_packm_int( &BLIS_ONE, - &a11, + bli_packm_int( &a11, &a11_pack, cntl_sub_packm_a11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/trmv/bli_trmv_unb_var1.c b/frame/2/trmv/bli_trmv_unb_var1.c index 36d648258..bb3dc6892 100644 --- a/frame/2/trmv/bli_trmv_unb_var1.c +++ b/frame/2/trmv/bli_trmv_unb_var1.c @@ -88,7 +88,7 @@ void bli_trmv_unb_var1( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/trmv/bli_trmv_unb_var2.c b/frame/2/trmv/bli_trmv_unb_var2.c index 4833e3fca..6f6bb17b6 100644 --- a/frame/2/trmv/bli_trmv_unb_var2.c +++ b/frame/2/trmv/bli_trmv_unb_var2.c @@ -88,7 +88,7 @@ void bli_trmv_unb_var2( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/trmv/bli_trmv_unf_var1.c b/frame/2/trmv/bli_trmv_unf_var1.c index 1fa625774..4f5234576 100644 --- a/frame/2/trmv/bli_trmv_unf_var1.c +++ b/frame/2/trmv/bli_trmv_unf_var1.c @@ -88,7 +88,7 @@ void bli_trmv_unf_var1( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/trmv/bli_trmv_unf_var2.c b/frame/2/trmv/bli_trmv_unf_var2.c index a2ea30e45..a806ee00c 100644 --- a/frame/2/trmv/bli_trmv_unf_var2.c +++ b/frame/2/trmv/bli_trmv_unf_var2.c @@ -88,7 +88,7 @@ void bli_trmv_unf_var2( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/trsv/bli_trsv.c b/frame/2/trsv/bli_trsv.c index 2be95de33..9c2924a44 100644 --- a/frame/2/trsv/bli_trsv.c +++ b/frame/2/trsv/bli_trsv.c @@ -70,7 +70,7 @@ void bli_trsv( obj_t* alpha, // the type union of the target datatypes of a and x to prevent any // unnecessary loss of information during the computation. dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x ); - bli_obj_init_scalar_copy_of( dt_alpha, + bli_obj_scalar_init_detached_copy_of( dt_alpha, BLIS_NO_CONJUGATE, alpha, &alpha_local ); @@ -147,7 +147,7 @@ void PASTEMAC(ch,opname)( \ \ rs_x = incx; cs_x = m * incx; \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ \ bli_obj_create_with_attached_buffer( dt, m, m, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ diff --git a/frame/2/trsv/bli_trsv_l_blk_var1.c b/frame/2/trsv/bli_trsv_l_blk_var1.c index 947cce0c0..b842a774c 100644 --- a/frame/2/trsv/bli_trsv_l_blk_var1.c +++ b/frame/2/trsv/bli_trsv_l_blk_var1.c @@ -85,8 +85,7 @@ void bli_trsv_l_blk_var1( obj_t* alpha, cntl_sub_packv_x1( cntl ) ); // Copy/pack A11, x1 (if needed). - bli_packm_int( &BLIS_ONE, - &a11, + bli_packm_int( &a11, &a11_pack, cntl_sub_packm_a11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/trsv/bli_trsv_l_blk_var2.c b/frame/2/trsv/bli_trsv_l_blk_var2.c index 910969c98..5f924bef6 100644 --- a/frame/2/trsv/bli_trsv_l_blk_var2.c +++ b/frame/2/trsv/bli_trsv_l_blk_var2.c @@ -85,8 +85,7 @@ void bli_trsv_l_blk_var2( obj_t* alpha, cntl_sub_packv_x1( cntl ) ); // Copy/pack A11, x1 (if needed). - bli_packm_int( &BLIS_ONE, - &a11, + bli_packm_int( &a11, &a11_pack, cntl_sub_packm_a11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/trsv/bli_trsv_u_blk_var1.c b/frame/2/trsv/bli_trsv_u_blk_var1.c index b16162826..ffc533642 100644 --- a/frame/2/trsv/bli_trsv_u_blk_var1.c +++ b/frame/2/trsv/bli_trsv_u_blk_var1.c @@ -85,8 +85,7 @@ void bli_trsv_u_blk_var1( obj_t* alpha, cntl_sub_packv_x1( cntl ) ); // Copy/pack A11, x1 (if needed). - bli_packm_int( &BLIS_ONE, - &a11, + bli_packm_int( &a11, &a11_pack, cntl_sub_packm_a11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/trsv/bli_trsv_u_blk_var2.c b/frame/2/trsv/bli_trsv_u_blk_var2.c index 145b2717e..8b03d9625 100644 --- a/frame/2/trsv/bli_trsv_u_blk_var2.c +++ b/frame/2/trsv/bli_trsv_u_blk_var2.c @@ -85,8 +85,7 @@ void bli_trsv_u_blk_var2( obj_t* alpha, cntl_sub_packv_x1( cntl ) ); // Copy/pack A11, x1 (if needed). - bli_packm_int( &BLIS_ONE, - &a11, + bli_packm_int( &a11, &a11_pack, cntl_sub_packm_a11( cntl ) ); bli_packv_int( &x1, diff --git a/frame/2/trsv/bli_trsv_unb_var1.c b/frame/2/trsv/bli_trsv_unb_var1.c index 3166dc3be..8888df017 100644 --- a/frame/2/trsv/bli_trsv_unb_var1.c +++ b/frame/2/trsv/bli_trsv_unb_var1.c @@ -88,7 +88,7 @@ void bli_trsv_unb_var1( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/trsv/bli_trsv_unb_var2.c b/frame/2/trsv/bli_trsv_unb_var2.c index 5bd60675e..b8ea6996f 100644 --- a/frame/2/trsv/bli_trsv_unb_var2.c +++ b/frame/2/trsv/bli_trsv_unb_var2.c @@ -88,7 +88,7 @@ void bli_trsv_unb_var2( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/trsv/bli_trsv_unf_var1.c b/frame/2/trsv/bli_trsv_unf_var1.c index 2e31c741c..b00c877b1 100644 --- a/frame/2/trsv/bli_trsv_unf_var1.c +++ b/frame/2/trsv/bli_trsv_unf_var1.c @@ -88,7 +88,7 @@ void bli_trsv_unf_var1( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/2/trsv/bli_trsv_unf_var2.c b/frame/2/trsv/bli_trsv_unf_var2.c index 7619cb529..10c923686 100644 --- a/frame/2/trsv/bli_trsv_unf_var2.c +++ b/frame/2/trsv/bli_trsv_unf_var2.c @@ -88,7 +88,7 @@ void bli_trsv_unf_var2( obj_t* alpha, // The datatype of alpha MUST be the type union of a and x. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_a, dt_x ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/3/gemm/bli_gemm.c b/frame/3/gemm/bli_gemm.c index c9ff2fb33..2277fd1c7 100644 --- a/frame/3/gemm/bli_gemm.c +++ b/frame/3/gemm/bli_gemm.c @@ -48,21 +48,16 @@ void bli_gemm( obj_t* alpha, obj_t* c ) { gemm_t* cntl; - obj_t alpha_local; - obj_t beta_local; obj_t a_local; obj_t b_local; obj_t c_local; - num_t dt_alpha; - num_t dt_beta; - bool_t pack_c; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_gemm_check( alpha, a, b, beta, c ); // If alpha is zero, scale by beta and return. - if ( bli_obj_scalar_equals( alpha, &BLIS_ZERO ) ) + if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); return; @@ -86,29 +81,6 @@ void bli_gemm( obj_t* alpha, bli_obj_induce_trans( c_local ); } - // Set the target and execution datatypes of the objects, and apply - // any transformations necessary to handle mixed domain computation. - bli_gemm_set_targ_exec_datatypes( &a_local, - &b_local, - &c_local, - &dt_alpha, - &dt_beta, - &pack_c ); - - // Create an object to hold a copy-cast of alpha. - bli_obj_init_scalar_copy_of( dt_alpha, - BLIS_NO_CONJUGATE, - alpha, - &alpha_local ); - - // Create an object to hold a copy-cast of beta. - bli_obj_init_scalar_copy_of( dt_beta, - BLIS_NO_CONJUGATE, - beta, - &beta_local ); - - if ( pack_c ) bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); - // Choose the control tree. cntl = gemm_cntl; @@ -122,10 +94,10 @@ void bli_gemm( obj_t* alpha, #endif // Invoke the internal back-end. - bli_gemm_int( &alpha_local, + bli_gemm_int( alpha, &a_local, &b_local, - &beta_local, + beta, &c_local, cntl ); } @@ -159,8 +131,8 @@ void PASTEMAC(ch,opname)( \ bli_set_dims_with_trans( transa, m, k, m_a, n_a ); \ bli_set_dims_with_trans( transb, k, n, m_b, n_b ); \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ - bli_obj_create_scalar_with_attached_buffer( dt, beta, &betao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ diff --git a/frame/3/gemm/bli_gemm_blk_var1.c b/frame/3/gemm/bli_gemm_blk_var1.c index 3e9a6d30b..9c381c5dd 100644 --- a/frame/3/gemm/bli_gemm_blk_var1.c +++ b/frame/3/gemm/bli_gemm_blk_var1.c @@ -34,10 +34,8 @@ #include "blis.h" -void bli_gemm_blk_var1( obj_t* alpha, - obj_t* a, +void bli_gemm_blk_var1( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, gemm_t* cntl ) { @@ -58,7 +56,7 @@ void bli_gemm_blk_var1( obj_t* alpha, m_trans = bli_obj_length_after_trans( *a ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -66,9 +64,8 @@ void bli_gemm_blk_var1( obj_t* alpha, bli_packm_init( b, &b_pack, cntl_sub_packm_b( cntl ) ); - // Pack B and scale by alpha (if instructed). - bli_packm_int( alpha, - b, &b_pack, + // Pack B (if instructed). + bli_packm_int( b, &b_pack, cntl_sub_packm_b( cntl ) ); // Partition along the m dimension. @@ -93,21 +90,19 @@ void bli_gemm_blk_var1( obj_t* alpha, bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, &a1_pack, + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1, &c1_pack, + // Pack C1 (if instructed). + bli_packm_int( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); // Perform gemm subproblem. - bli_gemm_int( alpha, + bli_gemm_int( &BLIS_ONE, &a1_pack, &b_pack, - beta, + &BLIS_ONE, &c1_pack, cntl_sub_gemm( cntl ) ); diff --git a/frame/3/gemm/bli_gemm_blk_var1.h b/frame/3/gemm/bli_gemm_blk_var1.h index 65bf31e81..048468284 100644 --- a/frame/3/gemm/bli_gemm_blk_var1.h +++ b/frame/3/gemm/bli_gemm_blk_var1.h @@ -32,10 +32,8 @@ */ -void bli_gemm_blk_var1( obj_t* alpha, - obj_t* a, +void bli_gemm_blk_var1( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, gemm_t* cntl ); diff --git a/frame/3/gemm/bli_gemm_blk_var2.c b/frame/3/gemm/bli_gemm_blk_var2.c index 1c1cda237..7a64d92c9 100644 --- a/frame/3/gemm/bli_gemm_blk_var2.c +++ b/frame/3/gemm/bli_gemm_blk_var2.c @@ -34,10 +34,8 @@ #include "blis.h" -void bli_gemm_blk_var2( obj_t* alpha, - obj_t* a, +void bli_gemm_blk_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, gemm_t* cntl ) { @@ -58,7 +56,7 @@ void bli_gemm_blk_var2( obj_t* alpha, n_trans = bli_obj_width_after_trans( *b ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -66,9 +64,8 @@ void bli_gemm_blk_var2( obj_t* alpha, bli_packm_init( a, &a_pack, cntl_sub_packm_a( cntl ) ); - // Pack A and scale by alpha (if instructed). - bli_packm_int( alpha, - a, &a_pack, + // Pack A (if instructed). + bli_packm_int( a, &a_pack, cntl_sub_packm_a( cntl ) ); // Partition along the n dimension. @@ -93,21 +90,19 @@ void bli_gemm_blk_var2( obj_t* alpha, bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); - // Pack B1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &b1, &b1_pack, + // Pack B1 (if instructed). + bli_packm_int( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1, &c1_pack, + // Pack C1 (if instructed). + bli_packm_int( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); // Perform gemm subproblem. - bli_gemm_int( alpha, + bli_gemm_int( &BLIS_ONE, &a_pack, &b1_pack, - beta, + &BLIS_ONE, &c1_pack, cntl_sub_gemm( cntl ) ); diff --git a/frame/3/gemm/bli_gemm_blk_var2.h b/frame/3/gemm/bli_gemm_blk_var2.h index 363b21409..2f07e8365 100644 --- a/frame/3/gemm/bli_gemm_blk_var2.h +++ b/frame/3/gemm/bli_gemm_blk_var2.h @@ -32,10 +32,8 @@ */ -void bli_gemm_blk_var2( obj_t* alpha, - obj_t* a, +void bli_gemm_blk_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, gemm_t* cntl ); diff --git a/frame/3/gemm/bli_gemm_blk_var3.c b/frame/3/gemm/bli_gemm_blk_var3.c index 74fcf8007..db8a009b5 100644 --- a/frame/3/gemm/bli_gemm_blk_var3.c +++ b/frame/3/gemm/bli_gemm_blk_var3.c @@ -34,17 +34,14 @@ #include "blis.h" -void bli_gemm_blk_var3( obj_t* alpha, - obj_t* a, +void bli_gemm_blk_var3( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, gemm_t* cntl ) { obj_t a1, a1_pack; obj_t b1, b1_pack; obj_t c_pack; - obj_t* beta_use; dim_t i; dim_t b_alg; @@ -59,7 +56,7 @@ void bli_gemm_blk_var3( obj_t* alpha, k_trans = bli_obj_width_after_trans( *a ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -67,9 +64,8 @@ void bli_gemm_blk_var3( obj_t* alpha, bli_packm_init( c, &c_pack, cntl_sub_packm_c( cntl ) ); - // Pack C and scale by beta (if instructed). - bli_packm_int( beta, - c, &c_pack, + // Pack C (if instructed). + bli_packm_int( c, &c_pack, cntl_sub_packm_c( cntl ) ); // Partition along the k dimension. @@ -94,28 +90,29 @@ void bli_gemm_blk_var3( obj_t* alpha, bli_packm_init( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, &a1_pack, + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack B1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &b1, &b1_pack, + // Pack B1 (if instructed). + bli_packm_int( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); - // Since this variant executes multiple rank-k updates, we must use - // beta only for the first iteration and BLIS_ONE for all others. - if ( i == 0 ) beta_use = beta; - else beta_use = &BLIS_ONE; - // Perform gemm subproblem. - bli_gemm_int( alpha, + bli_gemm_int( &BLIS_ONE, &a1_pack, &b1_pack, - beta_use, + &BLIS_ONE, &c_pack, cntl_sub_gemm( cntl ) ); + + // This variant executes multiple rank-k updates. Therefore, if the + // internal beta scalar on matrix C is non-zero, we must use it + // only for the first iteration (and then BLIS_ONE for all others). + // And since c_pack is a local obj_t, we can simply overwrite the + // internal beta scalar with BLIS_ONE once it has been used in the + // first iteration. + if ( i == 0 ) bli_obj_scalar_reset( &c_pack ); } // Unpack C (if C was packed). diff --git a/frame/3/gemm/bli_gemm_blk_var3.h b/frame/3/gemm/bli_gemm_blk_var3.h index 6f555e0cc..63a3b2d75 100644 --- a/frame/3/gemm/bli_gemm_blk_var3.h +++ b/frame/3/gemm/bli_gemm_blk_var3.h @@ -32,10 +32,8 @@ */ -void bli_gemm_blk_var3( obj_t* alpha, - obj_t* a, +void bli_gemm_blk_var3( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, gemm_t* cntl ); diff --git a/frame/3/gemm/bli_gemm_blk_var4.c b/frame/3/gemm/bli_gemm_blk_var4.c index 485732e6e..c08403cf3 100644 --- a/frame/3/gemm/bli_gemm_blk_var4.c +++ b/frame/3/gemm/bli_gemm_blk_var4.c @@ -34,10 +34,8 @@ #include "blis.h" -void bli_gemm_blk_var4( obj_t* alpha, - obj_t* a, +void bli_gemm_blk_var4( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, gemm_t* cntl ) { @@ -58,7 +56,7 @@ void bli_gemm_blk_var4( obj_t* alpha, m_trans = bli_obj_length_after_trans( *a ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -92,11 +90,11 @@ void bli_gemm_blk_var4( obj_t* alpha, bli_packm_init( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); + // Pack C1 (if instructed). + bli_packm_int( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); // Partition along the n dimension. for ( j = 0; j < n_trans; j += bn_inc ) @@ -113,14 +111,14 @@ void bli_gemm_blk_var4( obj_t* alpha, bli_acquire_mpart_l2r( BLIS_SUBPART1, j, bn_inc, &c1_pack, &c1_pack_inc ); - // Pack B1 and scale by alpha (if instructed). - bli_packm_int( alpha, &b_inc, &b_pack_inc, cntl_sub_packm_b( cntl ) ); + // Pack B1 (if instructed). + bli_packm_int( &b_inc, &b_pack_inc, cntl_sub_packm_b( cntl ) ); // Perform gemm subproblem. - bli_gemm_int( alpha, + bli_gemm_int( &BLIS_ONE, &a1_pack, &b_pack_inc, - beta, + &BLIS_ONE, &c1_pack_inc, cntl_sub_gemm( cntl ) ); } @@ -152,29 +150,24 @@ void bli_gemm_blk_var4( obj_t* alpha, bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, - &a1_pack, + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1, - &c1_pack, + // Pack C1 (if instructed). + bli_packm_int( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); // Perform gemm subproblem. - bli_gemm_int( alpha, + bli_gemm_int( &BLIS_ONE, &a1_pack, &b_pack, - beta, + &BLIS_ONE, &c1_pack, cntl_sub_gemm( cntl ) ); // Unpack C1 (if C1 was packed). - bli_unpackm_int( &c1_pack, - &c1, + bli_unpackm_int( &c1_pack, &c1, cntl_sub_unpackm_c( cntl ) ); } diff --git a/frame/3/gemm/bli_gemm_blk_var4.h b/frame/3/gemm/bli_gemm_blk_var4.h index a86b1672d..a05f3d865 100644 --- a/frame/3/gemm/bli_gemm_blk_var4.h +++ b/frame/3/gemm/bli_gemm_blk_var4.h @@ -32,10 +32,8 @@ */ -void bli_gemm_blk_var4( obj_t* alpha, - obj_t* a, +void bli_gemm_blk_var4( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, gemm_t* cntl ); diff --git a/frame/3/gemm/bli_gemm_int.c b/frame/3/gemm/bli_gemm_int.c index ebcff79dc..d395d5c10 100644 --- a/frame/3/gemm/bli_gemm_int.c +++ b/frame/3/gemm/bli_gemm_int.c @@ -36,10 +36,8 @@ #define FUNCPTR_T gemm_fp -typedef void (*FUNCPTR_T)( obj_t* alpha, - obj_t* a, +typedef void (*FUNCPTR_T)( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, gemm_t* cntl ); @@ -61,6 +59,8 @@ void bli_gemm_int( obj_t* alpha, obj_t* c, gemm_t* cntl ) { + obj_t a_local; + obj_t b_local; obj_t c_local; varnum_t n; impl_t i; @@ -81,6 +81,10 @@ void bli_gemm_int( obj_t* alpha, return; } + // Alias A and B in case we need to update attached scalars. + bli_obj_alias_to( *a, a_local ); + bli_obj_alias_to( *b, b_local ); + // Alias C in case we need to induce a transposition. bli_obj_alias_to( *c, c_local ); @@ -95,6 +99,20 @@ void bli_gemm_int( obj_t* alpha, bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, c_local ); } + // If alpha is non-unit, typecast and apply it to the scalar attached + // to B. + if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) + { + bli_obj_scalar_apply_scalar( alpha, &b_local ); + } + + // If beta is non-unit, typecast and apply it to the scalar attached + // to C. + if ( !bli_obj_equals( beta, &BLIS_ONE ) ) + { + bli_obj_scalar_apply_scalar( beta, &c_local ); + } + // Extract the variant number and implementation type. n = cntl_var_num( cntl ); i = cntl_impl_type( cntl ); @@ -103,10 +121,8 @@ void bli_gemm_int( obj_t* alpha, f = vars[n][i]; // Invoke the variant. - f( alpha, - a, - b, - beta, + f( &a_local, + &b_local, &c_local, cntl ); } diff --git a/frame/3/gemm/bli_gemm_ker_var2.c b/frame/3/gemm/bli_gemm_ker_var2.c index 924c18bf2..d5d7f773d 100644 --- a/frame/3/gemm/bli_gemm_ker_var2.c +++ b/frame/3/gemm/bli_gemm_ker_var2.c @@ -50,10 +50,8 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,gemm_ker_var2); -void bli_gemm_ker_var2( obj_t* alpha, - obj_t* a, +void bli_gemm_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, gemm_t* cntl ) { @@ -77,10 +75,10 @@ void bli_gemm_ker_var2( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); - num_t dt_alpha; - void* buf_alpha; + obj_t scalar_a; + obj_t scalar_b; - num_t dt_beta; + void* buf_alpha; void* buf_beta; FUNCPTR_T f; @@ -103,15 +101,15 @@ void bli_gemm_ker_var2( obj_t* alpha, } */ - // If alpha is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the alpha object and extract the buffer at the alpha offset. - bli_set_scalar_dt_buffer( alpha, dt_exec, dt_alpha, buf_alpha ); + // Detach and multiply the scalars attached to A and B. + bli_obj_scalar_detach( a, &scalar_a ); + bli_obj_scalar_detach( b, &scalar_b ); + bli_mulsc( &scalar_a, &scalar_b ); - // If beta is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the beta object and extract the buffer at the beta offset. - bli_set_scalar_dt_buffer( beta, dt_exec, dt_beta, buf_beta ); + // Grab the addresses of the internal scalar buffers for the scalar + // merged above and the scalar attached to C. + buf_alpha = bli_obj_internal_scalar_buffer( scalar_b ); + buf_beta = bli_obj_internal_scalar_buffer( *c ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/3/gemm/bli_gemm_ker_var2.h b/frame/3/gemm/bli_gemm_ker_var2.h index 0d8cb8c3c..db195332b 100644 --- a/frame/3/gemm/bli_gemm_ker_var2.h +++ b/frame/3/gemm/bli_gemm_ker_var2.h @@ -36,10 +36,8 @@ // // Prototype object-based interface. // -void bli_gemm_ker_var2( obj_t* alpha, - obj_t* a, +void bli_gemm_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, gemm_t* cntl ); diff --git a/frame/3/gemm/bli_gemm_ker_var5.c b/frame/3/gemm/bli_gemm_ker_var5.c index 96e1b9232..785e3d97c 100644 --- a/frame/3/gemm/bli_gemm_ker_var5.c +++ b/frame/3/gemm/bli_gemm_ker_var5.c @@ -50,10 +50,8 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,gemm_ker_var5); -void bli_gemm_ker_var5( obj_t* alpha, - obj_t* a, +void bli_gemm_ker_var5( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, gemm_t* cntl ) { @@ -77,10 +75,10 @@ void bli_gemm_ker_var5( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); - num_t dt_alpha; - void* buf_alpha; + obj_t scalar_a; + obj_t scalar_b; - num_t dt_beta; + void* buf_alpha; void* buf_beta; FUNCPTR_T f; @@ -103,15 +101,16 @@ void bli_gemm_ker_var5( obj_t* alpha, } */ - // If alpha is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the alpha object and extract the buffer at the alpha offset. - bli_set_scalar_dt_buffer( alpha, dt_exec, dt_alpha, buf_alpha ); - // If beta is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the beta object and extract the buffer at the beta offset. - bli_set_scalar_dt_buffer( beta, dt_exec, dt_beta, buf_beta ); + // Detach and multiply the scalars attached to A and B. + bli_obj_scalar_detach( a, &scalar_a ); + bli_obj_scalar_detach( b, &scalar_b ); + bli_mulsc( &scalar_a, &scalar_b ); + + // Grab the addresses of the internal scalar buffers for the scalar + // merged above and the scalar attached to C. + buf_alpha = bli_obj_internal_scalar_buffer( scalar_b ); + buf_beta = bli_obj_internal_scalar_buffer( *c ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/3/gemm/bli_gemm_ker_var5.h b/frame/3/gemm/bli_gemm_ker_var5.h index 48b0d58ce..eaa506d97 100644 --- a/frame/3/gemm/bli_gemm_ker_var5.h +++ b/frame/3/gemm/bli_gemm_ker_var5.h @@ -36,10 +36,8 @@ // // Prototype object-based interface. // -void bli_gemm_ker_var5( obj_t* alpha, - obj_t* a, +void bli_gemm_ker_var5( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, gemm_t* cntl ); diff --git a/frame/3/gemm/bli_gemm_target.c b/frame/3/gemm/bli_gemm_target.c index e70ee7262..fc9519c04 100644 --- a/frame/3/gemm/bli_gemm_target.c +++ b/frame/3/gemm/bli_gemm_target.c @@ -37,8 +37,6 @@ void bli_gemm_set_targ_exec_datatypes( obj_t* a, obj_t* b, obj_t* c, - num_t* dt_alpha, - num_t* dt_beta, bool_t* pack_c ) { num_t dt_targ_a; @@ -110,18 +108,6 @@ void bli_gemm_set_targ_exec_datatypes( obj_t* a, bli_obj_toggle_trans( *b ); } - // Notice that we use the target datatype of matrix a. By inspecting - // the table above, this clearly works for cases (0) through (4), (6), - // and (7). It also works for case (5) since it is transformed into - // case (6) by the above code. - *dt_alpha = bli_obj_target_datatype( *a ); - - // Notice that we use the target datatype of matrix a. By inspecting - // the table above, this clearly works for cases (0) through (4), (6), - // and (7). It also works for case (5) since it is transformed into - // case (6) by the above code. - *dt_beta = bli_obj_datatype( *c ); - // For now disable packing of C. *pack_c = FALSE; } diff --git a/frame/3/gemm/bli_gemm_target.h b/frame/3/gemm/bli_gemm_target.h index 93c27cd31..4416ff955 100644 --- a/frame/3/gemm/bli_gemm_target.h +++ b/frame/3/gemm/bli_gemm_target.h @@ -35,8 +35,6 @@ void bli_gemm_set_targ_exec_datatypes( obj_t* a, obj_t* b, obj_t* c, - num_t* dt_alpha, - num_t* dt_beta, bool_t* pack_c ); void bli_gemm_get_target_datatypes( obj_t* a, diff --git a/frame/3/gemm/other/bli_gemm_ker_var2.c b/frame/3/gemm/other/bli_gemm_ker_var2.c index f97913df9..5fd8c7db2 100644 --- a/frame/3/gemm/other/bli_gemm_ker_var2.c +++ b/frame/3/gemm/other/bli_gemm_ker_var2.c @@ -51,10 +51,8 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,gemm_ker_var2); -void bli_gemm_ker_var2( obj_t* alpha, - obj_t* a, +void bli_gemm_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, gemm_t* cntl ) { @@ -78,10 +76,10 @@ void bli_gemm_ker_var2( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); - num_t dt_alpha; - void* buf_alpha; + obj_t scalar_a; + obj_t scalar_b; - num_t dt_beta; + void* buf_alpha; void* buf_beta; FUNCPTR_T f; @@ -104,15 +102,13 @@ void bli_gemm_ker_var2( obj_t* alpha, } */ - // If alpha is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the alpha object and extract the buffer at the alpha offset. - bli_set_scalar_dt_buffer( alpha, dt_exec, dt_alpha, buf_alpha ); + bli_obj_scalar_detach( a, &scalar_a ); + bli_obj_scalar_detach( b, &scalar_b ); + bli_mulsc( &scalar_a, &scalar_b ); - // If beta is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the beta object and extract the buffer at the beta offset. - bli_set_scalar_dt_buffer( beta, dt_exec, dt_beta, buf_beta ); + buf_alpha = bli_obj_internal_scalar_buffer( scalar_b ); + + buf_beta = bli_obj_internal_scalar_buffer( *c ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/3/hemm/bli_hemm.c b/frame/3/hemm/bli_hemm.c index 1973b24ab..38eecaa5a 100644 --- a/frame/3/hemm/bli_hemm.c +++ b/frame/3/hemm/bli_hemm.c @@ -47,21 +47,16 @@ void bli_hemm( side_t side, obj_t* c ) { gemm_t* cntl; - obj_t alpha_local; - obj_t beta_local; obj_t a_local; obj_t b_local; obj_t c_local; - num_t dt_alpha; - num_t dt_beta; - bool_t pack_c; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_hemm_check( side, alpha, a, b, beta, c ); // If alpha is zero, scale by beta and return. - if ( bli_obj_scalar_equals( alpha, &BLIS_ZERO ) ) + if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); return; @@ -90,37 +85,14 @@ void bli_hemm( side_t side, bli_obj_swap( a_local, b_local ); } - // Set the target and execution datatypes of the objects, and apply - // any transformations necessary to handle mixed domain computation. - bli_gemm_set_targ_exec_datatypes( &a_local, - &b_local, - &c_local, - &dt_alpha, - &dt_beta, - &pack_c ); - - // Create an object to hold a copy-cast of alpha. - bli_obj_init_scalar_copy_of( dt_alpha, - BLIS_NO_CONJUGATE, - alpha, - &alpha_local ); - - // Create an object to hold a copy-cast of beta. - bli_obj_init_scalar_copy_of( dt_beta, - BLIS_NO_CONJUGATE, - beta, - &beta_local ); - - if ( pack_c ) bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); - // Choose the control tree. cntl = hemm_cntl; // Invoke the internal back-end. - bli_gemm_int( &alpha_local, + bli_gemm_int( alpha, &a_local, &b_local, - &beta_local, + beta, &c_local, cntl ); } @@ -155,8 +127,8 @@ void PASTEMAC(ch,opname)( \ bli_set_dim_with_side( side, m, n, mn_a ); \ bli_set_dims_with_trans( transb, m, n, m_b, n_b ); \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ - bli_obj_create_scalar_with_attached_buffer( dt, beta, &betao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, mn_a, mn_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ diff --git a/frame/3/her2k/bli_her2k.c b/frame/3/her2k/bli_her2k.c index cf70f396b..51c801526 100644 --- a/frame/3/her2k/bli_her2k.c +++ b/frame/3/her2k/bli_her2k.c @@ -47,24 +47,19 @@ void bli_her2k( obj_t* alpha, obj_t* c ) { //her2k_t* cntl; - obj_t alpha_local; - obj_t alpha_conj_local; - obj_t beta_local; + obj_t alpha_conj; obj_t c_local; obj_t a_local; obj_t bh_local; obj_t b_local; obj_t ah_local; - num_t dt_alpha; - num_t dt_beta; - bool_t pack_c; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_her2k_check( alpha, a, b, beta, c ); // If alpha is zero, scale by beta and return. - if ( bli_obj_scalar_equals( alpha, &BLIS_ZERO ) ) + if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); return; @@ -85,6 +80,12 @@ void bli_her2k( obj_t* alpha, bli_obj_induce_trans( ah_local ); bli_obj_toggle_conj( ah_local ); + // Initialize a conjugated copy of alpha. + bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *a ), + BLIS_CONJUGATE, + alpha, + &alpha_conj ); + // An optimization: If C is row-stored, transpose the entire operation // so as to allow the macro-kernel more favorable access patterns // through C. (The effect of the transposition of A and A' is negligible @@ -102,66 +103,36 @@ void bli_her2k( obj_t* alpha, bli_obj_induce_trans( c_local ); } - // Set the target and execution datatypes of the objects, and apply - // any transformations necessary to handle mixed domain computation. - bli_her2k_set_targ_exec_datatypes( &a_local, - &bh_local, - &b_local, - &ah_local, - &c_local, - &dt_alpha, - &dt_beta, - &pack_c ); - - // Create an object to hold a copy-cast of alpha. - bli_obj_init_scalar_copy_of( dt_alpha, - BLIS_NO_CONJUGATE, - alpha, - &alpha_local ); - - // Create an object to hold a copy-cast of conj(alpha). - bli_obj_init_scalar_copy_of( dt_alpha, - BLIS_CONJUGATE, - alpha, - &alpha_conj_local ); - - // Create an object to hold a copy-cast of beta. - bli_obj_init_scalar_copy_of( dt_beta, - BLIS_NO_CONJUGATE, - beta, - &beta_local ); - - if ( pack_c ) bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); - -/* +#if 0 // Choose the control tree. cntl = her2k_cntl; // Invoke the internal back-end. - bli_her2k_int( &alpha_local, + bli_her2k_int( alpha, &a_local, &bh_local, - &alpha_conj_local, + &alpha_conj, &b_local, &ah_local, - &beta_local, + beta, &c_local, cntl ); -*/ - - bli_herk_int( &alpha_local, +#else + // Invoke herk twice, using beta only the first time. + bli_herk_int( alpha, &a_local, &bh_local, - &beta_local, + beta, &c_local, herk_cntl ); - bli_herk_int( &alpha_conj_local, + + bli_herk_int( &alpha_conj, &b_local, &ah_local, &BLIS_ONE, &c_local, herk_cntl ); - +#endif } // @@ -194,8 +165,8 @@ void PASTEMAC(ch,opname)( \ bli_set_dims_with_trans( transa, m, k, m_a, n_a ); \ bli_set_dims_with_trans( transb, m, k, m_b, n_b ); \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ - bli_obj_create_scalar_with_attached_buffer( dt_r, beta, &betao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt_r, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ diff --git a/frame/3/her2k/bli_her2k_blk_var1f.c b/frame/3/her2k/bli_her2k_blk_var1f.c index 6463193c2..18ab8d1f4 100644 --- a/frame/3/her2k/bli_her2k_blk_var1f.c +++ b/frame/3/her2k/bli_her2k_blk_var1f.c @@ -34,13 +34,10 @@ #include "blis.h" -void bli_her2k_blk_var1f( obj_t* alpha, - obj_t* a, +void bli_her2k_blk_var1f( obj_t* a, obj_t* bh, - obj_t* alpha_conj, obj_t* b, obj_t* ah, - obj_t* beta, obj_t* c, her2k_t* cntl ) { @@ -65,7 +62,7 @@ void bli_her2k_blk_var1f( obj_t* alpha, m_trans = bli_obj_length_after_trans( *c ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -77,9 +74,8 @@ void bli_her2k_blk_var1f( obj_t* alpha, bli_packm_init( bh, &bh_pack, cntl_sub_packm_b( cntl ) ); - // Pack B' and scale by alpha (if instructed). - bli_packm_int( alpha, - bh, &bh_pack, + // Pack B' (if instructed). + bli_packm_int( bh, &bh_pack, cntl_sub_packm_b( cntl ) ); // Partition along the m dimension. @@ -101,21 +97,19 @@ void bli_her2k_blk_var1f( obj_t* alpha, bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, &a1_pack, + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1, &c1_pack, + // Pack C1 (if instructed). + bli_packm_int( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); // Perform herk subproblem. - bli_herk_int( alpha, + bli_herk_int( &BLIS_ONE, &a1_pack, &bh_pack, - beta, + &BLIS_ONE, &c1_pack, cntl_sub_herk( cntl ) ); @@ -129,6 +123,11 @@ void bli_her2k_blk_var1f( obj_t* alpha, bli_obj_release_pack( &a1_pack ); bli_obj_release_pack( &bh_pack ); + // This variant executes two rank-k updates. Therefore, if the + // internal beta scalar on matrix C is non-zero, we must use it only + // for the first rank-k update (and then BLIS_ONE for the other). + bli_obj_scalar_reset( c ); + // // Perform second rank-k update: C = C + conj(alpha) * B * A'. // @@ -137,9 +136,8 @@ void bli_her2k_blk_var1f( obj_t* alpha, bli_packm_init( ah, &ah_pack, cntl_sub_packm_b( cntl ) ); - // Pack A' and scale by alpha_conj (if instructed). - bli_packm_int( alpha_conj, - ah, &ah_pack, + // Pack A' (if instructed). + bli_packm_int( ah, &ah_pack, cntl_sub_packm_b( cntl ) ); // Partition along the m dimension. @@ -161,18 +159,16 @@ void bli_her2k_blk_var1f( obj_t* alpha, bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); - // Pack B1 and scale by alpha_conj (if instructed). - bli_packm_int( alpha_conj, - &b1, &b1_pack, + // Pack B1 (if instructed). + bli_packm_int( &b1, &b1_pack, cntl_sub_packm_a( cntl ) ); - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1, &c1_pack, + // Pack C1 (if instructed). + bli_packm_int( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); // Perform herk subproblem. - bli_herk_int( alpha_conj, + bli_herk_int( &BLIS_ONE, &b1_pack, &ah_pack, &BLIS_ONE, diff --git a/frame/3/her2k/bli_her2k_blk_var1f.h b/frame/3/her2k/bli_her2k_blk_var1f.h index b96caf99b..f766ac7dd 100644 --- a/frame/3/her2k/bli_her2k_blk_var1f.h +++ b/frame/3/her2k/bli_her2k_blk_var1f.h @@ -32,13 +32,10 @@ */ -void bli_her2k_blk_var1f( obj_t* alpha, - obj_t* a, +void bli_her2k_blk_var1f( obj_t* a, obj_t* bh, - obj_t* alpha_conj, obj_t* b, obj_t* ah, - obj_t* beta, obj_t* c, her2k_t* cntl ); diff --git a/frame/3/her2k/bli_her2k_blk_var2f.c b/frame/3/her2k/bli_her2k_blk_var2f.c index 2c53aad77..64285dc52 100644 --- a/frame/3/her2k/bli_her2k_blk_var2f.c +++ b/frame/3/her2k/bli_her2k_blk_var2f.c @@ -34,13 +34,10 @@ #include "blis.h" -void bli_her2k_blk_var2f( obj_t* alpha, - obj_t* a, +void bli_her2k_blk_var2f( obj_t* a, obj_t* bh, - obj_t* alpha_conj, obj_t* b, obj_t* ah, - obj_t* beta, obj_t* c, her2k_t* cntl ) { @@ -72,7 +69,7 @@ void bli_her2k_blk_var2f( obj_t* alpha, n_trans = bli_obj_width_after_trans( *c ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -82,14 +79,12 @@ void bli_her2k_blk_var2f( obj_t* alpha, bli_packm_init( b, &b_pack, cntl_sub_packm_a( cntl ) ); - // Pack A and scale by alpha (if instructed). - bli_packm_int( alpha, - a, &a_pack, + // Pack A (if instructed). + bli_packm_int( a, &a_pack, cntl_sub_packm_a( cntl ) ); - // Pack B and scale by alpha_conj (if instructed). - bli_packm_int( alpha_conj, - b, &b_pack, + // Pack B (if instructed). + bli_packm_int( b, &b_pack, cntl_sub_packm_a( cntl ) ); // Partition along the n dimension. @@ -124,29 +119,26 @@ void bli_her2k_blk_var2f( obj_t* alpha, bli_packm_init( &c1S, &c1S_pack, cntl_sub_packm_c( cntl ) ); - // Pack B1' and scale by alpha (if instructed). - bli_packm_int( alpha, - &bh1, &bh1_pack, + // Pack B1' (if instructed). + bli_packm_int( &bh1, &bh1_pack, cntl_sub_packm_b( cntl ) ); - // Pack A1' and scale by alpha_conj (if instructed). - bli_packm_int( alpha_conj, - &ah1, &ah1_pack, + // Pack A1' (if instructed). + bli_packm_int( &ah1, &ah1_pack, cntl_sub_packm_b( cntl ) ); - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1S, &c1S_pack, + // Pack C1 (if instructed). + bli_packm_int( &c1S, &c1S_pack, cntl_sub_packm_c( cntl ) ); // Perform her2k subproblem. - bli_her2k_int( alpha, + bli_her2k_int( &BLIS_ONE, &aS_pack, &bh1_pack, - alpha_conj, + &BLIS_ONE, &bS_pack, &ah1_pack, - beta, + &BLIS_ONE, &c1S_pack, cntl_sub_her2k( cntl ) ); diff --git a/frame/3/her2k/bli_her2k_blk_var2f.h b/frame/3/her2k/bli_her2k_blk_var2f.h index e1cbcdb6b..eeb939d0a 100644 --- a/frame/3/her2k/bli_her2k_blk_var2f.h +++ b/frame/3/her2k/bli_her2k_blk_var2f.h @@ -32,13 +32,10 @@ */ -void bli_her2k_blk_var2f( obj_t* alpha, - obj_t* a, +void bli_her2k_blk_var2f( obj_t* a, obj_t* bh, - obj_t* alpha_conj, obj_t* b, obj_t* ah, - obj_t* beta, obj_t* c, her2k_t* cntl ); diff --git a/frame/3/her2k/bli_her2k_blk_var3f.c b/frame/3/her2k/bli_her2k_blk_var3f.c index 58a9699c7..f143ae2a7 100644 --- a/frame/3/her2k/bli_her2k_blk_var3f.c +++ b/frame/3/her2k/bli_her2k_blk_var3f.c @@ -34,13 +34,10 @@ #include "blis.h" -void bli_her2k_blk_var3f( obj_t* alpha, - obj_t* a, +void bli_her2k_blk_var3f( obj_t* a, obj_t* bh, - obj_t* alpha_conj, obj_t* b, obj_t* ah, - obj_t* beta, obj_t* c, her2k_t* cntl ) { @@ -49,7 +46,6 @@ void bli_her2k_blk_var3f( obj_t* alpha, obj_t b1, b1_pack; obj_t ah1, ah1_pack; obj_t c_pack; - obj_t* beta_use; dim_t i; dim_t b_alg; @@ -66,7 +62,7 @@ void bli_her2k_blk_var3f( obj_t* alpha, k_trans = bli_obj_width_after_trans( *a ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -74,9 +70,8 @@ void bli_her2k_blk_var3f( obj_t* alpha, bli_packm_init( c, &c_pack, cntl_sub_packm_c( cntl ) ); - // Pack C and scale by beta (if instructed). - bli_packm_int( beta, - c, &c_pack, + // Pack C (if instructed). + bli_packm_int( c, &c_pack, cntl_sub_packm_c( cntl ) ); // Partition along the k dimension. @@ -98,14 +93,12 @@ void bli_her2k_blk_var3f( obj_t* alpha, bli_packm_init( &bh1, &bh1_pack, cntl_sub_packm_b( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, &a1_pack, + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack B1' and scale by alpha (if instructed). - bli_packm_int( alpha, - &bh1, &bh1_pack, + // Pack B1' (if instructed). + bli_packm_int( &bh1, &bh1_pack, cntl_sub_packm_b( cntl ) ); // Acquire partitions for B1 and A1'. @@ -120,31 +113,32 @@ void bli_her2k_blk_var3f( obj_t* alpha, bli_packm_init( &ah1, &ah1_pack, cntl_sub_packm_b( cntl ) ); - // Pack B1 and scale by alpha_conj (if instructed). - bli_packm_int( alpha_conj, - &b1, &b1_pack, + // Pack B1 (if instructed). + bli_packm_int( &b1, &b1_pack, cntl_sub_packm_a( cntl ) ); - // Pack A1' and scale by alpha_conj (if instructed). - bli_packm_int( alpha_conj, - &ah1, &ah1_pack, + // Pack A1' (if instructed). + bli_packm_int( &ah1, &ah1_pack, cntl_sub_packm_b( cntl ) ); - // Since this variant executes multiple rank-2k updates, we must use - // beta only for the first iteration and BLIS_ONE for all others. - if ( i == 0 ) beta_use = beta; - else beta_use = &BLIS_ONE; - // Perform herk subproblem. - bli_her2k_int( alpha, + bli_her2k_int( &BLIS_ONE, &a1_pack, &bh1_pack, - alpha_conj, + &BLIS_ONE, &b1_pack, &ah1_pack, - beta_use, + &BLIS_ONE, &c_pack, cntl_sub_her2k( cntl ) ); + + // This variant executes multiple rank-2k updates. Therefore, if the + // internal beta scalar on matrix C is non-zero, we must use it + // only for the first iteration (and then BLIS_ONE for all others). + // And since c_pack is a local obj_t, we can simply overwrite the + // internal beta scalar with BLIS_ONE once it has been used in the + // first iteration. + if ( i == 0 ) bli_obj_scalar_reset( &c_pack ); } // Unpack C (if C was packed). diff --git a/frame/3/her2k/bli_her2k_blk_var3f.h b/frame/3/her2k/bli_her2k_blk_var3f.h index 0dbac4fc5..5fbdf68bf 100644 --- a/frame/3/her2k/bli_her2k_blk_var3f.h +++ b/frame/3/her2k/bli_her2k_blk_var3f.h @@ -32,13 +32,10 @@ */ -void bli_her2k_blk_var3f( obj_t* alpha, - obj_t* a, +void bli_her2k_blk_var3f( obj_t* a, obj_t* bh, - obj_t* alpha_conj, obj_t* b, obj_t* ah, - obj_t* beta, obj_t* c, her2k_t* cntl ); diff --git a/frame/3/her2k/bli_her2k_int.c b/frame/3/her2k/bli_her2k_int.c index db6dcd9b2..794040efa 100644 --- a/frame/3/her2k/bli_her2k_int.c +++ b/frame/3/her2k/bli_her2k_int.c @@ -36,13 +36,10 @@ #define FUNCPTR_T her2k_fp -typedef void (*FUNCPTR_T)( obj_t* alpha, - obj_t* a, +typedef void (*FUNCPTR_T)( obj_t* a, obj_t* bh, - obj_t* alpha_conj, obj_t* b, obj_t* ah, - obj_t* beta, obj_t* c, her2k_t* cntl ); @@ -66,16 +63,20 @@ static FUNCPTR_T vars[2][4][3] = } }; -void bli_her2k_int( obj_t* alpha, +void bli_her2k_int( obj_t* alpha_abh, obj_t* a, obj_t* bh, - obj_t* alpha_conj, + obj_t* alpha_bah, obj_t* b, obj_t* ah, obj_t* beta, obj_t* c, her2k_t* cntl ) { + obj_t a_local; + obj_t bh_local; + obj_t b_local; + obj_t ah_local; obj_t c_local; varnum_t n; impl_t i; @@ -84,7 +85,7 @@ void bli_her2k_int( obj_t* alpha, // Check parameters. if ( bli_error_checking_is_enabled() ) - bli_her2k_int_check( alpha, a, bh, alpha_conj, b, ah, beta, c, cntl ); + bli_her2k_int_check( alpha_abh, a, bh, alpha_bah, b, ah, beta, c, cntl ); // If C has a zero dimension, return early. if ( bli_obj_has_zero_dim( *c ) ) return; @@ -99,6 +100,12 @@ void bli_her2k_int( obj_t* alpha, return; } + // Alias A, B', B, and A' in case we need to update attached scalars. + bli_obj_alias_to( *a, a_local ); + bli_obj_alias_to( *bh, bh_local ); + bli_obj_alias_to( *b, b_local ); + bli_obj_alias_to( *ah, ah_local ); + // Alias C in case we need to induce a transposition. bli_obj_alias_to( *c, c_local ); @@ -113,6 +120,27 @@ void bli_her2k_int( obj_t* alpha, bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, c_local ); } + // If alpha_abh is non-unit, typecast and apply it to the scalar + // attached to B'. + if ( !bli_obj_equals( alpha_abh, &BLIS_ONE ) ) + { + bli_obj_scalar_apply_scalar( alpha_abh, &bh_local ); + } + + // If alpha_bah is non-unit, typecast and apply it to the scalar + // attached to A'. + if ( !bli_obj_equals( alpha_bah, &BLIS_ONE ) ) + { + bli_obj_scalar_apply_scalar( alpha_bah, &ah_local ); + } + + // If beta is non-unit, typecast and apply it to the scalar + // attached to C. + if ( !bli_obj_equals( beta, &BLIS_ONE ) ) + { + bli_obj_scalar_apply_scalar( beta, &c_local ); + } + // Set a bool based on the uplo field of c. if ( bli_obj_root_is_lower( c_local ) ) uplo = 0; else uplo = 1; @@ -125,13 +153,10 @@ void bli_her2k_int( obj_t* alpha, f = vars[uplo][n][i]; // Invoke the variant. - f( alpha, - a, - bh, - alpha_conj, - b, - ah, - beta, + f( &a_local, + &bh_local, + &b_local, + &ah_local, &c_local, cntl ); } diff --git a/frame/3/her2k/bli_her2k_l_ker_var2.c b/frame/3/her2k/bli_her2k_l_ker_var2.c index fd11555c9..35a7b3eb1 100644 --- a/frame/3/her2k/bli_her2k_l_ker_var2.c +++ b/frame/3/her2k/bli_her2k_l_ker_var2.c @@ -34,16 +34,15 @@ #include "blis.h" -void bli_her2k_l_ker_var2( obj_t* alpha, - obj_t* a, +void bli_her2k_l_ker_var2( obj_t* a, obj_t* bh, - obj_t* alpha_conj, obj_t* b, obj_t* ah, - obj_t* beta, obj_t* c, her2k_t* cntl ) { + obj_t c_local; + // Implement her2k kernel in terms of two calls to the corresponding // herk kernel. @@ -51,18 +50,18 @@ void bli_her2k_l_ker_var2( obj_t* alpha, // only want to apply beta once. (And beta might be unit anyway if this // is not the first iteration of variant 3.) - bli_herk_l_ker_var2( alpha, - a, + bli_obj_alias_to( *c, c_local ); + + bli_herk_l_ker_var2( a, bh, - beta, - c, + &c_local, NULL ); - bli_herk_l_ker_var2( alpha_conj, - b, + bli_obj_scalar_reset( &c_local ); + + bli_herk_l_ker_var2( b, ah, - &BLIS_ONE, - c, + &c_local, NULL ); } diff --git a/frame/3/her2k/bli_her2k_l_ker_var2.h b/frame/3/her2k/bli_her2k_l_ker_var2.h index 493cc98f7..e28711abb 100644 --- a/frame/3/her2k/bli_her2k_l_ker_var2.h +++ b/frame/3/her2k/bli_her2k_l_ker_var2.h @@ -35,13 +35,10 @@ // // Prototype object-based interface. // -void bli_her2k_l_ker_var2( obj_t* alpha, - obj_t* a, +void bli_her2k_l_ker_var2( obj_t* a, obj_t* bh, - obj_t* alpha_conj, obj_t* b, obj_t* ah, - obj_t* beta, obj_t* c, her2k_t* cntl ); diff --git a/frame/3/her2k/bli_her2k_target.c b/frame/3/her2k/bli_her2k_target.c index affde1309..b92d1c9e8 100644 --- a/frame/3/her2k/bli_her2k_target.c +++ b/frame/3/her2k/bli_her2k_target.c @@ -39,8 +39,6 @@ void bli_her2k_set_targ_exec_datatypes( obj_t* a, obj_t* b, obj_t* ah, obj_t* c, - num_t* dt_alpha, - num_t* dt_beta, bool_t* pack_c ) { num_t dt_targ_a; @@ -81,18 +79,6 @@ void bli_her2k_set_targ_exec_datatypes( obj_t* a, bli_obj_set_execution_datatype( dt_exec, *ah ); bli_obj_set_execution_datatype( dt_exec, *c ); - // Notice that we use the target datatype of matrix a. By inspecting - // the table above, this clearly works for cases (0) through (4), (6), - // and (7). It also works for case (5) since it is transformed into - // case (6) by the above code. - *dt_alpha = bli_obj_target_datatype( *a ); - - // Notice that we use the target datatype of matrix a. By inspecting - // the table above, this clearly works for cases (0) through (4), (6), - // and (7). It also works for case (5) since it is transformed into - // case (6) by the above code. - *dt_beta = bli_obj_datatype( *c ); - // For now disable packing of C. *pack_c = FALSE; } diff --git a/frame/3/her2k/bli_her2k_target.h b/frame/3/her2k/bli_her2k_target.h index 4d06be8c7..2568b2f11 100644 --- a/frame/3/her2k/bli_her2k_target.h +++ b/frame/3/her2k/bli_her2k_target.h @@ -37,7 +37,5 @@ void bli_her2k_set_targ_exec_datatypes( obj_t* a, obj_t* b, obj_t* ah, obj_t* c, - num_t* dt_alpha, - num_t* dt_beta, bool_t* pack_c ); diff --git a/frame/3/her2k/bli_her2k_u_ker_var2.c b/frame/3/her2k/bli_her2k_u_ker_var2.c index 3228e0072..b9c98ad80 100644 --- a/frame/3/her2k/bli_her2k_u_ker_var2.c +++ b/frame/3/her2k/bli_her2k_u_ker_var2.c @@ -34,16 +34,15 @@ #include "blis.h" -void bli_her2k_u_ker_var2( obj_t* alpha, - obj_t* a, +void bli_her2k_u_ker_var2( obj_t* a, obj_t* bh, - obj_t* alpha_conj, obj_t* b, obj_t* ah, - obj_t* beta, obj_t* c, her2k_t* cntl ) { + obj_t c_local; + // Implement her2k kernel in terms of two calls to the corresponding // herk kernel. @@ -51,18 +50,18 @@ void bli_her2k_u_ker_var2( obj_t* alpha, // only want to apply beta once. (And beta might be unit anyway if this // is not the first iteration of variant 3.) - bli_herk_u_ker_var2( alpha, - a, + bli_obj_alias_to( *c, c_local ); + + bli_herk_u_ker_var2( a, bh, - beta, - c, + &c_local, NULL ); - bli_herk_u_ker_var2( alpha_conj, - b, + bli_obj_scalar_reset( &c_local ); + + bli_herk_u_ker_var2( b, ah, - &BLIS_ONE, - c, + &c_local, NULL ); } diff --git a/frame/3/her2k/bli_her2k_u_ker_var2.h b/frame/3/her2k/bli_her2k_u_ker_var2.h index b8b66252f..d6934a67f 100644 --- a/frame/3/her2k/bli_her2k_u_ker_var2.h +++ b/frame/3/her2k/bli_her2k_u_ker_var2.h @@ -35,13 +35,10 @@ // // Prototype object-based interface. // -void bli_her2k_u_ker_var2( obj_t* alpha, - obj_t* a, +void bli_her2k_u_ker_var2( obj_t* a, obj_t* bh, - obj_t* alpha_conj, obj_t* b, obj_t* ah, - obj_t* beta, obj_t* c, her2k_t* cntl ); diff --git a/frame/3/herk/bli_herk.c b/frame/3/herk/bli_herk.c index 4969e796c..a8b465ce5 100644 --- a/frame/3/herk/bli_herk.c +++ b/frame/3/herk/bli_herk.c @@ -45,21 +45,16 @@ void bli_herk( obj_t* alpha, obj_t* c ) { herk_t* cntl; - obj_t alpha_local; - obj_t beta_local; obj_t a_local; obj_t ah_local; obj_t c_local; - num_t dt_alpha; - num_t dt_beta; - bool_t pack_c; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_herk_check( alpha, a, beta, c ); // If alpha is zero, scale by beta and return. - if ( bli_obj_scalar_equals( alpha, &BLIS_ZERO ) ) + if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); return; @@ -87,37 +82,14 @@ void bli_herk( obj_t* alpha, bli_obj_induce_trans( c_local ); } - // Set the target and execution datatypes of the objects, and apply - // any transformations necessary to handle mixed domain computation. - bli_herk_set_targ_exec_datatypes( &a_local, - &ah_local, - &c_local, - &dt_alpha, - &dt_beta, - &pack_c ); - - // Create an object to hold a copy-cast of alpha. - bli_obj_init_scalar_copy_of( dt_alpha, - BLIS_NO_CONJUGATE, - alpha, - &alpha_local ); - - // Create an object to hold a copy-cast of beta. - bli_obj_init_scalar_copy_of( dt_beta, - BLIS_NO_CONJUGATE, - beta, - &beta_local ); - - if ( pack_c ) bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); - // Choose the control tree. cntl = herk_cntl; // Invoke the internal back-end. - bli_herk_int( &alpha_local, + bli_herk_int( alpha, &a_local, &ah_local, - &beta_local, + beta, &c_local, cntl ); } @@ -148,8 +120,8 @@ void PASTEMAC(ch,opname)( \ \ bli_set_dims_with_trans( transa, m, k, m_a, n_a ); \ \ - bli_obj_create_scalar_with_attached_buffer( dt_r, alpha, &alphao ); \ - bli_obj_create_scalar_with_attached_buffer( dt_r, beta, &betao ); \ + bli_obj_create_1x1_with_attached_buffer( dt_r, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt_r, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \ diff --git a/frame/3/herk/bli_herk_blk_var1f.c b/frame/3/herk/bli_herk_blk_var1f.c index dc137e1ce..ffa1fc8f1 100644 --- a/frame/3/herk/bli_herk_blk_var1f.c +++ b/frame/3/herk/bli_herk_blk_var1f.c @@ -34,10 +34,8 @@ #include "blis.h" -void bli_herk_blk_var1f( obj_t* alpha, - obj_t* a, +void bli_herk_blk_var1f( obj_t* a, obj_t* ah, - obj_t* beta, obj_t* c, herk_t* cntl ) { @@ -58,7 +56,7 @@ void bli_herk_blk_var1f( obj_t* alpha, m_trans = bli_obj_length_after_trans( *c ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -66,9 +64,8 @@ void bli_herk_blk_var1f( obj_t* alpha, bli_packm_init( ah, &ah_pack, cntl_sub_packm_b( cntl ) ); - // Pack A' and scale by alpha (if instructed). - bli_packm_int( alpha, - ah, &ah_pack, + // Pack A' (if instructed). + bli_packm_int( ah, &ah_pack, cntl_sub_packm_b( cntl ) ); // Partition along the m dimension. @@ -90,21 +87,19 @@ void bli_herk_blk_var1f( obj_t* alpha, bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, &a1_pack, + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1, &c1_pack, + // Pack C1 (if instructed). + bli_packm_int( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); // Perform herk subproblem. - bli_herk_int( alpha, + bli_herk_int( &BLIS_ONE, &a1_pack, &ah_pack, - beta, + &BLIS_ONE, &c1_pack, cntl_sub_herk( cntl ) ); diff --git a/frame/3/herk/bli_herk_blk_var1f.h b/frame/3/herk/bli_herk_blk_var1f.h index 94900b772..a33247a52 100644 --- a/frame/3/herk/bli_herk_blk_var1f.h +++ b/frame/3/herk/bli_herk_blk_var1f.h @@ -32,10 +32,8 @@ */ -void bli_herk_blk_var1f( obj_t* alpha, - obj_t* a, +void bli_herk_blk_var1f( obj_t* a, obj_t* ah, - obj_t* beta, obj_t* c, herk_t* cntl ); diff --git a/frame/3/herk/bli_herk_blk_var2f.c b/frame/3/herk/bli_herk_blk_var2f.c index df39ce55d..55edea9af 100644 --- a/frame/3/herk/bli_herk_blk_var2f.c +++ b/frame/3/herk/bli_herk_blk_var2f.c @@ -34,10 +34,8 @@ #include "blis.h" -void bli_herk_blk_var2f( obj_t* alpha, - obj_t* a, +void bli_herk_blk_var2f( obj_t* a, obj_t* ah, - obj_t* beta, obj_t* c, herk_t* cntl ) { @@ -65,7 +63,7 @@ void bli_herk_blk_var2f( obj_t* alpha, n_trans = bli_obj_width_after_trans( *c ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -73,9 +71,8 @@ void bli_herk_blk_var2f( obj_t* alpha, bli_packm_init( a, &a_pack, cntl_sub_packm_a( cntl ) ); - // Pack A and scale by alpha (if instructed). - bli_packm_int( alpha, - a, &a_pack, + // Pack A (if instructed). + bli_packm_int( a, &a_pack, cntl_sub_packm_a( cntl ) ); // Partition along the n dimension. @@ -104,21 +101,19 @@ void bli_herk_blk_var2f( obj_t* alpha, bli_packm_init( &c1S, &c1S_pack, cntl_sub_packm_c( cntl ) ); - // Pack A1' and scale by alpha (if instructed). - bli_packm_int( alpha, - &ah1, &ah1_pack, + // Pack A1' (if instructed). + bli_packm_int( &ah1, &ah1_pack, cntl_sub_packm_b( cntl ) ); - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1S, &c1S_pack, + // Pack C1 (if instructed). + bli_packm_int( &c1S, &c1S_pack, cntl_sub_packm_c( cntl ) ); // Perform herk subproblem. - bli_herk_int( alpha, + bli_herk_int( &BLIS_ONE, &aS_pack, &ah1_pack, - beta, + &BLIS_ONE, &c1S_pack, cntl_sub_herk( cntl ) ); diff --git a/frame/3/herk/bli_herk_blk_var2f.h b/frame/3/herk/bli_herk_blk_var2f.h index 1a5034242..97eeed211 100644 --- a/frame/3/herk/bli_herk_blk_var2f.h +++ b/frame/3/herk/bli_herk_blk_var2f.h @@ -32,10 +32,8 @@ */ -void bli_herk_blk_var2f( obj_t* alpha, - obj_t* a, +void bli_herk_blk_var2f( obj_t* a, obj_t* ah, - obj_t* beta, obj_t* c, herk_t* cntl ); diff --git a/frame/3/herk/bli_herk_blk_var3f.c b/frame/3/herk/bli_herk_blk_var3f.c index a54e341b9..2c0cafa92 100644 --- a/frame/3/herk/bli_herk_blk_var3f.c +++ b/frame/3/herk/bli_herk_blk_var3f.c @@ -34,17 +34,14 @@ #include "blis.h" -void bli_herk_blk_var3f( obj_t* alpha, - obj_t* a, +void bli_herk_blk_var3f( obj_t* a, obj_t* ah, - obj_t* beta, obj_t* c, herk_t* cntl ) { obj_t a1, a1_pack; obj_t ah1, ah1_pack; obj_t c_pack; - obj_t* beta_use; dim_t i; dim_t b_alg; @@ -59,7 +56,7 @@ void bli_herk_blk_var3f( obj_t* alpha, k_trans = bli_obj_width_after_trans( *a ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -67,9 +64,8 @@ void bli_herk_blk_var3f( obj_t* alpha, bli_packm_init( c, &c_pack, cntl_sub_packm_c( cntl ) ); - // Pack C and scale by beta (if instructed). - bli_packm_int( beta, - c, &c_pack, + // Pack C (if instructed). + bli_packm_int( c, &c_pack, cntl_sub_packm_c( cntl ) ); // Partition along the k dimension. @@ -91,28 +87,29 @@ void bli_herk_blk_var3f( obj_t* alpha, bli_packm_init( &ah1, &ah1_pack, cntl_sub_packm_b( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, &a1_pack, + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack B1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &ah1, &ah1_pack, + // Pack B1 (if instructed). + bli_packm_int( &ah1, &ah1_pack, cntl_sub_packm_b( cntl ) ); - // Since this variant executes multiple rank-k updates, we must use - // beta only for the first iteration and BLIS_ONE for all others. - if ( i == 0 ) beta_use = beta; - else beta_use = &BLIS_ONE; - // Perform herk subproblem. - bli_herk_int( alpha, + bli_herk_int( &BLIS_ONE, &a1_pack, &ah1_pack, - beta_use, + &BLIS_ONE, &c_pack, cntl_sub_herk( cntl ) ); + + // This variant executes multiple rank-k updates. Therefore, if the + // internal beta scalar on matrix C is non-zero, we must use it + // only for the first iteration (and then BLIS_ONE for all others). + // And since c_pack is a local obj_t, we can simply overwrite the + // internal beta scalar with BLIS_ONE once it has been used in the + // first iteration. + if ( i == 0 ) bli_obj_scalar_reset( &c_pack ); } // Unpack C (if C was packed). diff --git a/frame/3/herk/bli_herk_blk_var3f.h b/frame/3/herk/bli_herk_blk_var3f.h index 5e96900f7..726bf2115 100644 --- a/frame/3/herk/bli_herk_blk_var3f.h +++ b/frame/3/herk/bli_herk_blk_var3f.h @@ -32,10 +32,8 @@ */ -void bli_herk_blk_var3f( obj_t* alpha, - obj_t* a, +void bli_herk_blk_var3f( obj_t* a, obj_t* ah, - obj_t* beta, obj_t* c, herk_t* cntl ); diff --git a/frame/3/herk/bli_herk_int.c b/frame/3/herk/bli_herk_int.c index afeef8d32..9a643aa20 100644 --- a/frame/3/herk/bli_herk_int.c +++ b/frame/3/herk/bli_herk_int.c @@ -36,10 +36,8 @@ #define FUNCPTR_T herk_fp -typedef void (*FUNCPTR_T)( obj_t* alpha, - obj_t* a, +typedef void (*FUNCPTR_T)( obj_t* a, obj_t* ah, - obj_t* beta, obj_t* c, herk_t* cntl ); @@ -70,6 +68,8 @@ void bli_herk_int( obj_t* alpha, obj_t* c, herk_t* cntl ) { + obj_t a_local; + obj_t ah_local; obj_t c_local; varnum_t n; impl_t i; @@ -91,6 +91,10 @@ void bli_herk_int( obj_t* alpha, return; } + // Alias A and A' in case we need to update attached scalars. + bli_obj_alias_to( *a, a_local ); + bli_obj_alias_to( *ah, ah_local ); + // Alias C in case we need to induce a transposition. bli_obj_alias_to( *c, c_local ); @@ -105,6 +109,20 @@ void bli_herk_int( obj_t* alpha, bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, c_local ); } + // If alpha is non-unit, typecast and apply it to the scalar + // attached to A'. + if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) + { + bli_obj_scalar_apply_scalar( alpha, &ah_local ); + } + + // If beta is non-unit, typecast and apply it to the scalar + // attached to C. + if ( !bli_obj_equals( beta, &BLIS_ONE ) ) + { + bli_obj_scalar_apply_scalar( beta, &c_local ); + } + // Set a bool based on the uplo field of C's root object. if ( bli_obj_root_is_lower( c_local ) ) uplo = 0; else uplo = 1; @@ -117,10 +135,8 @@ void bli_herk_int( obj_t* alpha, f = vars[uplo][n][i]; // Invoke the variant. - f( alpha, - a, - ah, - beta, + f( &a_local, + &ah_local, &c_local, cntl ); } diff --git a/frame/3/herk/bli_herk_l_ker_var2.c b/frame/3/herk/bli_herk_l_ker_var2.c index be40383a6..ff104f372 100644 --- a/frame/3/herk/bli_herk_l_ker_var2.c +++ b/frame/3/herk/bli_herk_l_ker_var2.c @@ -51,10 +51,8 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,herk_l_ker_var2); -void bli_herk_l_ker_var2( obj_t* alpha, - obj_t* a, +void bli_herk_l_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, herk_t* cntl ) { @@ -80,15 +78,15 @@ void bli_herk_l_ker_var2( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); - num_t dt_alpha; - void* buf_alpha; + obj_t scalar_a; + obj_t scalar_b; - num_t dt_beta; + void* buf_alpha; void* buf_beta; FUNCPTR_T f; - +/* // Handle the special case where c and a are complex and b is real. // Note that this is the ONLY case allowed by the inner kernel whereby // the datatypes of a and b differ. In this situation, the execution @@ -104,16 +102,17 @@ void bli_herk_l_ker_var2( obj_t* alpha, cs_a *= 2; ps_a *= 2; } +*/ - // If alpha is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the alpha object and extract the buffer at the alpha offset. - bli_set_scalar_dt_buffer( alpha, dt_exec, dt_alpha, buf_alpha ); + // Detach and multiply the scalars attached to A and B. + bli_obj_scalar_detach( a, &scalar_a ); + bli_obj_scalar_detach( b, &scalar_b ); + bli_mulsc( &scalar_a, &scalar_b ); - // If beta is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the beta object and extract the buffer at the beta offset. - bli_set_scalar_dt_buffer( beta, dt_exec, dt_beta, buf_beta ); + // Grab the addresses of the internal scalar buffers for the scalar + // merged above and the scalar attached to C. + buf_alpha = bli_obj_internal_scalar_buffer( scalar_b ); + buf_beta = bli_obj_internal_scalar_buffer( *c ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/3/herk/bli_herk_l_ker_var2.h b/frame/3/herk/bli_herk_l_ker_var2.h index cf09d804d..2a36e5b37 100644 --- a/frame/3/herk/bli_herk_l_ker_var2.h +++ b/frame/3/herk/bli_herk_l_ker_var2.h @@ -36,10 +36,8 @@ // // Prototype object-based interface. // -void bli_herk_l_ker_var2( obj_t* alpha, - obj_t* a, +void bli_herk_l_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, herk_t* cntl ); diff --git a/frame/3/herk/bli_herk_target.c b/frame/3/herk/bli_herk_target.c index a04ee34b2..21e63bc60 100644 --- a/frame/3/herk/bli_herk_target.c +++ b/frame/3/herk/bli_herk_target.c @@ -37,8 +37,6 @@ void bli_herk_set_targ_exec_datatypes( obj_t* a, obj_t* ah, obj_t* c, - num_t* dt_alpha, - num_t* dt_beta, bool_t* pack_c ) { num_t dt_targ_a; @@ -69,10 +67,6 @@ void bli_herk_set_targ_exec_datatypes( obj_t* a, bli_obj_set_execution_datatype( dt_exec, *ah ); bli_obj_set_execution_datatype( dt_exec, *c ); - *dt_alpha = bli_obj_target_datatype( *a ); - - *dt_beta = bli_obj_datatype( *c ); - // For now disable packing of C. *pack_c = FALSE; } diff --git a/frame/3/herk/bli_herk_target.h b/frame/3/herk/bli_herk_target.h index 0610c690a..689fc521c 100644 --- a/frame/3/herk/bli_herk_target.h +++ b/frame/3/herk/bli_herk_target.h @@ -35,8 +35,6 @@ void bli_herk_set_targ_exec_datatypes( obj_t* a, obj_t* ah, obj_t* c, - num_t* dt_alpha, - num_t* dt_beta, bool_t* pack_c ); /* diff --git a/frame/3/herk/bli_herk_u_ker_var2.c b/frame/3/herk/bli_herk_u_ker_var2.c index fc332bd11..b3c65145d 100644 --- a/frame/3/herk/bli_herk_u_ker_var2.c +++ b/frame/3/herk/bli_herk_u_ker_var2.c @@ -51,10 +51,8 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,herk_u_ker_var2); -void bli_herk_u_ker_var2( obj_t* alpha, - obj_t* a, +void bli_herk_u_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, herk_t* cntl ) { @@ -80,15 +78,15 @@ void bli_herk_u_ker_var2( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); - num_t dt_alpha; - void* buf_alpha; + obj_t scalar_a; + obj_t scalar_b; - num_t dt_beta; + void* buf_alpha; void* buf_beta; FUNCPTR_T f; - +/* // Handle the special case where c and a are complex and b is real. // Note that this is the ONLY case allowed by the inner kernel whereby // the datatypes of a and b differ. In this situation, the execution @@ -104,16 +102,17 @@ void bli_herk_u_ker_var2( obj_t* alpha, cs_a *= 2; ps_a *= 2; } +*/ - // If alpha is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the alpha object and extract the buffer at the alpha offset. - bli_set_scalar_dt_buffer( alpha, dt_exec, dt_alpha, buf_alpha ); + // Detach and multiply the scalars attached to A and B. + bli_obj_scalar_detach( a, &scalar_a ); + bli_obj_scalar_detach( b, &scalar_b ); + bli_mulsc( &scalar_a, &scalar_b ); - // If beta is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the beta object and extract the buffer at the beta offset. - bli_set_scalar_dt_buffer( beta, dt_exec, dt_beta, buf_beta ); + // Grab the addresses of the internal scalar buffers for the scalar + // merged above and the scalar attached to C. + buf_alpha = bli_obj_internal_scalar_buffer( scalar_b ); + buf_beta = bli_obj_internal_scalar_buffer( *c ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/3/herk/bli_herk_u_ker_var2.h b/frame/3/herk/bli_herk_u_ker_var2.h index 1d8da175b..c72764146 100644 --- a/frame/3/herk/bli_herk_u_ker_var2.h +++ b/frame/3/herk/bli_herk_u_ker_var2.h @@ -36,10 +36,8 @@ // // Prototype object-based interface. // -void bli_herk_u_ker_var2( obj_t* alpha, - obj_t* a, +void bli_herk_u_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, herk_t* cntl ); diff --git a/frame/3/symm/bli_symm.c b/frame/3/symm/bli_symm.c index e95f71b8f..fe56aeb59 100644 --- a/frame/3/symm/bli_symm.c +++ b/frame/3/symm/bli_symm.c @@ -47,21 +47,16 @@ void bli_symm( side_t side, obj_t* c ) { gemm_t* cntl; - obj_t alpha_local; - obj_t beta_local; obj_t a_local; obj_t b_local; obj_t c_local; - num_t dt_alpha; - num_t dt_beta; - bool_t pack_c; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_symm_check( side, alpha, a, b, beta, c ); // If alpha is zero, scale by beta and return. - if ( bli_obj_scalar_equals( alpha, &BLIS_ZERO ) ) + if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); return; @@ -89,38 +84,15 @@ void bli_symm( side_t side, bli_obj_swap( a_local, b_local ); } - // Set the target and execution datatypes of the objects, and apply - // any transformations necessary to handle mixed domain computation. - bli_gemm_set_targ_exec_datatypes( &a_local, - &b_local, - &c_local, - &dt_alpha, - &dt_beta, - &pack_c ); - - // Create an object to hold a copy-cast of alpha. - bli_obj_init_scalar_copy_of( dt_alpha, - BLIS_NO_CONJUGATE, - alpha, - &alpha_local ); - - // Create an object to hold a copy-cast of beta. - bli_obj_init_scalar_copy_of( dt_beta, - BLIS_NO_CONJUGATE, - beta, - &beta_local ); - - if ( pack_c ) bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); - // Choose the control tree. We can just use hemm since the algorithm // is nearly identical to that of symm. cntl = hemm_cntl; // Invoke the internal back-end. - bli_gemm_int( &alpha_local, + bli_gemm_int( alpha, &a_local, &b_local, - &beta_local, + beta, &c_local, cntl ); } @@ -155,8 +127,8 @@ void PASTEMAC(ch,opname)( \ bli_set_dim_with_side( side, m, n, mn_a ); \ bli_set_dims_with_trans( transb, m, n, m_b, n_b ); \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ - bli_obj_create_scalar_with_attached_buffer( dt, beta, &betao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, mn_a, mn_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ diff --git a/frame/3/syr2k/bli_syr2k.c b/frame/3/syr2k/bli_syr2k.c index 15fd5f761..e2be579cd 100644 --- a/frame/3/syr2k/bli_syr2k.c +++ b/frame/3/syr2k/bli_syr2k.c @@ -47,23 +47,18 @@ void bli_syr2k( obj_t* alpha, obj_t* c ) { her2k_t* cntl; - obj_t alpha_local; - obj_t beta_local; obj_t c_local; obj_t a_local; obj_t bt_local; obj_t b_local; obj_t at_local; - num_t dt_alpha; - num_t dt_beta; - bool_t pack_c; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_syr2k_check( alpha, a, b, beta, c ); // If alpha is zero, scale by beta and return. - if ( bli_obj_scalar_equals( alpha, &BLIS_ZERO ) ) + if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); return; @@ -91,59 +86,37 @@ void bli_syr2k( obj_t* alpha, bli_obj_induce_trans( c_local ); } - // Set the target and execution datatypes of the objects, and apply - // any transformations necessary to handle mixed domain computation. - bli_her2k_set_targ_exec_datatypes( &a_local, - &bt_local, - &b_local, - &at_local, - &c_local, - &dt_alpha, - &dt_beta, - &pack_c ); - - // Create an object to hold a copy-cast of alpha. - bli_obj_init_scalar_copy_of( dt_alpha, - BLIS_NO_CONJUGATE, - alpha, - &alpha_local ); - - // Create an object to hold a copy-cast of beta. - bli_obj_init_scalar_copy_of( dt_beta, - BLIS_NO_CONJUGATE, - beta, - &beta_local ); - - if ( pack_c ) bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); - // Choose the control tree. We can just use her2k since the algorithm // is nearly identical to that of syr2k. cntl = her2k_cntl; +#if 1 // Invoke the internal back-end. - bli_her2k_int( &alpha_local, + bli_her2k_int( alpha, &a_local, &bt_local, - &alpha_local, + alpha, &b_local, &at_local, - &beta_local, + beta, &c_local, cntl ); -/* - bli_herk_int( &alpha_local, - a, - &bt, - &beta_local, +#else + // Invoke herk twice, using beta only the first time. + bli_herk_int( alpha, + &a_local, + &bt_local, + beta, &c_local, herk_cntl ); - bli_herk_int( &alpha_local, - b, - &at, + + bli_herk_int( alpha, + &b_local, + &at_local, &BLIS_ONE, &c_local, herk_cntl ); -*/ +#endif } // @@ -175,8 +148,8 @@ void PASTEMAC(ch,opname)( \ bli_set_dims_with_trans( transa, m, k, m_a, n_a ); \ bli_set_dims_with_trans( transb, m, k, m_b, n_b ); \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ - bli_obj_create_scalar_with_attached_buffer( dt, beta, &betao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ diff --git a/frame/3/syrk/bli_syrk.c b/frame/3/syrk/bli_syrk.c index af3956ccb..b879378c1 100644 --- a/frame/3/syrk/bli_syrk.c +++ b/frame/3/syrk/bli_syrk.c @@ -45,21 +45,16 @@ void bli_syrk( obj_t* alpha, obj_t* c ) { herk_t* cntl; - obj_t alpha_local; - obj_t beta_local; obj_t a_local; obj_t at_local; obj_t c_local; - num_t dt_alpha; - num_t dt_beta; - bool_t pack_c; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_syrk_check( alpha, a, beta, c ); // If alpha is zero, scale by beta and return. - if ( bli_obj_scalar_equals( alpha, &BLIS_ZERO ) ) + if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); return; @@ -83,38 +78,15 @@ void bli_syrk( obj_t* alpha, bli_obj_induce_trans( c_local ); } - // Set the target and execution datatypes of the objects, and apply - // any transformations necessary to handle mixed domain computation. - bli_herk_set_targ_exec_datatypes( &a_local, - &at_local, - &c_local, - &dt_alpha, - &dt_beta, - &pack_c ); - - // Create an object to hold a copy-cast of alpha. - bli_obj_init_scalar_copy_of( dt_alpha, - BLIS_NO_CONJUGATE, - alpha, - &alpha_local ); - - // Create an object to hold a copy-cast of beta. - bli_obj_init_scalar_copy_of( dt_beta, - BLIS_NO_CONJUGATE, - beta, - &beta_local ); - - if ( pack_c ) bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); - // Choose the control tree. We can just use herk since the algorithm // is nearly identical to that of syrk. cntl = herk_cntl; // Invoke the internal back-end. - bli_herk_int( &alpha_local, + bli_herk_int( alpha, &a_local, &at_local, - &beta_local, + beta, &c_local, cntl ); } @@ -144,8 +116,8 @@ void PASTEMAC(ch,opname)( \ \ bli_set_dims_with_trans( transa, m, k, m_a, n_a ); \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ - bli_obj_create_scalar_with_attached_buffer( dt, beta, &betao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \ diff --git a/frame/3/trmm/bli_trmm.c b/frame/3/trmm/bli_trmm.c index 9c5331eee..23b18bf04 100644 --- a/frame/3/trmm/bli_trmm.c +++ b/frame/3/trmm/bli_trmm.c @@ -46,18 +46,16 @@ void bli_trmm( side_t side, obj_t* b ) { trmm_t* cntl; - obj_t alpha_local; obj_t a_local; obj_t b_local; obj_t c_local; - num_t dt_alpha; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_trmm_check( side, alpha, a, b ); // If alpha is zero, scale by beta and return. - if ( bli_obj_scalar_equals( alpha, &BLIS_ZERO ) ) + if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( alpha, b ); return; @@ -127,25 +125,12 @@ void bli_trmm( side_t side, bli_obj_set_as_root( b_local ); bli_obj_set_as_root( c_local ); - // Set the target and execution datatypes of the objects, and apply - // any transformations necessary to handle mixed domain computation. - bli_trmm_set_targ_exec_datatypes( &a_local, - &b_local, - &c_local, - &dt_alpha ); - - // Create an object to hold a copy-cast of alpha. - bli_obj_init_scalar_copy_of( dt_alpha, - BLIS_NO_CONJUGATE, - alpha, - &alpha_local ); - // Choose the control tree. if ( bli_is_left( side ) ) cntl = trmm_l_cntl; else cntl = trmm_r_cntl; // Invoke the internal back-end. - bli_trmm_int( &alpha_local, + bli_trmm_int( alpha, &a_local, &b_local, &BLIS_ZERO, @@ -179,7 +164,7 @@ void PASTEMAC(ch,opname)( \ \ bli_set_dim_with_side( side, m, n, mn_a ); \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ \ bli_obj_create_with_attached_buffer( dt, mn_a, mn_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m, n, b, rs_b, cs_b, &bo ); \ diff --git a/frame/3/trmm/bli_trmm_blk_var1.c b/frame/3/trmm/bli_trmm_blk_var1.c index aa09f105f..63a925b64 100644 --- a/frame/3/trmm/bli_trmm_blk_var1.c +++ b/frame/3/trmm/bli_trmm_blk_var1.c @@ -34,10 +34,8 @@ #include "blis.h" -void bli_trmm_blk_var1( obj_t* alpha, - obj_t* a, +void bli_trmm_blk_var1( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ) { @@ -69,7 +67,7 @@ void bli_trmm_blk_var1( obj_t* alpha, bli_obj_width_after_trans( *a ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -77,9 +75,8 @@ void bli_trmm_blk_var1( obj_t* alpha, bli_packm_init( b, &b_pack, cntl_sub_packm_b( cntl ) ); - // Pack B and scale by alpha (if instructed). - bli_packm_int( alpha, - b, &b_pack, + // Pack B (if instructed). + bli_packm_int( b, &b_pack, cntl_sub_packm_b( cntl ) ); // Partition along the m dimension. @@ -101,21 +98,19 @@ void bli_trmm_blk_var1( obj_t* alpha, bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, &a1_pack, + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1, &c1_pack, + // Pack C1 (if instructed). + bli_packm_int( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); // Perform trmm subproblem. - bli_trmm_int( alpha, + bli_trmm_int( &BLIS_ONE, &a1_pack, &b_pack, - beta, + &BLIS_ONE, &c1_pack, cntl_sub_trmm( cntl ) ); diff --git a/frame/3/trmm/bli_trmm_blk_var1.h b/frame/3/trmm/bli_trmm_blk_var1.h index 1a0fc6cae..c60acb355 100644 --- a/frame/3/trmm/bli_trmm_blk_var1.h +++ b/frame/3/trmm/bli_trmm_blk_var1.h @@ -32,10 +32,8 @@ */ -void bli_trmm_blk_var1( obj_t* alpha, - obj_t* a, +void bli_trmm_blk_var1( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ); diff --git a/frame/3/trmm/bli_trmm_blk_var2b.c b/frame/3/trmm/bli_trmm_blk_var2b.c index afe3bba90..c2dff76c9 100644 --- a/frame/3/trmm/bli_trmm_blk_var2b.c +++ b/frame/3/trmm/bli_trmm_blk_var2b.c @@ -34,10 +34,8 @@ #include "blis.h" -void bli_trmm_blk_var2b( obj_t* alpha, - obj_t* a, +void bli_trmm_blk_var2b( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ) { @@ -58,7 +56,7 @@ void bli_trmm_blk_var2b( obj_t* alpha, n_trans = bli_obj_width_after_trans( *b ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -66,9 +64,8 @@ void bli_trmm_blk_var2b( obj_t* alpha, bli_packm_init( a, &a_pack, cntl_sub_packm_a( cntl ) ); - // Pack A and scale by alpha (if instructed). - bli_packm_int( alpha, - a, &a_pack, + // Pack A (if instructed). + bli_packm_int( a, &a_pack, cntl_sub_packm_a( cntl ) ); // Partition along the n dimension. @@ -90,21 +87,19 @@ void bli_trmm_blk_var2b( obj_t* alpha, bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); - // Pack B1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &b1, &b1_pack, + // Pack B1 (if instructed). + bli_packm_int( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1, &c1_pack, + // Pack C1 (if instructed). + bli_packm_int( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); // Perform trmm subproblem. - bli_trmm_int( alpha, + bli_trmm_int( &BLIS_ONE, &a_pack, &b1_pack, - beta, + &BLIS_ONE, &c1_pack, cntl_sub_trmm( cntl ) ); diff --git a/frame/3/trmm/bli_trmm_blk_var2b.h b/frame/3/trmm/bli_trmm_blk_var2b.h index 1e04ed383..3679aa0c3 100644 --- a/frame/3/trmm/bli_trmm_blk_var2b.h +++ b/frame/3/trmm/bli_trmm_blk_var2b.h @@ -32,10 +32,8 @@ */ -void bli_trmm_blk_var2b( obj_t* alpha, - obj_t* a, +void bli_trmm_blk_var2b( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ); diff --git a/frame/3/trmm/bli_trmm_blk_var2f.c b/frame/3/trmm/bli_trmm_blk_var2f.c index 3c39b6405..74be4554e 100644 --- a/frame/3/trmm/bli_trmm_blk_var2f.c +++ b/frame/3/trmm/bli_trmm_blk_var2f.c @@ -34,10 +34,8 @@ #include "blis.h" -void bli_trmm_blk_var2f( obj_t* alpha, - obj_t* a, +void bli_trmm_blk_var2f( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ) { @@ -58,7 +56,7 @@ void bli_trmm_blk_var2f( obj_t* alpha, n_trans = bli_obj_width_after_trans( *b ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -66,9 +64,8 @@ void bli_trmm_blk_var2f( obj_t* alpha, bli_packm_init( a, &a_pack, cntl_sub_packm_a( cntl ) ); - // Pack A and scale by alpha (if instructed). - bli_packm_int( alpha, - a, &a_pack, + // Pack A (if instructed). + bli_packm_int( a, &a_pack, cntl_sub_packm_a( cntl ) ); // Partition along the n dimension. @@ -90,21 +87,19 @@ void bli_trmm_blk_var2f( obj_t* alpha, bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); - // Pack B1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &b1, &b1_pack, + // Pack B1 (if instructed). + bli_packm_int( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1, &c1_pack, + // Pack C1 (if instructed). + bli_packm_int( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); // Perform trmm subproblem. - bli_trmm_int( alpha, + bli_trmm_int( &BLIS_ONE, &a_pack, &b1_pack, - beta, + &BLIS_ONE, &c1_pack, cntl_sub_trmm( cntl ) ); diff --git a/frame/3/trmm/bli_trmm_blk_var2f.h b/frame/3/trmm/bli_trmm_blk_var2f.h index 1fae4527a..b45d54256 100644 --- a/frame/3/trmm/bli_trmm_blk_var2f.h +++ b/frame/3/trmm/bli_trmm_blk_var2f.h @@ -32,10 +32,8 @@ */ -void bli_trmm_blk_var2f( obj_t* alpha, - obj_t* a, +void bli_trmm_blk_var2f( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ); diff --git a/frame/3/trmm/bli_trmm_blk_var3b.c b/frame/3/trmm/bli_trmm_blk_var3b.c index e84256a30..4a8ea4549 100644 --- a/frame/3/trmm/bli_trmm_blk_var3b.c +++ b/frame/3/trmm/bli_trmm_blk_var3b.c @@ -34,10 +34,8 @@ #include "blis.h" -void bli_trmm_blk_var3b( obj_t* alpha, - obj_t* a, +void bli_trmm_blk_var3b( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ) { @@ -58,7 +56,7 @@ void bli_trmm_blk_var3b( obj_t* alpha, k_trans = bli_obj_width_after_trans( *a ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -66,9 +64,8 @@ void bli_trmm_blk_var3b( obj_t* alpha, bli_packm_init( c, &c_pack, cntl_sub_packm_c( cntl ) ); - // Pack C and scale by beta (if instructed). - bli_packm_int( beta, - c, &c_pack, + // Pack C (if instructed). + bli_packm_int( c, &c_pack, cntl_sub_packm_c( cntl ) ); // Partition along the k dimension. @@ -90,21 +87,19 @@ void bli_trmm_blk_var3b( obj_t* alpha, bli_packm_init( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, &a1_pack, + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack B1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &b1, &b1_pack, + // Pack B1 (if instructed). + bli_packm_int( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); // Perform trmm subproblem. - bli_trmm_int( alpha, + bli_trmm_int( &BLIS_ONE, &a1_pack, &b1_pack, - beta, + &BLIS_ONE, &c_pack, cntl_sub_trmm( cntl ) ); } diff --git a/frame/3/trmm/bli_trmm_blk_var3b.h b/frame/3/trmm/bli_trmm_blk_var3b.h index 8da841847..124daf2c7 100644 --- a/frame/3/trmm/bli_trmm_blk_var3b.h +++ b/frame/3/trmm/bli_trmm_blk_var3b.h @@ -32,10 +32,8 @@ */ -void bli_trmm_blk_var3b( obj_t* alpha, - obj_t* a, +void bli_trmm_blk_var3b( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ); diff --git a/frame/3/trmm/bli_trmm_blk_var3f.c b/frame/3/trmm/bli_trmm_blk_var3f.c index ce44ead93..8b4df246e 100644 --- a/frame/3/trmm/bli_trmm_blk_var3f.c +++ b/frame/3/trmm/bli_trmm_blk_var3f.c @@ -34,10 +34,8 @@ #include "blis.h" -void bli_trmm_blk_var3f( obj_t* alpha, - obj_t* a, +void bli_trmm_blk_var3f( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ) { @@ -58,7 +56,7 @@ void bli_trmm_blk_var3f( obj_t* alpha, k_trans = bli_obj_width_after_trans( *a ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -66,9 +64,8 @@ void bli_trmm_blk_var3f( obj_t* alpha, bli_packm_init( c, &c_pack, cntl_sub_packm_c( cntl ) ); - // Pack C and scale by beta (if instructed). - bli_packm_int( beta, - c, &c_pack, + // Pack C (if instructed). + bli_packm_int( c, &c_pack, cntl_sub_packm_c( cntl ) ); // Partition along the k dimension. @@ -90,21 +87,19 @@ void bli_trmm_blk_var3f( obj_t* alpha, bli_packm_init( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, &a1_pack, + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack B1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &b1, &b1_pack, + // Pack B1 (if instructed). + bli_packm_int( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); // Perform trmm subproblem. - bli_trmm_int( alpha, + bli_trmm_int( &BLIS_ONE, &a1_pack, &b1_pack, - beta, + &BLIS_ONE, &c_pack, cntl_sub_trmm( cntl ) ); } diff --git a/frame/3/trmm/bli_trmm_blk_var3f.h b/frame/3/trmm/bli_trmm_blk_var3f.h index 72c8e11f9..0f15f9a48 100644 --- a/frame/3/trmm/bli_trmm_blk_var3f.h +++ b/frame/3/trmm/bli_trmm_blk_var3f.h @@ -32,10 +32,8 @@ */ -void bli_trmm_blk_var3f( obj_t* alpha, - obj_t* a, +void bli_trmm_blk_var3f( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ); diff --git a/frame/3/trmm/bli_trmm_int.c b/frame/3/trmm/bli_trmm_int.c index 17552e5f5..7f11d8513 100644 --- a/frame/3/trmm/bli_trmm_int.c +++ b/frame/3/trmm/bli_trmm_int.c @@ -36,10 +36,8 @@ #define FUNCPTR_T trmm_fp -typedef void (*FUNCPTR_T)( obj_t* alpha, - obj_t* a, +typedef void (*FUNCPTR_T)( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ); @@ -92,6 +90,8 @@ void bli_trmm_int( obj_t* alpha, obj_t* c, trmm_t* cntl ) { + obj_t a_local; + obj_t b_local; obj_t c_local; bool_t side, uplo; varnum_t n; @@ -113,6 +113,10 @@ void bli_trmm_int( obj_t* alpha, return; } + // Alias A and B in case we need to update attached scalars. + bli_obj_alias_to( *a, a_local ); + bli_obj_alias_to( *b, b_local ); + // Alias C in case we need to induce a transposition. bli_obj_alias_to( *c, c_local ); @@ -127,6 +131,20 @@ void bli_trmm_int( obj_t* alpha, bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, c_local ); } + // If alpha is non-unit, typecast and apply it to the scalar attached + // to B. + if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) + { + bli_obj_scalar_apply_scalar( alpha, &b_local ); + } + + // If beta is non-unit, typecast and apply it to the scalar attached + // to C. + if ( !bli_obj_equals( beta, &BLIS_ONE ) ) + { + bli_obj_scalar_apply_scalar( beta, &c_local ); + } + // Set two bools: one based on the implied side parameter (the structure // of the root object) and one based on the uplo field of the triangular // matrix's root object (whether that is matrix A or matrix B). @@ -152,10 +170,8 @@ void bli_trmm_int( obj_t* alpha, f = vars[side][uplo][n][i]; // Invoke the variant. - f( alpha, - a, - b, - beta, + f( &a_local, + &b_local, &c_local, cntl ); } diff --git a/frame/3/trmm/bli_trmm_ll_ker_var2.c b/frame/3/trmm/bli_trmm_ll_ker_var2.c index e85ba7a73..7ba1e10be 100644 --- a/frame/3/trmm/bli_trmm_ll_ker_var2.c +++ b/frame/3/trmm/bli_trmm_ll_ker_var2.c @@ -51,10 +51,8 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,trmm_ll_ker_var2); -void bli_trmm_ll_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trmm_ll_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ) { @@ -80,23 +78,24 @@ void bli_trmm_ll_ker_var2( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); - num_t dt_alpha; - void* buf_alpha; + obj_t scalar_a; + obj_t scalar_b; - num_t dt_beta; + void* buf_alpha; void* buf_beta; FUNCPTR_T f; - // If alpha is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the alpha object and extract the buffer at the beta offset. - bli_set_scalar_dt_buffer( alpha, dt_exec, dt_alpha, buf_alpha ); - // If beta is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the beta object and extract the buffer at the beta offset. - bli_set_scalar_dt_buffer( beta, dt_exec, dt_beta, buf_beta ); + // Detach and multiply the scalars attached to A and B. + bli_obj_scalar_detach( a, &scalar_a ); + bli_obj_scalar_detach( b, &scalar_b ); + bli_mulsc( &scalar_a, &scalar_b ); + + // Grab the addresses of the internal scalar buffers for the scalar + // merged above and the scalar attached to C. + buf_alpha = bli_obj_internal_scalar_buffer( scalar_b ); + buf_beta = bli_obj_internal_scalar_buffer( *c ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/3/trmm/bli_trmm_ll_ker_var2.h b/frame/3/trmm/bli_trmm_ll_ker_var2.h index 93fb695e0..08ebc465a 100644 --- a/frame/3/trmm/bli_trmm_ll_ker_var2.h +++ b/frame/3/trmm/bli_trmm_ll_ker_var2.h @@ -36,10 +36,8 @@ // // Prototype object-based interface. // -void bli_trmm_ll_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trmm_ll_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ); diff --git a/frame/3/trmm/bli_trmm_lu_ker_var2.c b/frame/3/trmm/bli_trmm_lu_ker_var2.c index d86673980..66bdea8b6 100644 --- a/frame/3/trmm/bli_trmm_lu_ker_var2.c +++ b/frame/3/trmm/bli_trmm_lu_ker_var2.c @@ -51,10 +51,8 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,trmm_lu_ker_var2); -void bli_trmm_lu_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trmm_lu_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ) { @@ -80,23 +78,24 @@ void bli_trmm_lu_ker_var2( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); - num_t dt_alpha; - void* buf_alpha; + obj_t scalar_a; + obj_t scalar_b; - num_t dt_beta; + void* buf_alpha; void* buf_beta; FUNCPTR_T f; - // If alpha is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the alpha object and extract the buffer at the beta offset. - bli_set_scalar_dt_buffer( alpha, dt_exec, dt_alpha, buf_alpha ); - // If beta is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the beta object and extract the buffer at the beta offset. - bli_set_scalar_dt_buffer( beta, dt_exec, dt_beta, buf_beta ); + // Detach and multiply the scalars attached to A and B. + bli_obj_scalar_detach( a, &scalar_a ); + bli_obj_scalar_detach( b, &scalar_b ); + bli_mulsc( &scalar_a, &scalar_b ); + + // Grab the addresses of the internal scalar buffers for the scalar + // merged above and the scalar attached to C. + buf_alpha = bli_obj_internal_scalar_buffer( scalar_b ); + buf_beta = bli_obj_internal_scalar_buffer( *c ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/3/trmm/bli_trmm_lu_ker_var2.h b/frame/3/trmm/bli_trmm_lu_ker_var2.h index 434376097..6bfe27810 100644 --- a/frame/3/trmm/bli_trmm_lu_ker_var2.h +++ b/frame/3/trmm/bli_trmm_lu_ker_var2.h @@ -36,10 +36,8 @@ // // Prototype object-based interface. // -void bli_trmm_lu_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trmm_lu_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ); diff --git a/frame/3/trmm/bli_trmm_rl_ker_var2.c b/frame/3/trmm/bli_trmm_rl_ker_var2.c index f697811f6..c727b5748 100644 --- a/frame/3/trmm/bli_trmm_rl_ker_var2.c +++ b/frame/3/trmm/bli_trmm_rl_ker_var2.c @@ -51,10 +51,8 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,trmm_rl_ker_var2); -void bli_trmm_rl_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trmm_rl_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ) { @@ -80,23 +78,24 @@ void bli_trmm_rl_ker_var2( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); - num_t dt_alpha; - void* buf_alpha; + obj_t scalar_a; + obj_t scalar_b; - num_t dt_beta; + void* buf_alpha; void* buf_beta; FUNCPTR_T f; - // If alpha is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the alpha object and extract the buffer at the beta offset. - bli_set_scalar_dt_buffer( alpha, dt_exec, dt_alpha, buf_alpha ); - // If beta is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the beta object and extract the buffer at the beta offset. - bli_set_scalar_dt_buffer( beta, dt_exec, dt_beta, buf_beta ); + // Detach and multiply the scalars attached to A and B. + bli_obj_scalar_detach( a, &scalar_a ); + bli_obj_scalar_detach( b, &scalar_b ); + bli_mulsc( &scalar_a, &scalar_b ); + + // Grab the addresses of the internal scalar buffers for the scalar + // merged above and the scalar attached to C. + buf_alpha = bli_obj_internal_scalar_buffer( scalar_b ); + buf_beta = bli_obj_internal_scalar_buffer( *c ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/3/trmm/bli_trmm_rl_ker_var2.h b/frame/3/trmm/bli_trmm_rl_ker_var2.h index 3c237dd70..95e4d3ec3 100644 --- a/frame/3/trmm/bli_trmm_rl_ker_var2.h +++ b/frame/3/trmm/bli_trmm_rl_ker_var2.h @@ -36,10 +36,8 @@ // // Prototype object-based interface. // -void bli_trmm_rl_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trmm_rl_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ); diff --git a/frame/3/trmm/bli_trmm_ru_ker_var2.c b/frame/3/trmm/bli_trmm_ru_ker_var2.c index 1723f7916..e8cc82abd 100644 --- a/frame/3/trmm/bli_trmm_ru_ker_var2.c +++ b/frame/3/trmm/bli_trmm_ru_ker_var2.c @@ -51,10 +51,8 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,trmm_ru_ker_var2); -void bli_trmm_ru_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trmm_ru_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ) { @@ -80,23 +78,24 @@ void bli_trmm_ru_ker_var2( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); - num_t dt_alpha; - void* buf_alpha; + obj_t scalar_a; + obj_t scalar_b; - num_t dt_beta; + void* buf_alpha; void* buf_beta; FUNCPTR_T f; - // If alpha is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the alpha object and extract the buffer at the beta offset. - bli_set_scalar_dt_buffer( alpha, dt_exec, dt_alpha, buf_alpha ); - // If beta is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the beta object and extract the buffer at the beta offset. - bli_set_scalar_dt_buffer( beta, dt_exec, dt_beta, buf_beta ); + // Detach and multiply the scalars attached to A and B. + bli_obj_scalar_detach( a, &scalar_a ); + bli_obj_scalar_detach( b, &scalar_b ); + bli_mulsc( &scalar_a, &scalar_b ); + + // Grab the addresses of the internal scalar buffers for the scalar + // merged above and the scalar attached to C. + buf_alpha = bli_obj_internal_scalar_buffer( scalar_b ); + buf_beta = bli_obj_internal_scalar_buffer( *c ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/3/trmm/bli_trmm_ru_ker_var2.h b/frame/3/trmm/bli_trmm_ru_ker_var2.h index dff6d906c..4537ca12d 100644 --- a/frame/3/trmm/bli_trmm_ru_ker_var2.h +++ b/frame/3/trmm/bli_trmm_ru_ker_var2.h @@ -36,10 +36,8 @@ // // Prototype object-based interface. // -void bli_trmm_ru_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trmm_ru_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trmm_t* cntl ); diff --git a/frame/3/trmm/bli_trmm_target.c b/frame/3/trmm/bli_trmm_target.c index 18734df4b..c3ec24e65 100644 --- a/frame/3/trmm/bli_trmm_target.c +++ b/frame/3/trmm/bli_trmm_target.c @@ -36,8 +36,7 @@ void bli_trmm_set_targ_exec_datatypes( obj_t* a, obj_t* b, - obj_t* c, - num_t* dt_alpha ) + obj_t* c ) { num_t dt_targ_a; num_t dt_targ_b; @@ -58,8 +57,6 @@ void bli_trmm_set_targ_exec_datatypes( obj_t* a, bli_obj_set_execution_datatype( dt_exec, *a ); bli_obj_set_execution_datatype( dt_exec, *b ); bli_obj_set_execution_datatype( dt_exec, *c ); - - *dt_alpha = bli_obj_target_datatype( *a ); } /* diff --git a/frame/3/trmm/bli_trmm_target.h b/frame/3/trmm/bli_trmm_target.h index 182d7a823..7fb2bc463 100644 --- a/frame/3/trmm/bli_trmm_target.h +++ b/frame/3/trmm/bli_trmm_target.h @@ -34,8 +34,7 @@ void bli_trmm_set_targ_exec_datatypes( obj_t* a, obj_t* b, - obj_t* c, - num_t* dt_alpha ); + obj_t* c ); /* void bli_trmm_get_target_datatypes( obj_t* a, diff --git a/frame/3/trmm3/bli_trmm3.c b/frame/3/trmm3/bli_trmm3.c index 1baa4fb43..6b8090b41 100644 --- a/frame/3/trmm3/bli_trmm3.c +++ b/frame/3/trmm3/bli_trmm3.c @@ -48,23 +48,16 @@ void bli_trmm3( side_t side, obj_t* c ) { trmm_t* cntl; - obj_t alpha_local; - obj_t beta_local; obj_t a_local; obj_t b_local; obj_t c_local; - num_t dt_targ_a; - //num_t dt_targ_b; - //num_t dt_targ_c; - num_t dt_alpha; - num_t dt_beta; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_trmm3_check( side, alpha, a, b, beta, c ); // If alpha is zero, scale by beta and return. - if ( bli_obj_scalar_equals( alpha, &BLIS_ZERO ) ) + if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( beta, c ); return; @@ -83,12 +76,6 @@ void bli_trmm3( side_t side, bli_obj_set_as_root( b_local ); bli_obj_set_as_root( c_local ); - // For now, assume the storage datatypes are the desired target - // datatypes. - dt_targ_a = bli_obj_datatype( *a ); - //dt_targ_b = bli_obj_datatype( *b ); - //dt_targ_c = bli_obj_datatype( *c ); - // We assume trmm is implemented with a block-panel kernel, thus, we will // only directly support the BLIS_LEFT case. We handle the BLIS_RIGHT case // by transposing the operation. @@ -117,31 +104,15 @@ void bli_trmm3( side_t side, bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, a_local ); } - // Create an object to hold a copy-cast of alpha. Notice that we use - // the target datatype of matrix A. - dt_alpha = dt_targ_a; - bli_obj_init_scalar_copy_of( dt_alpha, - BLIS_NO_CONJUGATE, - alpha, - &alpha_local ); - - // Create an object to hold a copy-cast of beta. Notice that we use - // the datatype of C. - dt_beta = bli_obj_datatype( *c ); - bli_obj_init_scalar_copy_of( dt_beta, - BLIS_NO_CONJUGATE, - beta, - &beta_local ); - // Choose the control tree. if ( bli_is_left( side ) ) cntl = trmm_l_cntl; else cntl = trmm_r_cntl; // Invoke the internal back-end. - bli_trmm_int( &alpha_local, + bli_trmm_int( alpha, &a_local, &b_local, - &beta_local, + beta, &c_local, cntl ); } @@ -177,8 +148,8 @@ void PASTEMAC(ch,opname)( \ bli_set_dim_with_side( side, m, n, mn_a ); \ bli_set_dims_with_trans( transb, m, n, m_b, n_b ); \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ - bli_obj_create_scalar_with_attached_buffer( dt, beta, &betao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \ \ bli_obj_create_with_attached_buffer( dt, mn_a, mn_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \ diff --git a/frame/3/trsm/bli_trsm.c b/frame/3/trsm/bli_trsm.c index db37d652b..69d544341 100644 --- a/frame/3/trsm/bli_trsm.c +++ b/frame/3/trsm/bli_trsm.c @@ -46,18 +46,16 @@ void bli_trsm( side_t side, obj_t* b ) { trsm_t* cntl; - obj_t alpha_local; obj_t a_local; obj_t b_local; obj_t c_local; - num_t dt_alpha; // Check parameters. if ( bli_error_checking_is_enabled() ) bli_trsm_check( side, alpha, a, b ); // If alpha is zero, scale by beta and return. - if ( bli_obj_scalar_equals( alpha, &BLIS_ZERO ) ) + if ( bli_obj_equals( alpha, &BLIS_ZERO ) ) { bli_scalm( alpha, b ); return; @@ -127,25 +125,12 @@ void bli_trsm( side_t side, bli_obj_set_as_root( b_local ); bli_obj_set_as_root( c_local ); - // Set the target and execution datatypes of the objects, and apply - // any transformations necessary to handle mixed domain computation. - bli_trmm_set_targ_exec_datatypes( &a_local, - &b_local, - &c_local, - &dt_alpha ); - - // Create an object to hold a copy-cast of alpha. - bli_obj_init_scalar_copy_of( dt_alpha, - BLIS_NO_CONJUGATE, - alpha, - &alpha_local ); - // Choose the control tree. if ( bli_is_left( side ) ) cntl = trsm_l_cntl; else cntl = trsm_r_cntl; // Invoke the internal back-end. - bli_trsm_int( &alpha_local, + bli_trsm_int( alpha, &a_local, &b_local, &BLIS_ZERO, @@ -179,7 +164,7 @@ void PASTEMAC(ch,opname)( \ \ bli_set_dim_with_side( side, m, n, mn_a ); \ \ - bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \ + bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \ \ bli_obj_create_with_attached_buffer( dt, mn_a, mn_a, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m, n, b, rs_b, cs_b, &bo ); \ diff --git a/frame/3/trsm/bli_trsm_blk_var1b.c b/frame/3/trsm/bli_trsm_blk_var1b.c index 10bfabffd..16c1973b5 100644 --- a/frame/3/trsm/bli_trsm_blk_var1b.c +++ b/frame/3/trsm/bli_trsm_blk_var1b.c @@ -34,10 +34,8 @@ #include "blis.h" -void bli_trsm_blk_var1b( obj_t* alpha, - obj_t* a, +void bli_trsm_blk_var1b( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ) { @@ -68,9 +66,8 @@ void bli_trsm_blk_var1b( obj_t* alpha, bli_packm_init( b, &b_pack, cntl_sub_packm_b( cntl ) ); - // Pack B1 and scale by alpha (if instructed). - bli_packm_int( alpha, - b, &b_pack, + // Pack B1 (if instructed). + bli_packm_int( b, &b_pack, cntl_sub_packm_b( cntl ) ); // Partition along the remaining portion of the m dimension. @@ -92,16 +89,15 @@ void bli_trsm_blk_var1b( obj_t* alpha, bli_packm_init( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, &a1_pack, + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); // Perform trsm subproblem. - bli_trsm_int( alpha, + bli_trsm_int( &BLIS_ONE, &a1_pack, &b_pack, - beta, + &BLIS_ONE, &c1, cntl_sub_trsm( cntl ) ); } diff --git a/frame/3/trsm/bli_trsm_blk_var1b.h b/frame/3/trsm/bli_trsm_blk_var1b.h index 5c62a375e..614ee0e20 100644 --- a/frame/3/trsm/bli_trsm_blk_var1b.h +++ b/frame/3/trsm/bli_trsm_blk_var1b.h @@ -32,10 +32,8 @@ */ -void bli_trsm_blk_var1b( obj_t* alpha, - obj_t* a, +void bli_trsm_blk_var1b( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ); diff --git a/frame/3/trsm/bli_trsm_blk_var1f.c b/frame/3/trsm/bli_trsm_blk_var1f.c index 188f33421..540de42c0 100644 --- a/frame/3/trsm/bli_trsm_blk_var1f.c +++ b/frame/3/trsm/bli_trsm_blk_var1f.c @@ -34,10 +34,8 @@ #include "blis.h" -void bli_trsm_blk_var1f( obj_t* alpha, - obj_t* a, +void bli_trsm_blk_var1f( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ) { @@ -67,9 +65,8 @@ void bli_trsm_blk_var1f( obj_t* alpha, bli_packm_init( b, &b_pack, cntl_sub_packm_b( cntl ) ); - // Pack B1 and scale by alpha (if instructed). - bli_packm_int( alpha, - b, &b_pack, + // Pack B1 (if instructed). + bli_packm_int( b, &b_pack, cntl_sub_packm_b( cntl ) ); // Partition along the remaining portion of the m dimension. @@ -89,16 +86,15 @@ void bli_trsm_blk_var1f( obj_t* alpha, bli_packm_init( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, &a1_pack, + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); // Perform trsm subproblem. - bli_trsm_int( alpha, + bli_trsm_int( &BLIS_ONE, &a1_pack, &b_pack, - beta, + &BLIS_ONE, &c1, cntl_sub_trsm( cntl ) ); } diff --git a/frame/3/trsm/bli_trsm_blk_var1f.h b/frame/3/trsm/bli_trsm_blk_var1f.h index 9f53fc234..ccc799f0f 100644 --- a/frame/3/trsm/bli_trsm_blk_var1f.h +++ b/frame/3/trsm/bli_trsm_blk_var1f.h @@ -32,10 +32,8 @@ */ -void bli_trsm_blk_var1f( obj_t* alpha, - obj_t* a, +void bli_trsm_blk_var1f( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ); diff --git a/frame/3/trsm/bli_trsm_blk_var2b.c b/frame/3/trsm/bli_trsm_blk_var2b.c index a51a6eed8..9ecadc744 100644 --- a/frame/3/trsm/bli_trsm_blk_var2b.c +++ b/frame/3/trsm/bli_trsm_blk_var2b.c @@ -34,10 +34,8 @@ #include "blis.h" -void bli_trsm_blk_var2b( obj_t* alpha, - obj_t* a, +void bli_trsm_blk_var2b( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ) { @@ -58,7 +56,7 @@ void bli_trsm_blk_var2b( obj_t* alpha, n_trans = bli_obj_width_after_trans( *b ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -66,9 +64,8 @@ void bli_trsm_blk_var2b( obj_t* alpha, bli_packm_init( a, &a_pack, cntl_sub_packm_a( cntl ) ); - // Pack A and scale by alpha (if instructed). - bli_packm_int( alpha, - a, &a_pack, + // Pack A (if instructed). + bli_packm_int( a, &a_pack, cntl_sub_packm_a( cntl ) ); // Partition along the n dimension. @@ -90,21 +87,19 @@ void bli_trsm_blk_var2b( obj_t* alpha, bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); - // Pack B1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &b1, &b1_pack, + // Pack B1 (if instructed). + bli_packm_int( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1, &c1_pack, + // Pack C1 (if instructed). + bli_packm_int( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); // Perform trsm subproblem. - bli_trsm_int( alpha, + bli_trsm_int( &BLIS_ONE, &a_pack, &b1_pack, - beta, + &BLIS_ONE, &c1_pack, cntl_sub_trsm( cntl ) ); diff --git a/frame/3/trsm/bli_trsm_blk_var2b.h b/frame/3/trsm/bli_trsm_blk_var2b.h index 5a5f00d66..26f52d759 100644 --- a/frame/3/trsm/bli_trsm_blk_var2b.h +++ b/frame/3/trsm/bli_trsm_blk_var2b.h @@ -32,10 +32,8 @@ */ -void bli_trsm_blk_var2b( obj_t* alpha, - obj_t* a, +void bli_trsm_blk_var2b( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ); diff --git a/frame/3/trsm/bli_trsm_blk_var2f.c b/frame/3/trsm/bli_trsm_blk_var2f.c index bfd59ff6d..05da54b0e 100644 --- a/frame/3/trsm/bli_trsm_blk_var2f.c +++ b/frame/3/trsm/bli_trsm_blk_var2f.c @@ -34,10 +34,8 @@ #include "blis.h" -void bli_trsm_blk_var2f( obj_t* alpha, - obj_t* a, +void bli_trsm_blk_var2f( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ) { @@ -58,7 +56,7 @@ void bli_trsm_blk_var2f( obj_t* alpha, n_trans = bli_obj_width_after_trans( *b ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -66,9 +64,8 @@ void bli_trsm_blk_var2f( obj_t* alpha, bli_packm_init( a, &a_pack, cntl_sub_packm_a( cntl ) ); - // Pack A and scale by alpha (if instructed). - bli_packm_int( alpha, - a, &a_pack, + // Pack A (if instructed). + bli_packm_int( a, &a_pack, cntl_sub_packm_a( cntl ) ); // Partition along the n dimension. @@ -90,21 +87,19 @@ void bli_trsm_blk_var2f( obj_t* alpha, bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); - // Pack B1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &b1, &b1_pack, + // Pack B1 (if instructed). + bli_packm_int( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1, &c1_pack, + // Pack C1 (if instructed). + bli_packm_int( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); // Perform trsm subproblem. - bli_trsm_int( alpha, + bli_trsm_int( &BLIS_ONE, &a_pack, &b1_pack, - beta, + &BLIS_ONE, &c1_pack, cntl_sub_trsm( cntl ) ); diff --git a/frame/3/trsm/bli_trsm_blk_var2f.h b/frame/3/trsm/bli_trsm_blk_var2f.h index eed4040b0..823233b15 100644 --- a/frame/3/trsm/bli_trsm_blk_var2f.h +++ b/frame/3/trsm/bli_trsm_blk_var2f.h @@ -32,10 +32,8 @@ */ -void bli_trsm_blk_var2f( obj_t* alpha, - obj_t* a, +void bli_trsm_blk_var2f( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ); diff --git a/frame/3/trsm/bli_trsm_blk_var3b.c b/frame/3/trsm/bli_trsm_blk_var3b.c index 6176fdda6..2ba3c3532 100644 --- a/frame/3/trsm/bli_trsm_blk_var3b.c +++ b/frame/3/trsm/bli_trsm_blk_var3b.c @@ -34,17 +34,14 @@ #include "blis.h" -void bli_trsm_blk_var3b( obj_t* alpha, - obj_t* a, +void bli_trsm_blk_var3b( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ) { obj_t a1, a1_pack; obj_t b1, b1_pack; obj_t c_pack; - obj_t* alpha_use; dim_t i; dim_t b_alg; @@ -59,7 +56,7 @@ void bli_trsm_blk_var3b( obj_t* alpha, k_trans = bli_obj_width_after_trans( *a ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -67,9 +64,8 @@ void bli_trsm_blk_var3b( obj_t* alpha, bli_packm_init( c, &c_pack, cntl_sub_packm_c( cntl ) ); - // Pack C and scale by beta (if instructed). - bli_packm_int( beta, - c, &c_pack, + // Pack C (if instructed). + bli_packm_int( c, &c_pack, cntl_sub_packm_c( cntl ) ); // Partition along the k dimension. @@ -91,28 +87,27 @@ void bli_trsm_blk_var3b( obj_t* alpha, bli_packm_init( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, &a1_pack, + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack B1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &b1, &b1_pack, + // Pack B1 (if instructed). + bli_packm_int( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); - // Since this variant executes multiple rank-k updates, we must use - // alpha only for the first iteration and BLIS_ONE for all others. - if ( i == 0 ) alpha_use = alpha; - else alpha_use = &BLIS_ONE; - // Perform trsm subproblem. - bli_trsm_int( alpha_use, + bli_trsm_int( &BLIS_ONE, &a1_pack, &b1_pack, - beta, + &BLIS_ONE, &c_pack, cntl_sub_trsm( cntl ) ); + + // This variant executes multiple rank-k updates. Therefore, if the + // internal alpha scalar on matrix A/B is non-zero, we must use it + // only for the first iteration (and then BLIS_ONE for all others). + if ( i == 0 ) { bli_obj_scalar_reset( a ); + bli_obj_scalar_reset( b ); } } // Unpack C (if C was packed). diff --git a/frame/3/trsm/bli_trsm_blk_var3b.h b/frame/3/trsm/bli_trsm_blk_var3b.h index c37fbd498..83f6b74f4 100644 --- a/frame/3/trsm/bli_trsm_blk_var3b.h +++ b/frame/3/trsm/bli_trsm_blk_var3b.h @@ -32,10 +32,8 @@ */ -void bli_trsm_blk_var3b( obj_t* alpha, - obj_t* a, +void bli_trsm_blk_var3b( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ); diff --git a/frame/3/trsm/bli_trsm_blk_var3f.c b/frame/3/trsm/bli_trsm_blk_var3f.c index 596e63426..e93a67e3e 100644 --- a/frame/3/trsm/bli_trsm_blk_var3f.c +++ b/frame/3/trsm/bli_trsm_blk_var3f.c @@ -34,17 +34,14 @@ #include "blis.h" -void bli_trsm_blk_var3f( obj_t* alpha, - obj_t* a, +void bli_trsm_blk_var3f( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ) { obj_t a1, a1_pack; obj_t b1, b1_pack; obj_t c_pack; - obj_t* alpha_use; dim_t i; dim_t b_alg; @@ -59,7 +56,7 @@ void bli_trsm_blk_var3f( obj_t* alpha, k_trans = bli_obj_width_after_trans( *a ); // Scale C by beta (if instructed). - bli_scalm_int( beta, + bli_scalm_int( &BLIS_ONE, c, cntl_sub_scalm( cntl ) ); @@ -67,9 +64,8 @@ void bli_trsm_blk_var3f( obj_t* alpha, bli_packm_init( c, &c_pack, cntl_sub_packm_c( cntl ) ); - // Pack C and scale by beta (if instructed). - bli_packm_int( beta, - c, &c_pack, + // Pack C (if instructed). + bli_packm_int( c, &c_pack, cntl_sub_packm_c( cntl ) ); // Partition along the k dimension. @@ -91,28 +87,27 @@ void bli_trsm_blk_var3f( obj_t* alpha, bli_packm_init( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, &a1_pack, + // Pack A1 (if instructed). + bli_packm_int( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - // Pack B1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &b1, &b1_pack, + // Pack B1 (if instructed). + bli_packm_int( &b1, &b1_pack, cntl_sub_packm_b( cntl ) ); - // Since this variant executes multiple rank-k updates, we must use - // alpha only for the first iteration and BLIS_ONE for all others. - if ( i == 0 ) alpha_use = alpha; - else alpha_use = &BLIS_ONE; - // Perform trsm subproblem. - bli_trsm_int( alpha_use, + bli_trsm_int( &BLIS_ONE, &a1_pack, &b1_pack, - beta, + &BLIS_ONE, &c_pack, cntl_sub_trsm( cntl ) ); + + // This variant executes multiple rank-k updates. Therefore, if the + // internal alpha scalar on matrix A/B is non-zero, we must use it + // only for the first iteration (and then BLIS_ONE for all others). + if ( i == 0 ) { bli_obj_scalar_reset( a ); + bli_obj_scalar_reset( b ); } } // Unpack C (if C was packed). diff --git a/frame/3/trsm/bli_trsm_blk_var3f.h b/frame/3/trsm/bli_trsm_blk_var3f.h index 203f13b15..dbfafab47 100644 --- a/frame/3/trsm/bli_trsm_blk_var3f.h +++ b/frame/3/trsm/bli_trsm_blk_var3f.h @@ -32,10 +32,8 @@ */ -void bli_trsm_blk_var3f( obj_t* alpha, - obj_t* a, +void bli_trsm_blk_var3f( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ); diff --git a/frame/3/trsm/bli_trsm_int.c b/frame/3/trsm/bli_trsm_int.c index e3970c5bb..3e8d7dd3f 100644 --- a/frame/3/trsm/bli_trsm_int.c +++ b/frame/3/trsm/bli_trsm_int.c @@ -36,10 +36,8 @@ #define FUNCPTR_T trsm_fp -typedef void (*FUNCPTR_T)( obj_t* alpha, - obj_t* a, +typedef void (*FUNCPTR_T)( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ); @@ -92,6 +90,8 @@ void bli_trsm_int( obj_t* alpha, obj_t* c, trsm_t* cntl ) { + obj_t a_local; + obj_t b_local; obj_t c_local; bool_t side, uplo; varnum_t n; @@ -113,6 +113,10 @@ void bli_trsm_int( obj_t* alpha, return; } + // Alias A and B in case we need to update attached scalars. + bli_obj_alias_to( *a, a_local ); + bli_obj_alias_to( *b, b_local ); + // Alias C in case we need to induce a transposition. bli_obj_alias_to( *c, c_local ); @@ -127,6 +131,12 @@ void bli_trsm_int( obj_t* alpha, bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, c_local ); } + // If beta is non-unit, apply it to the scalar attached to C. + if ( !bli_obj_equals( beta, &BLIS_ONE ) ) + { + bli_obj_scalar_apply_scalar( beta, &c_local ); + } + // Set two bools: one based on the implied side parameter (the structure // of the root object) and one based on the uplo field of the triangular // matrix's root object (whether that is matrix A or matrix B). @@ -135,6 +145,13 @@ void bli_trsm_int( obj_t* alpha, side = 0; if ( bli_obj_root_is_lower( *a ) ) uplo = 0; else uplo = 1; + + // If alpha is non-unit, typecast and apply it to the scalar + // attached to B (the non-triangular matrix). + if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) + { + bli_obj_scalar_apply_scalar( alpha, &b_local ); + } } else // if ( bli_obj_root_is_triangular( *b ) ) { @@ -142,6 +159,13 @@ void bli_trsm_int( obj_t* alpha, // Set a bool based on the uplo field of A's root object. if ( bli_obj_root_is_lower( *b ) ) uplo = 0; else uplo = 1; + + // If alpha is non-unit, typecast and apply it to the scalar + // attached to A (the non-triangular matrix). + if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) + { + bli_obj_scalar_apply_scalar( alpha, &a_local ); + } } // Extract the variant number and implementation type. @@ -152,10 +176,8 @@ void bli_trsm_int( obj_t* alpha, f = vars[side][uplo][n][i]; // Invoke the variant. - f( alpha, - a, - b, - beta, + f( &a_local, + &b_local, &c_local, cntl ); } diff --git a/frame/3/trsm/bli_trsm_ll_ker_var2.c b/frame/3/trsm/bli_trsm_ll_ker_var2.c index 520d8d714..74788ebeb 100644 --- a/frame/3/trsm/bli_trsm_ll_ker_var2.c +++ b/frame/3/trsm/bli_trsm_ll_ker_var2.c @@ -50,10 +50,8 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,trsm_ll_ker_var2); -void bli_trsm_ll_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trsm_ll_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ) { @@ -79,15 +77,14 @@ void bli_trsm_ll_ker_var2( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); - num_t dt_alpha; void* buf_alpha; FUNCPTR_T f; - // If alpha is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the alpha object and extract the buffer at the alpha offset. - bli_set_scalar_dt_buffer( alpha, dt_exec, dt_alpha, buf_alpha ); + + // Grab the address of the internal scalar buffer for the scalar + // attached to B. + buf_alpha = bli_obj_internal_scalar_buffer( *b ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/3/trsm/bli_trsm_ll_ker_var2.h b/frame/3/trsm/bli_trsm_ll_ker_var2.h index f87001583..35f4bf0cf 100644 --- a/frame/3/trsm/bli_trsm_ll_ker_var2.h +++ b/frame/3/trsm/bli_trsm_ll_ker_var2.h @@ -36,10 +36,8 @@ // // Prototype object-based interface. // -void bli_trsm_ll_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trsm_ll_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ); diff --git a/frame/3/trsm/bli_trsm_lu_ker_var2.c b/frame/3/trsm/bli_trsm_lu_ker_var2.c index 4cd0a20f6..35b77b365 100644 --- a/frame/3/trsm/bli_trsm_lu_ker_var2.c +++ b/frame/3/trsm/bli_trsm_lu_ker_var2.c @@ -50,10 +50,8 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,trsm_lu_ker_var2); -void bli_trsm_lu_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trsm_lu_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ) { @@ -79,15 +77,14 @@ void bli_trsm_lu_ker_var2( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); - num_t dt_alpha; void* buf_alpha; FUNCPTR_T f; - // If alpha is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the alpha object and extract the buffer at the alpha offset. - bli_set_scalar_dt_buffer( alpha, dt_exec, dt_alpha, buf_alpha ); + + // Grab the address of the internal scalar buffer for the scalar + // attached to B. + buf_alpha = bli_obj_internal_scalar_buffer( *b ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/3/trsm/bli_trsm_lu_ker_var2.h b/frame/3/trsm/bli_trsm_lu_ker_var2.h index 6317cc528..d864328be 100644 --- a/frame/3/trsm/bli_trsm_lu_ker_var2.h +++ b/frame/3/trsm/bli_trsm_lu_ker_var2.h @@ -36,10 +36,8 @@ // // Prototype object-based interface. // -void bli_trsm_lu_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trsm_lu_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ); diff --git a/frame/3/trsm/bli_trsm_rl_ker_var2.c b/frame/3/trsm/bli_trsm_rl_ker_var2.c index 20aa0700c..8396be9f4 100644 --- a/frame/3/trsm/bli_trsm_rl_ker_var2.c +++ b/frame/3/trsm/bli_trsm_rl_ker_var2.c @@ -50,10 +50,8 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,trsm_rl_ker_var2); -void bli_trsm_rl_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trsm_rl_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ) { @@ -79,15 +77,14 @@ void bli_trsm_rl_ker_var2( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); - num_t dt_alpha; void* buf_alpha; FUNCPTR_T f; - // If alpha is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the alpha object and extract the buffer at the alpha offset. - bli_set_scalar_dt_buffer( alpha, dt_exec, dt_alpha, buf_alpha ); + + // Grab the address of the internal scalar buffer for the scalar + // attached to B. + buf_alpha = bli_obj_internal_scalar_buffer( *a ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/3/trsm/bli_trsm_rl_ker_var2.h b/frame/3/trsm/bli_trsm_rl_ker_var2.h index 7929b8fed..fc676dfb0 100644 --- a/frame/3/trsm/bli_trsm_rl_ker_var2.h +++ b/frame/3/trsm/bli_trsm_rl_ker_var2.h @@ -36,10 +36,8 @@ // // Prototype object-based interface. // -void bli_trsm_rl_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trsm_rl_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ); diff --git a/frame/3/trsm/bli_trsm_ru_ker_var2.c b/frame/3/trsm/bli_trsm_ru_ker_var2.c index 580f1e18d..ef3a37606 100644 --- a/frame/3/trsm/bli_trsm_ru_ker_var2.c +++ b/frame/3/trsm/bli_trsm_ru_ker_var2.c @@ -50,10 +50,8 @@ typedef void (*FUNCPTR_T)( static FUNCPTR_T GENARRAY(ftypes,trsm_ru_ker_var2); -void bli_trsm_ru_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trsm_ru_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ) { @@ -79,15 +77,14 @@ void bli_trsm_ru_ker_var2( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); - num_t dt_alpha; void* buf_alpha; FUNCPTR_T f; - // If alpha is a scalar constant, use dt_exec to extract the address of the - // corresponding constant value; otherwise, use the datatype encoded - // within the alpha object and extract the buffer at the alpha offset. - bli_set_scalar_dt_buffer( alpha, dt_exec, dt_alpha, buf_alpha ); + + // Grab the address of the internal scalar buffer for the scalar + // attached to B. + buf_alpha = bli_obj_internal_scalar_buffer( *a ); // Index into the type combination array to extract the correct // function pointer. diff --git a/frame/3/trsm/bli_trsm_ru_ker_var2.h b/frame/3/trsm/bli_trsm_ru_ker_var2.h index 1cfeddf9e..9d4295ebb 100644 --- a/frame/3/trsm/bli_trsm_ru_ker_var2.h +++ b/frame/3/trsm/bli_trsm_ru_ker_var2.h @@ -36,10 +36,8 @@ // // Prototype object-based interface. // -void bli_trsm_ru_ker_var2( obj_t* alpha, - obj_t* a, +void bli_trsm_ru_ker_var2( obj_t* a, obj_t* b, - obj_t* beta, obj_t* c, trsm_t* cntl ); diff --git a/frame/base/bli_obj.c b/frame/base/bli_obj.c index 15d303c56..9d76e108d 100644 --- a/frame/base/bli_obj.c +++ b/frame/base/bli_obj.c @@ -66,7 +66,7 @@ void bli_obj_create_without_buffer( num_t dt, { siz_t elem_size; mem_t* pack_mem; - //mem_t* cast_mem; + void* s; if ( bli_error_checking_is_enabled() ) bli_obj_create_without_buffer_check( dt, m, n, obj ); @@ -99,9 +99,15 @@ void bli_obj_create_without_buffer( num_t dt, bli_obj_set_diag_offset( 0, *obj ); pack_mem = bli_obj_pack_mem( *obj ); - //cast_mem = bli_obj_cast_mem( *obj ); bli_mem_set_buffer( NULL, pack_mem ); - //bli_mem_set_buffer( NULL, cast_mem ); + + // Set the internal scalar to 1.0. + s = bli_obj_internal_scalar_buffer( *obj ); + + if ( bli_is_float( dt ) ) bli_sset1s( *(( float* )s) ) + else if ( bli_is_double( dt ) ) bli_dset1s( *(( double* )s) ) + else if ( bli_is_scomplex( dt ) ) bli_cset1s( *(( scomplex* )s) ) + else if ( bli_is_dcomplex( dt ) ) bli_zset1s( *(( dcomplex* )s) ) } void bli_obj_alloc_buffer( inc_t rs, @@ -210,56 +216,17 @@ void bli_obj_attach_buffer( void* p, bli_obj_set_incs( rs, cs, *obj ); } -void bli_obj_attach_internal_buffer( obj_t* obj ) -{ - void* p; - - // Query the address of the object's internal scalar buffer. - p = bli_obj_internal_scalar_buffer( *obj ); - - // Update the object. - bli_obj_set_buffer( p, *obj ); - bli_obj_set_incs( 1, 1, *obj ); -} - -void bli_obj_init_scalar( num_t dt, - obj_t* b ) -{ - // Initialize b without a buffer and then attach its internal buffer. - bli_obj_create_without_buffer( dt, 1, 1, b ); - bli_obj_attach_internal_buffer( b ); -} - -void bli_obj_init_scalar_copy_of( num_t dt, - conj_t conj, - obj_t* a, - obj_t* b ) -{ - obj_t a_local; - - // Make a local copy of scalar a so we can apply the conj parameter. - bli_obj_alias_to( *a, a_local ); - bli_obj_apply_conj( conj, a_local ); - - // Initialize b without a buffer and then attach its internal buffer. - bli_obj_create_without_buffer( dt, 1, 1, b ); - bli_obj_attach_internal_buffer( b ); - - // Copy the scalar value in a to object b, conjugating if needed. - bli_copysc( &a_local, b ); -} - -void bli_obj_create_scalar( num_t dt, - obj_t* obj ) +void bli_obj_create_1x1( num_t dt, + obj_t* obj ) { bli_obj_create_without_buffer( dt, 1, 1, obj ); bli_obj_alloc_buffer( 1, 1, obj ); } -void bli_obj_create_scalar_with_attached_buffer( num_t dt, - void* p, - obj_t* obj ) +void bli_obj_create_1x1_with_attached_buffer( num_t dt, + void* p, + obj_t* obj ) { bli_obj_create_without_buffer( dt, 1, 1, obj ); @@ -274,8 +241,9 @@ void bli_obj_free( obj_t* obj ) // Don't dereference obj if it is NULL. if ( obj != NULL ) { - // Idiot safety: Don't try to free the buffer field if it currently - // refers to the internal scalar buffer. + // Idiot safety: Don't try to free the buffer field if the object + // is a detached scalar (ie: if the buffer pointer refers to the + // address of the internal scalar buffer). if ( bli_obj_buffer( *obj ) != bli_obj_internal_scalar_buffer( *obj ) ) bli_free( bli_obj_buffer( *obj ) ); } @@ -387,7 +355,7 @@ void bli_adjust_strides( dim_t m, // Interpret rs = cs = 0 as request for column storage. if ( *rs == 0 && *cs == 0 ) { - // First we handle the scalar case explicitly. + // First we handle the 1x1 scalar case explicitly. if ( m == 1 && n == 1 ) { *rs = 1; @@ -412,7 +380,7 @@ void bli_adjust_strides( dim_t m, // single vector (but could also be a request for a 1xn matrix in // column-major order or an mx1 matrix in row-major order). In BLIS, // we have decided to "reserve" the case where rs = cs = 1 for - // scalars only. + // 1x1 scalars only. if ( m > 1 && n == 1 ) { // Set the column stride to indicate that this is a column vector @@ -431,7 +399,7 @@ void bli_adjust_strides( dim_t m, *rs = n; } - // Nothing needs to be done for the scalar case where m == n == 1. + // Nothing needs to be done for the 1x1 scalar case where m == n == 1. } } diff --git a/frame/base/bli_obj.h b/frame/base/bli_obj.h index b44ab9d48..77c62b531 100644 --- a/frame/base/bli_obj.h +++ b/frame/base/bli_obj.h @@ -63,22 +63,12 @@ void bli_obj_attach_buffer( void* p, inc_t cs, obj_t* obj ); -void bli_obj_attach_internal_buffer( obj_t* obj ); +void bli_obj_create_1x1( num_t dt, + obj_t* obj ); -void bli_obj_init_scalar( num_t dt, - obj_t* b ); - -void bli_obj_init_scalar_copy_of( num_t dt, - conj_t conj, - obj_t* a, - obj_t* b ); - -void bli_obj_create_scalar( num_t dt, - obj_t* obj ); - -void bli_obj_create_scalar_with_attached_buffer( num_t dt, - void* p, - obj_t* obj ); +void bli_obj_create_1x1_with_attached_buffer( num_t dt, + void* p, + obj_t* obj ); void bli_obj_free( obj_t* obj ); diff --git a/frame/base/bli_obj_scalar.c b/frame/base/bli_obj_scalar.c new file mode 100644 index 000000000..1052f5154 --- /dev/null +++ b/frame/base/bli_obj_scalar.c @@ -0,0 +1,174 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2013, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + + +void bli_obj_scalar_init_detached( num_t dt, + obj_t* beta ) +{ + void* p; + + // Initialize beta without a buffer and then attach its internal buffer. + bli_obj_create_without_buffer( dt, 1, 1, beta ); + + // Query the address of the object's internal scalar buffer. + p = bli_obj_internal_scalar_buffer( *beta ); + + // Update the object. + bli_obj_set_buffer( p, *beta ); + bli_obj_set_incs( 1, 1, *beta ); +} + +void bli_obj_scalar_init_detached_copy_of( num_t dt, + conj_t conj, + obj_t* alpha, + obj_t* beta ) +{ + obj_t alpha_local; + + // Make a local copy of alpha so we can apply the conj parameter. + bli_obj_alias_to( *alpha, alpha_local ); + bli_obj_apply_conj( conj, alpha_local ); + + // Initialize beta without a buffer and then attach its internal buffer. + bli_obj_scalar_init_detached( dt, beta ); + + // Copy the scalar value in a to object b, conjugating and/or + // typecasting if needed. + bli_copysc( &alpha_local, beta ); +} + +void bli_obj_scalar_detach( obj_t* a, + obj_t* alpha ) +{ + num_t dt_a = bli_obj_datatype( *a ); + + // Initialize alpha to be a bufferless internal scalar of the same + // datatype as A. + bli_obj_scalar_init_detached( dt_a, alpha ); + + // Copy the internal scalar in A to alpha. + bli_obj_copy_internal_scalar( *a, *alpha ); +} + +void bli_obj_scalar_attach( conj_t conj, + obj_t* alpha, + obj_t* a ) +{ + obj_t alpha_cast; + + // Make a copy-cast of alpha of the same datatype as A. This step + // gives us the opportunity to conjugate and/or typecast alpha. + bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *a ), + conj, + alpha, + &alpha_cast ); + + // Copy the internal scalar in alpha_cast to A. + bli_obj_copy_internal_scalar( alpha_cast, *a ); +} + +void bli_obj_scalar_apply_scalar( obj_t* alpha, + obj_t* a ) +{ + obj_t alpha_cast; + obj_t scalar_a; + + // Make a copy-cast of alpha of the same datatype as A. This step + // gives us the opportunity to typecast alpha. + bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *a ), + BLIS_NO_CONJUGATE, + alpha, + &alpha_cast ); + // Detach the scalar from A. + bli_obj_scalar_detach( a, &scalar_a ); + + // Scale the detached scalar by alpha. + bli_mulsc( &alpha_cast, &scalar_a ); + + // Copy the internal scalar in scalar_a to A. + bli_obj_copy_internal_scalar( scalar_a, *a ); +} + +void bli_obj_scalar_reset( obj_t* a ) +{ + num_t dt = bli_obj_datatype( *a ); + void* scalar_a = bli_obj_internal_scalar_buffer( *a ); + void* one = bli_obj_buffer_for_const( dt, BLIS_ONE ); + + if ( bli_is_float( dt ) ) *(( float* )scalar_a) = *(( float* )one); + else if ( bli_is_double( dt ) ) *(( double* )scalar_a) = *(( double* )one); + else if ( bli_is_scomplex( dt ) ) *(( scomplex* )scalar_a) = *(( scomplex* )one); + else if ( bli_is_dcomplex( dt ) ) *(( dcomplex* )scalar_a) = *(( dcomplex* )one); + + // Alternate implementation: + //bli_obj_scalar_attach( &BLIS_ONE, a ); +} + +bool_t bli_obj_scalar_has_nonzero_imag( obj_t* a ) +{ + bool_t r_val = FALSE; + num_t dt = bli_obj_datatype( *a ); + void* scalar_a = bli_obj_internal_scalar_buffer( *a ); + + if ( bli_is_real( dt ) ) + { + r_val = FALSE; + } + else if ( bli_is_scomplex( dt ) ) + { + r_val = ( bli_cimag( *(( scomplex* )scalar_a) ) != 0.0F ); + } + else if ( bli_is_dcomplex( dt ) ) + { + r_val = ( bli_zimag( *(( dcomplex* )scalar_a) ) != 0.0 ); + } + + return r_val; +} + +bool_t bli_obj_scalar_equals( obj_t* a, + obj_t* beta ) +{ + obj_t scalar_a; + bool_t r_val; + + bli_obj_scalar_detach( a, &scalar_a ); + + r_val = bli_obj_equals( &scalar_a, beta ); + + return r_val; +} + diff --git a/frame/base/bli_obj_scalar.h b/frame/base/bli_obj_scalar.h new file mode 100644 index 000000000..f8f1c682f --- /dev/null +++ b/frame/base/bli_obj_scalar.h @@ -0,0 +1,59 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2013, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_obj_scalar_init_detached( num_t dt, + obj_t* beta ); + +void bli_obj_scalar_init_detached_copy_of( num_t dt, + conj_t conj, + obj_t* alpha, + obj_t* beta ); + +void bli_obj_scalar_detach( obj_t* a, + obj_t* alpha ); + +void bli_obj_scalar_attach( conj_t conj, + obj_t* alpha, + obj_t* a ); + +void bli_obj_scalar_apply_scalar( obj_t* alpha, + obj_t* a ); + +void bli_obj_scalar_reset( obj_t* a ); + +bool_t bli_obj_scalar_has_nonzero_imag( obj_t* a ); + +bool_t bli_obj_scalar_equals( obj_t* a, + obj_t* beta ); + diff --git a/frame/base/bli_query.c b/frame/base/bli_query.c index af52a6c98..06fa89909 100644 --- a/frame/base/bli_query.c +++ b/frame/base/bli_query.c @@ -34,8 +34,8 @@ #include "blis.h" -bool_t bli_obj_scalar_equals( obj_t* a, - obj_t* b ) +bool_t bli_obj_equals( obj_t* a, + obj_t* b ) { bool_t r_val = FALSE; num_t dt_a; @@ -43,6 +43,11 @@ bool_t bli_obj_scalar_equals( obj_t* a, num_t dt; void* buf_a; void* buf_b; + + // The function is not yet implemented for vectors and matrices. + if ( !bli_obj_is_1x1( *a ) || + !bli_obj_is_1x1( *b ) ) + bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); /* bli_printm( "a:", a, "%9.2e", "" ); bli_printm( "b:", b, "%9.2e", "" ); @@ -58,8 +63,8 @@ bli_printm( "b:", b, "%9.2e", "" ); if ( dt_b == BLIS_CONSTANT ) dt = dt_a; else dt = dt_b; - buf_a = bli_obj_scalar_buffer( dt, *a ); - buf_b = bli_obj_scalar_buffer( dt, *b ); + buf_a = bli_obj_buffer_for_1x1( dt, *a ); + buf_b = bli_obj_buffer_for_1x1( dt, *b ); /* printf( "dt: %u\n", dt ); printf( "dt_a: %u\n", dt_a ); @@ -79,10 +84,10 @@ printf( "bufb: %p\n", buf_b ); scomplex* bp_c = bli_obj_buffer_for_const( BLIS_SCOMPLEX, *b ); dcomplex* bp_z = bli_obj_buffer_for_const( BLIS_DCOMPLEX, *b ); - r_val = r_val || ( *ap_s == *bp_s ); - r_val = r_val || ( *ap_d == *bp_d ); - r_val = r_val || ( ap_c->real == bp_c->real && ap_c->imag == bp_c->imag ); - r_val = r_val || ( ap_z->real == bp_z->real && ap_z->imag == bp_z->imag ); + r_val = r_val || bli_seqa( ap_s, bp_s ); + r_val = r_val || bli_deqa( ap_d, bp_d ); + r_val = r_val || bli_ceqa( ap_c, bp_c ); + r_val = r_val || bli_zeqa( ap_z, bp_z ); } else if ( dt == BLIS_FLOAT ) r_val = bli_seqa( buf_a, buf_b ); else if ( dt == BLIS_DOUBLE ) r_val = bli_deqa( buf_a, buf_b ); diff --git a/frame/base/bli_query.h b/frame/base/bli_query.h index cd22a4d86..777c69046 100644 --- a/frame/base/bli_query.h +++ b/frame/base/bli_query.h @@ -32,6 +32,6 @@ */ -bool_t bli_obj_scalar_equals( obj_t* a, - obj_t* b ); +bool_t bli_obj_equals( obj_t* a, + obj_t* b ); diff --git a/frame/include/bli_obj_macro_defs.h b/frame/include/bli_obj_macro_defs.h index fdfbc7ad7..0de62a47f 100644 --- a/frame/include/bli_obj_macro_defs.h +++ b/frame/include/bli_obj_macro_defs.h @@ -485,7 +485,7 @@ bli_obj_width_stored( obj ) #define bli_obj_vector_inc( x ) \ \ - ( bli_obj_is_scalar( x ) ? 1 : \ + ( bli_obj_is_1x1( x ) ? 1 : \ ( bli_obj_length( x ) == 1 ? bli_obj_col_stride( x ) \ : bli_obj_row_stride( x ) ) \ ) @@ -508,7 +508,7 @@ bli_obj_width_stored( obj ) ( bli_obj_length( obj ) == 0 || \ bli_obj_width( obj ) == 0 ) -#define bli_obj_is_scalar( x ) \ +#define bli_obj_is_1x1( x ) \ \ ( bli_obj_length( x ) == 1 && \ bli_obj_width( x ) == 1 ) @@ -695,6 +695,17 @@ bli_obj_width_stored( obj ) \ &((obj).scalar) +// Bufferless scalar field modification + +#define bli_obj_set_internal_scalar( val, obj ) \ +{ \ + (obj).scalar = val; \ +} + +#define bli_obj_copy_internal_scalar( a, b ) \ +{ \ + (b).scalar = (a).scalar; \ +} // Element size query @@ -897,7 +908,7 @@ bli_obj_width_stored( obj ) (obj).offm * (obj).rs ) \ ) -#define bli_obj_scalar_buffer( dt, obj ) \ +#define bli_obj_buffer_for_1x1( dt, obj ) \ \ ( void* )( bli_obj_is_const( obj ) ? ( bli_obj_buffer_for_const( dt, obj ) ) \ : ( bli_obj_buffer_at_off( obj ) ) \ diff --git a/frame/include/bli_param_macro_defs.h b/frame/include/bli_param_macro_defs.h index 2ea6e47b6..4ef2fd6f6 100644 --- a/frame/include/bli_param_macro_defs.h +++ b/frame/include/bli_param_macro_defs.h @@ -543,7 +543,7 @@ if ( bli_obj_is_const( *(obj_scalar) ) ) \ { \ dt_scalar = dt_aux; \ - buf_scalar = bli_obj_scalar_buffer( dt_scalar, *(obj_scalar) ); \ + buf_scalar = bli_obj_buffer_for_1x1( dt_scalar, *(obj_scalar) ); \ } \ else \ { \ @@ -558,7 +558,7 @@ { \ { \ dt_scalar = dt_aux; \ - buf_scalar = bli_obj_scalar_buffer( dt_scalar, *(obj_scalar) ); \ + buf_scalar = bli_obj_buffer_for_1x1( dt_scalar, *(obj_scalar) ); \ } \ } diff --git a/frame/include/blis.h b/frame/include/blis.h index a2ea09ab1..849e1f0ec 100644 --- a/frame/include/blis.h +++ b/frame/include/blis.h @@ -84,6 +84,7 @@ extern "C" { #include "bli_init.h" #include "bli_malloc.h" #include "bli_obj.h" +#include "bli_obj_scalar.h" #include "bli_mem.h" #include "bli_part.h" #include "bli_query.h" diff --git a/kernels/x86_64/core2-sse3/1f/bli_dotxf_opt_var1.c.alt b/kernels/x86_64/core2-sse3/1f/bli_dotxf_opt_var1.c.alt index e5c0f517b..b4812a8e4 100644 --- a/kernels/x86_64/core2-sse3/1f/bli_dotxf_opt_var1.c.alt +++ b/kernels/x86_64/core2-sse3/1f/bli_dotxf_opt_var1.c.alt @@ -94,11 +94,11 @@ void bli_dotxf_opt_var1( obj_t* alpha, // The datatype of alpha MUST be the type union of x and y. This is to // prevent any unnecessary loss of information during computation. dt_alpha = bli_datatype_union( dt_x, dt_y ); - buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha ); + buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha ); // The datatype of beta MUST be the same as the datatype of rho. dt_beta = dt_rho; - buf_beta = bli_obj_scalar_buffer( dt_beta, *beta ); + buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta ); // Index into the type combination array to extract the correct // function pointer. diff --git a/testsuite/src/test_addm.c b/testsuite/src/test_addm.c index 60beab086..194e80bc6 100644 --- a/testsuite/src/test_addm.c +++ b/testsuite/src/test_addm.c @@ -137,8 +137,8 @@ void libblis_test_addm_experiment( test_params_t* params, bli_param_map_char_to_blis_trans( pc_str[0], &transx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transx, @@ -239,13 +239,13 @@ void libblis_test_addm_check( obj_t* alpha, // is negligible. // - bli_obj_init_scalar( dt, &aplusb ); - bli_obj_init_scalar( dt_real, &temp_r ); - bli_obj_init_scalar( dt_real, &norm_r ); - bli_obj_init_scalar( dt_real, &m_r ); - bli_obj_init_scalar( dt_real, &n_r ); + bli_obj_scalar_init_detached( dt, &aplusb ); + bli_obj_scalar_init_detached( dt_real, &temp_r ); + bli_obj_scalar_init_detached( dt_real, &norm_r ); + bli_obj_scalar_init_detached( dt_real, &m_r ); + bli_obj_scalar_init_detached( dt_real, &n_r ); - bli_obj_init_scalar_copy_of( dt, conjx, alpha, &alpha_conj ); + bli_obj_scalar_init_detached_copy_of( dt, conjx, alpha, &alpha_conj ); bli_fnormm( y, &norm_r ); diff --git a/testsuite/src/test_addv.c b/testsuite/src/test_addv.c index 49a5752b3..248d15868 100644 --- a/testsuite/src/test_addv.c +++ b/testsuite/src/test_addv.c @@ -135,8 +135,8 @@ void libblis_test_addv_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); @@ -234,12 +234,12 @@ void libblis_test_addv_check( obj_t* alpha, // is negligible. // - bli_obj_init_scalar( dt, &aplusb ); - bli_obj_init_scalar( dt_real, &temp_r ); - bli_obj_init_scalar( dt_real, &norm_r ); - bli_obj_init_scalar( dt_real, &m_r ); + bli_obj_scalar_init_detached( dt, &aplusb ); + bli_obj_scalar_init_detached( dt_real, &temp_r ); + bli_obj_scalar_init_detached( dt_real, &norm_r ); + bli_obj_scalar_init_detached( dt_real, &m_r ); - bli_obj_init_scalar_copy_of( dt, conjx, alpha, &alpha_conj ); + bli_obj_scalar_init_detached_copy_of( dt, conjx, alpha, &alpha_conj ); bli_fnormv( y, &norm_r ); diff --git a/testsuite/src/test_axpy2v.c b/testsuite/src/test_axpy2v.c index d046e3113..d17d2fad3 100644 --- a/testsuite/src/test_axpy2v.c +++ b/testsuite/src/test_axpy2v.c @@ -149,8 +149,8 @@ void libblis_test_axpy2v_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[1], &conjy ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha1 ); - bli_obj_init_scalar( datatype, &alpha2 ); + bli_obj_scalar_init_detached( datatype, &alpha1 ); + bli_obj_scalar_init_detached( datatype, &alpha2 ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); @@ -269,7 +269,7 @@ void libblis_test_axpy2v_check( obj_t* alpha1, // is negligible, where v contains z as computed by two calls to axpyv. // - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &x_temp ); bli_obj_create( dt, m, 1, 0, 0, &y_temp ); @@ -335,9 +335,9 @@ void bli_axpy2v_ker( obj_t* alpha1, inc_t inc_z = bli_obj_vector_inc( *z ); void* buf_z = bli_obj_buffer_at_off( *z ); - void* buf_alpha1 = bli_obj_scalar_buffer( dt, *alpha1 ); + void* buf_alpha1 = bli_obj_buffer_for_1x1( dt, *alpha1 ); - void* buf_alpha2 = bli_obj_scalar_buffer( dt, *alpha2 ); + void* buf_alpha2 = bli_obj_buffer_for_1x1( dt, *alpha2 ); FUNCPTR_T f; diff --git a/testsuite/src/test_axpyf.c b/testsuite/src/test_axpyf.c index a821db723..0d92f7ab2 100644 --- a/testsuite/src/test_axpyf.c +++ b/testsuite/src/test_axpyf.c @@ -154,7 +154,7 @@ void libblis_test_axpyf_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[1], &conjx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, @@ -275,8 +275,8 @@ void libblis_test_axpyf_check( obj_t* alpha, // axpyv. // - bli_obj_init_scalar( dt_real, &norm ); - bli_obj_init_scalar( dt, &alpha_chi1 ); + bli_obj_scalar_init_detached( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &alpha_chi1 ); bli_obj_create( dt, m, 1, 0, 0, &v ); @@ -344,7 +344,7 @@ void bli_axpyf_ker( obj_t* alpha, inc_t inc_y = bli_obj_vector_inc( *y ); void* buf_y = bli_obj_buffer_at_off( *y ); - void* buf_alpha = bli_obj_scalar_buffer( dt, *alpha ); + void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); FUNCPTR_T f; diff --git a/testsuite/src/test_axpym.c b/testsuite/src/test_axpym.c index 78eaca522..99378b860 100644 --- a/testsuite/src/test_axpym.c +++ b/testsuite/src/test_axpym.c @@ -145,7 +145,7 @@ void libblis_test_axpym_experiment( test_params_t* params, bli_param_map_char_to_blis_trans( pc_str[0], &transx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transx, @@ -253,7 +253,7 @@ void libblis_test_axpym_check( obj_t* alpha, // is negligible. // - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, n, 0, 0, &x_temp ); bli_obj_create( dt, m, n, 0, 0, &y_temp ); diff --git a/testsuite/src/test_axpyv.c b/testsuite/src/test_axpyv.c index bb4114584..435a0aaf8 100644 --- a/testsuite/src/test_axpyv.c +++ b/testsuite/src/test_axpyv.c @@ -144,7 +144,7 @@ void libblis_test_axpyv_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); @@ -250,7 +250,7 @@ void libblis_test_axpyv_check( obj_t* alpha, // is negligible. // - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &x_temp ); bli_obj_create( dt, m, 1, 0, 0, &y_temp ); diff --git a/testsuite/src/test_copym.c b/testsuite/src/test_copym.c index 79bf2f7fe..060f95199 100644 --- a/testsuite/src/test_copym.c +++ b/testsuite/src/test_copym.c @@ -216,7 +216,7 @@ void libblis_test_copym_check( obj_t* x, // is negligible. // - bli_obj_init_scalar( dt_real, &norm_y_r ); + bli_obj_scalar_init_detached( dt_real, &norm_y_r ); bli_subm( x, y ); diff --git a/testsuite/src/test_copyv.c b/testsuite/src/test_copyv.c index 69e986051..fcb934ab7 100644 --- a/testsuite/src/test_copyv.c +++ b/testsuite/src/test_copyv.c @@ -213,7 +213,7 @@ void libblis_test_copyv_check( obj_t* x, // is negligible. // - bli_obj_init_scalar( dt_real, &norm_y_r ); + bli_obj_scalar_init_detached( dt_real, &norm_y_r ); bli_subv( x, y ); diff --git a/testsuite/src/test_dotaxpyv.c b/testsuite/src/test_dotaxpyv.c index 377af7b29..96953d9e6 100644 --- a/testsuite/src/test_dotaxpyv.c +++ b/testsuite/src/test_dotaxpyv.c @@ -153,8 +153,8 @@ void libblis_test_dotaxpyv_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[2], &conjy ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &rho ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &rho ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); @@ -301,8 +301,8 @@ void libblis_test_dotaxpyv_check( obj_t* alpha, // computed by dotv and axpyv, respectively. // - bli_obj_init_scalar( dt, &rho_temp ); - bli_obj_init_scalar( dt_real, &norm_z ); + bli_obj_scalar_init_detached( dt, &rho_temp ); + bli_obj_scalar_init_detached( dt_real, &norm_z ); bli_obj_create( dt, m, 1, 0, 0, &z_temp ); bli_copyv( z_orig, &z_temp ); @@ -371,7 +371,7 @@ void bli_dotaxpyv_ker( obj_t* alpha, void* buf_rho = bli_obj_buffer_at_off( *rho ); - void* buf_alpha = bli_obj_scalar_buffer( dt, *alpha ); + void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); FUNCPTR_T f; diff --git a/testsuite/src/test_dotv.c b/testsuite/src/test_dotv.c index 761a78e25..3a8e46d5a 100644 --- a/testsuite/src/test_dotv.c +++ b/testsuite/src/test_dotv.c @@ -140,7 +140,7 @@ void libblis_test_dotv_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[1], &conjy ); // Create test scalars. - bli_obj_init_scalar( datatype, &rho ); + bli_obj_scalar_init_detached( datatype, &rho ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); @@ -246,10 +246,10 @@ void libblis_test_dotv_check( obj_t* x, // are negligible. // - bli_obj_init_scalar( dt_real, &rho_r ); - bli_obj_init_scalar( dt_real, &rho_i ); - bli_obj_init_scalar( dt_real, &norm_x ); - bli_obj_init_scalar( dt_real, &norm_xy ); + bli_obj_scalar_init_detached( dt_real, &rho_r ); + bli_obj_scalar_init_detached( dt_real, &rho_i ); + bli_obj_scalar_init_detached( dt_real, &norm_x ); + bli_obj_scalar_init_detached( dt_real, &norm_xy ); bli_fnormv( x, &norm_x ); diff --git a/testsuite/src/test_dotxaxpyf.c b/testsuite/src/test_dotxaxpyf.c index 6a0678267..ee066c9c3 100644 --- a/testsuite/src/test_dotxaxpyf.c +++ b/testsuite/src/test_dotxaxpyf.c @@ -166,8 +166,8 @@ void libblis_test_dotxaxpyf_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[3], &conjx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, @@ -326,8 +326,8 @@ void libblis_test_dotxaxpyf_check( obj_t* alpha, // calls to dotxv and axpyv, respectively. // - bli_obj_init_scalar( dt_real, &norm ); - bli_obj_init_scalar( dt, &alpha_chi1 ); + bli_obj_scalar_init_detached( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &alpha_chi1 ); bli_obj_create( dt, b_n, 1, 0, 0, &v ); bli_obj_create( dt, m, 1, 0, 0, &q ); @@ -433,9 +433,9 @@ void bli_dotxaxpyf_ker( obj_t* alpha, inc_t inc_z = bli_obj_vector_inc( *z ); void* buf_z = bli_obj_buffer_at_off( *z ); - void* buf_alpha = bli_obj_scalar_buffer( dt, *alpha );; + void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha );; - void* buf_beta = bli_obj_scalar_buffer( dt, *beta );; + void* buf_beta = bli_obj_buffer_for_1x1( dt, *beta );; FUNCPTR_T f; diff --git a/testsuite/src/test_dotxf.c b/testsuite/src/test_dotxf.c index 0eb00ae57..3c8091f71 100644 --- a/testsuite/src/test_dotxf.c +++ b/testsuite/src/test_dotxf.c @@ -156,8 +156,8 @@ void libblis_test_dotxf_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[1], &conjx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, @@ -280,7 +280,7 @@ void libblis_test_dotxf_check( obj_t* alpha, // dotxv. // - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, b_n, 1, 0, 0, &v ); @@ -347,9 +347,9 @@ void bli_dotxf_ker( obj_t* alpha, inc_t inc_y = bli_obj_vector_inc( *y ); void* buf_y = bli_obj_buffer_at_off( *y ); - void* buf_alpha = bli_obj_scalar_buffer( dt, *alpha ); + void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); - void* buf_beta = bli_obj_scalar_buffer( dt, *beta ); + void* buf_beta = bli_obj_buffer_for_1x1( dt, *beta ); FUNCPTR_T f; diff --git a/testsuite/src/test_dotxv.c b/testsuite/src/test_dotxv.c index f5e6d6d66..9d78e4a48 100644 --- a/testsuite/src/test_dotxv.c +++ b/testsuite/src/test_dotxv.c @@ -146,10 +146,10 @@ void libblis_test_dotxv_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[1], &conjy ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); - bli_obj_init_scalar( datatype, &rho ); - bli_obj_init_scalar( datatype, &rho_save ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &rho ); + bli_obj_scalar_init_detached( datatype, &rho_save ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); @@ -272,11 +272,11 @@ void libblis_test_dotxv_check( obj_t* alpha, // are negligible. // - bli_obj_init_scalar( dt_real, &rho_r ); - bli_obj_init_scalar( dt_real, &rho_i ); - bli_obj_init_scalar( dt_real, &norm_x_r ); - bli_obj_init_scalar( dt_real, &norm_xy_r ); - bli_obj_init_scalar( dt_real, &temp_r ); + bli_obj_scalar_init_detached( dt_real, &rho_r ); + bli_obj_scalar_init_detached( dt_real, &rho_i ); + bli_obj_scalar_init_detached( dt_real, &norm_x_r ); + bli_obj_scalar_init_detached( dt_real, &norm_xy_r ); + bli_obj_scalar_init_detached( dt_real, &temp_r ); bli_copysc( alpha, &temp_r ); bli_sqrtsc( &temp_r, &temp_r ); diff --git a/testsuite/src/test_fnormm.c b/testsuite/src/test_fnormm.c index 07e5248cc..761504141 100644 --- a/testsuite/src/test_fnormm.c +++ b/testsuite/src/test_fnormm.c @@ -138,8 +138,8 @@ void libblis_test_fnormm_experiment( test_params_t* params, // Create test scalars. - bli_obj_init_scalar( datatype, &beta ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( datatype, &beta ); + bli_obj_scalar_init_detached( dt_real, &norm ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, @@ -225,9 +225,9 @@ void libblis_test_fnormm_check( obj_t* beta, // where m and n are the dimensions of x. // - bli_obj_init_scalar( dt_real, &temp_r ); - bli_obj_init_scalar( dt_real, &m_r ); - bli_obj_init_scalar( dt_real, &n_r ); + bli_obj_scalar_init_detached( dt_real, &temp_r ); + bli_obj_scalar_init_detached( dt_real, &m_r ); + bli_obj_scalar_init_detached( dt_real, &n_r ); bli_setsc( ( double )m, 0.0, &m_r ); bli_setsc( ( double )n, 0.0, &n_r ); diff --git a/testsuite/src/test_fnormv.c b/testsuite/src/test_fnormv.c index 2458b1b1f..55ff8630e 100644 --- a/testsuite/src/test_fnormv.c +++ b/testsuite/src/test_fnormv.c @@ -137,8 +137,8 @@ void libblis_test_fnormv_experiment( test_params_t* params, // Create test scalars. - bli_obj_init_scalar( datatype, &beta ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( datatype, &beta ); + bli_obj_scalar_init_detached( dt_real, &norm ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); @@ -222,8 +222,8 @@ void libblis_test_fnormv_check( obj_t* beta, // where m is the length of x. // - bli_obj_init_scalar( dt_real, &temp_r ); - bli_obj_init_scalar( dt_real, &m_r ); + bli_obj_scalar_init_detached( dt_real, &temp_r ); + bli_obj_scalar_init_detached( dt_real, &m_r ); bli_setsc( ( double )m, 0.0, &m_r ); diff --git a/testsuite/src/test_gemm.c b/testsuite/src/test_gemm.c index 2373e0988..40f0550f9 100644 --- a/testsuite/src/test_gemm.c +++ b/testsuite/src/test_gemm.c @@ -156,9 +156,9 @@ void libblis_test_gemm_experiment( test_params_t* params, bli_param_map_char_to_blis_trans( pc_str[1], &transb ); // Create test scalars. - bli_obj_init_scalar( datatype, &kappa ); - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transa, @@ -175,7 +175,6 @@ void libblis_test_gemm_experiment( test_params_t* params, { bli_setsc( 1.2, 0.0, &alpha ); bli_setsc( -1.0, 0.0, &beta ); - //bli_setsc( 0.0, 0.0, &beta ); } else { @@ -295,8 +294,8 @@ void libblis_test_gemm_check( obj_t* alpha, // = beta * C_orig * t + z // - bli_obj_init_scalar( dt, &kappa ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &kappa ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); diff --git a/testsuite/src/test_gemm_ukr.c b/testsuite/src/test_gemm_ukr.c index 6e27a8f30..21ffc2564 100644 --- a/testsuite/src/test_gemm_ukr.c +++ b/testsuite/src/test_gemm_ukr.c @@ -164,9 +164,9 @@ void libblis_test_gemm_ukr_experiment( test_params_t* params, op->dim_aux[1] = n; // Create test scalars. - bli_obj_init_scalar( datatype, &kappa ); - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands. libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, @@ -221,8 +221,8 @@ void libblis_test_gemm_ukr_experiment( test_params_t* params, &b, &bp ); // Pack the contents of a and b to ap and bp, respectively. - bli_packm_blk_var2( &BLIS_ONE, &a, &ap ); - bli_packm_blk_var2( &BLIS_ONE, &b, &bp ); + bli_packm_blk_var2( &a, &ap ); + bli_packm_blk_var2( &b, &bp ); // Repeat the experiment n_repeats times and record results. @@ -326,8 +326,8 @@ void libblis_test_gemm_ukr_check( obj_t* alpha, // = beta * C_orig * t + z // - bli_obj_init_scalar( dt, &kappa ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &kappa ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, n, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); @@ -392,9 +392,9 @@ void bli_gemm_ukr( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c ); inc_t cs_c = bli_obj_col_stride( *c ); - void* buf_alpha = bli_obj_scalar_buffer( dt, *alpha ); + void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); - void* buf_beta = bli_obj_scalar_buffer( dt, *beta ); + void* buf_beta = bli_obj_buffer_for_1x1( dt, *beta ); FUNCPTR_T f; diff --git a/testsuite/src/test_gemmtrsm_ukr.c b/testsuite/src/test_gemmtrsm_ukr.c index be9ce1723..a719955bc 100644 --- a/testsuite/src/test_gemmtrsm_ukr.c +++ b/testsuite/src/test_gemmtrsm_ukr.c @@ -185,8 +185,8 @@ void libblis_test_gemmtrsm_ukr_experiment( test_params_t* params, bli_param_map_char_to_blis_uplo( pc_str[0], &uploa ); // Create test scalars. - bli_obj_init_scalar( datatype, &kappa ); - bli_obj_init_scalar( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, @@ -251,10 +251,10 @@ void libblis_test_gemmtrsm_ukr_experiment( test_params_t* params, &b, &bp ); // Pack the contents of a to ap. - bli_packm_blk_var3( &BLIS_ONE, &a, &ap ); + bli_packm_blk_var3( &a, &ap ); // Pack the contents of b to bp. - bli_packm_blk_var2( &BLIS_ONE, &b, &bp ); + bli_packm_blk_var2( &b, &bp ); // Create subpartitions from the a and b panels. @@ -268,7 +268,7 @@ void libblis_test_gemmtrsm_ukr_experiment( test_params_t* params, bli_copym( &c11_save, &c11 ); // Re-pack the contents of b to bp. - bli_packm_blk_var2( &BLIS_ONE, &b, &bp ); + bli_packm_blk_var2( &b, &bp ); time = bli_clock(); @@ -369,8 +369,8 @@ void libblis_test_gemmtrsm_ukr_check( side_t side, // = inv(A11) * ( alpha * B11_orig * t - A1x * w ) // - bli_obj_init_scalar( dt, &kappa ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &kappa ); + bli_obj_scalar_init_detached( dt_real, &norm ); if ( bli_is_left( side ) ) { @@ -523,7 +523,7 @@ void bli_gemmtrsm_ukr( obj_t* alpha, inc_t rs_c = bli_obj_row_stride( *c11 ); inc_t cs_c = bli_obj_col_stride( *c11 ); - void* buf_alpha = bli_obj_scalar_buffer( dt, *alpha ); + void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); FUNCPTR_T f; diff --git a/testsuite/src/test_gemv.c b/testsuite/src/test_gemv.c index 0bfd3017b..ce77ffb50 100644 --- a/testsuite/src/test_gemv.c +++ b/testsuite/src/test_gemv.c @@ -152,9 +152,9 @@ void libblis_test_gemv_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[1], &conjx ); // Create test scalars. - bli_obj_init_scalar( datatype, &kappa ); - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transa, @@ -290,8 +290,8 @@ void libblis_test_gemv_check( obj_t* kappa, // z = beta * y_orig + alpha * conja(kappa) * x // - bli_obj_init_scalar_copy_of( dt, conja, kappa, &kappac ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached_copy_of( dt, conja, kappa, &kappac ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, n_x, 1, 0, 0, &x_temp ); bli_obj_create( dt, m_y, 1, 0, 0, &y_temp ); diff --git a/testsuite/src/test_ger.c b/testsuite/src/test_ger.c index 13b3b5326..20e8a452f 100644 --- a/testsuite/src/test_ger.c +++ b/testsuite/src/test_ger.c @@ -148,7 +148,7 @@ void libblis_test_ger_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[1], &conjy ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, @@ -273,9 +273,9 @@ void libblis_test_ger_check( obj_t* alpha, // = A_orig * t + w // - bli_obj_init_scalar( dt, &tau ); - bli_obj_init_scalar( dt, &rho ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &tau ); + bli_obj_scalar_init_detached( dt, &rho ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, n_a, 1, 0, 0, &t ); bli_obj_create( dt, m_a, 1, 0, 0, &v ); diff --git a/testsuite/src/test_hemm.c b/testsuite/src/test_hemm.c index c9a125fec..3f6d44bc2 100644 --- a/testsuite/src/test_hemm.c +++ b/testsuite/src/test_hemm.c @@ -163,9 +163,9 @@ void libblis_test_hemm_experiment( test_params_t* params, bli_param_map_char_to_blis_trans( pc_str[3], &transb ); // Create test scalars. - bli_obj_init_scalar( datatype, &kappa ); - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). bli_set_dim_with_side( side, m, n, mn_side ); @@ -317,8 +317,8 @@ void libblis_test_hemm_check( side_t side, // = beta * C_orig * t + alpha * transb(B) * w // = beta * C_orig * t + z - bli_obj_init_scalar( dt, &kappa ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &kappa ); + bli_obj_scalar_init_detached( dt_real, &norm ); if ( bli_is_left( side ) ) { diff --git a/testsuite/src/test_hemv.c b/testsuite/src/test_hemv.c index 6c2f59e10..99f775107 100644 --- a/testsuite/src/test_hemv.c +++ b/testsuite/src/test_hemv.c @@ -154,9 +154,9 @@ void libblis_test_hemv_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[2], &conjx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); - bli_obj_init_scalar( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &kappa ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, @@ -296,7 +296,7 @@ void libblis_test_hemv_check( obj_t* alpha, // v = beta * y_orig + alpha * conja(A_dense) * x // - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &v ); diff --git a/testsuite/src/test_her.c b/testsuite/src/test_her.c index b3baf3c88..1ae34b5af 100644 --- a/testsuite/src/test_her.c +++ b/testsuite/src/test_her.c @@ -148,7 +148,7 @@ void libblis_test_her_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[1], &conjx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, @@ -278,9 +278,9 @@ void libblis_test_her_check( obj_t* alpha, bli_obj_set_uplo( BLIS_DENSE, *a ); bli_obj_set_uplo( BLIS_DENSE, *a_orig ); - bli_obj_init_scalar( dt, &tau ); - bli_obj_init_scalar( dt, &rho ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &tau ); + bli_obj_scalar_init_detached( dt, &rho ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m_a, 1, 0, 0, &t ); bli_obj_create( dt, m_a, 1, 0, 0, &v ); diff --git a/testsuite/src/test_her2.c b/testsuite/src/test_her2.c index 55846980b..cf947b8eb 100644 --- a/testsuite/src/test_her2.c +++ b/testsuite/src/test_her2.c @@ -151,7 +151,7 @@ void libblis_test_her2_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[2], &conjy ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, @@ -290,10 +290,10 @@ void libblis_test_her2_check( obj_t* alpha, bli_obj_set_uplo( BLIS_DENSE, *a ); bli_obj_set_uplo( BLIS_DENSE, *a_orig ); - bli_obj_init_scalar( dt, &tau ); - bli_obj_init_scalar( dt, &rho ); - bli_obj_init_scalar( dt, &alphac ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &tau ); + bli_obj_scalar_init_detached( dt, &rho ); + bli_obj_scalar_init_detached( dt, &alphac ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m_a, 1, 0, 0, &t ); bli_obj_create( dt, m_a, 1, 0, 0, &v ); diff --git a/testsuite/src/test_her2k.c b/testsuite/src/test_her2k.c index b12817402..b6023b19a 100644 --- a/testsuite/src/test_her2k.c +++ b/testsuite/src/test_her2k.c @@ -157,9 +157,9 @@ void libblis_test_her2k_experiment( test_params_t* params, bli_param_map_char_to_blis_trans( pc_str[2], &transb ); // Create test scalars. - bli_obj_init_scalar( datatype, &kappa ); - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transa, @@ -316,9 +316,9 @@ void libblis_test_her2k_check( obj_t* alpha, bli_obj_alias_with_trans( BLIS_CONJ_TRANSPOSE, *a, ah ); bli_obj_alias_with_trans( BLIS_CONJ_TRANSPOSE, *b, bh ); - bli_obj_init_scalar( dt, &kappa ); - bli_obj_init_scalar( dt_real, &norm ); - bli_obj_init_scalar_copy_of( dt, BLIS_CONJUGATE, alpha, &alphac ); + bli_obj_scalar_init_detached( dt, &kappa ); + bli_obj_scalar_init_detached( dt_real, &norm ); + bli_obj_scalar_init_detached_copy_of( dt, BLIS_CONJUGATE, alpha, &alphac ); bli_obj_create( dt, m, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); diff --git a/testsuite/src/test_herk.c b/testsuite/src/test_herk.c index 2b70b6dd0..e535ee9b8 100644 --- a/testsuite/src/test_herk.c +++ b/testsuite/src/test_herk.c @@ -154,9 +154,9 @@ void libblis_test_herk_experiment( test_params_t* params, bli_param_map_char_to_blis_trans( pc_str[1], &transa ); // Create test scalars. - bli_obj_init_scalar( datatype, &kappa ); - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transa, @@ -299,8 +299,8 @@ void libblis_test_herk_check( obj_t* alpha, bli_obj_alias_with_trans( BLIS_CONJ_TRANSPOSE, *a, ah ); - bli_obj_init_scalar( dt, &kappa ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &kappa ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); diff --git a/testsuite/src/test_scal2m.c b/testsuite/src/test_scal2m.c index 6b5765e0b..e2f594945 100644 --- a/testsuite/src/test_scal2m.c +++ b/testsuite/src/test_scal2m.c @@ -144,7 +144,7 @@ void libblis_test_scal2m_experiment( test_params_t* params, bli_param_map_char_to_blis_trans( pc_str[0], &transx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transx, @@ -252,7 +252,7 @@ void libblis_test_scal2m_check( obj_t* alpha, // is negligible. // - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, n, 0, 0, &x_temp ); diff --git a/testsuite/src/test_scal2v.c b/testsuite/src/test_scal2v.c index 3951a189b..fc2471071 100644 --- a/testsuite/src/test_scal2v.c +++ b/testsuite/src/test_scal2v.c @@ -143,7 +143,7 @@ void libblis_test_scal2v_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); @@ -249,7 +249,7 @@ void libblis_test_scal2v_check( obj_t* alpha, // is negligible. // - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &x_temp ); diff --git a/testsuite/src/test_scalm.c b/testsuite/src/test_scalm.c index 19a58c078..e580b9cd3 100644 --- a/testsuite/src/test_scalm.c +++ b/testsuite/src/test_scalm.c @@ -140,7 +140,7 @@ void libblis_test_scalm_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[0], &conjbeta ); // Create test scalars. - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, @@ -247,8 +247,8 @@ void libblis_test_scalm_check( obj_t* beta, bli_obj_create( dt, m, n, 0, 0, &y2 ); bli_copym( y_orig, &y2 ); - bli_obj_init_scalar( dt, &nbeta ); - bli_obj_init_scalar( dt_real, &norm_y_r ); + bli_obj_scalar_init_detached( dt, &nbeta ); + bli_obj_scalar_init_detached( dt_real, &norm_y_r ); bli_copysc( beta, &nbeta ); bli_mulsc( &BLIS_MINUS_ONE, &nbeta ); diff --git a/testsuite/src/test_scalv.c b/testsuite/src/test_scalv.c index 25db4f0fa..fa60b1389 100644 --- a/testsuite/src/test_scalv.c +++ b/testsuite/src/test_scalv.c @@ -140,7 +140,7 @@ void libblis_test_scalv_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[0], &conjbeta ); // Create test scalars. - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &y ); @@ -243,8 +243,8 @@ void libblis_test_scalv_check( obj_t* beta, bli_obj_create( dt, m, 1, 0, 0, &y2 ); bli_copyv( y_orig, &y2 ); - bli_obj_init_scalar( dt, &nbeta ); - bli_obj_init_scalar( dt_real, &norm_y_r ); + bli_obj_scalar_init_detached( dt, &nbeta ); + bli_obj_scalar_init_detached( dt_real, &norm_y_r ); bli_copysc( beta, &nbeta ); bli_mulsc( &BLIS_MINUS_ONE, &nbeta ); diff --git a/testsuite/src/test_setm.c b/testsuite/src/test_setm.c index f4c9e5d85..2e1ac390a 100644 --- a/testsuite/src/test_setm.c +++ b/testsuite/src/test_setm.c @@ -135,7 +135,7 @@ void libblis_test_setm_experiment( test_params_t* params, // Create test scalars. - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, @@ -200,7 +200,7 @@ void libblis_test_setm_check( obj_t* beta, inc_t rs_x = bli_obj_row_stride( *x ); inc_t cs_x = bli_obj_col_stride( *x ); void* buf_x = bli_obj_buffer_at_off( *x ); - void* buf_beta = bli_obj_scalar_buffer( dt_x, *beta ); + void* buf_beta = bli_obj_buffer_for_1x1( dt_x, *beta ); dim_t i, j; *resid = 0.0; diff --git a/testsuite/src/test_setv.c b/testsuite/src/test_setv.c index 9bada131c..d147fbd39 100644 --- a/testsuite/src/test_setv.c +++ b/testsuite/src/test_setv.c @@ -134,7 +134,7 @@ void libblis_test_setv_experiment( test_params_t* params, // Create test scalars. - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); @@ -196,7 +196,7 @@ void libblis_test_setv_check( obj_t* beta, dim_t m_x = bli_obj_vector_dim( *x ); inc_t inc_x = bli_obj_vector_inc( *x ); void* buf_x = bli_obj_buffer_at_off( *x ); - void* buf_beta = bli_obj_scalar_buffer( dt_x, *beta ); + void* buf_beta = bli_obj_buffer_for_1x1( dt_x, *beta ); dim_t i; *resid = 0.0; diff --git a/testsuite/src/test_subm.c b/testsuite/src/test_subm.c index 9322fbc27..c9c177972 100644 --- a/testsuite/src/test_subm.c +++ b/testsuite/src/test_subm.c @@ -137,8 +137,8 @@ void libblis_test_subm_experiment( test_params_t* params, bli_param_map_char_to_blis_trans( pc_str[0], &transx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transx, @@ -239,13 +239,13 @@ void libblis_test_subm_check( obj_t* alpha, // is negligible. // - bli_obj_init_scalar( dt, &aminusb ); - bli_obj_init_scalar( dt_real, &temp_r ); - bli_obj_init_scalar( dt_real, &norm_r ); - bli_obj_init_scalar( dt_real, &m_r ); - bli_obj_init_scalar( dt_real, &n_r ); + bli_obj_scalar_init_detached( dt, &aminusb ); + bli_obj_scalar_init_detached( dt_real, &temp_r ); + bli_obj_scalar_init_detached( dt_real, &norm_r ); + bli_obj_scalar_init_detached( dt_real, &m_r ); + bli_obj_scalar_init_detached( dt_real, &n_r ); - bli_obj_init_scalar_copy_of( dt, conjx, alpha, &alpha_conj ); + bli_obj_scalar_init_detached_copy_of( dt, conjx, alpha, &alpha_conj ); bli_fnormm( y, &norm_r ); diff --git a/testsuite/src/test_subv.c b/testsuite/src/test_subv.c index f6486a5cb..dbd09d5d5 100644 --- a/testsuite/src/test_subv.c +++ b/testsuite/src/test_subv.c @@ -136,8 +136,8 @@ void libblis_test_subv_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); @@ -235,12 +235,12 @@ void libblis_test_subv_check( obj_t* alpha, // is negligible. // - bli_obj_init_scalar( dt, &aminusb ); - bli_obj_init_scalar( dt_real, &temp_r ); - bli_obj_init_scalar( dt_real, &norm_r ); - bli_obj_init_scalar( dt_real, &m_r ); + bli_obj_scalar_init_detached( dt, &aminusb ); + bli_obj_scalar_init_detached( dt_real, &temp_r ); + bli_obj_scalar_init_detached( dt_real, &norm_r ); + bli_obj_scalar_init_detached( dt_real, &m_r ); - bli_obj_init_scalar_copy_of( dt, conjx, alpha, &alpha_conj ); + bli_obj_scalar_init_detached_copy_of( dt, conjx, alpha, &alpha_conj ); bli_fnormv( y, &norm_r ); diff --git a/testsuite/src/test_symm.c b/testsuite/src/test_symm.c index 401a874ff..114fe2f4a 100644 --- a/testsuite/src/test_symm.c +++ b/testsuite/src/test_symm.c @@ -163,9 +163,9 @@ void libblis_test_symm_experiment( test_params_t* params, bli_param_map_char_to_blis_trans( pc_str[3], &transb ); // Create test scalars. - bli_obj_init_scalar( datatype, &kappa ); - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). bli_set_dim_with_side( side, m, n, mn_side ); @@ -317,8 +317,8 @@ void libblis_test_symm_check( side_t side, // = beta * C_orig * t + alpha * transb(B) * w // = beta * C_orig * t + z - bli_obj_init_scalar( dt, &kappa ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &kappa ); + bli_obj_scalar_init_detached( dt_real, &norm ); if ( bli_is_left( side ) ) { diff --git a/testsuite/src/test_symv.c b/testsuite/src/test_symv.c index c12f216a7..80207ee90 100644 --- a/testsuite/src/test_symv.c +++ b/testsuite/src/test_symv.c @@ -154,9 +154,9 @@ void libblis_test_symv_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[2], &conjx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); - bli_obj_init_scalar( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &kappa ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, @@ -296,7 +296,7 @@ void libblis_test_symv_check( obj_t* alpha, // v = beta * y_orig + alpha * conja(A_dense) * x // - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &v ); diff --git a/testsuite/src/test_syr.c b/testsuite/src/test_syr.c index 68362a4d5..607a23800 100644 --- a/testsuite/src/test_syr.c +++ b/testsuite/src/test_syr.c @@ -148,7 +148,7 @@ void libblis_test_syr_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[1], &conjx ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, @@ -278,9 +278,9 @@ void libblis_test_syr_check( obj_t* alpha, bli_obj_set_uplo( BLIS_DENSE, *a ); bli_obj_set_uplo( BLIS_DENSE, *a_orig ); - bli_obj_init_scalar( dt, &tau ); - bli_obj_init_scalar( dt, &rho ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &tau ); + bli_obj_scalar_init_detached( dt, &rho ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m_a, 1, 0, 0, &t ); bli_obj_create( dt, m_a, 1, 0, 0, &v ); diff --git a/testsuite/src/test_syr2.c b/testsuite/src/test_syr2.c index 947fedcc4..69a015351 100644 --- a/testsuite/src/test_syr2.c +++ b/testsuite/src/test_syr2.c @@ -151,7 +151,7 @@ void libblis_test_syr2_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[2], &conjy ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, @@ -292,9 +292,9 @@ void libblis_test_syr2_check( obj_t* alpha, bli_obj_set_uplo( BLIS_DENSE, *a ); bli_obj_set_uplo( BLIS_DENSE, *a_orig ); - bli_obj_init_scalar( dt, &tau ); - bli_obj_init_scalar( dt, &rho ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &tau ); + bli_obj_scalar_init_detached( dt, &rho ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m_a, 1, 0, 0, &t ); bli_obj_create( dt, m_a, 1, 0, 0, &v ); diff --git a/testsuite/src/test_syr2k.c b/testsuite/src/test_syr2k.c index c565fd78a..1638a82d1 100644 --- a/testsuite/src/test_syr2k.c +++ b/testsuite/src/test_syr2k.c @@ -157,9 +157,9 @@ void libblis_test_syr2k_experiment( test_params_t* params, bli_param_map_char_to_blis_trans( pc_str[2], &transb ); // Create test scalars. - bli_obj_init_scalar( datatype, &kappa ); - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transa, @@ -315,8 +315,8 @@ void libblis_test_syr2k_check( obj_t* alpha, bli_obj_alias_with_trans( BLIS_TRANSPOSE, *a, at ); bli_obj_alias_with_trans( BLIS_TRANSPOSE, *b, bt ); - bli_obj_init_scalar( dt, &kappa ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &kappa ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); diff --git a/testsuite/src/test_syrk.c b/testsuite/src/test_syrk.c index 24c43b46a..212c8a457 100644 --- a/testsuite/src/test_syrk.c +++ b/testsuite/src/test_syrk.c @@ -154,9 +154,9 @@ void libblis_test_syrk_experiment( test_params_t* params, bli_param_map_char_to_blis_trans( pc_str[1], &transa ); // Create test scalars. - bli_obj_init_scalar( datatype, &kappa ); - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, transa, @@ -300,8 +300,8 @@ void libblis_test_syrk_check( obj_t* alpha, bli_obj_alias_with_trans( BLIS_TRANSPOSE, *a, at ); - bli_obj_init_scalar( dt, &kappa ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &kappa ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &t ); bli_obj_create( dt, m, 1, 0, 0, &v ); diff --git a/testsuite/src/test_trmm.c b/testsuite/src/test_trmm.c index 281d44ddd..514fdf9b7 100644 --- a/testsuite/src/test_trmm.c +++ b/testsuite/src/test_trmm.c @@ -159,8 +159,8 @@ void libblis_test_trmm_experiment( test_params_t* params, bli_param_map_char_to_blis_diag( pc_str[3], &diaga ); // Create test scalars. - bli_obj_init_scalar( datatype, &kappa ); - bli_obj_init_scalar( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). bli_set_dim_with_side( side, m, n, mn_side ); @@ -299,8 +299,8 @@ void libblis_test_trmm_check( side_t side, // = alpha * B * transa(A) * t // = alpha * B * w - bli_obj_init_scalar( dt, &kappa ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &kappa ); + bli_obj_scalar_init_detached( dt_real, &norm ); if ( bli_is_left( side ) ) { diff --git a/testsuite/src/test_trmm3.c b/testsuite/src/test_trmm3.c index cf1f36f79..6a8ad3feb 100644 --- a/testsuite/src/test_trmm3.c +++ b/testsuite/src/test_trmm3.c @@ -165,9 +165,9 @@ void libblis_test_trmm3_experiment( test_params_t* params, bli_param_map_char_to_blis_trans( pc_str[4], &transb ); // Create test scalars. - bli_obj_init_scalar( datatype, &kappa ); - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). bli_set_dim_with_side( side, m, n, mn_side ); @@ -318,8 +318,8 @@ void libblis_test_trmm3_check( side_t side, // = beta * C_orig * t + alpha * transb(B) * w // = beta * C_orig * t + z - bli_obj_init_scalar( dt, &kappa ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &kappa ); + bli_obj_scalar_init_detached( dt_real, &norm ); if ( bli_is_left( side ) ) { diff --git a/testsuite/src/test_trmv.c b/testsuite/src/test_trmv.c index fbcd31621..0a7d788b6 100644 --- a/testsuite/src/test_trmv.c +++ b/testsuite/src/test_trmv.c @@ -150,8 +150,8 @@ void libblis_test_trmv_experiment( test_params_t* params, bli_param_map_char_to_blis_diag( pc_str[2], &diaga ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &kappa ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, @@ -276,7 +276,7 @@ void libblis_test_trmv_check( obj_t* alpha, // y = alpha * conja(A_dense) * x_orig // - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_obj_create( dt, m, 1, 0, 0, &y ); bli_obj_create( dt, m, m, 0, 0, &a_local ); diff --git a/testsuite/src/test_trsm.c b/testsuite/src/test_trsm.c index 2de3f27ad..b78e135d6 100644 --- a/testsuite/src/test_trsm.c +++ b/testsuite/src/test_trsm.c @@ -159,8 +159,8 @@ void libblis_test_trsm_experiment( test_params_t* params, bli_param_map_char_to_blis_diag( pc_str[3], &diaga ); // Create test scalars. - bli_obj_init_scalar( datatype, &kappa ); - bli_obj_init_scalar( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). bli_set_dim_with_side( side, m, n, mn_side ); @@ -300,8 +300,8 @@ void libblis_test_trsm_check( side_t side, // = alpha * B * tinv(ransa(A)) * t // = alpha * B * w - bli_obj_init_scalar( dt, &kappa ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &kappa ); + bli_obj_scalar_init_detached( dt_real, &norm ); if ( bli_is_left( side ) ) { diff --git a/testsuite/src/test_trsm_ukr.c b/testsuite/src/test_trsm_ukr.c index bce3ace69..cb9139f7c 100644 --- a/testsuite/src/test_trsm_ukr.c +++ b/testsuite/src/test_trsm_ukr.c @@ -165,7 +165,7 @@ void libblis_test_trsm_ukr_experiment( test_params_t* params, bli_param_map_char_to_blis_uplo( pc_str[0], &uploa ); // Create test scalars. - bli_obj_init_scalar( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &kappa ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, @@ -217,14 +217,14 @@ void libblis_test_trsm_ukr_experiment( test_params_t* params, &b, &bp ); // Pack the contents of a to ap. - bli_packm_blk_var3( &BLIS_ONE, &a, &ap ); + bli_packm_blk_var3( &a, &ap ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) { // Re-pack the contents of b to bp. - bli_packm_blk_var2( &BLIS_ONE, &b, &bp ); + bli_packm_blk_var2( &b, &bp ); bli_copym( &c_save, &c ); @@ -320,8 +320,8 @@ void libblis_test_trsm_ukr_check( side_t side, // = B * tinv(ransa(A)) * t // = B * w - bli_obj_init_scalar( dt, &kappa ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &kappa ); + bli_obj_scalar_init_detached( dt_real, &norm ); if ( bli_is_left( side ) ) { diff --git a/testsuite/src/test_trsv.c b/testsuite/src/test_trsv.c index 147a1dbfb..1a2aa3431 100644 --- a/testsuite/src/test_trsv.c +++ b/testsuite/src/test_trsv.c @@ -150,8 +150,8 @@ void libblis_test_trsv_experiment( test_params_t* params, bli_param_map_char_to_blis_diag( pc_str[2], &diaga ); // Create test scalars. - bli_obj_init_scalar( datatype, &alpha ); - bli_obj_init_scalar( datatype, &kappa ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &kappa ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, @@ -277,8 +277,8 @@ void libblis_test_trsv_check( obj_t* alpha, // y = inv(alpha) * transa(A_dense) * x // - bli_obj_init_scalar( dt, &alpha_inv ); - bli_obj_init_scalar( dt_real, &norm ); + bli_obj_scalar_init_detached( dt, &alpha_inv ); + bli_obj_scalar_init_detached( dt_real, &norm ); bli_copysc( &BLIS_ONE, &alpha_inv ); bli_divsc( alpha, &alpha_inv );