mirror of
https://github.com/amd/blis.git
synced 2026-05-12 01:59:59 +00:00
Added new "attached" scalar representation.
Details:
- Added infrastructure to support a new scalar representation, whereby
every object contains an internal scalar that defaults to 1.0. This
facilitates passing scalars around without having to house them in
separate objects. These "attached" scalars are stored in the internal
atom_t field of the obj_t struct, and are always stored to be the same
datatype as the object to which they are attached. Level-3 variants no
longer take scalar arguments, however, level-3 internal back-ends stll
do; this is so that the calling function can perform subproblems such
as C := C - alpha * A * B on-the-fly without needing to change either
of the scalars attached to A or B.
- Removed scalar argument from packm_int().
- Observe and apply attached scalars in scalm_int(), and removed scalar
from interface of scalm_unb_var1().
- Renamed the following functions (and corresponding invocations):
bli_obj_init_scalar_copy_of()
-> bli_obj_scalar_init_detached_copy_of()
bli_obj_init_scalar() -> bli_obj_scalar_init_detached()
bli_obj_create_scalar_with_attached_buffer()
-> bli_obj_create_1x1_with_attached_buffer()
bli_obj_scalar_equals() -> bli_obj_equals()
- Defined new functions:
bli_obj_scalar_detach()
bli_obj_scalar_attach()
bli_obj_scalar_apply_scalar()
bli_obj_scalar_reset()
bli_obj_scalar_has_nonzero_imag()
bli_obj_scalar_equals()
- Placed all bli_obj_scalar_* functions in a new file, bli_obj_scalar.c.
- Renamed the following macros:
bli_obj_scalar_buffer() -> bli_obj_buffer_for_1x1()
bli_obj_is_scalar() -> bli_obj_is_1x1()
- Defined new macros to set and copy internal scalars between objects:
bli_obj_set_internal_scalar()
bli_obj_copy_internal_scalar()
- In level-3 internal back-ends, added conditional blocks where alpha and
beta are checked for non-unit-ness. Those values for alpha and beta are
applied to the scalars attached to aliases of A/B/C, as appropriate,
before being passed into the variant specified by the control tree.
- In level-3 blocked variants, pass BLIS_ONE into subproblems instead of
alpha and/or beta.
- In level-3 macro-kernels, changed how scalars are obtained. Now, scalars
attached to A and B are multiplied together to obtain alpha, while beta
is obtained directly from C.
- In level-3 front-ends, removed old function calls meant to provide
future support for mixed domain/precision. These can be added back later
once that functionality is given proper treatment. Also, removed the
creating of copy-casts of alpha and beta since typecasting of scalars
is now implicitly handled in the internal back-ends when alpha and
beta are applied to the attached scalars.
This commit is contained in:
@@ -56,7 +56,7 @@ void bli_getsc( obj_t* chi,
|
||||
// If chi is a constant object, default to using the dcomplex
|
||||
// value within since we don't know if the caller needs just the
|
||||
// real or the real and imaginary parts.
|
||||
void* buf_chi = bli_obj_scalar_buffer( dt_def, *chi );
|
||||
void* buf_chi = bli_obj_buffer_for_1x1( dt_def, *chi );
|
||||
|
||||
FUNCPTR_T f;
|
||||
|
||||
|
||||
@@ -58,7 +58,7 @@ void PASTEMAC0(opname)( \
|
||||
dt_x = bli_obj_datatype( *x ); \
|
||||
\
|
||||
/* Create an object to hold a copy-cast of alpha. */ \
|
||||
bli_obj_init_scalar_copy_of( dt_x, \
|
||||
bli_obj_scalar_init_detached_copy_of( dt_x, \
|
||||
BLIS_NO_CONJUGATE, \
|
||||
alpha, \
|
||||
&alpha_local ); \
|
||||
|
||||
@@ -93,11 +93,11 @@ void bli_dotxv_unb_var1( obj_t* alpha,
|
||||
// The datatype of alpha MUST be the type union of x and y. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_x, dt_y );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// The datatype of beta MUST be the same as the datatype of rho.
|
||||
dt_beta = dt_rho;
|
||||
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -58,7 +58,7 @@ void PASTEMAC0(opname)( \
|
||||
dt_x = bli_obj_datatype( *x ); \
|
||||
\
|
||||
/* Create an object to hold a copy-cast of beta. */ \
|
||||
bli_obj_init_scalar_copy_of( dt_x, \
|
||||
bli_obj_scalar_init_detached_copy_of( dt_x, \
|
||||
BLIS_NO_CONJUGATE, \
|
||||
beta, \
|
||||
&beta_local ); \
|
||||
|
||||
@@ -57,7 +57,7 @@ void PASTEMAC0(opname)( \
|
||||
dt_x = bli_obj_datatype( *x ); \
|
||||
\
|
||||
/* Create an object to hold a copy-cast of beta. */ \
|
||||
bli_obj_init_scalar_copy_of( dt_x, \
|
||||
bli_obj_scalar_init_detached_copy_of( dt_x, \
|
||||
BLIS_NO_CONJUGATE, \
|
||||
beta, \
|
||||
&beta_local ); \
|
||||
|
||||
@@ -64,7 +64,7 @@ void bli_scalv_int( obj_t* beta,
|
||||
if ( bli_obj_has_zero_dim( *x ) ) return;
|
||||
|
||||
// Return early if the beta scalar equals one.
|
||||
if ( bli_obj_scalar_equals( beta, &BLIS_ONE ) ) return;
|
||||
if ( bli_obj_equals( beta, &BLIS_ONE ) ) return;
|
||||
|
||||
// Extract the variant number and implementation type.
|
||||
n = cntl_var_num( cntl );
|
||||
|
||||
@@ -58,7 +58,7 @@ void PASTEMAC0(opname)( \
|
||||
dt_x = bli_obj_datatype( *x ); \
|
||||
\
|
||||
/* Create an object to hold a copy-cast of beta. */ \
|
||||
bli_obj_init_scalar_copy_of( dt_x, \
|
||||
bli_obj_scalar_init_detached_copy_of( dt_x, \
|
||||
BLIS_NO_CONJUGATE, \
|
||||
beta, \
|
||||
&beta_local ); \
|
||||
|
||||
@@ -53,7 +53,7 @@ void bli_axpyd( obj_t* alpha,
|
||||
dt_x = bli_obj_datatype( *x );
|
||||
|
||||
// Create an object to hold a copy-cast of alpha.
|
||||
bli_obj_init_scalar_copy_of( dt_x,
|
||||
bli_obj_scalar_init_detached_copy_of( dt_x,
|
||||
BLIS_NO_CONJUGATE,
|
||||
alpha,
|
||||
&alpha_local );
|
||||
|
||||
@@ -53,7 +53,7 @@ void bli_scal2d( obj_t* beta,
|
||||
dt_x = bli_obj_datatype( *x );
|
||||
|
||||
// Create an object to hold a copy-cast of alpha.
|
||||
bli_obj_init_scalar_copy_of( dt_x,
|
||||
bli_obj_scalar_init_detached_copy_of( dt_x,
|
||||
BLIS_NO_CONJUGATE,
|
||||
beta,
|
||||
&beta_local );
|
||||
|
||||
@@ -52,7 +52,7 @@ void bli_scald( obj_t* beta,
|
||||
dt_x = bli_obj_datatype( *x );
|
||||
|
||||
// Create an object to hold a copy-cast of alpha.
|
||||
bli_obj_init_scalar_copy_of( dt_x,
|
||||
bli_obj_scalar_init_detached_copy_of( dt_x,
|
||||
BLIS_NO_CONJUGATE,
|
||||
beta,
|
||||
&beta_local );
|
||||
|
||||
@@ -52,7 +52,7 @@ void bli_setd( obj_t* beta,
|
||||
dt_x = bli_obj_datatype( *x );
|
||||
|
||||
// Create an object to hold a copy-cast of alpha.
|
||||
bli_obj_init_scalar_copy_of( dt_x,
|
||||
bli_obj_scalar_init_detached_copy_of( dt_x,
|
||||
BLIS_NO_CONJUGATE,
|
||||
beta,
|
||||
&beta_local );
|
||||
|
||||
@@ -93,7 +93,7 @@ void bli_axpyf_unb_var1( obj_t* alpha,
|
||||
// The datatype of alpha MUST be the type union of a and x. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_a, dt_x );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -113,11 +113,11 @@ void bli_dotxaxpyf_unb_var1( obj_t* alpha,
|
||||
// The datatype of alpha MUST be the type union of a and x. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_a, dt_x );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// The datatype of beta MUST be the same as the datatype of y.
|
||||
dt_beta = dt_y;
|
||||
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -113,11 +113,11 @@ void bli_dotxaxpyf_unb_var2( obj_t* alpha,
|
||||
// The datatype of alpha MUST be the type union of a and x. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_a, dt_x );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// The datatype of beta MUST be the same as the datatype of y.
|
||||
dt_beta = dt_y;
|
||||
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -98,11 +98,11 @@ void bli_dotxf_unb_var1( obj_t* alpha,
|
||||
// The datatype of alpha MUST be the type union of a and x. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_a, dt_x );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// The datatype of beta MUST be the same as the datatype of y.
|
||||
dt_beta = dt_y;
|
||||
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -53,7 +53,7 @@ void bli_axpym( obj_t* alpha,
|
||||
dt_x = bli_obj_datatype( *x );
|
||||
|
||||
// Create an object to hold a copy-cast of alpha.
|
||||
bli_obj_init_scalar_copy_of( dt_x,
|
||||
bli_obj_scalar_init_detached_copy_of( dt_x,
|
||||
BLIS_NO_CONJUGATE,
|
||||
alpha,
|
||||
&alpha_local );
|
||||
|
||||
@@ -46,7 +46,7 @@ typedef void (*FUNCPTR_T)(
|
||||
dim_t n,
|
||||
dim_t m_max,
|
||||
dim_t n_max,
|
||||
void* beta,
|
||||
void* kappa,
|
||||
void* c, inc_t rs_c, inc_t cs_c,
|
||||
void* p, inc_t rs_p, inc_t cs_p,
|
||||
dim_t pd_p, inc_t ps_p
|
||||
@@ -55,8 +55,7 @@ typedef void (*FUNCPTR_T)(
|
||||
static FUNCPTR_T GENARRAY(ftypes,packm_blk_var2);
|
||||
|
||||
|
||||
void bli_packm_blk_var2( obj_t* beta,
|
||||
obj_t* c,
|
||||
void bli_packm_blk_var2( obj_t* c,
|
||||
obj_t* p )
|
||||
{
|
||||
num_t dt_cp = bli_obj_datatype( *c );
|
||||
@@ -82,10 +81,16 @@ void bli_packm_blk_var2( obj_t* beta,
|
||||
dim_t pd_p = bli_obj_panel_dim( *p );
|
||||
inc_t ps_p = bli_obj_panel_stride( *p );
|
||||
|
||||
void* buf_beta = bli_obj_scalar_buffer( dt_cp, *beta );
|
||||
void* buf_kappa;
|
||||
|
||||
FUNCPTR_T f;
|
||||
|
||||
// This variant assumes that the micro-kernel will always apply the
|
||||
// alpha scalar of the higher-level operation. Thus, we use BLIS_ONE
|
||||
// for kappa so that the underlying packm implementation does not
|
||||
// scale during packing.
|
||||
buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
f = ftypes[dt_cp];
|
||||
@@ -100,7 +105,7 @@ void bli_packm_blk_var2( obj_t* beta,
|
||||
n_p,
|
||||
m_max_p,
|
||||
n_max_p,
|
||||
buf_beta,
|
||||
buf_kappa,
|
||||
buf_c, rs_c, cs_c,
|
||||
buf_p, rs_p, cs_p,
|
||||
pd_p, ps_p );
|
||||
@@ -120,16 +125,16 @@ void PASTEMAC(ch,varname )( \
|
||||
dim_t n, \
|
||||
dim_t m_max, \
|
||||
dim_t n_max, \
|
||||
void* beta, \
|
||||
void* kappa, \
|
||||
void* c, inc_t rs_c, inc_t cs_c, \
|
||||
void* p, inc_t rs_p, inc_t cs_p, \
|
||||
dim_t pd_p, inc_t ps_p \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict beta_cast = beta; \
|
||||
ctype* restrict c_cast = c; \
|
||||
ctype* restrict p_cast = p; \
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict c_cast = c; \
|
||||
ctype* restrict p_cast = p; \
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
ctype* restrict c_begin; \
|
||||
ctype* restrict p_begin; \
|
||||
\
|
||||
@@ -338,7 +343,7 @@ void PASTEMAC(ch,varname )( \
|
||||
PASTEMAC(ch,packm_cxk)( conjc10, \
|
||||
p10_dim, \
|
||||
p10_len, \
|
||||
beta_cast, \
|
||||
kappa_cast, \
|
||||
c10, incc10, ldc10, \
|
||||
p10, ldp ); \
|
||||
\
|
||||
@@ -347,7 +352,7 @@ void PASTEMAC(ch,varname )( \
|
||||
PASTEMAC(ch,packm_cxk)( conjc12, \
|
||||
p12_dim, \
|
||||
p12_len, \
|
||||
beta_cast, \
|
||||
kappa_cast, \
|
||||
c12, incc12, ldc12, \
|
||||
p12, ldp ); \
|
||||
\
|
||||
@@ -358,7 +363,7 @@ void PASTEMAC(ch,varname )( \
|
||||
conjc, \
|
||||
p11_m, \
|
||||
p11_n, \
|
||||
beta_cast, \
|
||||
kappa_cast, \
|
||||
c11, rs_c, cs_c, \
|
||||
p11, rs_p11, cs_p11 ); \
|
||||
\
|
||||
@@ -412,7 +417,7 @@ void PASTEMAC(ch,varname )( \
|
||||
PASTEMAC(ch,packm_cxk)( conjc10, \
|
||||
panel_dim_i, \
|
||||
panel_len, \
|
||||
beta_cast, \
|
||||
kappa_cast, \
|
||||
c10, incc10, ldc10, \
|
||||
p_begin, ldp ); \
|
||||
\
|
||||
|
||||
@@ -32,8 +32,7 @@
|
||||
|
||||
*/
|
||||
|
||||
void bli_packm_blk_var2( obj_t* beta,
|
||||
obj_t* c,
|
||||
void bli_packm_blk_var2( obj_t* c,
|
||||
obj_t* p );
|
||||
|
||||
|
||||
@@ -50,7 +49,7 @@ void PASTEMAC(ch,varname)( \
|
||||
dim_t n, \
|
||||
dim_t m_max, \
|
||||
dim_t n_max, \
|
||||
void* beta, \
|
||||
void* kappa, \
|
||||
void* c, inc_t rs_c, inc_t cs_c, \
|
||||
void* p, inc_t rs_p, inc_t cs_p, \
|
||||
dim_t pd_p, inc_t ps_p \
|
||||
|
||||
@@ -49,7 +49,7 @@ typedef void (*FUNCPTR_T)(
|
||||
dim_t n,
|
||||
dim_t m_max,
|
||||
dim_t n_max,
|
||||
void* beta,
|
||||
void* kappa,
|
||||
void* c, inc_t rs_c, inc_t cs_c,
|
||||
void* p, inc_t rs_p, inc_t cs_p,
|
||||
dim_t pd_p, inc_t ps_p
|
||||
@@ -58,8 +58,7 @@ typedef void (*FUNCPTR_T)(
|
||||
static FUNCPTR_T GENARRAY(ftypes,packm_blk_var3);
|
||||
|
||||
|
||||
void bli_packm_blk_var3( obj_t* beta,
|
||||
obj_t* c,
|
||||
void bli_packm_blk_var3( obj_t* c,
|
||||
obj_t* p )
|
||||
{
|
||||
num_t dt_cp = bli_obj_datatype( *c );
|
||||
@@ -88,10 +87,16 @@ void bli_packm_blk_var3( obj_t* beta,
|
||||
dim_t pd_p = bli_obj_panel_dim( *p );
|
||||
inc_t ps_p = bli_obj_panel_stride( *p );
|
||||
|
||||
void* buf_beta = bli_obj_scalar_buffer( dt_cp, *beta );
|
||||
void* buf_kappa;
|
||||
|
||||
FUNCPTR_T f;
|
||||
|
||||
// This variant assumes that the micro-kernel will always apply the
|
||||
// alpha scalar of the higher-level operation. Thus, we use BLIS_ONE
|
||||
// for kappa so that the underlying packm implementation does not
|
||||
// scale during packing.
|
||||
buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
f = ftypes[dt_cp];
|
||||
@@ -109,7 +114,7 @@ void bli_packm_blk_var3( obj_t* beta,
|
||||
n_p,
|
||||
m_max_p,
|
||||
n_max_p,
|
||||
buf_beta,
|
||||
buf_kappa,
|
||||
buf_c, rs_c, cs_c,
|
||||
buf_p, rs_p, cs_p,
|
||||
pd_p, ps_p );
|
||||
@@ -132,16 +137,16 @@ void PASTEMAC(ch,varname )( \
|
||||
dim_t n, \
|
||||
dim_t m_max, \
|
||||
dim_t n_max, \
|
||||
void* beta, \
|
||||
void* kappa, \
|
||||
void* c, inc_t rs_c, inc_t cs_c, \
|
||||
void* p, inc_t rs_p, inc_t cs_p, \
|
||||
dim_t pd_p, inc_t ps_p \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict beta_cast = beta; \
|
||||
ctype* restrict c_cast = c; \
|
||||
ctype* restrict p_cast = p; \
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict c_cast = c; \
|
||||
ctype* restrict p_cast = p; \
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
ctype* restrict c_begin; \
|
||||
ctype* restrict p_begin; \
|
||||
\
|
||||
@@ -317,7 +322,7 @@ void PASTEMAC(ch,varname )( \
|
||||
PASTEMAC(ch,packm_cxk)( conjc, \
|
||||
panel_dim_i, \
|
||||
panel_len_i, \
|
||||
beta_cast, \
|
||||
kappa_cast, \
|
||||
c_use, incc, ldc, \
|
||||
p_use, ldp ); \
|
||||
\
|
||||
@@ -328,7 +333,7 @@ void PASTEMAC(ch,varname )( \
|
||||
PASTEMAC2(ch,ch,setd_unb_var1)( diagoffp, \
|
||||
*m_panel_use, \
|
||||
*n_panel_use, \
|
||||
beta_cast, \
|
||||
kappa_cast, \
|
||||
p_use, rs_p, cs_p ); \
|
||||
} \
|
||||
\
|
||||
@@ -378,7 +383,7 @@ void PASTEMAC(ch,varname )( \
|
||||
PASTEMAC(ch,packm_cxk)( conjc, \
|
||||
panel_dim_i, \
|
||||
panel_len_i, \
|
||||
beta_cast, \
|
||||
kappa_cast, \
|
||||
c_use, incc, ldc, \
|
||||
p_use, ldp ); \
|
||||
\
|
||||
|
||||
@@ -32,8 +32,7 @@
|
||||
|
||||
*/
|
||||
|
||||
void bli_packm_blk_var3( obj_t* beta,
|
||||
obj_t* c,
|
||||
void bli_packm_blk_var3( obj_t* c,
|
||||
obj_t* p );
|
||||
|
||||
|
||||
@@ -53,7 +52,7 @@ void PASTEMAC(ch,varname)( \
|
||||
dim_t n, \
|
||||
dim_t m_max, \
|
||||
dim_t n_max, \
|
||||
void* beta, \
|
||||
void* kappa, \
|
||||
void* c, inc_t rs_c, inc_t cs_c, \
|
||||
void* p, inc_t rs_p, inc_t cs_p, \
|
||||
dim_t pd_p, inc_t ps_p \
|
||||
|
||||
@@ -34,33 +34,49 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
void bli_packm_check( obj_t* beta,
|
||||
obj_t* c,
|
||||
obj_t* p,
|
||||
packm_t* cntl )
|
||||
|
||||
void bli_packm_init_check( obj_t* a,
|
||||
obj_t* p,
|
||||
packm_t* cntl )
|
||||
{
|
||||
err_t e_val;
|
||||
|
||||
// Check object datatypes.
|
||||
|
||||
e_val = bli_check_noninteger_object( beta );
|
||||
e_val = bli_check_floating_object( a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_floating_object( c );
|
||||
bli_check_error_code( e_val );
|
||||
// Check control tree pointer.
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
e_val = bli_check_scalar_object( beta );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// We don't check for conformal dimensions between c and p because
|
||||
// p has not yet been initialized.
|
||||
|
||||
// Check control tree pointer
|
||||
|
||||
// NOTE: We can't check the control tree until we stop interpreting a
|
||||
// NULL value (in bli_packm_int()) as a request to skip the operation.
|
||||
// NOTE: We can't check the control tree because we interpret a NULL
|
||||
// value (in bli_packm_int()) as a request to skip the operation.
|
||||
//e_val = bli_check_valid_cntl( ( void* )cntl );
|
||||
//bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
void bli_packm_int_check( obj_t* a,
|
||||
obj_t* p,
|
||||
packm_t* cntl )
|
||||
{
|
||||
err_t e_val;
|
||||
|
||||
// Check object datatypes.
|
||||
|
||||
e_val = bli_check_floating_object( a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_floating_object( p );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
e_val = bli_check_conformal_dims( a, p );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check control tree pointer.
|
||||
|
||||
// NOTE: We can't check the control tree because we interpret a NULL
|
||||
// value (in bli_packm_int()) as a request to skip the operation.
|
||||
//e_val = bli_check_valid_cntl( ( void* )cntl );
|
||||
//bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
@@ -32,7 +32,10 @@
|
||||
|
||||
*/
|
||||
|
||||
void bli_packm_check( obj_t* beta,
|
||||
obj_t* c,
|
||||
obj_t* p,
|
||||
packm_t* cntl );
|
||||
void bli_packm_init_check( obj_t* a,
|
||||
obj_t* p,
|
||||
packm_t* cntl );
|
||||
|
||||
void bli_packm_int_check( obj_t* a,
|
||||
obj_t* p,
|
||||
packm_t* cntl );
|
||||
|
||||
@@ -56,7 +56,7 @@ void bli_packm_init( obj_t* a,
|
||||
|
||||
// Check parameters.
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
bli_packm_check( &BLIS_ONE, a, p, cntl );
|
||||
bli_packm_init_check( a, p, cntl );
|
||||
|
||||
// First check if we are to skip this operation because the control tree
|
||||
// is NULL, and if so, simply alias the object to its packed counterpart.
|
||||
|
||||
@@ -36,8 +36,7 @@
|
||||
|
||||
#define FUNCPTR_T packm_fp
|
||||
|
||||
typedef void (*FUNCPTR_T)( obj_t* beta,
|
||||
obj_t* a,
|
||||
typedef void (*FUNCPTR_T)( obj_t* a,
|
||||
obj_t* p );
|
||||
|
||||
static FUNCPTR_T vars[6][3] =
|
||||
@@ -51,20 +50,17 @@ static FUNCPTR_T vars[6][3] =
|
||||
{ NULL, NULL, NULL, },
|
||||
};
|
||||
|
||||
void bli_packm_int( obj_t* beta,
|
||||
obj_t* a,
|
||||
void bli_packm_int( obj_t* a,
|
||||
obj_t* p,
|
||||
packm_t* cntl )
|
||||
{
|
||||
obj_t* beta_use;
|
||||
|
||||
varnum_t n;
|
||||
impl_t i;
|
||||
FUNCPTR_T f;
|
||||
|
||||
// Check parameters.
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
bli_packm_check( beta, a, p, cntl );
|
||||
bli_packm_int_check( a, p, cntl );
|
||||
|
||||
// Sanity check; A should never have a zero dimension. If we must support
|
||||
// it, then we should fold it into the next alias-and-early-exit block.
|
||||
@@ -106,13 +102,35 @@ void bli_packm_int( obj_t* beta,
|
||||
return;
|
||||
}
|
||||
|
||||
// Notice that a beta parameter is always passed in. This value is allowed
|
||||
// to be non-unit even when no scaling is prescribed. If the control tree
|
||||
// indicates no scaling, then make sure that BLIS_ONE is passed into the
|
||||
// packm implementation.
|
||||
//if ( cntl_does_scale( cntl ) ) beta_use = beta;
|
||||
//else beta_use = &BLIS_ONE;
|
||||
beta_use = &BLIS_ONE;
|
||||
/*
|
||||
// The value for kappa we use will depend on whether the scalar
|
||||
// attached to A has a nonzero imaginary component. If it does,
|
||||
// then we will apply the scalar during packing to facilitate
|
||||
// implementing complex domain micro-kernels in terms of their
|
||||
// real domain counterparts. (In the aforementioned situation,
|
||||
// applying a real scalar is easy, but applying a complex one is
|
||||
// harder, so we avoid the need altogether with the code below.)
|
||||
if ( bli_obj_scalar_has_nonzero_imag( a ) )
|
||||
{
|
||||
bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED );
|
||||
|
||||
// Detach the scalar.
|
||||
bli_obj_scalar_detach( a, &kappa );
|
||||
|
||||
// Reset the attached scalar (to 1.0).
|
||||
bli_obj_scalar_reset( a );
|
||||
|
||||
kappa_p = κ
|
||||
}
|
||||
else
|
||||
{
|
||||
// If the internal scalar of A has only a real component, then
|
||||
// we will apply it later (in the micro-kernel), and so we will
|
||||
// use BLIS_ONE to indicate no scaling during packing.
|
||||
kappa_p = &BLIS_ONE;
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
// Extract the variant number and implementation type.
|
||||
n = cntl_var_num( cntl );
|
||||
@@ -121,9 +139,8 @@ void bli_packm_int( obj_t* beta,
|
||||
// Index into the variant array to extract the correct function pointer.
|
||||
f = vars[n][i];
|
||||
|
||||
// Invoke the variant with beta_use.
|
||||
f( beta_use,
|
||||
a,
|
||||
// Invoke the variant with kappa_use.
|
||||
f( a,
|
||||
p );
|
||||
}
|
||||
|
||||
|
||||
@@ -32,8 +32,7 @@
|
||||
|
||||
*/
|
||||
|
||||
void bli_packm_int( obj_t* beta,
|
||||
obj_t* c,
|
||||
void bli_packm_int( obj_t* a,
|
||||
obj_t* p,
|
||||
packm_t* cntl );
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ typedef void (*FUNCPTR_T)(
|
||||
dim_t n,
|
||||
dim_t m_max,
|
||||
dim_t n_max,
|
||||
void* beta,
|
||||
void* kappa,
|
||||
void* c, inc_t rs_c, inc_t cs_c,
|
||||
void* p, inc_t rs_p, inc_t cs_p
|
||||
);
|
||||
@@ -55,8 +55,7 @@ typedef void (*FUNCPTR_T)(
|
||||
static FUNCPTR_T GENARRAY(ftypes,packm_unb_var1);
|
||||
|
||||
|
||||
void bli_packm_unb_var1( obj_t* beta,
|
||||
obj_t* c,
|
||||
void bli_packm_unb_var1( obj_t* c,
|
||||
obj_t* p )
|
||||
{
|
||||
num_t dt_cp = bli_obj_datatype( *c );
|
||||
@@ -81,7 +80,7 @@ void bli_packm_unb_var1( obj_t* beta,
|
||||
inc_t rs_p = bli_obj_row_stride( *p );
|
||||
inc_t cs_p = bli_obj_col_stride( *p );
|
||||
|
||||
void* buf_beta = bli_obj_scalar_buffer( dt_cp, *beta );
|
||||
void* buf_kappa;
|
||||
|
||||
FUNCPTR_T f;
|
||||
|
||||
@@ -89,6 +88,12 @@ void bli_packm_unb_var1( obj_t* beta,
|
||||
if ( bli_obj_is_dense( *p ) ) densify = TRUE;
|
||||
else densify = FALSE;
|
||||
|
||||
// This variant assumes that the computational kernel will always apply
|
||||
// the alpha scalar of the higher-level operation. Thus, we use BLIS_ONE
|
||||
// for kappa so that the underlying packm implementation does not scale
|
||||
// during packing.
|
||||
buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
f = ftypes[dt_cp];
|
||||
@@ -104,7 +109,7 @@ void bli_packm_unb_var1( obj_t* beta,
|
||||
n_p,
|
||||
m_max_p,
|
||||
n_max_p,
|
||||
buf_beta,
|
||||
buf_kappa,
|
||||
buf_c, rs_c, cs_c,
|
||||
buf_p, rs_p, cs_p );
|
||||
}
|
||||
@@ -124,20 +129,20 @@ void PASTEMAC(ch,varname)( \
|
||||
dim_t n, \
|
||||
dim_t m_max, \
|
||||
dim_t n_max, \
|
||||
void* beta, \
|
||||
void* kappa, \
|
||||
void* c, inc_t rs_c, inc_t cs_c, \
|
||||
void* p, inc_t rs_p, inc_t cs_p \
|
||||
) \
|
||||
{ \
|
||||
ctype* restrict beta_cast = beta; \
|
||||
ctype* restrict c_cast = c; \
|
||||
ctype* restrict p_cast = p; \
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
ctype* restrict kappa_cast = kappa; \
|
||||
ctype* restrict c_cast = c; \
|
||||
ctype* restrict p_cast = p; \
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
\
|
||||
/* We begin by packing the region indicated by the parameters. If
|
||||
matrix c is dense (either because the structure is general or
|
||||
because the structure has already been "densified"), this ends
|
||||
up being the only action we take. Note that if beta is unit,
|
||||
up being the only action we take. Note that if kappa is unit,
|
||||
the data is simply copied (rather than scaled by one). */ \
|
||||
PASTEMAC3(ch,ch,ch,scal2m)( diagoffc, \
|
||||
diagc, \
|
||||
@@ -145,7 +150,7 @@ void PASTEMAC(ch,varname)( \
|
||||
transc, \
|
||||
m, \
|
||||
n, \
|
||||
beta_cast, \
|
||||
kappa_cast, \
|
||||
c_cast, rs_c, cs_c, \
|
||||
p_cast, rs_p, cs_p ); \
|
||||
\
|
||||
@@ -184,7 +189,7 @@ void PASTEMAC(ch,varname)( \
|
||||
transc, \
|
||||
m, \
|
||||
n, \
|
||||
beta_cast, \
|
||||
kappa_cast, \
|
||||
c_cast, rs_c, cs_c, \
|
||||
p_cast, rs_p, cs_p ); \
|
||||
} \
|
||||
|
||||
@@ -32,8 +32,7 @@
|
||||
|
||||
*/
|
||||
|
||||
void bli_packm_unb_var1( obj_t* beta,
|
||||
obj_t* c,
|
||||
void bli_packm_unb_var1( obj_t* c,
|
||||
obj_t* p );
|
||||
|
||||
|
||||
@@ -51,7 +50,7 @@ void PASTEMAC(ch,varname)( \
|
||||
dim_t n, \
|
||||
dim_t m_max, \
|
||||
dim_t n_max, \
|
||||
void* beta, \
|
||||
void* kappa, \
|
||||
void* c, inc_t rs_c, inc_t cs_c, \
|
||||
void* p, inc_t rs_p, inc_t cs_p \
|
||||
);
|
||||
|
||||
@@ -83,7 +83,7 @@ void bli_packm_blk_var1( obj_t* beta,
|
||||
inc_t cs_p = bli_obj_col_stride( *p );
|
||||
inc_t ps_p = bli_obj_panel_stride( *p );
|
||||
|
||||
void* buf_beta = bli_obj_scalar_buffer( dt_cp, *beta );
|
||||
void* buf_beta = bli_obj_buffer_for_1x1( dt_cp, *beta );
|
||||
|
||||
FUNCPTR_T f;
|
||||
|
||||
|
||||
@@ -83,7 +83,7 @@ void bli_packm_blk_var2( obj_t* beta,
|
||||
dim_t pd_p = bli_obj_panel_dim( *p );
|
||||
inc_t ps_p = bli_obj_panel_stride( *p );
|
||||
|
||||
void* buf_beta = bli_obj_scalar_buffer( dt_cp, *beta );
|
||||
void* buf_beta = bli_obj_buffer_for_1x1( dt_cp, *beta );
|
||||
|
||||
FUNCPTR_T f;
|
||||
|
||||
|
||||
@@ -53,7 +53,7 @@ void bli_scal2m( obj_t* beta,
|
||||
dt_x = bli_obj_datatype( *x );
|
||||
|
||||
// Create an object to hold a copy-cast of beta.
|
||||
bli_obj_init_scalar_copy_of( dt_x,
|
||||
bli_obj_scalar_init_detached_copy_of( dt_x,
|
||||
BLIS_NO_CONJUGATE,
|
||||
beta,
|
||||
&beta_local );
|
||||
|
||||
@@ -43,28 +43,12 @@ extern scalm_t* scalm_cntl;
|
||||
void bli_scalm( obj_t* beta,
|
||||
obj_t* x )
|
||||
{
|
||||
num_t dt_x;
|
||||
obj_t beta_local;
|
||||
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
bli_scalm_check( beta, x );
|
||||
|
||||
// Use the datatype of x as the target type for beta (since we do
|
||||
// not assume mixed domain/type support is enabled).
|
||||
dt_x = bli_obj_datatype( *x );
|
||||
|
||||
// Create an object to hold a copy-cast of beta.
|
||||
bli_obj_init_scalar_copy_of( dt_x,
|
||||
BLIS_NO_CONJUGATE,
|
||||
beta,
|
||||
&beta_local );
|
||||
|
||||
bli_scalm_unb_var1( &beta_local, x );
|
||||
/*
|
||||
bli_scalm_int( &beta_local,
|
||||
bli_scalm_int( beta,
|
||||
x,
|
||||
scalm_cntl );
|
||||
*/
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -36,8 +36,7 @@
|
||||
|
||||
#define FUNCPTR_T scalm_fp
|
||||
|
||||
typedef void (*FUNCPTR_T)( obj_t* beta,
|
||||
obj_t* x );
|
||||
typedef void (*FUNCPTR_T)( obj_t* x );
|
||||
|
||||
static FUNCPTR_T vars[1][3] =
|
||||
{
|
||||
@@ -49,6 +48,7 @@ void bli_scalm_int( obj_t* beta,
|
||||
obj_t* x,
|
||||
scalm_t* cntl )
|
||||
{
|
||||
obj_t x_local;
|
||||
varnum_t n;
|
||||
impl_t i;
|
||||
FUNCPTR_T f;
|
||||
@@ -63,8 +63,18 @@ void bli_scalm_int( obj_t* beta,
|
||||
// Return early if one of the matrix operands has a zero dimension.
|
||||
if ( bli_obj_has_zero_dim( *x ) ) return;
|
||||
|
||||
// Return early if the beta scalar equals one.
|
||||
if ( bli_obj_scalar_equals( beta, &BLIS_ONE ) ) return;
|
||||
// Return early if both beta and the scalar attached to x are unit.
|
||||
if ( bli_obj_equals( beta, &BLIS_ONE ) &&
|
||||
bli_obj_scalar_equals( x, &BLIS_ONE ) ) return;
|
||||
|
||||
// Alias x to x_local so we can apply beta if it is non-unit.
|
||||
bli_obj_alias_to( *x, x_local );
|
||||
|
||||
// If beta is non-unit, apply it to the scalar attached to x.
|
||||
if ( !bli_obj_equals( beta, &BLIS_ONE ) )
|
||||
{
|
||||
bli_obj_scalar_apply_scalar( beta, &x_local );
|
||||
}
|
||||
|
||||
// Extract the variant number and implementation type.
|
||||
n = cntl_var_num( cntl );
|
||||
@@ -74,7 +84,6 @@ void bli_scalm_int( obj_t* beta,
|
||||
f = vars[n][i];
|
||||
|
||||
// Invoke the variant.
|
||||
f( beta,
|
||||
x );
|
||||
f( &x_local );
|
||||
}
|
||||
|
||||
|
||||
@@ -59,12 +59,10 @@ static FUNCPTR_T GENARRAY2_MIN(ftypes,scalm_unb_var1);
|
||||
#endif
|
||||
|
||||
|
||||
void bli_scalm_unb_var1( obj_t* beta,
|
||||
obj_t* x )
|
||||
void bli_scalm_unb_var1( obj_t* x )
|
||||
{
|
||||
num_t dt_x = bli_obj_datatype( *x );
|
||||
|
||||
conj_t conjbeta = bli_obj_conj_status( *beta );
|
||||
doff_t diagoffx = bli_obj_diag_offset( *x );
|
||||
uplo_t uplox = bli_obj_uplo( *x );
|
||||
|
||||
@@ -76,21 +74,25 @@ void bli_scalm_unb_var1( obj_t* beta,
|
||||
inc_t cs_x = bli_obj_col_stride( *x );
|
||||
|
||||
void* buf_beta;
|
||||
num_t dt_beta;
|
||||
|
||||
FUNCPTR_T f;
|
||||
|
||||
// If beta is a scalar constant, use dt_x to extract the address of the
|
||||
// corresponding constant value; otherwise, use the datatype encoded
|
||||
// within the beta object and extract the buffer at the beta offset.
|
||||
bli_set_scalar_dt_buffer( beta, dt_x, dt_beta, buf_beta );
|
||||
|
||||
// Grab the address of the internal scalar buffer for the scalar
|
||||
// attached to x.
|
||||
buf_beta = bli_obj_internal_scalar_buffer( *x );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
f = ftypes[dt_beta][dt_x];
|
||||
// NOTE: We use dt_x for both beta and x because beta was obtained
|
||||
// from the attached scalar of x, which is guaranteed to be of the
|
||||
// same datatype as x.
|
||||
f = ftypes[dt_x][dt_x];
|
||||
|
||||
// Invoke the function.
|
||||
f( conjbeta,
|
||||
// NOTE: We unconditionally pass in BLIS_NO_CONJUGATE for beta
|
||||
// because it would have already been conjugated by the front-end.
|
||||
f( BLIS_NO_CONJUGATE,
|
||||
diagoffx,
|
||||
uplox,
|
||||
m,
|
||||
|
||||
@@ -32,8 +32,7 @@
|
||||
|
||||
*/
|
||||
|
||||
void bli_scalm_unb_var1( obj_t* beta,
|
||||
obj_t* x );
|
||||
void bli_scalm_unb_var1( obj_t* x );
|
||||
|
||||
|
||||
#undef GENTPROT2
|
||||
|
||||
@@ -52,7 +52,7 @@ void bli_setm( obj_t* beta,
|
||||
dt_x = bli_obj_datatype( *x );
|
||||
|
||||
// Create an object to hold a copy-cast of beta.
|
||||
bli_obj_init_scalar_copy_of( dt_x,
|
||||
bli_obj_scalar_init_detached_copy_of( dt_x,
|
||||
BLIS_NO_CONJUGATE,
|
||||
beta,
|
||||
&beta_local );
|
||||
|
||||
@@ -78,7 +78,7 @@ void bli_gemv( obj_t* alpha,
|
||||
// the type union of the target datatypes of a and x to prevent any
|
||||
// unnecessary loss of information during the computation.
|
||||
dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x );
|
||||
bli_obj_init_scalar_copy_of( dt_alpha,
|
||||
bli_obj_scalar_init_detached_copy_of( dt_alpha,
|
||||
BLIS_NO_CONJUGATE,
|
||||
alpha,
|
||||
&alpha_local );
|
||||
@@ -89,7 +89,7 @@ void bli_gemv( obj_t* alpha,
|
||||
// the complex part of beta*y will not be stored. If y is complex and
|
||||
// beta is real then beta is harmlessly promoted to complex.
|
||||
dt_beta = dt_targ_y;
|
||||
bli_obj_init_scalar_copy_of( dt_beta,
|
||||
bli_obj_scalar_init_detached_copy_of( dt_beta,
|
||||
BLIS_NO_CONJUGATE,
|
||||
beta,
|
||||
&beta_local );
|
||||
@@ -188,8 +188,8 @@ void PASTEMAC(ch,opname)( \
|
||||
rs_x = incx; cs_x = m_x * incx; \
|
||||
rs_y = incy; cs_y = m_y * incy; \
|
||||
\
|
||||
bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \
|
||||
bli_obj_create_scalar_with_attached_buffer( dt, beta, &betao ); \
|
||||
bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \
|
||||
bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \
|
||||
\
|
||||
bli_obj_create_with_attached_buffer( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \
|
||||
bli_obj_create_with_attached_buffer( dt, m_x, 1, x, rs_x, cs_x, &xo ); \
|
||||
|
||||
@@ -76,8 +76,7 @@ void bli_gemv_blk_var1( obj_t* alpha,
|
||||
cntl_sub_packv_y( cntl ) );
|
||||
|
||||
// Copy/pack A1, y1 (if needed).
|
||||
bli_packm_int( &BLIS_ONE,
|
||||
&a1,
|
||||
bli_packm_int( &a1,
|
||||
&a1_pack,
|
||||
cntl_sub_packm_a( cntl ) );
|
||||
bli_packv_int( &y1,
|
||||
|
||||
@@ -81,8 +81,7 @@ void bli_gemv_blk_var2( obj_t* alpha,
|
||||
cntl_sub_packv_x( cntl ) );
|
||||
|
||||
// Copy/pack A1, x1 (if needed).
|
||||
bli_packm_int( alpha,
|
||||
&a1,
|
||||
bli_packm_int( &a1,
|
||||
&a1_pack,
|
||||
cntl_sub_packm_a( cntl ) );
|
||||
bli_packv_int( &x1,
|
||||
|
||||
@@ -99,11 +99,11 @@ void bli_gemv_unb_var1( obj_t* alpha,
|
||||
// The datatype of alpha MUST be the type union of a and x. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_a, dt_x );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// The datatype of beta MUST be the same as the datatype of y.
|
||||
dt_beta = dt_y;
|
||||
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -99,11 +99,11 @@ void bli_gemv_unb_var2( obj_t* alpha,
|
||||
// The datatype of alpha MUST be the type union of a and x. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_a, dt_x );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// The datatype of beta MUST be the same as the datatype of y.
|
||||
dt_beta = dt_y;
|
||||
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -99,11 +99,11 @@ void bli_gemv_unf_var1( obj_t* alpha,
|
||||
// The datatype of alpha MUST be the type union of a and x. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_a, dt_x );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// The datatype of beta MUST be the same as the datatype of y.
|
||||
dt_beta = dt_y;
|
||||
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -99,11 +99,11 @@ void bli_gemv_unf_var2( obj_t* alpha,
|
||||
// The datatype of alpha MUST be the type union of a and x. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_a, dt_x );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// The datatype of beta MUST be the same as the datatype of y.
|
||||
dt_beta = dt_y;
|
||||
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -75,7 +75,7 @@ void bli_ger( obj_t* alpha,
|
||||
// the type union of the target datatypes of x and y to prevent any
|
||||
// unnecessary loss of information during the computation.
|
||||
dt_alpha = bli_datatype_union( dt_targ_x, dt_targ_y );
|
||||
bli_obj_init_scalar_copy_of( dt_alpha,
|
||||
bli_obj_scalar_init_detached_copy_of( dt_alpha,
|
||||
BLIS_NO_CONJUGATE,
|
||||
alpha,
|
||||
&alpha_local );
|
||||
@@ -148,7 +148,7 @@ void PASTEMAC(ch,opname)( \
|
||||
rs_x = incx; cs_x = m_x * incx; \
|
||||
rs_y = incy; cs_y = m_y * incy; \
|
||||
\
|
||||
bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \
|
||||
bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \
|
||||
\
|
||||
bli_obj_create_with_attached_buffer( dt, m_x, 1, x, rs_x, cs_x, &xo ); \
|
||||
bli_obj_create_with_attached_buffer( dt, m_y, 1, y, rs_y, cs_y, &yo ); \
|
||||
|
||||
@@ -75,8 +75,7 @@ void bli_ger_blk_var1( obj_t* alpha,
|
||||
cntl_sub_packv_x( cntl ) );
|
||||
|
||||
// Copy/pack A1, x1 (if needed).
|
||||
bli_packm_int( &BLIS_ONE,
|
||||
&a1,
|
||||
bli_packm_int( &a1,
|
||||
&a1_pack,
|
||||
cntl_sub_packm_a( cntl ) );
|
||||
bli_packv_int( &x1,
|
||||
|
||||
@@ -75,8 +75,7 @@ void bli_ger_blk_var2( obj_t* alpha,
|
||||
cntl_sub_packv_y( cntl ) );
|
||||
|
||||
// Copy/pack A1, y1 (if needed).
|
||||
bli_packm_int( &BLIS_ONE,
|
||||
&a1,
|
||||
bli_packm_int( &a1,
|
||||
&a1_pack,
|
||||
cntl_sub_packm_a( cntl ) );
|
||||
bli_packv_int( &y1,
|
||||
|
||||
@@ -95,7 +95,7 @@ void bli_ger_int( conj_t conjx,
|
||||
bli_obj_toggle_conj( x_local );
|
||||
bli_obj_toggle_conj( y_local );
|
||||
|
||||
bli_obj_init_scalar_copy_of( bli_obj_datatype( *alpha ),
|
||||
bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *alpha ),
|
||||
BLIS_CONJUGATE,
|
||||
alpha,
|
||||
&alpha_local );
|
||||
|
||||
@@ -94,7 +94,7 @@ void bli_ger_unb_var1( obj_t* alpha,
|
||||
// The datatype of alpha MUST be the type union of x and y. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_x, dt_y );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -94,7 +94,7 @@ void bli_ger_unb_var2( obj_t* alpha,
|
||||
// The datatype of alpha MUST be the type union of x and y. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_x, dt_y );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -78,7 +78,7 @@ void bli_hemv( obj_t* alpha,
|
||||
// the type union of the target datatypes of a and x to prevent any
|
||||
// unnecessary loss of information during the computation.
|
||||
dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x );
|
||||
bli_obj_init_scalar_copy_of( dt_alpha,
|
||||
bli_obj_scalar_init_detached_copy_of( dt_alpha,
|
||||
BLIS_NO_CONJUGATE,
|
||||
alpha,
|
||||
&alpha_local );
|
||||
@@ -89,7 +89,7 @@ void bli_hemv( obj_t* alpha,
|
||||
// the complex part of beta*y will not be stored. If y is complex and
|
||||
// beta is real then beta is harmlessly promoted to complex.
|
||||
dt_beta = dt_targ_y;
|
||||
bli_obj_init_scalar_copy_of( dt_beta,
|
||||
bli_obj_scalar_init_detached_copy_of( dt_beta,
|
||||
BLIS_NO_CONJUGATE,
|
||||
beta,
|
||||
&beta_local );
|
||||
@@ -180,8 +180,8 @@ void PASTEMAC(ch,opname)( \
|
||||
rs_x = incx; cs_x = m * incx; \
|
||||
rs_y = incy; cs_y = m * incy; \
|
||||
\
|
||||
bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \
|
||||
bli_obj_create_scalar_with_attached_buffer( dt, beta, &betao ); \
|
||||
bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \
|
||||
bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \
|
||||
\
|
||||
bli_obj_create_with_attached_buffer( dt, m, m, a, rs_a, cs_a, &ao ); \
|
||||
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
|
||||
|
||||
@@ -106,8 +106,7 @@ void bli_hemv_blk_var1( conj_t conjh,
|
||||
cntl_sub_packv_y1( cntl ) );
|
||||
|
||||
// Copy/pack A11, x1, y1 (if needed).
|
||||
bli_packm_int( &BLIS_ONE,
|
||||
&a11,
|
||||
bli_packm_int( &a11,
|
||||
&a11_pack,
|
||||
cntl_sub_packm_a11( cntl ) );
|
||||
bli_packv_int( &x1,
|
||||
|
||||
@@ -109,8 +109,7 @@ void bli_hemv_blk_var2( conj_t conjh,
|
||||
cntl_sub_packv_y1( cntl ) );
|
||||
|
||||
// Copy/pack A11, x1, y1 (if needed).
|
||||
bli_packm_int( &BLIS_ONE,
|
||||
&a11,
|
||||
bli_packm_int( &a11,
|
||||
&a11_pack,
|
||||
cntl_sub_packm_a11( cntl ) );
|
||||
bli_packv_int( &x1,
|
||||
|
||||
@@ -106,8 +106,7 @@ void bli_hemv_blk_var3( conj_t conjh,
|
||||
cntl_sub_packv_y1( cntl ) );
|
||||
|
||||
// Copy/pack A11, x1, y1 (if needed).
|
||||
bli_packm_int( &BLIS_ONE,
|
||||
&a11,
|
||||
bli_packm_int( &a11,
|
||||
&a11_pack,
|
||||
cntl_sub_packm_a11( cntl ) );
|
||||
bli_packv_int( &x1,
|
||||
|
||||
@@ -109,8 +109,7 @@ void bli_hemv_blk_var4( conj_t conjh,
|
||||
cntl_sub_packv_y1( cntl ) );
|
||||
|
||||
// Copy/pack A11, x1, y1 (if needed).
|
||||
bli_packm_int( &BLIS_ONE,
|
||||
&a11,
|
||||
bli_packm_int( &a11,
|
||||
&a11_pack,
|
||||
cntl_sub_packm_a11( cntl ) );
|
||||
bli_packv_int( &x1,
|
||||
|
||||
@@ -101,11 +101,11 @@ void bli_hemv_unb_var1( conj_t conjh,
|
||||
// The datatype of alpha MUST be the type union of a and x. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_a, dt_x );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// The datatype of beta MUST be the same as the datatype of y.
|
||||
dt_beta = dt_y;
|
||||
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -101,11 +101,11 @@ void bli_hemv_unb_var2( conj_t conjh,
|
||||
// The datatype of alpha MUST be the type union of a and x. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_a, dt_x );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// The datatype of beta MUST be the same as the datatype of y.
|
||||
dt_beta = dt_y;
|
||||
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -101,11 +101,11 @@ void bli_hemv_unb_var3( conj_t conjh,
|
||||
// The datatype of alpha MUST be the type union of a and x. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_a, dt_x );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// The datatype of beta MUST be the same as the datatype of y.
|
||||
dt_beta = dt_y;
|
||||
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -101,11 +101,11 @@ void bli_hemv_unb_var4( conj_t conjh,
|
||||
// The datatype of alpha MUST be the type union of a and x. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_a, dt_x );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// The datatype of beta MUST be the same as the datatype of y.
|
||||
dt_beta = dt_y;
|
||||
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -101,11 +101,11 @@ void bli_hemv_unf_var1( conj_t conjh,
|
||||
// The datatype of alpha MUST be the type union of a and x. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_a, dt_x );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// The datatype of beta MUST be the same as the datatype of y.
|
||||
dt_beta = dt_y;
|
||||
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -101,11 +101,11 @@ void bli_hemv_unf_var1a( conj_t conjh,
|
||||
// The datatype of alpha MUST be the type union of a and x. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_a, dt_x );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// The datatype of beta MUST be the same as the datatype of y.
|
||||
dt_beta = dt_y;
|
||||
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -101,11 +101,11 @@ void bli_hemv_unf_var3( conj_t conjh,
|
||||
// The datatype of alpha MUST be the type union of a and x. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_a, dt_x );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// The datatype of beta MUST be the same as the datatype of y.
|
||||
dt_beta = dt_y;
|
||||
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
|
||||
|
||||
#if 0
|
||||
obj_t x_copy, y_copy;
|
||||
|
||||
@@ -101,11 +101,11 @@ void bli_hemv_unf_var3a( conj_t conjh,
|
||||
// The datatype of alpha MUST be the type union of a and x. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_a, dt_x );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// The datatype of beta MUST be the same as the datatype of y.
|
||||
dt_beta = dt_y;
|
||||
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
|
||||
|
||||
#if 0
|
||||
obj_t x_copy, y_copy;
|
||||
|
||||
@@ -68,7 +68,7 @@ void bli_her( obj_t* alpha,
|
||||
|
||||
// Create object to hold a copy-cast of alpha.
|
||||
dt_alpha = dt_targ_x;
|
||||
bli_obj_init_scalar_copy_of( dt_alpha,
|
||||
bli_obj_scalar_init_detached_copy_of( dt_alpha,
|
||||
BLIS_NO_CONJUGATE,
|
||||
alpha,
|
||||
&alpha_local );
|
||||
@@ -151,7 +151,7 @@ void PASTEMAC(ch,opname)( \
|
||||
\
|
||||
rs_x = incx; cs_x = m * incx; \
|
||||
\
|
||||
bli_obj_create_scalar_with_attached_buffer( dt_r, alpha, &alphao ); \
|
||||
bli_obj_create_1x1_with_attached_buffer( dt_r, alpha, &alphao ); \
|
||||
\
|
||||
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
|
||||
bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \
|
||||
|
||||
@@ -90,8 +90,7 @@ void bli_her_blk_var1( conj_t conjh,
|
||||
cntl_sub_packv_x1( cntl ) );
|
||||
|
||||
// Copy/pack C11, x1 (if needed).
|
||||
bli_packm_int( &BLIS_ONE,
|
||||
&c11,
|
||||
bli_packm_int( &c11,
|
||||
&c11_pack,
|
||||
cntl_sub_packm_c11( cntl ) );
|
||||
bli_packv_int( &x1,
|
||||
|
||||
@@ -90,8 +90,7 @@ void bli_her_blk_var2( conj_t conjh,
|
||||
cntl_sub_packv_x1( cntl ) );
|
||||
|
||||
// Copy/pack C11, x1 (if needed).
|
||||
bli_packm_int( &BLIS_ONE,
|
||||
&c11,
|
||||
bli_packm_int( &c11,
|
||||
&c11_pack,
|
||||
cntl_sub_packm_c11( cntl ) );
|
||||
bli_packv_int( &x1,
|
||||
|
||||
@@ -75,13 +75,13 @@ void bli_her2( obj_t* alpha,
|
||||
// Create an object to hold a copy-cast of alpha. Notice that we use
|
||||
// the type union of the datatypes of x and y.
|
||||
dt_alpha = bli_datatype_union( dt_targ_x, dt_targ_y );
|
||||
bli_obj_init_scalar_copy_of( dt_alpha,
|
||||
bli_obj_scalar_init_detached_copy_of( dt_alpha,
|
||||
BLIS_NO_CONJUGATE,
|
||||
alpha,
|
||||
&alpha_local );
|
||||
|
||||
// Also create a conjugated copy of alpha.
|
||||
bli_obj_init_scalar_copy_of( dt_alpha,
|
||||
bli_obj_scalar_init_detached_copy_of( dt_alpha,
|
||||
BLIS_CONJUGATE,
|
||||
alpha,
|
||||
&alpha_conj_local );
|
||||
@@ -171,7 +171,7 @@ void PASTEMAC(ch,opname)( \
|
||||
rs_x = incx; cs_x = m * incx; \
|
||||
rs_y = incy; cs_y = m * incy; \
|
||||
\
|
||||
bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \
|
||||
bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \
|
||||
\
|
||||
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
|
||||
bli_obj_create_with_attached_buffer( dt, m, 1, y, rs_y, cs_y, &yo ); \
|
||||
|
||||
@@ -101,8 +101,7 @@ void bli_her2_blk_var1( conj_t conjh,
|
||||
cntl_sub_packv_y1( cntl ) );
|
||||
|
||||
// Copy/pack C11, x1, y1 (if needed).
|
||||
bli_packm_int( &BLIS_ONE,
|
||||
&c11,
|
||||
bli_packm_int( &c11,
|
||||
&c11_pack,
|
||||
cntl_sub_packm_c11( cntl ) );
|
||||
bli_packv_int( &x1,
|
||||
|
||||
@@ -104,8 +104,7 @@ void bli_her2_blk_var2( conj_t conjh,
|
||||
cntl_sub_packv_y1( cntl ) );
|
||||
|
||||
// Copy/pack C11, x1, y1 (if needed).
|
||||
bli_packm_int( &BLIS_ONE,
|
||||
&c11,
|
||||
bli_packm_int( &c11,
|
||||
&c11_pack,
|
||||
cntl_sub_packm_c11( cntl ) );
|
||||
bli_packv_int( &x1,
|
||||
|
||||
@@ -104,8 +104,7 @@ void bli_her2_blk_var3( conj_t conjh,
|
||||
cntl_sub_packv_y1( cntl ) );
|
||||
|
||||
// Copy/pack C11, x1, y1 (if needed).
|
||||
bli_packm_int( &BLIS_ONE,
|
||||
&c11,
|
||||
bli_packm_int( &c11,
|
||||
&c11_pack,
|
||||
cntl_sub_packm_c11( cntl ) );
|
||||
bli_packv_int( &x1,
|
||||
|
||||
@@ -101,8 +101,7 @@ void bli_her2_blk_var4( conj_t conjh,
|
||||
cntl_sub_packv_y1( cntl ) );
|
||||
|
||||
// Copy/pack C11, x1, y1 (if needed).
|
||||
bli_packm_int( &BLIS_ONE,
|
||||
&c11,
|
||||
bli_packm_int( &c11,
|
||||
&c11_pack,
|
||||
cntl_sub_packm_c11( cntl ) );
|
||||
bli_packv_int( &x1,
|
||||
|
||||
@@ -93,11 +93,11 @@ void bli_her2_int( conj_t conjh,
|
||||
bli_obj_toggle_conj( x_local );
|
||||
bli_obj_toggle_conj( y_local );
|
||||
|
||||
bli_obj_init_scalar_copy_of( bli_obj_datatype( *alpha ),
|
||||
bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *alpha ),
|
||||
BLIS_CONJUGATE,
|
||||
alpha,
|
||||
&alpha_local );
|
||||
bli_obj_init_scalar_copy_of( bli_obj_datatype( *alpha_conj ),
|
||||
bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *alpha_conj ),
|
||||
BLIS_CONJUGATE,
|
||||
alpha_conj,
|
||||
&alpha_conj_local );
|
||||
|
||||
@@ -96,7 +96,7 @@ void bli_her2_unb_var1( conj_t conjh,
|
||||
|
||||
// The datatype of alpha MUST be the type union of the datatypes of x and y.
|
||||
dt_alpha = bli_datatype_union( dt_x, dt_y );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -96,7 +96,7 @@ void bli_her2_unb_var2( conj_t conjh,
|
||||
|
||||
// The datatype of alpha MUST be the type union of the datatypes of x and y.
|
||||
dt_alpha = bli_datatype_union( dt_x, dt_y );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -96,7 +96,7 @@ void bli_her2_unb_var3( conj_t conjh,
|
||||
|
||||
// The datatype of alpha MUST be the type union of the datatypes of x and y.
|
||||
dt_alpha = bli_datatype_union( dt_x, dt_y );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -96,7 +96,7 @@ void bli_her2_unb_var4( conj_t conjh,
|
||||
|
||||
// The datatype of alpha MUST be the type union of the datatypes of x and y.
|
||||
dt_alpha = bli_datatype_union( dt_x, dt_y );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -96,7 +96,7 @@ void bli_her2_unf_var1( conj_t conjh,
|
||||
|
||||
// The datatype of alpha MUST be the type union of the datatypes of x and y.
|
||||
dt_alpha = bli_datatype_union( dt_x, dt_y );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -96,7 +96,7 @@ void bli_her2_unf_var4( conj_t conjh,
|
||||
|
||||
// The datatype of alpha MUST be the type union of the datatypes of x and y.
|
||||
dt_alpha = bli_datatype_union( dt_x, dt_y );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -78,7 +78,7 @@ void bli_symv( obj_t* alpha,
|
||||
// the type union of the target datatypes of a and x to prevent any
|
||||
// unnecessary loss of information during the computation.
|
||||
dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x );
|
||||
bli_obj_init_scalar_copy_of( dt_alpha,
|
||||
bli_obj_scalar_init_detached_copy_of( dt_alpha,
|
||||
BLIS_NO_CONJUGATE,
|
||||
alpha,
|
||||
&alpha_local );
|
||||
@@ -89,7 +89,7 @@ void bli_symv( obj_t* alpha,
|
||||
// the complex part of beta*y will not be stored. If y is complex and
|
||||
// beta is real then beta is harmlessly promoted to complex.
|
||||
dt_beta = dt_targ_y;
|
||||
bli_obj_init_scalar_copy_of( dt_beta,
|
||||
bli_obj_scalar_init_detached_copy_of( dt_beta,
|
||||
BLIS_NO_CONJUGATE,
|
||||
beta,
|
||||
&beta_local );
|
||||
@@ -180,8 +180,8 @@ void PASTEMAC(ch,opname)( \
|
||||
rs_x = incx; cs_x = m * incx; \
|
||||
rs_y = incy; cs_y = m * incy; \
|
||||
\
|
||||
bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \
|
||||
bli_obj_create_scalar_with_attached_buffer( dt, beta, &betao ); \
|
||||
bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \
|
||||
bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \
|
||||
\
|
||||
bli_obj_create_with_attached_buffer( dt, m, m, a, rs_a, cs_a, &ao ); \
|
||||
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
|
||||
|
||||
@@ -70,7 +70,7 @@ void bli_syr( obj_t* alpha,
|
||||
// the type union of the target datatypes of x and c to prevent any
|
||||
// unnecessary loss of information during the computation.
|
||||
dt_alpha = bli_datatype_union( dt_targ_x, dt_targ_c );
|
||||
bli_obj_init_scalar_copy_of( dt_alpha,
|
||||
bli_obj_scalar_init_detached_copy_of( dt_alpha,
|
||||
BLIS_NO_CONJUGATE,
|
||||
alpha,
|
||||
&alpha_local );
|
||||
@@ -152,7 +152,7 @@ void PASTEMAC(ch,opname)( \
|
||||
\
|
||||
rs_x = incx; cs_x = m * incx; \
|
||||
\
|
||||
bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \
|
||||
bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \
|
||||
\
|
||||
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
|
||||
bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \
|
||||
|
||||
@@ -74,7 +74,7 @@ void bli_syr2( obj_t* alpha,
|
||||
// Create an object to hold a copy-cast of alpha. Notice that we use
|
||||
// the type union of the datatypes of x and y.
|
||||
dt_alpha = bli_datatype_union( dt_targ_x, dt_targ_y );
|
||||
bli_obj_init_scalar_copy_of( dt_alpha,
|
||||
bli_obj_scalar_init_detached_copy_of( dt_alpha,
|
||||
BLIS_NO_CONJUGATE,
|
||||
alpha,
|
||||
&alpha_local );
|
||||
@@ -164,7 +164,7 @@ void PASTEMAC(ch,opname)( \
|
||||
rs_x = incx; cs_x = m * incx; \
|
||||
rs_y = incy; cs_y = m * incy; \
|
||||
\
|
||||
bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \
|
||||
bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \
|
||||
\
|
||||
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
|
||||
bli_obj_create_with_attached_buffer( dt, m, 1, y, rs_y, cs_y, &yo ); \
|
||||
|
||||
@@ -70,7 +70,7 @@ void bli_trmv( obj_t* alpha,
|
||||
// the type union of the target datatypes of a and x to prevent any
|
||||
// unnecessary loss of information during the computation.
|
||||
dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x );
|
||||
bli_obj_init_scalar_copy_of( dt_alpha,
|
||||
bli_obj_scalar_init_detached_copy_of( dt_alpha,
|
||||
BLIS_NO_CONJUGATE,
|
||||
alpha,
|
||||
&alpha_local );
|
||||
@@ -150,7 +150,7 @@ void PASTEMAC(ch,opname)( \
|
||||
\
|
||||
rs_x = incx; cs_x = m * incx; \
|
||||
\
|
||||
bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \
|
||||
bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \
|
||||
\
|
||||
bli_obj_create_with_attached_buffer( dt, m, m, a, rs_a, cs_a, &ao ); \
|
||||
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
|
||||
|
||||
@@ -80,8 +80,7 @@ void bli_trmv_l_blk_var1( obj_t* alpha,
|
||||
cntl_sub_packv_x1( cntl ) );
|
||||
|
||||
// Copy/pack A11, x1 (if needed).
|
||||
bli_packm_int( &BLIS_ONE,
|
||||
&a11,
|
||||
bli_packm_int( &a11,
|
||||
&a11_pack,
|
||||
cntl_sub_packm_a11( cntl ) );
|
||||
bli_packv_int( &x1,
|
||||
|
||||
@@ -80,8 +80,7 @@ void bli_trmv_l_blk_var2( obj_t* alpha,
|
||||
cntl_sub_packv_x1( cntl ) );
|
||||
|
||||
// Copy/pack A11, x1 (if needed).
|
||||
bli_packm_int( &BLIS_ONE,
|
||||
&a11,
|
||||
bli_packm_int( &a11,
|
||||
&a11_pack,
|
||||
cntl_sub_packm_a11( cntl ) );
|
||||
bli_packv_int( &x1,
|
||||
|
||||
@@ -80,8 +80,7 @@ void bli_trmv_u_blk_var1( obj_t* alpha,
|
||||
cntl_sub_packv_x1( cntl ) );
|
||||
|
||||
// Copy/pack A11, x1 (if needed).
|
||||
bli_packm_int( &BLIS_ONE,
|
||||
&a11,
|
||||
bli_packm_int( &a11,
|
||||
&a11_pack,
|
||||
cntl_sub_packm_a11( cntl ) );
|
||||
bli_packv_int( &x1,
|
||||
|
||||
@@ -80,8 +80,7 @@ void bli_trmv_u_blk_var2( obj_t* alpha,
|
||||
cntl_sub_packv_x1( cntl ) );
|
||||
|
||||
// Copy/pack A11, x1 (if needed).
|
||||
bli_packm_int( &BLIS_ONE,
|
||||
&a11,
|
||||
bli_packm_int( &a11,
|
||||
&a11_pack,
|
||||
cntl_sub_packm_a11( cntl ) );
|
||||
bli_packv_int( &x1,
|
||||
|
||||
@@ -88,7 +88,7 @@ void bli_trmv_unb_var1( obj_t* alpha,
|
||||
// The datatype of alpha MUST be the type union of a and x. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_a, dt_x );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -88,7 +88,7 @@ void bli_trmv_unb_var2( obj_t* alpha,
|
||||
// The datatype of alpha MUST be the type union of a and x. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_a, dt_x );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -88,7 +88,7 @@ void bli_trmv_unf_var1( obj_t* alpha,
|
||||
// The datatype of alpha MUST be the type union of a and x. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_a, dt_x );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -88,7 +88,7 @@ void bli_trmv_unf_var2( obj_t* alpha,
|
||||
// The datatype of alpha MUST be the type union of a and x. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_a, dt_x );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -70,7 +70,7 @@ void bli_trsv( obj_t* alpha,
|
||||
// the type union of the target datatypes of a and x to prevent any
|
||||
// unnecessary loss of information during the computation.
|
||||
dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x );
|
||||
bli_obj_init_scalar_copy_of( dt_alpha,
|
||||
bli_obj_scalar_init_detached_copy_of( dt_alpha,
|
||||
BLIS_NO_CONJUGATE,
|
||||
alpha,
|
||||
&alpha_local );
|
||||
@@ -147,7 +147,7 @@ void PASTEMAC(ch,opname)( \
|
||||
\
|
||||
rs_x = incx; cs_x = m * incx; \
|
||||
\
|
||||
bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \
|
||||
bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \
|
||||
\
|
||||
bli_obj_create_with_attached_buffer( dt, m, m, a, rs_a, cs_a, &ao ); \
|
||||
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
|
||||
|
||||
@@ -85,8 +85,7 @@ void bli_trsv_l_blk_var1( obj_t* alpha,
|
||||
cntl_sub_packv_x1( cntl ) );
|
||||
|
||||
// Copy/pack A11, x1 (if needed).
|
||||
bli_packm_int( &BLIS_ONE,
|
||||
&a11,
|
||||
bli_packm_int( &a11,
|
||||
&a11_pack,
|
||||
cntl_sub_packm_a11( cntl ) );
|
||||
bli_packv_int( &x1,
|
||||
|
||||
@@ -85,8 +85,7 @@ void bli_trsv_l_blk_var2( obj_t* alpha,
|
||||
cntl_sub_packv_x1( cntl ) );
|
||||
|
||||
// Copy/pack A11, x1 (if needed).
|
||||
bli_packm_int( &BLIS_ONE,
|
||||
&a11,
|
||||
bli_packm_int( &a11,
|
||||
&a11_pack,
|
||||
cntl_sub_packm_a11( cntl ) );
|
||||
bli_packv_int( &x1,
|
||||
|
||||
@@ -85,8 +85,7 @@ void bli_trsv_u_blk_var1( obj_t* alpha,
|
||||
cntl_sub_packv_x1( cntl ) );
|
||||
|
||||
// Copy/pack A11, x1 (if needed).
|
||||
bli_packm_int( &BLIS_ONE,
|
||||
&a11,
|
||||
bli_packm_int( &a11,
|
||||
&a11_pack,
|
||||
cntl_sub_packm_a11( cntl ) );
|
||||
bli_packv_int( &x1,
|
||||
|
||||
@@ -85,8 +85,7 @@ void bli_trsv_u_blk_var2( obj_t* alpha,
|
||||
cntl_sub_packv_x1( cntl ) );
|
||||
|
||||
// Copy/pack A11, x1 (if needed).
|
||||
bli_packm_int( &BLIS_ONE,
|
||||
&a11,
|
||||
bli_packm_int( &a11,
|
||||
&a11_pack,
|
||||
cntl_sub_packm_a11( cntl ) );
|
||||
bli_packv_int( &x1,
|
||||
|
||||
@@ -88,7 +88,7 @@ void bli_trsv_unb_var1( obj_t* alpha,
|
||||
// The datatype of alpha MUST be the type union of a and x. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_a, dt_x );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -88,7 +88,7 @@ void bli_trsv_unb_var2( obj_t* alpha,
|
||||
// The datatype of alpha MUST be the type union of a and x. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_a, dt_x );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -88,7 +88,7 @@ void bli_trsv_unf_var1( obj_t* alpha,
|
||||
// The datatype of alpha MUST be the type union of a and x. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_a, dt_x );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -88,7 +88,7 @@ void bli_trsv_unf_var2( obj_t* alpha,
|
||||
// The datatype of alpha MUST be the type union of a and x. This is to
|
||||
// prevent any unnecessary loss of information during computation.
|
||||
dt_alpha = bli_datatype_union( dt_a, dt_x );
|
||||
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
|
||||
@@ -48,21 +48,16 @@ void bli_gemm( obj_t* alpha,
|
||||
obj_t* c )
|
||||
{
|
||||
gemm_t* cntl;
|
||||
obj_t alpha_local;
|
||||
obj_t beta_local;
|
||||
obj_t a_local;
|
||||
obj_t b_local;
|
||||
obj_t c_local;
|
||||
num_t dt_alpha;
|
||||
num_t dt_beta;
|
||||
bool_t pack_c;
|
||||
|
||||
// Check parameters.
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
bli_gemm_check( alpha, a, b, beta, c );
|
||||
|
||||
// If alpha is zero, scale by beta and return.
|
||||
if ( bli_obj_scalar_equals( alpha, &BLIS_ZERO ) )
|
||||
if ( bli_obj_equals( alpha, &BLIS_ZERO ) )
|
||||
{
|
||||
bli_scalm( beta, c );
|
||||
return;
|
||||
@@ -86,29 +81,6 @@ void bli_gemm( obj_t* alpha,
|
||||
bli_obj_induce_trans( c_local );
|
||||
}
|
||||
|
||||
// Set the target and execution datatypes of the objects, and apply
|
||||
// any transformations necessary to handle mixed domain computation.
|
||||
bli_gemm_set_targ_exec_datatypes( &a_local,
|
||||
&b_local,
|
||||
&c_local,
|
||||
&dt_alpha,
|
||||
&dt_beta,
|
||||
&pack_c );
|
||||
|
||||
// Create an object to hold a copy-cast of alpha.
|
||||
bli_obj_init_scalar_copy_of( dt_alpha,
|
||||
BLIS_NO_CONJUGATE,
|
||||
alpha,
|
||||
&alpha_local );
|
||||
|
||||
// Create an object to hold a copy-cast of beta.
|
||||
bli_obj_init_scalar_copy_of( dt_beta,
|
||||
BLIS_NO_CONJUGATE,
|
||||
beta,
|
||||
&beta_local );
|
||||
|
||||
if ( pack_c ) bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED );
|
||||
|
||||
// Choose the control tree.
|
||||
cntl = gemm_cntl;
|
||||
|
||||
@@ -122,10 +94,10 @@ void bli_gemm( obj_t* alpha,
|
||||
#endif
|
||||
|
||||
// Invoke the internal back-end.
|
||||
bli_gemm_int( &alpha_local,
|
||||
bli_gemm_int( alpha,
|
||||
&a_local,
|
||||
&b_local,
|
||||
&beta_local,
|
||||
beta,
|
||||
&c_local,
|
||||
cntl );
|
||||
}
|
||||
@@ -159,8 +131,8 @@ void PASTEMAC(ch,opname)( \
|
||||
bli_set_dims_with_trans( transa, m, k, m_a, n_a ); \
|
||||
bli_set_dims_with_trans( transb, k, n, m_b, n_b ); \
|
||||
\
|
||||
bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \
|
||||
bli_obj_create_scalar_with_attached_buffer( dt, beta, &betao ); \
|
||||
bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \
|
||||
bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \
|
||||
\
|
||||
bli_obj_create_with_attached_buffer( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \
|
||||
bli_obj_create_with_attached_buffer( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \
|
||||
|
||||
@@ -34,10 +34,8 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
void bli_gemm_blk_var1( obj_t* alpha,
|
||||
obj_t* a,
|
||||
void bli_gemm_blk_var1( obj_t* a,
|
||||
obj_t* b,
|
||||
obj_t* beta,
|
||||
obj_t* c,
|
||||
gemm_t* cntl )
|
||||
{
|
||||
@@ -58,7 +56,7 @@ void bli_gemm_blk_var1( obj_t* alpha,
|
||||
m_trans = bli_obj_length_after_trans( *a );
|
||||
|
||||
// Scale C by beta (if instructed).
|
||||
bli_scalm_int( beta,
|
||||
bli_scalm_int( &BLIS_ONE,
|
||||
c,
|
||||
cntl_sub_scalm( cntl ) );
|
||||
|
||||
@@ -66,9 +64,8 @@ void bli_gemm_blk_var1( obj_t* alpha,
|
||||
bli_packm_init( b, &b_pack,
|
||||
cntl_sub_packm_b( cntl ) );
|
||||
|
||||
// Pack B and scale by alpha (if instructed).
|
||||
bli_packm_int( alpha,
|
||||
b, &b_pack,
|
||||
// Pack B (if instructed).
|
||||
bli_packm_int( b, &b_pack,
|
||||
cntl_sub_packm_b( cntl ) );
|
||||
|
||||
// Partition along the m dimension.
|
||||
@@ -93,21 +90,19 @@ void bli_gemm_blk_var1( obj_t* alpha,
|
||||
bli_packm_init( &c1, &c1_pack,
|
||||
cntl_sub_packm_c( cntl ) );
|
||||
|
||||
// Pack A1 and scale by alpha (if instructed).
|
||||
bli_packm_int( alpha,
|
||||
&a1, &a1_pack,
|
||||
// Pack A1 (if instructed).
|
||||
bli_packm_int( &a1, &a1_pack,
|
||||
cntl_sub_packm_a( cntl ) );
|
||||
|
||||
// Pack C1 and scale by beta (if instructed).
|
||||
bli_packm_int( beta,
|
||||
&c1, &c1_pack,
|
||||
// Pack C1 (if instructed).
|
||||
bli_packm_int( &c1, &c1_pack,
|
||||
cntl_sub_packm_c( cntl ) );
|
||||
|
||||
// Perform gemm subproblem.
|
||||
bli_gemm_int( alpha,
|
||||
bli_gemm_int( &BLIS_ONE,
|
||||
&a1_pack,
|
||||
&b_pack,
|
||||
beta,
|
||||
&BLIS_ONE,
|
||||
&c1_pack,
|
||||
cntl_sub_gemm( cntl ) );
|
||||
|
||||
|
||||
@@ -32,10 +32,8 @@
|
||||
|
||||
*/
|
||||
|
||||
void bli_gemm_blk_var1( obj_t* alpha,
|
||||
obj_t* a,
|
||||
void bli_gemm_blk_var1( obj_t* a,
|
||||
obj_t* b,
|
||||
obj_t* beta,
|
||||
obj_t* c,
|
||||
gemm_t* cntl );
|
||||
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user