Added new "attached" scalar representation.

Details:
- Added infrastructure to support a new scalar representation, whereby
  every object contains an internal scalar that defaults to 1.0. This
  facilitates passing scalars around without having to house them in
  separate objects. These "attached" scalars are stored in the internal
  atom_t field of the obj_t struct, and are always stored to be the same
  datatype as the object to which they are attached. Level-3 variants no
  longer take scalar arguments, however, level-3 internal back-ends stll
  do; this is so that the calling function can perform subproblems such
  as C := C - alpha * A * B on-the-fly without needing to change either
  of the scalars attached to A or B.
- Removed scalar argument from packm_int().
- Observe and apply attached scalars in scalm_int(), and removed scalar
  from interface of scalm_unb_var1().
- Renamed the following functions (and corresponding invocations):

   bli_obj_init_scalar_copy_of()
                           -> bli_obj_scalar_init_detached_copy_of()
   bli_obj_init_scalar()   -> bli_obj_scalar_init_detached()
   bli_obj_create_scalar_with_attached_buffer()
                           -> bli_obj_create_1x1_with_attached_buffer()
   bli_obj_scalar_equals() -> bli_obj_equals()

- Defined new functions:

   bli_obj_scalar_detach()
   bli_obj_scalar_attach()
   bli_obj_scalar_apply_scalar()
   bli_obj_scalar_reset()
   bli_obj_scalar_has_nonzero_imag()
   bli_obj_scalar_equals()

- Placed all bli_obj_scalar_* functions in a new file, bli_obj_scalar.c.
- Renamed the following macros:

   bli_obj_scalar_buffer() -> bli_obj_buffer_for_1x1()
   bli_obj_is_scalar()     -> bli_obj_is_1x1()

- Defined new macros to set and copy internal scalars between objects:

   bli_obj_set_internal_scalar()
   bli_obj_copy_internal_scalar()

- In level-3 internal back-ends, added conditional blocks where alpha and
  beta are checked for non-unit-ness. Those values for alpha and beta are
  applied to the scalars attached to aliases of A/B/C, as appropriate,
  before being passed into the variant specified by the control tree.
- In level-3 blocked variants, pass BLIS_ONE into subproblems instead of
  alpha and/or beta.
- In level-3 macro-kernels, changed how scalars are obtained. Now, scalars
  attached to A and B are multiplied together to obtain alpha, while beta
  is obtained directly from C.
- In level-3 front-ends, removed old function calls meant to provide
  future support for mixed domain/precision. These can be added back later
  once that functionality is given proper treatment. Also, removed the
  creating of copy-casts of alpha and beta since typecasting of scalars
  is now implicitly handled in the internal back-ends when alpha and
  beta are applied to the attached scalars.
This commit is contained in:
Field G. Van Zee
2013-12-03 16:08:30 -06:00
parent 992de486d6
commit b444489f10
247 changed files with 1461 additions and 1645 deletions

View File

@@ -56,7 +56,7 @@ void bli_getsc( obj_t* chi,
// If chi is a constant object, default to using the dcomplex
// value within since we don't know if the caller needs just the
// real or the real and imaginary parts.
void* buf_chi = bli_obj_scalar_buffer( dt_def, *chi );
void* buf_chi = bli_obj_buffer_for_1x1( dt_def, *chi );
FUNCPTR_T f;

View File

@@ -58,7 +58,7 @@ void PASTEMAC0(opname)( \
dt_x = bli_obj_datatype( *x ); \
\
/* Create an object to hold a copy-cast of alpha. */ \
bli_obj_init_scalar_copy_of( dt_x, \
bli_obj_scalar_init_detached_copy_of( dt_x, \
BLIS_NO_CONJUGATE, \
alpha, \
&alpha_local ); \

View File

@@ -93,11 +93,11 @@ void bli_dotxv_unb_var1( obj_t* alpha,
// The datatype of alpha MUST be the type union of x and y. This is to
// prevent any unnecessary loss of information during computation.
dt_alpha = bli_datatype_union( dt_x, dt_y );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// The datatype of beta MUST be the same as the datatype of rho.
dt_beta = dt_rho;
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -58,7 +58,7 @@ void PASTEMAC0(opname)( \
dt_x = bli_obj_datatype( *x ); \
\
/* Create an object to hold a copy-cast of beta. */ \
bli_obj_init_scalar_copy_of( dt_x, \
bli_obj_scalar_init_detached_copy_of( dt_x, \
BLIS_NO_CONJUGATE, \
beta, \
&beta_local ); \

View File

@@ -57,7 +57,7 @@ void PASTEMAC0(opname)( \
dt_x = bli_obj_datatype( *x ); \
\
/* Create an object to hold a copy-cast of beta. */ \
bli_obj_init_scalar_copy_of( dt_x, \
bli_obj_scalar_init_detached_copy_of( dt_x, \
BLIS_NO_CONJUGATE, \
beta, \
&beta_local ); \

View File

@@ -64,7 +64,7 @@ void bli_scalv_int( obj_t* beta,
if ( bli_obj_has_zero_dim( *x ) ) return;
// Return early if the beta scalar equals one.
if ( bli_obj_scalar_equals( beta, &BLIS_ONE ) ) return;
if ( bli_obj_equals( beta, &BLIS_ONE ) ) return;
// Extract the variant number and implementation type.
n = cntl_var_num( cntl );

View File

@@ -58,7 +58,7 @@ void PASTEMAC0(opname)( \
dt_x = bli_obj_datatype( *x ); \
\
/* Create an object to hold a copy-cast of beta. */ \
bli_obj_init_scalar_copy_of( dt_x, \
bli_obj_scalar_init_detached_copy_of( dt_x, \
BLIS_NO_CONJUGATE, \
beta, \
&beta_local ); \

View File

@@ -53,7 +53,7 @@ void bli_axpyd( obj_t* alpha,
dt_x = bli_obj_datatype( *x );
// Create an object to hold a copy-cast of alpha.
bli_obj_init_scalar_copy_of( dt_x,
bli_obj_scalar_init_detached_copy_of( dt_x,
BLIS_NO_CONJUGATE,
alpha,
&alpha_local );

View File

@@ -53,7 +53,7 @@ void bli_scal2d( obj_t* beta,
dt_x = bli_obj_datatype( *x );
// Create an object to hold a copy-cast of alpha.
bli_obj_init_scalar_copy_of( dt_x,
bli_obj_scalar_init_detached_copy_of( dt_x,
BLIS_NO_CONJUGATE,
beta,
&beta_local );

View File

@@ -52,7 +52,7 @@ void bli_scald( obj_t* beta,
dt_x = bli_obj_datatype( *x );
// Create an object to hold a copy-cast of alpha.
bli_obj_init_scalar_copy_of( dt_x,
bli_obj_scalar_init_detached_copy_of( dt_x,
BLIS_NO_CONJUGATE,
beta,
&beta_local );

View File

@@ -52,7 +52,7 @@ void bli_setd( obj_t* beta,
dt_x = bli_obj_datatype( *x );
// Create an object to hold a copy-cast of alpha.
bli_obj_init_scalar_copy_of( dt_x,
bli_obj_scalar_init_detached_copy_of( dt_x,
BLIS_NO_CONJUGATE,
beta,
&beta_local );

View File

@@ -93,7 +93,7 @@ void bli_axpyf_unb_var1( obj_t* alpha,
// The datatype of alpha MUST be the type union of a and x. This is to
// prevent any unnecessary loss of information during computation.
dt_alpha = bli_datatype_union( dt_a, dt_x );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -113,11 +113,11 @@ void bli_dotxaxpyf_unb_var1( obj_t* alpha,
// The datatype of alpha MUST be the type union of a and x. This is to
// prevent any unnecessary loss of information during computation.
dt_alpha = bli_datatype_union( dt_a, dt_x );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// The datatype of beta MUST be the same as the datatype of y.
dt_beta = dt_y;
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -113,11 +113,11 @@ void bli_dotxaxpyf_unb_var2( obj_t* alpha,
// The datatype of alpha MUST be the type union of a and x. This is to
// prevent any unnecessary loss of information during computation.
dt_alpha = bli_datatype_union( dt_a, dt_x );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// The datatype of beta MUST be the same as the datatype of y.
dt_beta = dt_y;
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -98,11 +98,11 @@ void bli_dotxf_unb_var1( obj_t* alpha,
// The datatype of alpha MUST be the type union of a and x. This is to
// prevent any unnecessary loss of information during computation.
dt_alpha = bli_datatype_union( dt_a, dt_x );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// The datatype of beta MUST be the same as the datatype of y.
dt_beta = dt_y;
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -53,7 +53,7 @@ void bli_axpym( obj_t* alpha,
dt_x = bli_obj_datatype( *x );
// Create an object to hold a copy-cast of alpha.
bli_obj_init_scalar_copy_of( dt_x,
bli_obj_scalar_init_detached_copy_of( dt_x,
BLIS_NO_CONJUGATE,
alpha,
&alpha_local );

View File

@@ -46,7 +46,7 @@ typedef void (*FUNCPTR_T)(
dim_t n,
dim_t m_max,
dim_t n_max,
void* beta,
void* kappa,
void* c, inc_t rs_c, inc_t cs_c,
void* p, inc_t rs_p, inc_t cs_p,
dim_t pd_p, inc_t ps_p
@@ -55,8 +55,7 @@ typedef void (*FUNCPTR_T)(
static FUNCPTR_T GENARRAY(ftypes,packm_blk_var2);
void bli_packm_blk_var2( obj_t* beta,
obj_t* c,
void bli_packm_blk_var2( obj_t* c,
obj_t* p )
{
num_t dt_cp = bli_obj_datatype( *c );
@@ -82,10 +81,16 @@ void bli_packm_blk_var2( obj_t* beta,
dim_t pd_p = bli_obj_panel_dim( *p );
inc_t ps_p = bli_obj_panel_stride( *p );
void* buf_beta = bli_obj_scalar_buffer( dt_cp, *beta );
void* buf_kappa;
FUNCPTR_T f;
// This variant assumes that the micro-kernel will always apply the
// alpha scalar of the higher-level operation. Thus, we use BLIS_ONE
// for kappa so that the underlying packm implementation does not
// scale during packing.
buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE );
// Index into the type combination array to extract the correct
// function pointer.
f = ftypes[dt_cp];
@@ -100,7 +105,7 @@ void bli_packm_blk_var2( obj_t* beta,
n_p,
m_max_p,
n_max_p,
buf_beta,
buf_kappa,
buf_c, rs_c, cs_c,
buf_p, rs_p, cs_p,
pd_p, ps_p );
@@ -120,16 +125,16 @@ void PASTEMAC(ch,varname )( \
dim_t n, \
dim_t m_max, \
dim_t n_max, \
void* beta, \
void* kappa, \
void* c, inc_t rs_c, inc_t cs_c, \
void* p, inc_t rs_p, inc_t cs_p, \
dim_t pd_p, inc_t ps_p \
) \
{ \
ctype* restrict beta_cast = beta; \
ctype* restrict c_cast = c; \
ctype* restrict p_cast = p; \
ctype* restrict zero = PASTEMAC(ch,0); \
ctype* restrict kappa_cast = kappa; \
ctype* restrict c_cast = c; \
ctype* restrict p_cast = p; \
ctype* restrict zero = PASTEMAC(ch,0); \
ctype* restrict c_begin; \
ctype* restrict p_begin; \
\
@@ -338,7 +343,7 @@ void PASTEMAC(ch,varname )( \
PASTEMAC(ch,packm_cxk)( conjc10, \
p10_dim, \
p10_len, \
beta_cast, \
kappa_cast, \
c10, incc10, ldc10, \
p10, ldp ); \
\
@@ -347,7 +352,7 @@ void PASTEMAC(ch,varname )( \
PASTEMAC(ch,packm_cxk)( conjc12, \
p12_dim, \
p12_len, \
beta_cast, \
kappa_cast, \
c12, incc12, ldc12, \
p12, ldp ); \
\
@@ -358,7 +363,7 @@ void PASTEMAC(ch,varname )( \
conjc, \
p11_m, \
p11_n, \
beta_cast, \
kappa_cast, \
c11, rs_c, cs_c, \
p11, rs_p11, cs_p11 ); \
\
@@ -412,7 +417,7 @@ void PASTEMAC(ch,varname )( \
PASTEMAC(ch,packm_cxk)( conjc10, \
panel_dim_i, \
panel_len, \
beta_cast, \
kappa_cast, \
c10, incc10, ldc10, \
p_begin, ldp ); \
\

View File

@@ -32,8 +32,7 @@
*/
void bli_packm_blk_var2( obj_t* beta,
obj_t* c,
void bli_packm_blk_var2( obj_t* c,
obj_t* p );
@@ -50,7 +49,7 @@ void PASTEMAC(ch,varname)( \
dim_t n, \
dim_t m_max, \
dim_t n_max, \
void* beta, \
void* kappa, \
void* c, inc_t rs_c, inc_t cs_c, \
void* p, inc_t rs_p, inc_t cs_p, \
dim_t pd_p, inc_t ps_p \

View File

@@ -49,7 +49,7 @@ typedef void (*FUNCPTR_T)(
dim_t n,
dim_t m_max,
dim_t n_max,
void* beta,
void* kappa,
void* c, inc_t rs_c, inc_t cs_c,
void* p, inc_t rs_p, inc_t cs_p,
dim_t pd_p, inc_t ps_p
@@ -58,8 +58,7 @@ typedef void (*FUNCPTR_T)(
static FUNCPTR_T GENARRAY(ftypes,packm_blk_var3);
void bli_packm_blk_var3( obj_t* beta,
obj_t* c,
void bli_packm_blk_var3( obj_t* c,
obj_t* p )
{
num_t dt_cp = bli_obj_datatype( *c );
@@ -88,10 +87,16 @@ void bli_packm_blk_var3( obj_t* beta,
dim_t pd_p = bli_obj_panel_dim( *p );
inc_t ps_p = bli_obj_panel_stride( *p );
void* buf_beta = bli_obj_scalar_buffer( dt_cp, *beta );
void* buf_kappa;
FUNCPTR_T f;
// This variant assumes that the micro-kernel will always apply the
// alpha scalar of the higher-level operation. Thus, we use BLIS_ONE
// for kappa so that the underlying packm implementation does not
// scale during packing.
buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE );
// Index into the type combination array to extract the correct
// function pointer.
f = ftypes[dt_cp];
@@ -109,7 +114,7 @@ void bli_packm_blk_var3( obj_t* beta,
n_p,
m_max_p,
n_max_p,
buf_beta,
buf_kappa,
buf_c, rs_c, cs_c,
buf_p, rs_p, cs_p,
pd_p, ps_p );
@@ -132,16 +137,16 @@ void PASTEMAC(ch,varname )( \
dim_t n, \
dim_t m_max, \
dim_t n_max, \
void* beta, \
void* kappa, \
void* c, inc_t rs_c, inc_t cs_c, \
void* p, inc_t rs_p, inc_t cs_p, \
dim_t pd_p, inc_t ps_p \
) \
{ \
ctype* restrict beta_cast = beta; \
ctype* restrict c_cast = c; \
ctype* restrict p_cast = p; \
ctype* restrict zero = PASTEMAC(ch,0); \
ctype* restrict kappa_cast = kappa; \
ctype* restrict c_cast = c; \
ctype* restrict p_cast = p; \
ctype* restrict zero = PASTEMAC(ch,0); \
ctype* restrict c_begin; \
ctype* restrict p_begin; \
\
@@ -317,7 +322,7 @@ void PASTEMAC(ch,varname )( \
PASTEMAC(ch,packm_cxk)( conjc, \
panel_dim_i, \
panel_len_i, \
beta_cast, \
kappa_cast, \
c_use, incc, ldc, \
p_use, ldp ); \
\
@@ -328,7 +333,7 @@ void PASTEMAC(ch,varname )( \
PASTEMAC2(ch,ch,setd_unb_var1)( diagoffp, \
*m_panel_use, \
*n_panel_use, \
beta_cast, \
kappa_cast, \
p_use, rs_p, cs_p ); \
} \
\
@@ -378,7 +383,7 @@ void PASTEMAC(ch,varname )( \
PASTEMAC(ch,packm_cxk)( conjc, \
panel_dim_i, \
panel_len_i, \
beta_cast, \
kappa_cast, \
c_use, incc, ldc, \
p_use, ldp ); \
\

View File

@@ -32,8 +32,7 @@
*/
void bli_packm_blk_var3( obj_t* beta,
obj_t* c,
void bli_packm_blk_var3( obj_t* c,
obj_t* p );
@@ -53,7 +52,7 @@ void PASTEMAC(ch,varname)( \
dim_t n, \
dim_t m_max, \
dim_t n_max, \
void* beta, \
void* kappa, \
void* c, inc_t rs_c, inc_t cs_c, \
void* p, inc_t rs_p, inc_t cs_p, \
dim_t pd_p, inc_t ps_p \

View File

@@ -34,33 +34,49 @@
#include "blis.h"
void bli_packm_check( obj_t* beta,
obj_t* c,
obj_t* p,
packm_t* cntl )
void bli_packm_init_check( obj_t* a,
obj_t* p,
packm_t* cntl )
{
err_t e_val;
// Check object datatypes.
e_val = bli_check_noninteger_object( beta );
e_val = bli_check_floating_object( a );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( c );
bli_check_error_code( e_val );
// Check control tree pointer.
// Check object dimensions.
e_val = bli_check_scalar_object( beta );
bli_check_error_code( e_val );
// We don't check for conformal dimensions between c and p because
// p has not yet been initialized.
// Check control tree pointer
// NOTE: We can't check the control tree until we stop interpreting a
// NULL value (in bli_packm_int()) as a request to skip the operation.
// NOTE: We can't check the control tree because we interpret a NULL
// value (in bli_packm_int()) as a request to skip the operation.
//e_val = bli_check_valid_cntl( ( void* )cntl );
//bli_check_error_code( e_val );
}
void bli_packm_int_check( obj_t* a,
obj_t* p,
packm_t* cntl )
{
err_t e_val;
// Check object datatypes.
e_val = bli_check_floating_object( a );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( p );
bli_check_error_code( e_val );
// Check object dimensions.
e_val = bli_check_conformal_dims( a, p );
bli_check_error_code( e_val );
// Check control tree pointer.
// NOTE: We can't check the control tree because we interpret a NULL
// value (in bli_packm_int()) as a request to skip the operation.
//e_val = bli_check_valid_cntl( ( void* )cntl );
//bli_check_error_code( e_val );
}

View File

@@ -32,7 +32,10 @@
*/
void bli_packm_check( obj_t* beta,
obj_t* c,
obj_t* p,
packm_t* cntl );
void bli_packm_init_check( obj_t* a,
obj_t* p,
packm_t* cntl );
void bli_packm_int_check( obj_t* a,
obj_t* p,
packm_t* cntl );

View File

@@ -56,7 +56,7 @@ void bli_packm_init( obj_t* a,
// Check parameters.
if ( bli_error_checking_is_enabled() )
bli_packm_check( &BLIS_ONE, a, p, cntl );
bli_packm_init_check( a, p, cntl );
// First check if we are to skip this operation because the control tree
// is NULL, and if so, simply alias the object to its packed counterpart.

View File

@@ -36,8 +36,7 @@
#define FUNCPTR_T packm_fp
typedef void (*FUNCPTR_T)( obj_t* beta,
obj_t* a,
typedef void (*FUNCPTR_T)( obj_t* a,
obj_t* p );
static FUNCPTR_T vars[6][3] =
@@ -51,20 +50,17 @@ static FUNCPTR_T vars[6][3] =
{ NULL, NULL, NULL, },
};
void bli_packm_int( obj_t* beta,
obj_t* a,
void bli_packm_int( obj_t* a,
obj_t* p,
packm_t* cntl )
{
obj_t* beta_use;
varnum_t n;
impl_t i;
FUNCPTR_T f;
// Check parameters.
if ( bli_error_checking_is_enabled() )
bli_packm_check( beta, a, p, cntl );
bli_packm_int_check( a, p, cntl );
// Sanity check; A should never have a zero dimension. If we must support
// it, then we should fold it into the next alias-and-early-exit block.
@@ -106,13 +102,35 @@ void bli_packm_int( obj_t* beta,
return;
}
// Notice that a beta parameter is always passed in. This value is allowed
// to be non-unit even when no scaling is prescribed. If the control tree
// indicates no scaling, then make sure that BLIS_ONE is passed into the
// packm implementation.
//if ( cntl_does_scale( cntl ) ) beta_use = beta;
//else beta_use = &BLIS_ONE;
beta_use = &BLIS_ONE;
/*
// The value for kappa we use will depend on whether the scalar
// attached to A has a nonzero imaginary component. If it does,
// then we will apply the scalar during packing to facilitate
// implementing complex domain micro-kernels in terms of their
// real domain counterparts. (In the aforementioned situation,
// applying a real scalar is easy, but applying a complex one is
// harder, so we avoid the need altogether with the code below.)
if ( bli_obj_scalar_has_nonzero_imag( a ) )
{
bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED );
// Detach the scalar.
bli_obj_scalar_detach( a, &kappa );
// Reset the attached scalar (to 1.0).
bli_obj_scalar_reset( a );
kappa_p = κ
}
else
{
// If the internal scalar of A has only a real component, then
// we will apply it later (in the micro-kernel), and so we will
// use BLIS_ONE to indicate no scaling during packing.
kappa_p = &BLIS_ONE;
}
*/
// Extract the variant number and implementation type.
n = cntl_var_num( cntl );
@@ -121,9 +139,8 @@ void bli_packm_int( obj_t* beta,
// Index into the variant array to extract the correct function pointer.
f = vars[n][i];
// Invoke the variant with beta_use.
f( beta_use,
a,
// Invoke the variant with kappa_use.
f( a,
p );
}

View File

@@ -32,8 +32,7 @@
*/
void bli_packm_int( obj_t* beta,
obj_t* c,
void bli_packm_int( obj_t* a,
obj_t* p,
packm_t* cntl );

View File

@@ -47,7 +47,7 @@ typedef void (*FUNCPTR_T)(
dim_t n,
dim_t m_max,
dim_t n_max,
void* beta,
void* kappa,
void* c, inc_t rs_c, inc_t cs_c,
void* p, inc_t rs_p, inc_t cs_p
);
@@ -55,8 +55,7 @@ typedef void (*FUNCPTR_T)(
static FUNCPTR_T GENARRAY(ftypes,packm_unb_var1);
void bli_packm_unb_var1( obj_t* beta,
obj_t* c,
void bli_packm_unb_var1( obj_t* c,
obj_t* p )
{
num_t dt_cp = bli_obj_datatype( *c );
@@ -81,7 +80,7 @@ void bli_packm_unb_var1( obj_t* beta,
inc_t rs_p = bli_obj_row_stride( *p );
inc_t cs_p = bli_obj_col_stride( *p );
void* buf_beta = bli_obj_scalar_buffer( dt_cp, *beta );
void* buf_kappa;
FUNCPTR_T f;
@@ -89,6 +88,12 @@ void bli_packm_unb_var1( obj_t* beta,
if ( bli_obj_is_dense( *p ) ) densify = TRUE;
else densify = FALSE;
// This variant assumes that the computational kernel will always apply
// the alpha scalar of the higher-level operation. Thus, we use BLIS_ONE
// for kappa so that the underlying packm implementation does not scale
// during packing.
buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE );
// Index into the type combination array to extract the correct
// function pointer.
f = ftypes[dt_cp];
@@ -104,7 +109,7 @@ void bli_packm_unb_var1( obj_t* beta,
n_p,
m_max_p,
n_max_p,
buf_beta,
buf_kappa,
buf_c, rs_c, cs_c,
buf_p, rs_p, cs_p );
}
@@ -124,20 +129,20 @@ void PASTEMAC(ch,varname)( \
dim_t n, \
dim_t m_max, \
dim_t n_max, \
void* beta, \
void* kappa, \
void* c, inc_t rs_c, inc_t cs_c, \
void* p, inc_t rs_p, inc_t cs_p \
) \
{ \
ctype* restrict beta_cast = beta; \
ctype* restrict c_cast = c; \
ctype* restrict p_cast = p; \
ctype* restrict zero = PASTEMAC(ch,0); \
ctype* restrict kappa_cast = kappa; \
ctype* restrict c_cast = c; \
ctype* restrict p_cast = p; \
ctype* restrict zero = PASTEMAC(ch,0); \
\
/* We begin by packing the region indicated by the parameters. If
matrix c is dense (either because the structure is general or
because the structure has already been "densified"), this ends
up being the only action we take. Note that if beta is unit,
up being the only action we take. Note that if kappa is unit,
the data is simply copied (rather than scaled by one). */ \
PASTEMAC3(ch,ch,ch,scal2m)( diagoffc, \
diagc, \
@@ -145,7 +150,7 @@ void PASTEMAC(ch,varname)( \
transc, \
m, \
n, \
beta_cast, \
kappa_cast, \
c_cast, rs_c, cs_c, \
p_cast, rs_p, cs_p ); \
\
@@ -184,7 +189,7 @@ void PASTEMAC(ch,varname)( \
transc, \
m, \
n, \
beta_cast, \
kappa_cast, \
c_cast, rs_c, cs_c, \
p_cast, rs_p, cs_p ); \
} \

View File

@@ -32,8 +32,7 @@
*/
void bli_packm_unb_var1( obj_t* beta,
obj_t* c,
void bli_packm_unb_var1( obj_t* c,
obj_t* p );
@@ -51,7 +50,7 @@ void PASTEMAC(ch,varname)( \
dim_t n, \
dim_t m_max, \
dim_t n_max, \
void* beta, \
void* kappa, \
void* c, inc_t rs_c, inc_t cs_c, \
void* p, inc_t rs_p, inc_t cs_p \
);

View File

@@ -83,7 +83,7 @@ void bli_packm_blk_var1( obj_t* beta,
inc_t cs_p = bli_obj_col_stride( *p );
inc_t ps_p = bli_obj_panel_stride( *p );
void* buf_beta = bli_obj_scalar_buffer( dt_cp, *beta );
void* buf_beta = bli_obj_buffer_for_1x1( dt_cp, *beta );
FUNCPTR_T f;

View File

@@ -83,7 +83,7 @@ void bli_packm_blk_var2( obj_t* beta,
dim_t pd_p = bli_obj_panel_dim( *p );
inc_t ps_p = bli_obj_panel_stride( *p );
void* buf_beta = bli_obj_scalar_buffer( dt_cp, *beta );
void* buf_beta = bli_obj_buffer_for_1x1( dt_cp, *beta );
FUNCPTR_T f;

View File

@@ -53,7 +53,7 @@ void bli_scal2m( obj_t* beta,
dt_x = bli_obj_datatype( *x );
// Create an object to hold a copy-cast of beta.
bli_obj_init_scalar_copy_of( dt_x,
bli_obj_scalar_init_detached_copy_of( dt_x,
BLIS_NO_CONJUGATE,
beta,
&beta_local );

View File

@@ -43,28 +43,12 @@ extern scalm_t* scalm_cntl;
void bli_scalm( obj_t* beta,
obj_t* x )
{
num_t dt_x;
obj_t beta_local;
if ( bli_error_checking_is_enabled() )
bli_scalm_check( beta, x );
// Use the datatype of x as the target type for beta (since we do
// not assume mixed domain/type support is enabled).
dt_x = bli_obj_datatype( *x );
// Create an object to hold a copy-cast of beta.
bli_obj_init_scalar_copy_of( dt_x,
BLIS_NO_CONJUGATE,
beta,
&beta_local );
bli_scalm_unb_var1( &beta_local, x );
/*
bli_scalm_int( &beta_local,
bli_scalm_int( beta,
x,
scalm_cntl );
*/
}

View File

@@ -36,8 +36,7 @@
#define FUNCPTR_T scalm_fp
typedef void (*FUNCPTR_T)( obj_t* beta,
obj_t* x );
typedef void (*FUNCPTR_T)( obj_t* x );
static FUNCPTR_T vars[1][3] =
{
@@ -49,6 +48,7 @@ void bli_scalm_int( obj_t* beta,
obj_t* x,
scalm_t* cntl )
{
obj_t x_local;
varnum_t n;
impl_t i;
FUNCPTR_T f;
@@ -63,8 +63,18 @@ void bli_scalm_int( obj_t* beta,
// Return early if one of the matrix operands has a zero dimension.
if ( bli_obj_has_zero_dim( *x ) ) return;
// Return early if the beta scalar equals one.
if ( bli_obj_scalar_equals( beta, &BLIS_ONE ) ) return;
// Return early if both beta and the scalar attached to x are unit.
if ( bli_obj_equals( beta, &BLIS_ONE ) &&
bli_obj_scalar_equals( x, &BLIS_ONE ) ) return;
// Alias x to x_local so we can apply beta if it is non-unit.
bli_obj_alias_to( *x, x_local );
// If beta is non-unit, apply it to the scalar attached to x.
if ( !bli_obj_equals( beta, &BLIS_ONE ) )
{
bli_obj_scalar_apply_scalar( beta, &x_local );
}
// Extract the variant number and implementation type.
n = cntl_var_num( cntl );
@@ -74,7 +84,6 @@ void bli_scalm_int( obj_t* beta,
f = vars[n][i];
// Invoke the variant.
f( beta,
x );
f( &x_local );
}

View File

@@ -59,12 +59,10 @@ static FUNCPTR_T GENARRAY2_MIN(ftypes,scalm_unb_var1);
#endif
void bli_scalm_unb_var1( obj_t* beta,
obj_t* x )
void bli_scalm_unb_var1( obj_t* x )
{
num_t dt_x = bli_obj_datatype( *x );
conj_t conjbeta = bli_obj_conj_status( *beta );
doff_t diagoffx = bli_obj_diag_offset( *x );
uplo_t uplox = bli_obj_uplo( *x );
@@ -76,21 +74,25 @@ void bli_scalm_unb_var1( obj_t* beta,
inc_t cs_x = bli_obj_col_stride( *x );
void* buf_beta;
num_t dt_beta;
FUNCPTR_T f;
// If beta is a scalar constant, use dt_x to extract the address of the
// corresponding constant value; otherwise, use the datatype encoded
// within the beta object and extract the buffer at the beta offset.
bli_set_scalar_dt_buffer( beta, dt_x, dt_beta, buf_beta );
// Grab the address of the internal scalar buffer for the scalar
// attached to x.
buf_beta = bli_obj_internal_scalar_buffer( *x );
// Index into the type combination array to extract the correct
// function pointer.
f = ftypes[dt_beta][dt_x];
// NOTE: We use dt_x for both beta and x because beta was obtained
// from the attached scalar of x, which is guaranteed to be of the
// same datatype as x.
f = ftypes[dt_x][dt_x];
// Invoke the function.
f( conjbeta,
// NOTE: We unconditionally pass in BLIS_NO_CONJUGATE for beta
// because it would have already been conjugated by the front-end.
f( BLIS_NO_CONJUGATE,
diagoffx,
uplox,
m,

View File

@@ -32,8 +32,7 @@
*/
void bli_scalm_unb_var1( obj_t* beta,
obj_t* x );
void bli_scalm_unb_var1( obj_t* x );
#undef GENTPROT2

View File

@@ -52,7 +52,7 @@ void bli_setm( obj_t* beta,
dt_x = bli_obj_datatype( *x );
// Create an object to hold a copy-cast of beta.
bli_obj_init_scalar_copy_of( dt_x,
bli_obj_scalar_init_detached_copy_of( dt_x,
BLIS_NO_CONJUGATE,
beta,
&beta_local );

View File

@@ -78,7 +78,7 @@ void bli_gemv( obj_t* alpha,
// the type union of the target datatypes of a and x to prevent any
// unnecessary loss of information during the computation.
dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x );
bli_obj_init_scalar_copy_of( dt_alpha,
bli_obj_scalar_init_detached_copy_of( dt_alpha,
BLIS_NO_CONJUGATE,
alpha,
&alpha_local );
@@ -89,7 +89,7 @@ void bli_gemv( obj_t* alpha,
// the complex part of beta*y will not be stored. If y is complex and
// beta is real then beta is harmlessly promoted to complex.
dt_beta = dt_targ_y;
bli_obj_init_scalar_copy_of( dt_beta,
bli_obj_scalar_init_detached_copy_of( dt_beta,
BLIS_NO_CONJUGATE,
beta,
&beta_local );
@@ -188,8 +188,8 @@ void PASTEMAC(ch,opname)( \
rs_x = incx; cs_x = m_x * incx; \
rs_y = incy; cs_y = m_y * incy; \
\
bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \
bli_obj_create_scalar_with_attached_buffer( dt, beta, &betao ); \
bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \
bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \
\
bli_obj_create_with_attached_buffer( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \
bli_obj_create_with_attached_buffer( dt, m_x, 1, x, rs_x, cs_x, &xo ); \

View File

@@ -76,8 +76,7 @@ void bli_gemv_blk_var1( obj_t* alpha,
cntl_sub_packv_y( cntl ) );
// Copy/pack A1, y1 (if needed).
bli_packm_int( &BLIS_ONE,
&a1,
bli_packm_int( &a1,
&a1_pack,
cntl_sub_packm_a( cntl ) );
bli_packv_int( &y1,

View File

@@ -81,8 +81,7 @@ void bli_gemv_blk_var2( obj_t* alpha,
cntl_sub_packv_x( cntl ) );
// Copy/pack A1, x1 (if needed).
bli_packm_int( alpha,
&a1,
bli_packm_int( &a1,
&a1_pack,
cntl_sub_packm_a( cntl ) );
bli_packv_int( &x1,

View File

@@ -99,11 +99,11 @@ void bli_gemv_unb_var1( obj_t* alpha,
// The datatype of alpha MUST be the type union of a and x. This is to
// prevent any unnecessary loss of information during computation.
dt_alpha = bli_datatype_union( dt_a, dt_x );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// The datatype of beta MUST be the same as the datatype of y.
dt_beta = dt_y;
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -99,11 +99,11 @@ void bli_gemv_unb_var2( obj_t* alpha,
// The datatype of alpha MUST be the type union of a and x. This is to
// prevent any unnecessary loss of information during computation.
dt_alpha = bli_datatype_union( dt_a, dt_x );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// The datatype of beta MUST be the same as the datatype of y.
dt_beta = dt_y;
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -99,11 +99,11 @@ void bli_gemv_unf_var1( obj_t* alpha,
// The datatype of alpha MUST be the type union of a and x. This is to
// prevent any unnecessary loss of information during computation.
dt_alpha = bli_datatype_union( dt_a, dt_x );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// The datatype of beta MUST be the same as the datatype of y.
dt_beta = dt_y;
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -99,11 +99,11 @@ void bli_gemv_unf_var2( obj_t* alpha,
// The datatype of alpha MUST be the type union of a and x. This is to
// prevent any unnecessary loss of information during computation.
dt_alpha = bli_datatype_union( dt_a, dt_x );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// The datatype of beta MUST be the same as the datatype of y.
dt_beta = dt_y;
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -75,7 +75,7 @@ void bli_ger( obj_t* alpha,
// the type union of the target datatypes of x and y to prevent any
// unnecessary loss of information during the computation.
dt_alpha = bli_datatype_union( dt_targ_x, dt_targ_y );
bli_obj_init_scalar_copy_of( dt_alpha,
bli_obj_scalar_init_detached_copy_of( dt_alpha,
BLIS_NO_CONJUGATE,
alpha,
&alpha_local );
@@ -148,7 +148,7 @@ void PASTEMAC(ch,opname)( \
rs_x = incx; cs_x = m_x * incx; \
rs_y = incy; cs_y = m_y * incy; \
\
bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \
bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \
\
bli_obj_create_with_attached_buffer( dt, m_x, 1, x, rs_x, cs_x, &xo ); \
bli_obj_create_with_attached_buffer( dt, m_y, 1, y, rs_y, cs_y, &yo ); \

View File

@@ -75,8 +75,7 @@ void bli_ger_blk_var1( obj_t* alpha,
cntl_sub_packv_x( cntl ) );
// Copy/pack A1, x1 (if needed).
bli_packm_int( &BLIS_ONE,
&a1,
bli_packm_int( &a1,
&a1_pack,
cntl_sub_packm_a( cntl ) );
bli_packv_int( &x1,

View File

@@ -75,8 +75,7 @@ void bli_ger_blk_var2( obj_t* alpha,
cntl_sub_packv_y( cntl ) );
// Copy/pack A1, y1 (if needed).
bli_packm_int( &BLIS_ONE,
&a1,
bli_packm_int( &a1,
&a1_pack,
cntl_sub_packm_a( cntl ) );
bli_packv_int( &y1,

View File

@@ -95,7 +95,7 @@ void bli_ger_int( conj_t conjx,
bli_obj_toggle_conj( x_local );
bli_obj_toggle_conj( y_local );
bli_obj_init_scalar_copy_of( bli_obj_datatype( *alpha ),
bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *alpha ),
BLIS_CONJUGATE,
alpha,
&alpha_local );

View File

@@ -94,7 +94,7 @@ void bli_ger_unb_var1( obj_t* alpha,
// The datatype of alpha MUST be the type union of x and y. This is to
// prevent any unnecessary loss of information during computation.
dt_alpha = bli_datatype_union( dt_x, dt_y );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -94,7 +94,7 @@ void bli_ger_unb_var2( obj_t* alpha,
// The datatype of alpha MUST be the type union of x and y. This is to
// prevent any unnecessary loss of information during computation.
dt_alpha = bli_datatype_union( dt_x, dt_y );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -78,7 +78,7 @@ void bli_hemv( obj_t* alpha,
// the type union of the target datatypes of a and x to prevent any
// unnecessary loss of information during the computation.
dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x );
bli_obj_init_scalar_copy_of( dt_alpha,
bli_obj_scalar_init_detached_copy_of( dt_alpha,
BLIS_NO_CONJUGATE,
alpha,
&alpha_local );
@@ -89,7 +89,7 @@ void bli_hemv( obj_t* alpha,
// the complex part of beta*y will not be stored. If y is complex and
// beta is real then beta is harmlessly promoted to complex.
dt_beta = dt_targ_y;
bli_obj_init_scalar_copy_of( dt_beta,
bli_obj_scalar_init_detached_copy_of( dt_beta,
BLIS_NO_CONJUGATE,
beta,
&beta_local );
@@ -180,8 +180,8 @@ void PASTEMAC(ch,opname)( \
rs_x = incx; cs_x = m * incx; \
rs_y = incy; cs_y = m * incy; \
\
bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \
bli_obj_create_scalar_with_attached_buffer( dt, beta, &betao ); \
bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \
bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \
\
bli_obj_create_with_attached_buffer( dt, m, m, a, rs_a, cs_a, &ao ); \
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \

View File

@@ -106,8 +106,7 @@ void bli_hemv_blk_var1( conj_t conjh,
cntl_sub_packv_y1( cntl ) );
// Copy/pack A11, x1, y1 (if needed).
bli_packm_int( &BLIS_ONE,
&a11,
bli_packm_int( &a11,
&a11_pack,
cntl_sub_packm_a11( cntl ) );
bli_packv_int( &x1,

View File

@@ -109,8 +109,7 @@ void bli_hemv_blk_var2( conj_t conjh,
cntl_sub_packv_y1( cntl ) );
// Copy/pack A11, x1, y1 (if needed).
bli_packm_int( &BLIS_ONE,
&a11,
bli_packm_int( &a11,
&a11_pack,
cntl_sub_packm_a11( cntl ) );
bli_packv_int( &x1,

View File

@@ -106,8 +106,7 @@ void bli_hemv_blk_var3( conj_t conjh,
cntl_sub_packv_y1( cntl ) );
// Copy/pack A11, x1, y1 (if needed).
bli_packm_int( &BLIS_ONE,
&a11,
bli_packm_int( &a11,
&a11_pack,
cntl_sub_packm_a11( cntl ) );
bli_packv_int( &x1,

View File

@@ -109,8 +109,7 @@ void bli_hemv_blk_var4( conj_t conjh,
cntl_sub_packv_y1( cntl ) );
// Copy/pack A11, x1, y1 (if needed).
bli_packm_int( &BLIS_ONE,
&a11,
bli_packm_int( &a11,
&a11_pack,
cntl_sub_packm_a11( cntl ) );
bli_packv_int( &x1,

View File

@@ -101,11 +101,11 @@ void bli_hemv_unb_var1( conj_t conjh,
// The datatype of alpha MUST be the type union of a and x. This is to
// prevent any unnecessary loss of information during computation.
dt_alpha = bli_datatype_union( dt_a, dt_x );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// The datatype of beta MUST be the same as the datatype of y.
dt_beta = dt_y;
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -101,11 +101,11 @@ void bli_hemv_unb_var2( conj_t conjh,
// The datatype of alpha MUST be the type union of a and x. This is to
// prevent any unnecessary loss of information during computation.
dt_alpha = bli_datatype_union( dt_a, dt_x );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// The datatype of beta MUST be the same as the datatype of y.
dt_beta = dt_y;
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -101,11 +101,11 @@ void bli_hemv_unb_var3( conj_t conjh,
// The datatype of alpha MUST be the type union of a and x. This is to
// prevent any unnecessary loss of information during computation.
dt_alpha = bli_datatype_union( dt_a, dt_x );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// The datatype of beta MUST be the same as the datatype of y.
dt_beta = dt_y;
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -101,11 +101,11 @@ void bli_hemv_unb_var4( conj_t conjh,
// The datatype of alpha MUST be the type union of a and x. This is to
// prevent any unnecessary loss of information during computation.
dt_alpha = bli_datatype_union( dt_a, dt_x );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// The datatype of beta MUST be the same as the datatype of y.
dt_beta = dt_y;
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -101,11 +101,11 @@ void bli_hemv_unf_var1( conj_t conjh,
// The datatype of alpha MUST be the type union of a and x. This is to
// prevent any unnecessary loss of information during computation.
dt_alpha = bli_datatype_union( dt_a, dt_x );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// The datatype of beta MUST be the same as the datatype of y.
dt_beta = dt_y;
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -101,11 +101,11 @@ void bli_hemv_unf_var1a( conj_t conjh,
// The datatype of alpha MUST be the type union of a and x. This is to
// prevent any unnecessary loss of information during computation.
dt_alpha = bli_datatype_union( dt_a, dt_x );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// The datatype of beta MUST be the same as the datatype of y.
dt_beta = dt_y;
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -101,11 +101,11 @@ void bli_hemv_unf_var3( conj_t conjh,
// The datatype of alpha MUST be the type union of a and x. This is to
// prevent any unnecessary loss of information during computation.
dt_alpha = bli_datatype_union( dt_a, dt_x );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// The datatype of beta MUST be the same as the datatype of y.
dt_beta = dt_y;
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
#if 0
obj_t x_copy, y_copy;

View File

@@ -101,11 +101,11 @@ void bli_hemv_unf_var3a( conj_t conjh,
// The datatype of alpha MUST be the type union of a and x. This is to
// prevent any unnecessary loss of information during computation.
dt_alpha = bli_datatype_union( dt_a, dt_x );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// The datatype of beta MUST be the same as the datatype of y.
dt_beta = dt_y;
buf_beta = bli_obj_scalar_buffer( dt_beta, *beta );
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
#if 0
obj_t x_copy, y_copy;

View File

@@ -68,7 +68,7 @@ void bli_her( obj_t* alpha,
// Create object to hold a copy-cast of alpha.
dt_alpha = dt_targ_x;
bli_obj_init_scalar_copy_of( dt_alpha,
bli_obj_scalar_init_detached_copy_of( dt_alpha,
BLIS_NO_CONJUGATE,
alpha,
&alpha_local );
@@ -151,7 +151,7 @@ void PASTEMAC(ch,opname)( \
\
rs_x = incx; cs_x = m * incx; \
\
bli_obj_create_scalar_with_attached_buffer( dt_r, alpha, &alphao ); \
bli_obj_create_1x1_with_attached_buffer( dt_r, alpha, &alphao ); \
\
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \

View File

@@ -90,8 +90,7 @@ void bli_her_blk_var1( conj_t conjh,
cntl_sub_packv_x1( cntl ) );
// Copy/pack C11, x1 (if needed).
bli_packm_int( &BLIS_ONE,
&c11,
bli_packm_int( &c11,
&c11_pack,
cntl_sub_packm_c11( cntl ) );
bli_packv_int( &x1,

View File

@@ -90,8 +90,7 @@ void bli_her_blk_var2( conj_t conjh,
cntl_sub_packv_x1( cntl ) );
// Copy/pack C11, x1 (if needed).
bli_packm_int( &BLIS_ONE,
&c11,
bli_packm_int( &c11,
&c11_pack,
cntl_sub_packm_c11( cntl ) );
bli_packv_int( &x1,

View File

@@ -75,13 +75,13 @@ void bli_her2( obj_t* alpha,
// Create an object to hold a copy-cast of alpha. Notice that we use
// the type union of the datatypes of x and y.
dt_alpha = bli_datatype_union( dt_targ_x, dt_targ_y );
bli_obj_init_scalar_copy_of( dt_alpha,
bli_obj_scalar_init_detached_copy_of( dt_alpha,
BLIS_NO_CONJUGATE,
alpha,
&alpha_local );
// Also create a conjugated copy of alpha.
bli_obj_init_scalar_copy_of( dt_alpha,
bli_obj_scalar_init_detached_copy_of( dt_alpha,
BLIS_CONJUGATE,
alpha,
&alpha_conj_local );
@@ -171,7 +171,7 @@ void PASTEMAC(ch,opname)( \
rs_x = incx; cs_x = m * incx; \
rs_y = incy; cs_y = m * incy; \
\
bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \
bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \
\
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
bli_obj_create_with_attached_buffer( dt, m, 1, y, rs_y, cs_y, &yo ); \

View File

@@ -101,8 +101,7 @@ void bli_her2_blk_var1( conj_t conjh,
cntl_sub_packv_y1( cntl ) );
// Copy/pack C11, x1, y1 (if needed).
bli_packm_int( &BLIS_ONE,
&c11,
bli_packm_int( &c11,
&c11_pack,
cntl_sub_packm_c11( cntl ) );
bli_packv_int( &x1,

View File

@@ -104,8 +104,7 @@ void bli_her2_blk_var2( conj_t conjh,
cntl_sub_packv_y1( cntl ) );
// Copy/pack C11, x1, y1 (if needed).
bli_packm_int( &BLIS_ONE,
&c11,
bli_packm_int( &c11,
&c11_pack,
cntl_sub_packm_c11( cntl ) );
bli_packv_int( &x1,

View File

@@ -104,8 +104,7 @@ void bli_her2_blk_var3( conj_t conjh,
cntl_sub_packv_y1( cntl ) );
// Copy/pack C11, x1, y1 (if needed).
bli_packm_int( &BLIS_ONE,
&c11,
bli_packm_int( &c11,
&c11_pack,
cntl_sub_packm_c11( cntl ) );
bli_packv_int( &x1,

View File

@@ -101,8 +101,7 @@ void bli_her2_blk_var4( conj_t conjh,
cntl_sub_packv_y1( cntl ) );
// Copy/pack C11, x1, y1 (if needed).
bli_packm_int( &BLIS_ONE,
&c11,
bli_packm_int( &c11,
&c11_pack,
cntl_sub_packm_c11( cntl ) );
bli_packv_int( &x1,

View File

@@ -93,11 +93,11 @@ void bli_her2_int( conj_t conjh,
bli_obj_toggle_conj( x_local );
bli_obj_toggle_conj( y_local );
bli_obj_init_scalar_copy_of( bli_obj_datatype( *alpha ),
bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *alpha ),
BLIS_CONJUGATE,
alpha,
&alpha_local );
bli_obj_init_scalar_copy_of( bli_obj_datatype( *alpha_conj ),
bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *alpha_conj ),
BLIS_CONJUGATE,
alpha_conj,
&alpha_conj_local );

View File

@@ -96,7 +96,7 @@ void bli_her2_unb_var1( conj_t conjh,
// The datatype of alpha MUST be the type union of the datatypes of x and y.
dt_alpha = bli_datatype_union( dt_x, dt_y );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -96,7 +96,7 @@ void bli_her2_unb_var2( conj_t conjh,
// The datatype of alpha MUST be the type union of the datatypes of x and y.
dt_alpha = bli_datatype_union( dt_x, dt_y );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -96,7 +96,7 @@ void bli_her2_unb_var3( conj_t conjh,
// The datatype of alpha MUST be the type union of the datatypes of x and y.
dt_alpha = bli_datatype_union( dt_x, dt_y );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -96,7 +96,7 @@ void bli_her2_unb_var4( conj_t conjh,
// The datatype of alpha MUST be the type union of the datatypes of x and y.
dt_alpha = bli_datatype_union( dt_x, dt_y );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -96,7 +96,7 @@ void bli_her2_unf_var1( conj_t conjh,
// The datatype of alpha MUST be the type union of the datatypes of x and y.
dt_alpha = bli_datatype_union( dt_x, dt_y );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -96,7 +96,7 @@ void bli_her2_unf_var4( conj_t conjh,
// The datatype of alpha MUST be the type union of the datatypes of x and y.
dt_alpha = bli_datatype_union( dt_x, dt_y );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -78,7 +78,7 @@ void bli_symv( obj_t* alpha,
// the type union of the target datatypes of a and x to prevent any
// unnecessary loss of information during the computation.
dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x );
bli_obj_init_scalar_copy_of( dt_alpha,
bli_obj_scalar_init_detached_copy_of( dt_alpha,
BLIS_NO_CONJUGATE,
alpha,
&alpha_local );
@@ -89,7 +89,7 @@ void bli_symv( obj_t* alpha,
// the complex part of beta*y will not be stored. If y is complex and
// beta is real then beta is harmlessly promoted to complex.
dt_beta = dt_targ_y;
bli_obj_init_scalar_copy_of( dt_beta,
bli_obj_scalar_init_detached_copy_of( dt_beta,
BLIS_NO_CONJUGATE,
beta,
&beta_local );
@@ -180,8 +180,8 @@ void PASTEMAC(ch,opname)( \
rs_x = incx; cs_x = m * incx; \
rs_y = incy; cs_y = m * incy; \
\
bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \
bli_obj_create_scalar_with_attached_buffer( dt, beta, &betao ); \
bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \
bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \
\
bli_obj_create_with_attached_buffer( dt, m, m, a, rs_a, cs_a, &ao ); \
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \

View File

@@ -70,7 +70,7 @@ void bli_syr( obj_t* alpha,
// the type union of the target datatypes of x and c to prevent any
// unnecessary loss of information during the computation.
dt_alpha = bli_datatype_union( dt_targ_x, dt_targ_c );
bli_obj_init_scalar_copy_of( dt_alpha,
bli_obj_scalar_init_detached_copy_of( dt_alpha,
BLIS_NO_CONJUGATE,
alpha,
&alpha_local );
@@ -152,7 +152,7 @@ void PASTEMAC(ch,opname)( \
\
rs_x = incx; cs_x = m * incx; \
\
bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \
bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \
\
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \

View File

@@ -74,7 +74,7 @@ void bli_syr2( obj_t* alpha,
// Create an object to hold a copy-cast of alpha. Notice that we use
// the type union of the datatypes of x and y.
dt_alpha = bli_datatype_union( dt_targ_x, dt_targ_y );
bli_obj_init_scalar_copy_of( dt_alpha,
bli_obj_scalar_init_detached_copy_of( dt_alpha,
BLIS_NO_CONJUGATE,
alpha,
&alpha_local );
@@ -164,7 +164,7 @@ void PASTEMAC(ch,opname)( \
rs_x = incx; cs_x = m * incx; \
rs_y = incy; cs_y = m * incy; \
\
bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \
bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \
\
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
bli_obj_create_with_attached_buffer( dt, m, 1, y, rs_y, cs_y, &yo ); \

View File

@@ -70,7 +70,7 @@ void bli_trmv( obj_t* alpha,
// the type union of the target datatypes of a and x to prevent any
// unnecessary loss of information during the computation.
dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x );
bli_obj_init_scalar_copy_of( dt_alpha,
bli_obj_scalar_init_detached_copy_of( dt_alpha,
BLIS_NO_CONJUGATE,
alpha,
&alpha_local );
@@ -150,7 +150,7 @@ void PASTEMAC(ch,opname)( \
\
rs_x = incx; cs_x = m * incx; \
\
bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \
bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \
\
bli_obj_create_with_attached_buffer( dt, m, m, a, rs_a, cs_a, &ao ); \
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \

View File

@@ -80,8 +80,7 @@ void bli_trmv_l_blk_var1( obj_t* alpha,
cntl_sub_packv_x1( cntl ) );
// Copy/pack A11, x1 (if needed).
bli_packm_int( &BLIS_ONE,
&a11,
bli_packm_int( &a11,
&a11_pack,
cntl_sub_packm_a11( cntl ) );
bli_packv_int( &x1,

View File

@@ -80,8 +80,7 @@ void bli_trmv_l_blk_var2( obj_t* alpha,
cntl_sub_packv_x1( cntl ) );
// Copy/pack A11, x1 (if needed).
bli_packm_int( &BLIS_ONE,
&a11,
bli_packm_int( &a11,
&a11_pack,
cntl_sub_packm_a11( cntl ) );
bli_packv_int( &x1,

View File

@@ -80,8 +80,7 @@ void bli_trmv_u_blk_var1( obj_t* alpha,
cntl_sub_packv_x1( cntl ) );
// Copy/pack A11, x1 (if needed).
bli_packm_int( &BLIS_ONE,
&a11,
bli_packm_int( &a11,
&a11_pack,
cntl_sub_packm_a11( cntl ) );
bli_packv_int( &x1,

View File

@@ -80,8 +80,7 @@ void bli_trmv_u_blk_var2( obj_t* alpha,
cntl_sub_packv_x1( cntl ) );
// Copy/pack A11, x1 (if needed).
bli_packm_int( &BLIS_ONE,
&a11,
bli_packm_int( &a11,
&a11_pack,
cntl_sub_packm_a11( cntl ) );
bli_packv_int( &x1,

View File

@@ -88,7 +88,7 @@ void bli_trmv_unb_var1( obj_t* alpha,
// The datatype of alpha MUST be the type union of a and x. This is to
// prevent any unnecessary loss of information during computation.
dt_alpha = bli_datatype_union( dt_a, dt_x );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -88,7 +88,7 @@ void bli_trmv_unb_var2( obj_t* alpha,
// The datatype of alpha MUST be the type union of a and x. This is to
// prevent any unnecessary loss of information during computation.
dt_alpha = bli_datatype_union( dt_a, dt_x );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -88,7 +88,7 @@ void bli_trmv_unf_var1( obj_t* alpha,
// The datatype of alpha MUST be the type union of a and x. This is to
// prevent any unnecessary loss of information during computation.
dt_alpha = bli_datatype_union( dt_a, dt_x );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -88,7 +88,7 @@ void bli_trmv_unf_var2( obj_t* alpha,
// The datatype of alpha MUST be the type union of a and x. This is to
// prevent any unnecessary loss of information during computation.
dt_alpha = bli_datatype_union( dt_a, dt_x );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -70,7 +70,7 @@ void bli_trsv( obj_t* alpha,
// the type union of the target datatypes of a and x to prevent any
// unnecessary loss of information during the computation.
dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x );
bli_obj_init_scalar_copy_of( dt_alpha,
bli_obj_scalar_init_detached_copy_of( dt_alpha,
BLIS_NO_CONJUGATE,
alpha,
&alpha_local );
@@ -147,7 +147,7 @@ void PASTEMAC(ch,opname)( \
\
rs_x = incx; cs_x = m * incx; \
\
bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \
bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \
\
bli_obj_create_with_attached_buffer( dt, m, m, a, rs_a, cs_a, &ao ); \
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \

View File

@@ -85,8 +85,7 @@ void bli_trsv_l_blk_var1( obj_t* alpha,
cntl_sub_packv_x1( cntl ) );
// Copy/pack A11, x1 (if needed).
bli_packm_int( &BLIS_ONE,
&a11,
bli_packm_int( &a11,
&a11_pack,
cntl_sub_packm_a11( cntl ) );
bli_packv_int( &x1,

View File

@@ -85,8 +85,7 @@ void bli_trsv_l_blk_var2( obj_t* alpha,
cntl_sub_packv_x1( cntl ) );
// Copy/pack A11, x1 (if needed).
bli_packm_int( &BLIS_ONE,
&a11,
bli_packm_int( &a11,
&a11_pack,
cntl_sub_packm_a11( cntl ) );
bli_packv_int( &x1,

View File

@@ -85,8 +85,7 @@ void bli_trsv_u_blk_var1( obj_t* alpha,
cntl_sub_packv_x1( cntl ) );
// Copy/pack A11, x1 (if needed).
bli_packm_int( &BLIS_ONE,
&a11,
bli_packm_int( &a11,
&a11_pack,
cntl_sub_packm_a11( cntl ) );
bli_packv_int( &x1,

View File

@@ -85,8 +85,7 @@ void bli_trsv_u_blk_var2( obj_t* alpha,
cntl_sub_packv_x1( cntl ) );
// Copy/pack A11, x1 (if needed).
bli_packm_int( &BLIS_ONE,
&a11,
bli_packm_int( &a11,
&a11_pack,
cntl_sub_packm_a11( cntl ) );
bli_packv_int( &x1,

View File

@@ -88,7 +88,7 @@ void bli_trsv_unb_var1( obj_t* alpha,
// The datatype of alpha MUST be the type union of a and x. This is to
// prevent any unnecessary loss of information during computation.
dt_alpha = bli_datatype_union( dt_a, dt_x );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -88,7 +88,7 @@ void bli_trsv_unb_var2( obj_t* alpha,
// The datatype of alpha MUST be the type union of a and x. This is to
// prevent any unnecessary loss of information during computation.
dt_alpha = bli_datatype_union( dt_a, dt_x );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -88,7 +88,7 @@ void bli_trsv_unf_var1( obj_t* alpha,
// The datatype of alpha MUST be the type union of a and x. This is to
// prevent any unnecessary loss of information during computation.
dt_alpha = bli_datatype_union( dt_a, dt_x );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -88,7 +88,7 @@ void bli_trsv_unf_var2( obj_t* alpha,
// The datatype of alpha MUST be the type union of a and x. This is to
// prevent any unnecessary loss of information during computation.
dt_alpha = bli_datatype_union( dt_a, dt_x );
buf_alpha = bli_obj_scalar_buffer( dt_alpha, *alpha );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// Index into the type combination array to extract the correct
// function pointer.

View File

@@ -48,21 +48,16 @@ void bli_gemm( obj_t* alpha,
obj_t* c )
{
gemm_t* cntl;
obj_t alpha_local;
obj_t beta_local;
obj_t a_local;
obj_t b_local;
obj_t c_local;
num_t dt_alpha;
num_t dt_beta;
bool_t pack_c;
// Check parameters.
if ( bli_error_checking_is_enabled() )
bli_gemm_check( alpha, a, b, beta, c );
// If alpha is zero, scale by beta and return.
if ( bli_obj_scalar_equals( alpha, &BLIS_ZERO ) )
if ( bli_obj_equals( alpha, &BLIS_ZERO ) )
{
bli_scalm( beta, c );
return;
@@ -86,29 +81,6 @@ void bli_gemm( obj_t* alpha,
bli_obj_induce_trans( c_local );
}
// Set the target and execution datatypes of the objects, and apply
// any transformations necessary to handle mixed domain computation.
bli_gemm_set_targ_exec_datatypes( &a_local,
&b_local,
&c_local,
&dt_alpha,
&dt_beta,
&pack_c );
// Create an object to hold a copy-cast of alpha.
bli_obj_init_scalar_copy_of( dt_alpha,
BLIS_NO_CONJUGATE,
alpha,
&alpha_local );
// Create an object to hold a copy-cast of beta.
bli_obj_init_scalar_copy_of( dt_beta,
BLIS_NO_CONJUGATE,
beta,
&beta_local );
if ( pack_c ) bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED );
// Choose the control tree.
cntl = gemm_cntl;
@@ -122,10 +94,10 @@ void bli_gemm( obj_t* alpha,
#endif
// Invoke the internal back-end.
bli_gemm_int( &alpha_local,
bli_gemm_int( alpha,
&a_local,
&b_local,
&beta_local,
beta,
&c_local,
cntl );
}
@@ -159,8 +131,8 @@ void PASTEMAC(ch,opname)( \
bli_set_dims_with_trans( transa, m, k, m_a, n_a ); \
bli_set_dims_with_trans( transb, k, n, m_b, n_b ); \
\
bli_obj_create_scalar_with_attached_buffer( dt, alpha, &alphao ); \
bli_obj_create_scalar_with_attached_buffer( dt, beta, &betao ); \
bli_obj_create_1x1_with_attached_buffer( dt, alpha, &alphao ); \
bli_obj_create_1x1_with_attached_buffer( dt, beta, &betao ); \
\
bli_obj_create_with_attached_buffer( dt, m_a, n_a, a, rs_a, cs_a, &ao ); \
bli_obj_create_with_attached_buffer( dt, m_b, n_b, b, rs_b, cs_b, &bo ); \

View File

@@ -34,10 +34,8 @@
#include "blis.h"
void bli_gemm_blk_var1( obj_t* alpha,
obj_t* a,
void bli_gemm_blk_var1( obj_t* a,
obj_t* b,
obj_t* beta,
obj_t* c,
gemm_t* cntl )
{
@@ -58,7 +56,7 @@ void bli_gemm_blk_var1( obj_t* alpha,
m_trans = bli_obj_length_after_trans( *a );
// Scale C by beta (if instructed).
bli_scalm_int( beta,
bli_scalm_int( &BLIS_ONE,
c,
cntl_sub_scalm( cntl ) );
@@ -66,9 +64,8 @@ void bli_gemm_blk_var1( obj_t* alpha,
bli_packm_init( b, &b_pack,
cntl_sub_packm_b( cntl ) );
// Pack B and scale by alpha (if instructed).
bli_packm_int( alpha,
b, &b_pack,
// Pack B (if instructed).
bli_packm_int( b, &b_pack,
cntl_sub_packm_b( cntl ) );
// Partition along the m dimension.
@@ -93,21 +90,19 @@ void bli_gemm_blk_var1( obj_t* alpha,
bli_packm_init( &c1, &c1_pack,
cntl_sub_packm_c( cntl ) );
// Pack A1 and scale by alpha (if instructed).
bli_packm_int( alpha,
&a1, &a1_pack,
// Pack A1 (if instructed).
bli_packm_int( &a1, &a1_pack,
cntl_sub_packm_a( cntl ) );
// Pack C1 and scale by beta (if instructed).
bli_packm_int( beta,
&c1, &c1_pack,
// Pack C1 (if instructed).
bli_packm_int( &c1, &c1_pack,
cntl_sub_packm_c( cntl ) );
// Perform gemm subproblem.
bli_gemm_int( alpha,
bli_gemm_int( &BLIS_ONE,
&a1_pack,
&b_pack,
beta,
&BLIS_ONE,
&c1_pack,
cntl_sub_gemm( cntl ) );

View File

@@ -32,10 +32,8 @@
*/
void bli_gemm_blk_var1( obj_t* alpha,
obj_t* a,
void bli_gemm_blk_var1( obj_t* a,
obj_t* b,
obj_t* beta,
obj_t* c,
gemm_t* cntl );

Some files were not shown because too many files have changed in this diff Show More