Renamed various datatype-related macros/functions.

Details:
- Renamed the following macros in bli_obj_macro_defs.h and
  bli_param_macro_defs.h:
  - bli_obj_datatype()                 -> bli_obj_dt()
  - bli_obj_target_datatype()          -> bli_obj_target_dt()
  - bli_obj_execution_datatype()       -> bli_obj_exec_dt()
  - bli_obj_set_datatype()             -> bli_obj_set_dt()
  - bli_obj_set_target_datatype()      -> bli_obj_set_target_dt()
  - bli_obj_set_execution_datatype()   -> bli_obj_set_exec_dt()
  - bli_obj_datatype_proj_to_real()    -> bli_obj_dt_proj_to_real()
  - bli_obj_datatype_proj_to_complex() -> bli_obj_dt_proj_to_complex()
  - bli_datatype_proj_to_real()        -> bli_dt_proj_to_real()
  - bli_datatype_proj_to_complex()     -> bli_dt_proj_to_complex()
- Renamed the following functions in bli_obj.c:
  - bli_datatype_size()                -> bli_dt_size()
  - bli_datatype_string()              -> bli_dt_string()
  - bli_datatype_union()               -> bli_dt_union()
- Removed a pair of old level-1f penryn intrinsics kernels that were no
  longer in use.
This commit is contained in:
Field G. Van Zee
2018-04-30 14:57:33 -05:00
parent 01c4173238
commit 75d0d1057d
132 changed files with 331 additions and 1094 deletions

View File

@@ -121,8 +121,8 @@ int main( int argc, char** argv )
// Let's inspect the amount of padding inserted for alignment. Note
// the difference between the m dimension and the column stride.
printf( "datatype %s\n", bli_datatype_string( bli_obj_datatype( a8 ) ) );
printf( "datatype size %d bytes\n", bli_datatype_size( bli_obj_datatype( a8 ) ) );
printf( "datatype %s\n", bli_dt_string( bli_obj_dt( a8 ) ) );
printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( a8 ) ) );
printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( a8 ) );
printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( a8 ) );
printf( "row stride: %d\n", ( int )bli_obj_row_stride( a8 ) );
@@ -139,24 +139,24 @@ int main( int argc, char** argv )
bli_obj_create( BLIS_SCOMPLEX, 3, 5, 0, 0, &a10);
bli_obj_create( BLIS_DCOMPLEX, 3, 5, 0, 0, &a11 );
printf( "datatype %s\n", bli_datatype_string( bli_obj_datatype( a9 ) ) );
printf( "datatype size %d bytes\n", bli_datatype_size( bli_obj_datatype( a9 ) ) );
printf( "datatype %s\n", bli_dt_string( bli_obj_dt( a9 ) ) );
printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( a9 ) ) );
printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( a9 ) );
printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( a9 ) );
printf( "row stride: %d\n", ( int )bli_obj_row_stride( a9 ) );
printf( "col stride: %d\n", ( int )bli_obj_col_stride( a9 ) );
printf( "\n" );
printf( "datatype %s\n", bli_datatype_string( bli_obj_datatype( a10 ) ) );
printf( "datatype size %d bytes\n", bli_datatype_size( bli_obj_datatype( a10 ) ) );
printf( "datatype %s\n", bli_dt_string( bli_obj_dt( a10 ) ) );
printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( a10 ) ) );
printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( a10 ) );
printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( a10 ) );
printf( "row stride: %d\n", ( int )bli_obj_row_stride( a10 ) );
printf( "col stride: %d\n", ( int )bli_obj_col_stride( a10 ) );
printf( "\n" );
printf( "datatype %s\n", bli_datatype_string( bli_obj_datatype( a11 ) ) );
printf( "datatype size %d bytes\n", bli_datatype_size( bli_obj_datatype( a11 ) ) );
printf( "datatype %s\n", bli_dt_string( bli_obj_dt( a11 ) ) );
printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( a11 ) ) );
printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( a11 ) );
printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( a11 ) );
printf( "row stride: %d\n", ( int )bli_obj_row_stride( a11 ) );

View File

@@ -50,7 +50,7 @@ void PASTEMAC0(opname) \
bli_init_once(); \
\
num_t dt_chi; \
num_t dt_absq_c = bli_obj_datatype_proj_to_complex( *absq ); \
num_t dt_absq_c = bli_obj_dt_proj_to_complex( *absq ); \
\
void* buf_chi; \
void* buf_absq = bli_obj_buffer_at_off( *absq ); \
@@ -88,7 +88,7 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_datatype( *psi ); \
num_t dt = bli_obj_dt( *psi ); \
\
conj_t conjchi = bli_obj_conj_status( *chi ); \
\
@@ -125,7 +125,7 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_datatype( *chi ); \
num_t dt = bli_obj_dt( *chi ); \
\
conj_t conjchi = bli_obj_conj_status( *chi ); \
\
@@ -158,7 +158,7 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_datatype( *psi ); \
num_t dt = bli_obj_dt( *psi ); \
\
void* buf_chi = bli_obj_buffer_for_1x1( dt, *chi ); \
void* buf_psi = bli_obj_buffer_at_off( *psi ); \
@@ -191,7 +191,7 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
num_t dt_chi = bli_obj_datatype( *chi ); \
num_t dt_chi = bli_obj_dt( *chi ); \
num_t dt_def = BLIS_DCOMPLEX; \
num_t dt_use; \
\
@@ -234,7 +234,7 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
num_t dt_chi = bli_obj_datatype( *chi ); \
num_t dt_chi = bli_obj_dt( *chi ); \
\
void* buf_chi = bli_obj_buffer_at_off( *chi ); \
\
@@ -268,7 +268,7 @@ void PASTEMAC0(opname) \
bli_init_once(); \
\
num_t dt_chi; \
num_t dt_zeta_c = bli_obj_datatype_proj_to_complex( *zeta_r ); \
num_t dt_zeta_c = bli_obj_dt_proj_to_complex( *zeta_r ); \
\
void* buf_chi; \
\
@@ -309,7 +309,7 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
num_t dt_chi = bli_obj_datatype( *chi ); \
num_t dt_chi = bli_obj_dt( *chi ); \
\
void* buf_zeta_r = bli_obj_buffer_for_1x1( dt_chi, *zeta_r ); \
void* buf_zeta_i = bli_obj_buffer_for_1x1( dt_chi, *zeta_i ); \

View File

@@ -65,7 +65,7 @@ void PASTEMAC0(opname) \
\
conj_t conjchi = bli_obj_conj_status( *chi ); \
\
num_t dt_psi = bli_obj_datatype( *psi ); \
num_t dt_psi = bli_obj_dt( *psi ); \
void* buf_psi = bli_obj_buffer_at_off( *psi ); \
\
num_t dt_chi; \

View File

@@ -54,7 +54,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
conj_t conjx = bli_obj_conj_status( *x ); \
dim_t n = bli_obj_vector_dim( *x ); \
@@ -98,7 +98,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
dim_t n = bli_obj_vector_dim( *x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
@@ -140,7 +140,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
conj_t conjx = bli_obj_conj_status( *x ); \
dim_t n = bli_obj_vector_dim( *x ); \
@@ -200,7 +200,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
conj_t conjx = bli_obj_conj_status( *x ); \
dim_t n = bli_obj_vector_dim( *x ); \
@@ -255,7 +255,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
conj_t conjx = bli_obj_conj_status( *x ); \
conj_t conjy = bli_obj_conj_status( *y ); \
@@ -304,7 +304,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
conj_t conjx = bli_obj_conj_status( *x ); \
conj_t conjy = bli_obj_conj_status( *y ); \
@@ -366,7 +366,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
dim_t n = bli_obj_vector_dim( *x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
@@ -403,7 +403,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
/* conj_t conjalpha = bli_obj_conj_status( *alpha ); */ \
dim_t n = bli_obj_vector_dim( *x ); \
@@ -454,7 +454,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
dim_t n = bli_obj_vector_dim( *x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
@@ -495,7 +495,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
conj_t conjx = bli_obj_conj_status( *x ); \
dim_t n = bli_obj_vector_dim( *x ); \

View File

@@ -81,7 +81,7 @@ void bli_packv_init
// Now, if we are not skipping the pack operation, then the only question
// left is whether we are to typecast vector a before packing.
if ( bli_obj_datatype( *a ) != bli_obj_target_datatype( *a ) )
if ( bli_obj_dt( *a ) != bli_obj_target_dt( *a ) )
bli_abort();
// Extract various fields from the control tree and pass them in
@@ -113,7 +113,7 @@ siz_t bli_packv_init_pack
cntx_t* cntx
)
{
num_t dt = bli_obj_datatype( *a );
num_t dt = bli_obj_dt( *a );
dim_t dim_a = bli_obj_vector_dim( *a );
dim_t bmult = bli_cntx_get_blksz_def_dt( dt, bmult_id, cntx );

View File

@@ -51,7 +51,7 @@ void bli_packv_unb_var1( obj_t* c,
cntx_t* cntx,
packv_t* cntl )
{
num_t dt_cp = bli_obj_datatype( *c );
num_t dt_cp = bli_obj_dt( *c );
dim_t dim_p = bli_obj_vector_dim( *p );

View File

@@ -94,10 +94,10 @@ void bli_unpackv_int( obj_t* p,
// Now, if we are not skipping the unpack operation, then the only
// question left is whether we are to typecast vector a after unpacking.
if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) )
if ( bli_obj_dt( *p ) != bli_obj_dt( *a ) )
bli_abort();
/*
if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) )
if ( bli_obj_dt( *p ) != bli_obj_dt( *a ) )
{
// Initialize an object c for the intermediate typecast vector.
bli_unpackv_init_cast( p,
@@ -132,7 +132,7 @@ void bli_unpackv_int( obj_t* p,
// was not necessary, then we are done because the call to the unpackv
// implementation would have unpacked directly to vector a.
/*
if ( bli_obj_datatype( *p ) != bli_obj_datatype( *a ) )
if ( bli_obj_dt( *p ) != bli_obj_dt( *a ) )
{
// Copy/typecast vector c to vector a.
// NOTE: Here, we use copynzv instead of copym because, in the cases
@@ -179,15 +179,15 @@ void bli_unpackv_init_cast( obj_t* p,
// already available. (After acquring a mem entry from the memory
// manager, it is cached within p for quick access later on.)
num_t dt_targ_a = bli_obj_target_datatype( *a );
num_t dt_targ_a = bli_obj_target_dt( *a );
dim_t dim_a = bli_obj_vector_dim( *a );
siz_t elem_size_c = bli_datatype_size( dt_targ_a );
siz_t elem_size_c = bli_dt_size( dt_targ_a );
// We begin by copying the basic fields of a.
bli_obj_alias_to( *a, *c );
// Update datatype and element size fields.
bli_obj_set_datatype( dt_targ_a, *c );
bli_obj_set_dt( dt_targ_a, *c );
bli_obj_set_elem_size( elem_size_c, *c );
// Update the strides and dimensions. We set the increments to reflect a

View File

@@ -51,7 +51,7 @@ void bli_unpackv_unb_var1( obj_t* p,
cntx_t* cntx,
unpackv_t* cntl )
{
num_t dt_pc = bli_obj_datatype( *p );
num_t dt_pc = bli_obj_dt( *p );
dim_t dim_c = bli_obj_vector_dim( *c );

View File

@@ -54,7 +54,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
doff_t diagoffx = bli_obj_diag_offset( *x ); \
diag_t diagx = bli_obj_diag( *x ); \
@@ -107,7 +107,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
doff_t diagoffx = bli_obj_diag_offset( *x ); \
diag_t diagx = bli_obj_diag( *x ); \
@@ -168,7 +168,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
doff_t diagoffx = bli_obj_diag_offset( *x ); \
dim_t m = bli_obj_length( *x ); \
@@ -210,7 +210,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
/* conj_t conjalpha = bli_obj_conj_status( *alpha ); */ \
doff_t diagoffx = bli_obj_diag_offset( *x ); \
@@ -266,7 +266,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
doff_t diagoffx = bli_obj_diag_offset( *x ); \
dim_t m = bli_obj_length( *x ); \

View File

@@ -57,7 +57,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
conj_t conjx = bli_obj_conj_status( *x ); \
conj_t conjy = bli_obj_conj_status( *y ); \
@@ -123,7 +123,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
conj_t conja = bli_obj_conj_status( *a ); \
conj_t conjx = bli_obj_conj_status( *x ); \
@@ -191,7 +191,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
conj_t conjxt = bli_obj_conj_status( *xt ); \
conj_t conjx = bli_obj_conj_status( *x ); \
@@ -259,7 +259,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
conj_t conjat = bli_obj_conj_status( *at ); \
conj_t conja = bli_obj_conj_status( *a ); \
@@ -342,7 +342,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
conj_t conjat = bli_obj_conj_status( *a ); \
conj_t conjx = bli_obj_conj_status( *x ); \

View File

@@ -54,7 +54,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
doff_t diagoffx = bli_obj_diag_offset( *x ); \
diag_t diagx = bli_obj_diag( *x ); \
@@ -109,7 +109,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
doff_t diagoffx = bli_obj_diag_offset( *x ); \
diag_t diagx = bli_obj_diag( *x ); \
@@ -173,7 +173,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
/* conj_t conjalpha = bli_obj_conj_status( *alpha ); */ \
doff_t diagoffx = bli_obj_diag_offset( *x ); \
@@ -245,7 +245,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
/* conj_t conjalpha = bli_obj_conj_status( *alpha ); */ \
doff_t diagoffx = bli_obj_diag_offset( *x ); \

View File

@@ -108,7 +108,7 @@ void bli_packm_blk_var1
thrinfo_t* t
)
{
num_t dt_cp = bli_obj_datatype( *c );
num_t dt_cp = bli_obj_dt( *c );
struc_t strucc = bli_obj_struc( *c );
doff_t diagoffc = bli_obj_diag_offset( *c );

View File

@@ -68,7 +68,7 @@ void bli_packm_blk_var1( obj_t* c,
obj_t* p,
packm_thrinfo_t* t )
{
num_t dt_cp = bli_obj_datatype( *c );
num_t dt_cp = bli_obj_dt( *c );
struc_t strucc = bli_obj_struc( *c );
doff_t diagoffc = bli_obj_diag_offset( *c );

View File

@@ -189,7 +189,7 @@ siz_t bli_packm_init_pack
{
bli_init_once();
num_t dt = bli_obj_datatype( *a );
num_t dt = bli_obj_dt( *a );
trans_t transa = bli_obj_onlytrans_status( *a );
dim_t m_a = bli_obj_length( *a );
dim_t n_a = bli_obj_width( *a );

View File

@@ -64,7 +64,7 @@ void bli_packm_unb_var1
thrinfo_t* thread
)
{
num_t dt_cp = bli_obj_datatype( *c );
num_t dt_cp = bli_obj_dt( *c );
struc_t strucc = bli_obj_struc( *c );
doff_t diagoffc = bli_obj_diag_offset( *c );

View File

@@ -64,7 +64,7 @@ void bli_unpackm_blk_var1
thrinfo_t* thread
)
{
num_t dt_cp = bli_obj_datatype( *c );
num_t dt_cp = bli_obj_dt( *c );
// Normally we take the parameters from the source argument. But here,
// the packm/unpackm framework is not yet solidified enough for us to

View File

@@ -59,7 +59,7 @@ void bli_unpackm_unb_var1
thrinfo_t* thread
)
{
num_t dt_pc = bli_obj_datatype( *p );
num_t dt_pc = bli_obj_dt( *p );
doff_t diagoffp = bli_obj_diag_offset( *p );
uplo_t uplop = bli_obj_uplo( *p );

View File

@@ -57,7 +57,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *a ); \
num_t dt = bli_obj_dt( *a ); \
\
trans_t transa = bli_obj_conjtrans_status( *a ); \
conj_t conjx = bli_obj_conj_status( *x ); \
@@ -126,7 +126,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *a ); \
num_t dt = bli_obj_dt( *a ); \
\
conj_t conjx = bli_obj_conj_status( *x ); \
conj_t conjy = bli_obj_conj_status( *y ); \
@@ -190,7 +190,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *a ); \
num_t dt = bli_obj_dt( *a ); \
\
uplo_t uploa = bli_obj_uplo( *a ); \
conj_t conja = bli_obj_conj_status( *a ); \
@@ -259,7 +259,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *a ); \
num_t dt = bli_obj_dt( *a ); \
\
uplo_t uploa = bli_obj_uplo( *a ); \
conj_t conjx = bli_obj_conj_status( *x ); \
@@ -318,7 +318,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *a ); \
num_t dt = bli_obj_dt( *a ); \
\
uplo_t uploa = bli_obj_uplo( *a ); \
conj_t conjx = bli_obj_conj_status( *x ); \
@@ -381,7 +381,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *a ); \
num_t dt = bli_obj_dt( *a ); \
\
uplo_t uploa = bli_obj_uplo( *a ); \
trans_t transa = bli_obj_conjtrans_status( *a ); \

View File

@@ -50,7 +50,7 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_datatype( *a ); \
num_t dt = bli_obj_dt( *a ); \
\
trans_t transa = bli_obj_conjtrans_status( *a ); \
conj_t conjx = bli_obj_conj_status( *x ); \

View File

@@ -51,7 +51,7 @@ void PASTEMAC0(opname) \
gemv_t* cntl \
) \
{ \
num_t dt = bli_obj_datatype( *a ); \
num_t dt = bli_obj_dt( *a ); \
\
trans_t transa = bli_obj_conjtrans_status( *a ); \
conj_t conjx = bli_obj_conj_status( *x ); \

View File

@@ -67,9 +67,9 @@ void bli_gemv_front
// Query the target datatypes of each object.
dt_targ_a = bli_obj_target_datatype( *a );
dt_targ_x = bli_obj_target_datatype( *x );
dt_targ_y = bli_obj_target_datatype( *y );
dt_targ_a = bli_obj_target_dt( *a );
dt_targ_x = bli_obj_target_dt( *x );
dt_targ_y = bli_obj_target_dt( *y );
// Determine whether each operand is stored with unit stride.
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
@@ -81,7 +81,7 @@ void bli_gemv_front
// Create an object to hold a copy-cast of alpha. Notice that we use
// the type union of the target datatypes of a and x to prevent any
// unnecessary loss of information during the computation.
dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x );
dt_alpha = bli_dt_union( dt_targ_a, dt_targ_x );
bli_obj_scalar_init_detached_copy_of( dt_alpha,
BLIS_NO_CONJUGATE,
alpha,

View File

@@ -49,7 +49,7 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_datatype( *a ); \
num_t dt = bli_obj_dt( *a ); \
\
conj_t conjx = bli_obj_conj_status( *x ); \
conj_t conjy = bli_obj_conj_status( *y ); \

View File

@@ -64,9 +64,9 @@ void bli_ger_front
// Query the target datatypes of each object.
dt_targ_x = bli_obj_target_datatype( *x );
dt_targ_y = bli_obj_target_datatype( *y );
//dt_targ_a = bli_obj_target_datatype( *a );
dt_targ_x = bli_obj_target_dt( *x );
dt_targ_y = bli_obj_target_dt( *y );
//dt_targ_a = bli_obj_target_dt( *a );
// Determine whether each operand with unit stride.
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
@@ -78,7 +78,7 @@ void bli_ger_front
// Create an object to hold a copy-cast of alpha. Notice that we use
// the type union of the target datatypes of x and y to prevent any
// unnecessary loss of information during the computation.
dt_alpha = bli_datatype_union( dt_targ_x, dt_targ_y );
dt_alpha = bli_dt_union( dt_targ_x, dt_targ_y );
bli_obj_scalar_init_detached_copy_of( dt_alpha,
BLIS_NO_CONJUGATE,
alpha,

View File

@@ -94,7 +94,7 @@ void bli_ger_int( conj_t conjx,
bli_obj_toggle_conj( x_local );
bli_obj_toggle_conj( y_local );
bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *alpha ),
bli_obj_scalar_init_detached_copy_of( bli_obj_dt( *alpha ),
BLIS_CONJUGATE,
alpha,
&alpha_local );

View File

@@ -51,7 +51,7 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_datatype( *a ); \
num_t dt = bli_obj_dt( *a ); \
\
uplo_t uplo = bli_obj_uplo( *a ); \
conj_t conja = bli_obj_conj_status( *a ); \

View File

@@ -67,9 +67,9 @@ void bli_hemv_front
// Query the target datatypes of each object.
dt_targ_a = bli_obj_target_datatype( *a );
dt_targ_x = bli_obj_target_datatype( *x );
dt_targ_y = bli_obj_target_datatype( *y );
dt_targ_a = bli_obj_target_dt( *a );
dt_targ_x = bli_obj_target_dt( *x );
dt_targ_y = bli_obj_target_dt( *y );
// Determine whether each operand with unit stride.
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
@@ -81,7 +81,7 @@ void bli_hemv_front
// Create an object to hold a copy-cast of alpha. Notice that we use
// the type union of the target datatypes of a and x to prevent any
// unnecessary loss of information during the computation.
dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x );
dt_alpha = bli_dt_union( dt_targ_a, dt_targ_x );
bli_obj_scalar_init_detached_copy_of( dt_alpha,
BLIS_NO_CONJUGATE,
alpha,

View File

@@ -49,7 +49,7 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_datatype( *c ); \
num_t dt = bli_obj_dt( *c ); \
\
uplo_t uplo = bli_obj_uplo( *c ); \
conj_t conjx = bli_obj_conj_status( *x ); \

View File

@@ -61,8 +61,8 @@ void bli_her_front
// Query the target datatypes of each object.
dt_targ_x = bli_obj_target_datatype( *x );
//dt_targ_c = bli_obj_target_datatype( *c );
dt_targ_x = bli_obj_target_dt( *x );
//dt_targ_c = bli_obj_target_dt( *c );
// Determine whether each operand with unit stride.
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );

View File

@@ -51,7 +51,7 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_datatype( *c ); \
num_t dt = bli_obj_dt( *c ); \
\
uplo_t uplo = bli_obj_uplo( *c ); \
conj_t conjx = bli_obj_conj_status( *x ); \

View File

@@ -65,9 +65,9 @@ void bli_her2_front
// Query the target datatypes of each object.
dt_targ_x = bli_obj_target_datatype( *x );
dt_targ_y = bli_obj_target_datatype( *y );
//dt_targ_c = bli_obj_target_datatype( *c );
dt_targ_x = bli_obj_target_dt( *x );
dt_targ_y = bli_obj_target_dt( *y );
//dt_targ_c = bli_obj_target_dt( *c );
// Determine whether each operand with unit stride.
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
@@ -78,7 +78,7 @@ void bli_her2_front
// Create an object to hold a copy-cast of alpha. Notice that we use
// the type union of the datatypes of x and y.
dt_alpha = bli_datatype_union( dt_targ_x, dt_targ_y );
dt_alpha = bli_dt_union( dt_targ_x, dt_targ_y );
bli_obj_scalar_init_detached_copy_of( dt_alpha,
BLIS_NO_CONJUGATE,
alpha,

View File

@@ -98,11 +98,11 @@ void bli_her2_int( conj_t conjh,
bli_obj_toggle_conj( x_local );
bli_obj_toggle_conj( y_local );
bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *alpha ),
bli_obj_scalar_init_detached_copy_of( bli_obj_dt( *alpha ),
BLIS_CONJUGATE,
alpha,
&alpha_local );
bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *alpha_conj ),
bli_obj_scalar_init_detached_copy_of( bli_obj_dt( *alpha_conj ),
BLIS_CONJUGATE,
alpha_conj,
&alpha_conj_local );

View File

@@ -67,9 +67,9 @@ void bli_symv_front
// Query the target datatypes of each object.
dt_targ_a = bli_obj_target_datatype( *a );
dt_targ_x = bli_obj_target_datatype( *x );
dt_targ_y = bli_obj_target_datatype( *y );
dt_targ_a = bli_obj_target_dt( *a );
dt_targ_x = bli_obj_target_dt( *x );
dt_targ_y = bli_obj_target_dt( *y );
// Determine whether each operand with unit stride.
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
@@ -81,7 +81,7 @@ void bli_symv_front
// Create an object to hold a copy-cast of alpha. Notice that we use
// the type union of the target datatypes of a and x to prevent any
// unnecessary loss of information during the computation.
dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x );
dt_alpha = bli_dt_union( dt_targ_a, dt_targ_x );
bli_obj_scalar_init_detached_copy_of( dt_alpha,
BLIS_NO_CONJUGATE,
alpha,

View File

@@ -61,8 +61,8 @@ void bli_syr_front
// Query the target datatypes of each object.
dt_targ_x = bli_obj_target_datatype( *x );
dt_targ_c = bli_obj_target_datatype( *c );
dt_targ_x = bli_obj_target_dt( *x );
dt_targ_c = bli_obj_target_dt( *c );
// Determine whether each operand with unit stride.
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
@@ -73,7 +73,7 @@ void bli_syr_front
// Create an object to hold a copy-cast of alpha. Notice that we use
// the type union of the target datatypes of x and c to prevent any
// unnecessary loss of information during the computation.
dt_alpha = bli_datatype_union( dt_targ_x, dt_targ_c );
dt_alpha = bli_dt_union( dt_targ_x, dt_targ_c );
bli_obj_scalar_init_detached_copy_of( dt_alpha,
BLIS_NO_CONJUGATE,
alpha,

View File

@@ -64,9 +64,9 @@ void bli_syr2_front
// Query the target datatypes of each object.
dt_targ_x = bli_obj_target_datatype( *x );
dt_targ_y = bli_obj_target_datatype( *y );
//dt_targ_c = bli_obj_target_datatype( *c );
dt_targ_x = bli_obj_target_dt( *x );
dt_targ_y = bli_obj_target_dt( *y );
//dt_targ_c = bli_obj_target_dt( *c );
// Determine whether each operand with unit stride.
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
@@ -77,7 +77,7 @@ void bli_syr2_front
// Create an object to hold a copy-cast of alpha. Notice that we use
// the type union of the datatypes of x and y.
dt_alpha = bli_datatype_union( dt_targ_x, dt_targ_y );
dt_alpha = bli_dt_union( dt_targ_x, dt_targ_y );
bli_obj_scalar_init_detached_copy_of( dt_alpha,
BLIS_NO_CONJUGATE,
alpha,

View File

@@ -48,7 +48,7 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_datatype( *a ); \
num_t dt = bli_obj_dt( *a ); \
\
uplo_t uploa = bli_obj_uplo( *a ); \
trans_t transa = bli_obj_conjtrans_status( *a ); \

View File

@@ -61,8 +61,8 @@ void bli_trmv_front
// Query the target datatypes of each object.
dt_targ_a = bli_obj_target_datatype( *a );
dt_targ_x = bli_obj_target_datatype( *x );
dt_targ_a = bli_obj_target_dt( *a );
dt_targ_x = bli_obj_target_dt( *x );
// Determine whether each operand with unit stride.
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
@@ -73,7 +73,7 @@ void bli_trmv_front
// Create an object to hold a copy-cast of alpha. Notice that we use
// the type union of the target datatypes of a and x to prevent any
// unnecessary loss of information during the computation.
dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x );
dt_alpha = bli_dt_union( dt_targ_a, dt_targ_x );
bli_obj_scalar_init_detached_copy_of( dt_alpha,
BLIS_NO_CONJUGATE,
alpha,

View File

@@ -48,7 +48,7 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_datatype( *a ); \
num_t dt = bli_obj_dt( *a ); \
\
uplo_t uploa = bli_obj_uplo( *a ); \
trans_t transa = bli_obj_conjtrans_status( *a ); \

View File

@@ -61,8 +61,8 @@ void bli_trsv_front
// Query the target datatypes of each object.
dt_targ_a = bli_obj_datatype( *a );
dt_targ_x = bli_obj_datatype( *x );
dt_targ_a = bli_obj_dt( *a );
dt_targ_x = bli_obj_dt( *x );
// Determine whether each operand with unit stride.
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
@@ -73,7 +73,7 @@ void bli_trsv_front
// Create an object to hold a copy-cast of alpha. Notice that we use
// the type union of the target datatypes of a and x to prevent any
// unnecessary loss of information during the computation.
dt_alpha = bli_datatype_union( dt_targ_a, dt_targ_x );
dt_alpha = bli_dt_union( dt_targ_a, dt_targ_x );
bli_obj_scalar_init_detached_copy_of( dt_alpha,
BLIS_NO_CONJUGATE,
alpha,

View File

@@ -130,7 +130,7 @@ dim_t PASTEMAC0(opname) \
\
/* Extract the execution datatype and use it to query the corresponding
blocksize and blocksize maximum values from the blksz_t object. */ \
dt = bli_obj_execution_datatype( *a ); \
dt = bli_obj_exec_dt( *a ); \
bsize = bli_cntx_get_blksz( bszid, cntx ); \
b_alg = bli_blksz_get_def( dt, bsize ); \
b_max = bli_blksz_get_max( dt, bsize ); \
@@ -196,7 +196,7 @@ dim_t PASTEMAC0(opname) \
\
/* Extract the execution datatype and use it to query the corresponding
blocksize and blocksize maximum values from the blksz_t object. */ \
dt = bli_obj_execution_datatype( *a ); \
dt = bli_obj_exec_dt( *a ); \
bsize = bli_cntx_get_blksz( bszid, cntx ); \
b_alg = bli_blksz_get_def( dt, bsize ); \
b_max = bli_blksz_get_max( dt, bsize ); \
@@ -249,7 +249,7 @@ dim_t PASTEMAC0(opname) \
\
/* Extract the execution datatype and use it to query the corresponding
blocksize and blocksize maximum values from the blksz_t object. */ \
dt = bli_obj_execution_datatype( *a ); \
dt = bli_obj_exec_dt( *a ); \
bsize = bli_cntx_get_blksz( bszid, cntx ); \
b_alg = bli_blksz_get_def( dt, bsize ); \
b_max = bli_blksz_get_max( dt, bsize ); \
@@ -310,7 +310,7 @@ dim_t PASTEMAC0(opname) \
\
/* Extract the execution datatype and use it to query the corresponding
blocksize and blocksize maximum values from the blksz_t object. */ \
dt = bli_obj_execution_datatype( *a ); \
dt = bli_obj_exec_dt( *a ); \
bsize = bli_cntx_get_blksz( bszid, cntx ); \
b_alg = bli_blksz_get_def( dt, bsize ); \
b_max = bli_blksz_get_max( dt, bsize ); \

View File

@@ -479,7 +479,7 @@ void bli_l3_basic_check
// Check for sufficiently sized stack buffers
e_val = bli_check_sufficient_stack_buf_size( bli_obj_datatype( *a ), cntx );
e_val = bli_check_sufficient_stack_buf_size( bli_obj_dt( *a ), cntx );
bli_check_error_code( e_val );
}

View File

@@ -49,7 +49,7 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_datatype( *c ); \
num_t dt = bli_obj_dt( *c ); \
\
dim_t k = bli_obj_width( *a ); \
void* buf_a = bli_obj_buffer_at_off( *a ); \
@@ -100,7 +100,7 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_datatype( *c ); \
num_t dt = bli_obj_dt( *c ); \
\
void* buf_a = bli_obj_buffer_at_off( *a ); \
void* buf_b = bli_obj_buffer_at_off( *b ); \
@@ -164,7 +164,7 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_datatype( *c11 ); \
num_t dt = bli_obj_dt( *c11 ); \
\
dim_t k = bli_obj_width( *a1x ); \
void* buf_a1x = bli_obj_buffer_at_off( *a1x ); \

View File

@@ -66,7 +66,7 @@ void bli_gemm_ker_var2
thrinfo_t* thread
)
{
num_t dt_exec = bli_obj_execution_datatype( *c );
num_t dt_exec = bli_obj_exec_dt( *c );
pack_t schema_a = bli_obj_pack_schema( *a );
pack_t schema_b = bli_obj_pack_schema( *b );

View File

@@ -66,7 +66,7 @@ void bli_gemm4mb_ker_var2
thrinfo_t* thread
)
{
num_t dt_exec = bli_obj_execution_datatype( *c );
num_t dt_exec = bli_obj_exec_dt( *c );
pack_t schema_a = bli_obj_pack_schema( *a );
pack_t schema_b = bli_obj_pack_schema( *b );

View File

@@ -66,7 +66,7 @@ void bli_gemm3m2_ker_var2
thrinfo_t* thread
)
{
num_t dt_exec = bli_obj_execution_datatype( *c );
num_t dt_exec = bli_obj_exec_dt( *c );
pack_t schema_a = bli_obj_pack_schema( *a );
pack_t schema_b = bli_obj_pack_schema( *b );

View File

@@ -58,7 +58,7 @@ void bli_gemm_ker_var5( obj_t* a,
gemm_t* cntl,
gemm_thrinfo_t* thread )
{
num_t dt_exec = bli_obj_execution_datatype( *c );
num_t dt_exec = bli_obj_exec_dt( *c );
dim_t m = bli_obj_length( *c );
dim_t n = bli_obj_width( *c );

View File

@@ -83,7 +83,7 @@ void bli_her2k_front
bli_obj_toggle_conj( ah_local );
// Initialize a conjugated copy of alpha.
bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *a ),
bli_obj_scalar_init_detached_copy_of( bli_obj_dt( *a ),
BLIS_CONJUGATE,
alpha,
&alpha_conj );

View File

@@ -67,7 +67,7 @@ void bli_herk_l_ker_var2
thrinfo_t* thread
)
{
num_t dt_exec = bli_obj_execution_datatype( *c );
num_t dt_exec = bli_obj_exec_dt( *c );
doff_t diagoffc = bli_obj_diag_offset( *c );

View File

@@ -67,7 +67,7 @@ void bli_herk_u_ker_var2
thrinfo_t* thread
)
{
num_t dt_exec = bli_obj_execution_datatype( *c );
num_t dt_exec = bli_obj_exec_dt( *c );
doff_t diagoffc = bli_obj_diag_offset( *c );

View File

@@ -65,7 +65,7 @@ void bli_trmm_ll_ker_var2
thrinfo_t* thread
)
{
num_t dt_exec = bli_obj_execution_datatype( *c );
num_t dt_exec = bli_obj_exec_dt( *c );
doff_t diagoffa = bli_obj_diag_offset( *a );

View File

@@ -65,7 +65,7 @@ void bli_trmm_lu_ker_var2
thrinfo_t* thread
)
{
num_t dt_exec = bli_obj_execution_datatype( *c );
num_t dt_exec = bli_obj_exec_dt( *c );
doff_t diagoffa = bli_obj_diag_offset( *a );

View File

@@ -65,7 +65,7 @@ void bli_trmm_rl_ker_var2
thrinfo_t* thread
)
{
num_t dt_exec = bli_obj_execution_datatype( *c );
num_t dt_exec = bli_obj_exec_dt( *c );
doff_t diagoffb = bli_obj_diag_offset( *b );

View File

@@ -65,7 +65,7 @@ void bli_trmm_ru_ker_var2
thrinfo_t* thread
)
{
num_t dt_exec = bli_obj_execution_datatype( *c );
num_t dt_exec = bli_obj_exec_dt( *c );
doff_t diagoffb = bli_obj_diag_offset( *b );

View File

@@ -65,7 +65,7 @@ void bli_trsm_ll_ker_var2
thrinfo_t* thread
)
{
num_t dt_exec = bli_obj_execution_datatype( *c );
num_t dt_exec = bli_obj_exec_dt( *c );
doff_t diagoffa = bli_obj_diag_offset( *a );

View File

@@ -65,7 +65,7 @@ void bli_trsm_lu_ker_var2
thrinfo_t* thread
)
{
num_t dt_exec = bli_obj_execution_datatype( *c );
num_t dt_exec = bli_obj_exec_dt( *c );
doff_t diagoffa = bli_obj_diag_offset( *a );

View File

@@ -65,7 +65,7 @@ void bli_trsm_rl_ker_var2
thrinfo_t* thread
)
{
num_t dt_exec = bli_obj_execution_datatype( *c );
num_t dt_exec = bli_obj_exec_dt( *c );
doff_t diagoffb = bli_obj_diag_offset( *b );

View File

@@ -65,7 +65,7 @@ void bli_trsm_ru_ker_var2
thrinfo_t* thread
)
{
num_t dt_exec = bli_obj_execution_datatype( *c );
num_t dt_exec = bli_obj_exec_dt( *c );
doff_t diagoffb = bli_obj_diag_offset( *b );

View File

@@ -261,7 +261,7 @@ dim_t bli_determine_blocksize_f
// Extract the execution datatype and use it to query the corresponding
// blocksize and blocksize maximum values from the blksz_t object.
dt = bli_obj_execution_datatype( *obj );
dt = bli_obj_exec_dt( *obj );
bsize = bli_cntx_get_blksz( bszid, cntx );
b_alg = bli_blksz_get_def( dt, bsize );
b_max = bli_blksz_get_max( dt, bsize );
@@ -287,7 +287,7 @@ dim_t bli_determine_blocksize_b
// Extract the execution datatype and use it to query the corresponding
// blocksize and blocksize maximum values from the blksz_t object.
dt = bli_obj_execution_datatype( *obj );
dt = bli_obj_exec_dt( *obj );
bsize = bli_cntx_get_blksz( bszid, cntx );
b_alg = bli_blksz_get_def( dt, bsize );
b_max = bli_blksz_get_max( dt, bsize );

View File

@@ -159,7 +159,7 @@ err_t bli_check_object_valid_datatype( obj_t* a )
err_t e_val;
num_t dt;
dt = bli_obj_datatype( *a );
dt = bli_obj_dt( *a );
e_val = bli_check_valid_datatype( dt );
return e_val;
@@ -180,7 +180,7 @@ err_t bli_check_noninteger_object( obj_t* a )
err_t e_val;
num_t dt;
dt = bli_obj_datatype( *a );
dt = bli_obj_dt( *a );
e_val = bli_check_noninteger_datatype( dt );
return e_val;
@@ -201,7 +201,7 @@ err_t bli_check_nonconstant_object( obj_t* a )
err_t e_val;
num_t dt;
dt = bli_obj_datatype( *a );
dt = bli_obj_dt( *a );
e_val = bli_check_nonconstant_datatype( dt );
return e_val;
@@ -225,7 +225,7 @@ err_t bli_check_floating_object( obj_t* a )
err_t e_val;
num_t dt;
dt = bli_obj_datatype( *a );
dt = bli_obj_dt( *a );
e_val = bli_check_floating_datatype( dt );
return e_val;
@@ -247,7 +247,7 @@ err_t bli_check_real_object( obj_t* a )
err_t e_val;
num_t dt;
dt = bli_obj_datatype( *a );
dt = bli_obj_dt( *a );
e_val = bli_check_real_datatype( dt );
return e_val;
@@ -268,7 +268,7 @@ err_t bli_check_integer_object( obj_t* a )
err_t e_val;
num_t dt;
dt = bli_obj_datatype( *a );
dt = bli_obj_dt( *a );
e_val = bli_check_integer_datatype( dt );
return e_val;
@@ -292,8 +292,8 @@ err_t bli_check_consistent_object_datatypes( obj_t* a, obj_t* b )
num_t dt_a;
num_t dt_b;
dt_a = bli_obj_datatype( *a );
dt_b = bli_obj_datatype( *b );
dt_a = bli_obj_dt( *a );
dt_b = bli_obj_dt( *b );
e_val = bli_check_consistent_datatypes( dt_a, dt_b );
@@ -320,8 +320,8 @@ err_t bli_check_object_real_proj_of( obj_t* c, obj_t* r )
num_t dt_c;
num_t dt_r;
dt_c = bli_obj_datatype( *c );
dt_r = bli_obj_datatype( *r );
dt_c = bli_obj_dt( *c );
dt_r = bli_obj_dt( *r );
e_val = bli_check_datatype_real_proj_of( dt_c, dt_r );
@@ -773,7 +773,7 @@ err_t bli_check_sufficient_stack_buf_size( num_t dt, cntx_t* cntx )
dim_t mr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx );
dim_t nr = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx );
siz_t dt_size = bli_datatype_size( dt );
siz_t dt_size = bli_dt_size( dt );
// NOTE: For induced methods, we use the size of the complex datatypes
// (rather than the size of the native micro-kernels' datatype) because

View File

@@ -469,7 +469,7 @@ static bool_t bli_cntx_l3_nat_ukr_prefers_cols_dt( num_t dt, l3ukr_t ukr_id, cnt
static bool_t bli_cntx_l3_nat_ukr_prefers_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx )
{
const num_t dt = bli_obj_datatype( *obj );
const num_t dt = bli_obj_dt( *obj );
const bool_t ukr_prefers_rows
= bli_cntx_l3_nat_ukr_prefers_rows_dt( dt, ukr_id, cntx );
const bool_t ukr_prefers_cols
@@ -514,7 +514,7 @@ static bool_t bli_cntx_l3_ukr_prefers_rows_dt( num_t dt, l3ukr_t ukr_id, cntx_t*
// For induced methods, return the ukernel storage preferences of the
// corresponding real micro-kernel.
if ( bli_cntx_method( cntx ) != BLIS_NAT )
dt = bli_datatype_proj_to_real( dt );
dt = bli_dt_proj_to_real( dt );
return bli_cntx_l3_nat_ukr_prefers_rows_dt( dt, ukr_id, cntx );
}
@@ -524,14 +524,14 @@ static bool_t bli_cntx_l3_ukr_prefers_cols_dt( num_t dt, l3ukr_t ukr_id, cntx_t*
// For induced methods, return the ukernel storage preferences of the
// corresponding real micro-kernel.
if ( bli_cntx_method( cntx ) != BLIS_NAT )
dt = bli_datatype_proj_to_real( dt );
dt = bli_dt_proj_to_real( dt );
return bli_cntx_l3_nat_ukr_prefers_cols_dt( dt, ukr_id, cntx );
}
static bool_t bli_cntx_l3_ukr_prefers_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx )
{
const num_t dt = bli_obj_datatype( *obj );
const num_t dt = bli_obj_dt( *obj );
const bool_t ukr_prefers_rows
= bli_cntx_l3_ukr_prefers_rows_dt( dt, ukr_id, cntx );
const bool_t ukr_prefers_cols

View File

@@ -51,7 +51,7 @@ static FUNCPTR_T GENARRAY(ftypes,machval);
void bli_machval( machval_t mval,
obj_t* v )
{
num_t dt_v = bli_obj_datatype( *v );
num_t dt_v = bli_obj_dt( *v );
void* buf_v = bli_obj_buffer_at_off( *v );

View File

@@ -412,7 +412,7 @@ void bli_membrk_compute_pool_block_sizes_dt
cntx_t* cntx
)
{
siz_t size_dt = bli_datatype_size( dt );
siz_t size_dt = bli_dt_size( dt );
blksz_t* mr;
blksz_t* nr;

View File

@@ -77,7 +77,7 @@ void bli_obj_create_without_buffer( num_t dt,
bli_obj_create_without_buffer_check( dt, m, n, obj );
// Query the size of one element of the object's pre-set datatype.
elem_size = bli_datatype_size( dt );
elem_size = bli_dt_size( dt );
// Set any default properties that are appropriate.
bli_obj_set_defaults( *obj );
@@ -95,10 +95,10 @@ void bli_obj_create_without_buffer( num_t dt,
// Set individual fields.
bli_obj_set_buffer( NULL, *obj );
bli_obj_set_datatype( dt, *obj );
bli_obj_set_dt( dt, *obj );
bli_obj_set_elem_size( elem_size, *obj );
bli_obj_set_target_datatype( dt, *obj );
bli_obj_set_execution_datatype( dt, *obj );
bli_obj_set_target_dt( dt, *obj );
bli_obj_set_exec_dt( dt, *obj );
bli_obj_set_dims( m, n, *obj );
bli_obj_set_offs( 0, 0, *obj );
bli_obj_set_diag_offset( 0, *obj );
@@ -220,7 +220,7 @@ void bli_obj_create_1x1_with_attached_buffer( num_t dt,
void bli_obj_create_conf_to( obj_t* s, obj_t* d )
{
const num_t dt = bli_obj_datatype( *s );
const num_t dt = bli_obj_dt( *s );
const dim_t m = bli_obj_length( *s );
const dim_t n = bli_obj_width( *s );
const inc_t rs = bli_obj_row_stride( *s );
@@ -422,10 +422,10 @@ static siz_t dt_sizes[6] =
sizeof( constdata_t )
};
siz_t bli_datatype_size( num_t dt )
siz_t bli_dt_size( num_t dt )
{
if ( bli_error_checking_is_enabled() )
bli_datatype_size_check( dt );
bli_dt_size_check( dt );
return dt_sizes[dt];
}
@@ -439,10 +439,10 @@ static char* dt_names[ BLIS_NUM_FP_TYPES+1 ] =
"int"
};
char* bli_datatype_string( num_t dt )
char* bli_dt_string( num_t dt )
{
if ( bli_error_checking_is_enabled() )
bli_datatype_string_check( dt );
bli_dt_string_check( dt );
return dt_names[dt];
}
@@ -493,10 +493,10 @@ static num_t type_union[BLIS_NUM_FP_TYPES][BLIS_NUM_FP_TYPES] =
/* z */ { BLIS_DCOMPLEX, BLIS_DCOMPLEX, BLIS_DCOMPLEX, BLIS_DCOMPLEX }
};
num_t bli_datatype_union( num_t dt1, num_t dt2 )
num_t bli_dt_union( num_t dt1, num_t dt2 )
{
if ( bli_error_checking_is_enabled() )
bli_datatype_union_check( dt1, dt2 );
bli_dt_union_check( dt1, dt2 );
return type_union[dt1][dt2];
}
@@ -536,9 +536,9 @@ void bli_obj_print( char* label, obj_t* obj )
fprintf( file, " info %lX\n", ( unsigned long int )(*obj).info );
fprintf( file, " - is complex %lu\n", ( unsigned long int )bli_obj_is_complex( *obj ) );
fprintf( file, " - is d. prec %lu\n", ( unsigned long int )bli_obj_is_double_precision( *obj ) );
fprintf( file, " - datatype %lu\n", ( unsigned long int )bli_obj_datatype( *obj ) );
fprintf( file, " - target dt %lu\n", ( unsigned long int )bli_obj_target_datatype( *obj ) );
fprintf( file, " - exec dt %lu\n", ( unsigned long int )bli_obj_execution_datatype( *obj ) );
fprintf( file, " - datatype %lu\n", ( unsigned long int )bli_obj_dt( *obj ) );
fprintf( file, " - target dt %lu\n", ( unsigned long int )bli_obj_target_dt( *obj ) );
fprintf( file, " - exec dt %lu\n", ( unsigned long int )bli_obj_exec_dt( *obj ) );
fprintf( file, " - has trans %lu\n", ( unsigned long int )bli_obj_has_trans( *obj ) );
fprintf( file, " - has conj %lu\n", ( unsigned long int )bli_obj_has_conj( *obj ) );
fprintf( file, " - unit diag? %lu\n", ( unsigned long int )bli_obj_has_unit_diag( *obj ) );

View File

@@ -87,14 +87,14 @@ void bli_adjust_strides( dim_t m,
inc_t* cs,
inc_t* is );
siz_t bli_datatype_size( num_t dt );
char* bli_datatype_string( num_t dt );
siz_t bli_dt_size( num_t dt );
char* bli_dt_string( num_t dt );
dim_t bli_align_dim_to_mult( dim_t dim, dim_t dim_mult );
dim_t bli_align_dim_to_size( dim_t dim, siz_t elem_size, siz_t align_size );
dim_t bli_align_ptr_to_size( void* p, size_t align_size );
num_t bli_datatype_union( num_t dt1, num_t dt2 );
num_t bli_dt_union( num_t dt1, num_t dt2 );
void bli_obj_print( char* label, obj_t* obj );

View File

@@ -74,7 +74,7 @@ void bli_obj_scalar_init_detached_copy_of( num_t dt,
void bli_obj_scalar_detach( obj_t* a,
obj_t* alpha )
{
num_t dt_a = bli_obj_datatype( *a );
num_t dt_a = bli_obj_dt( *a );
// Initialize alpha to be a bufferless internal scalar of the same
// datatype as A.
@@ -92,7 +92,7 @@ void bli_obj_scalar_attach( conj_t conj,
// Make a copy-cast of alpha of the same datatype as A. This step
// gives us the opportunity to conjugate and/or typecast alpha.
bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *a ),
bli_obj_scalar_init_detached_copy_of( bli_obj_dt( *a ),
conj,
alpha,
&alpha_cast );
@@ -109,7 +109,7 @@ void bli_obj_scalar_apply_scalar( obj_t* alpha,
// Make a copy-cast of alpha of the same datatype as A. This step
// gives us the opportunity to typecast alpha.
bli_obj_scalar_init_detached_copy_of( bli_obj_datatype( *a ),
bli_obj_scalar_init_detached_copy_of( bli_obj_dt( *a ),
BLIS_NO_CONJUGATE,
alpha,
&alpha_cast );
@@ -125,7 +125,7 @@ void bli_obj_scalar_apply_scalar( obj_t* alpha,
void bli_obj_scalar_reset( obj_t* a )
{
num_t dt = bli_obj_datatype( *a );
num_t dt = bli_obj_dt( *a );
void* scalar_a = bli_obj_internal_scalar_buffer( *a );
void* one = bli_obj_buffer_for_const( dt, BLIS_ONE );
@@ -141,7 +141,7 @@ void bli_obj_scalar_reset( obj_t* a )
bool_t bli_obj_scalar_has_nonzero_imag( obj_t* a )
{
bool_t r_val = FALSE;
num_t dt = bli_obj_datatype( *a );
num_t dt = bli_obj_dt( *a );
void* scalar_a = bli_obj_internal_scalar_buffer( *a );
if ( bli_is_real( dt ) )

View File

@@ -47,8 +47,8 @@ bool_t bli_obj_equals( obj_t* a,
!bli_obj_is_1x1( *b ) )
bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED );
dt_a = bli_obj_datatype( *a );
dt_b = bli_obj_datatype( *b );
dt_a = bli_obj_dt( *a );
dt_b = bli_obj_dt( *b );
// If B is BLIS_CONSTANT, then we need to test equality based on the
// datatype of A--this works even if A is also BLIS_CONSTANT. If B
@@ -90,8 +90,8 @@ bool_t bli_obj_imag_equals( obj_t* a,
num_t dt_a;
num_t dt_b;
dt_a = bli_obj_datatype( *a );
dt_b = bli_obj_datatype( *b );
dt_a = bli_obj_dt( *a );
dt_b = bli_obj_dt( *b );
// The function is not yet implemented for vectors and matrices.
if ( !bli_obj_is_1x1( *a ) ||
@@ -108,7 +108,7 @@ bool_t bli_obj_imag_equals( obj_t* a,
}
else // if ( bli_is_complex( dt_a ) )
{
num_t dt_a_real = bli_datatype_proj_to_real( dt_a );
num_t dt_a_real = bli_dt_proj_to_real( dt_a );
// Now we compare the imaginary part of a to b. Notice that since
// we are using bli_obj_buffer_for_1x1() to acquire the buffer for

View File

@@ -57,7 +57,7 @@ err_t bli_setijm
dim_t n = bli_obj_width( *b );
dim_t rs = bli_obj_row_stride( *b );
dim_t cs = bli_obj_col_stride( *b );
num_t dt = bli_obj_datatype( *b );
num_t dt = bli_obj_dt( *b );
// Return error if i or j is beyond bounds of matrix/vector.
if ( m <= i ) return BLIS_FAILURE;
@@ -131,7 +131,7 @@ err_t bli_getijm
dim_t n = bli_obj_width( *b );
dim_t rs = bli_obj_row_stride( *b );
dim_t cs = bli_obj_col_stride( *b );
num_t dt = bli_obj_datatype( *b );
num_t dt = bli_obj_dt( *b );
// Return error if i or j is beyond bounds of matrix/vector.
if ( m <= i ) return BLIS_FAILURE;

View File

@@ -158,7 +158,7 @@ void bli_obj_create_const_copy_of_check( obj_t* a, obj_t* b )
}
#endif
void bli_datatype_size_check( num_t dt )
void bli_dt_size_check( num_t dt )
{
err_t e_val;
@@ -166,7 +166,7 @@ void bli_datatype_size_check( num_t dt )
bli_check_error_code( e_val );
}
void bli_datatype_string_check( num_t dt )
void bli_dt_string_check( num_t dt )
{
err_t e_val;
@@ -174,7 +174,7 @@ void bli_datatype_string_check( num_t dt )
bli_check_error_code( e_val );
}
void bli_datatype_union_check( num_t dt1, num_t dt2 )
void bli_dt_union_check( num_t dt1, num_t dt2 )
{
err_t e_val;

View File

@@ -64,11 +64,11 @@ void bli_obj_create_const_check( double value, obj_t* obj );
void bli_obj_create_const_copy_of_check( obj_t* a, obj_t* b );
void bli_datatype_size_check( num_t dt );
void bli_dt_size_check( num_t dt );
void bli_datatype_string_check( num_t dt );
void bli_dt_string_check( num_t dt );
void bli_datatype_union_check( num_t dt1, num_t dt2 );
void bli_dt_union_check( num_t dt1, num_t dt2 );
void bli_obj_print_check( char* label, obj_t* obj );

View File

@@ -85,23 +85,23 @@
\
( ( (obj).info & BLIS_PRECISION_BIT ) == BLIS_BITVAL_DOUBLE_PREC )
#define bli_obj_datatype( obj ) \
#define bli_obj_dt( obj ) \
\
( (obj).info & BLIS_DATATYPE_BITS )
#define bli_obj_datatype_proj_to_real( obj ) \
#define bli_obj_dt_proj_to_real( obj ) \
\
( ( (obj).info & BLIS_DATATYPE_BITS ) & ~BLIS_BITVAL_COMPLEX )
#define bli_obj_datatype_proj_to_complex( obj ) \
#define bli_obj_dt_proj_to_complex( obj ) \
\
( ( (obj).info & BLIS_DATATYPE_BITS ) & BLIS_BITVAL_COMPLEX )
#define bli_obj_target_datatype( obj ) \
#define bli_obj_target_dt( obj ) \
\
( ( (obj).info & BLIS_TARGET_DT_BITS ) >> BLIS_TARGET_DT_SHIFT )
#define bli_obj_execution_datatype( obj ) \
#define bli_obj_exec_dt( obj ) \
\
( ( (obj).info & BLIS_EXECUTION_DT_BITS ) >> BLIS_EXECUTION_DT_SHIFT )
@@ -277,17 +277,17 @@
(obj).info = ( (obj).info & ~BLIS_INVERT_DIAG_BIT ) | (inv_diag); \
}
#define bli_obj_set_datatype( dt, obj ) \
#define bli_obj_set_dt( dt, obj ) \
{ \
(obj).info = ( (obj).info & ~BLIS_DATATYPE_BITS ) | (dt); \
}
#define bli_obj_set_target_datatype( dt, obj ) \
#define bli_obj_set_target_dt( dt, obj ) \
{ \
(obj).info = ( (obj).info & ~BLIS_TARGET_DT_BITS ) | ( dt << BLIS_TARGET_DT_SHIFT ); \
}
#define bli_obj_set_execution_datatype( dt, obj ) \
#define bli_obj_set_exec_dt( dt, obj ) \
{ \
(obj).info = ( (obj).info & ~BLIS_EXECUTION_DT_BITS ) | ( dt << BLIS_EXECUTION_DT_SHIFT ); \
}

View File

@@ -99,11 +99,11 @@
( bli_is_double( dt ) || \
bli_is_dcomplex( dt ) )
#define bli_datatype_proj_to_real( dt ) \
#define bli_dt_proj_to_real( dt ) \
\
( dt & ~BLIS_BITVAL_COMPLEX )
#define bli_datatype_proj_to_complex( dt ) \
#define bli_dt_proj_to_complex( dt ) \
\
( dt & BLIS_BITVAL_COMPLEX )
@@ -753,7 +753,7 @@
} \
else \
{ \
dt_scalar = bli_obj_datatype( *(obj_scalar) ); \
dt_scalar = bli_obj_dt( *(obj_scalar) ); \
buf_scalar = bli_obj_buffer_at_off( *(obj_scalar) ); \
} \
}

View File

@@ -52,7 +52,7 @@
if ( bli_obj_imag_equals( &beta, &BLIS_ZERO ) && \
!bli_is_gen_stored( rs_c, cs_c ) ) \
{ \
dt_exec = bli_datatype_proj_to_real( dt_exec ); \
dt_exec = bli_dt_proj_to_real( dt_exec ); \
\
if ( bli_is_1e_packed( schema_a ) ) \
{ \

View File

@@ -52,7 +52,7 @@ void PASTEMAC(opname,imeth) \
bli_init_once(); \
\
ind_t ind = PASTEMAC0(imeth); \
num_t dt = bli_obj_datatype( *c ); \
num_t dt = bli_obj_dt( *c ); \
obj_t* beta_use = beta; \
\
dim_t i; \
@@ -147,7 +147,7 @@ void PASTEMAC(opname,imeth) \
bli_init_once(); \
\
ind_t ind = PASTEMAC0(imeth); \
num_t dt = bli_obj_datatype( *c ); \
num_t dt = bli_obj_dt( *c ); \
obj_t* beta_use = beta; \
\
dim_t i; \
@@ -225,7 +225,7 @@ void PASTEMAC(opname,imeth) \
bli_init_once(); \
\
ind_t ind = PASTEMAC0(imeth); \
num_t dt = bli_obj_datatype( *c ); \
num_t dt = bli_obj_dt( *c ); \
obj_t* beta_use = beta; \
\
dim_t i; \
@@ -295,7 +295,7 @@ void PASTEMAC(opname,imeth) \
bli_init_once(); \
\
ind_t ind = PASTEMAC0(imeth); \
num_t dt = bli_obj_datatype( *b ); \
num_t dt = bli_obj_dt( *b ); \
\
dim_t i; \
\
@@ -352,7 +352,7 @@ void PASTEMAC(opname,imeth) \
bli_init_once(); \
\
ind_t ind = PASTEMAC0(imeth); \
num_t dt = bli_obj_datatype( *b ); \
num_t dt = bli_obj_dt( *b ); \
\
/* If the objects are in the real domain, execute the native
implementation. */ \

View File

@@ -52,7 +52,7 @@ void PASTEMAC(opname,imeth) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_datatype( *c ); \
num_t dt = bli_obj_dt( *c ); \
PASTECH(opname,_oft) func = PASTEMAC(opname,ind_get_avail)( dt ); \
\
func( alpha, a, b, beta, c, cntx ); \
@@ -81,7 +81,7 @@ void PASTEMAC(opname,imeth) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_datatype( *c ); \
num_t dt = bli_obj_dt( *c ); \
PASTECH(opname,_oft) func = PASTEMAC(opname,ind_get_avail)( dt ); \
\
func( side, alpha, a, b, beta, c, cntx ); \
@@ -108,7 +108,7 @@ void PASTEMAC(opname,imeth) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_datatype( *c ); \
num_t dt = bli_obj_dt( *c ); \
PASTECH(opname,_oft) func = PASTEMAC(opname,ind_get_avail)( dt ); \
\
func( alpha, a, beta, c, cntx ); \
@@ -134,7 +134,7 @@ void PASTEMAC(opname,imeth) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_datatype( *b ); \
num_t dt = bli_obj_dt( *b ); \
PASTECH(opname,_oft) func = PASTEMAC(opname,ind_get_avail)( dt ); \
\
func( side, alpha, a, b, cntx ); \

View File

@@ -203,7 +203,7 @@ siz_t bli_thread_get_range_l2r
dim_t* end
)
{
num_t dt = bli_obj_datatype( *a );
num_t dt = bli_obj_dt( *a );
dim_t m = bli_obj_length_after_trans( *a );
dim_t n = bli_obj_width_after_trans( *a );
dim_t bf = bli_blksz_get_def( dt, bmult );
@@ -223,7 +223,7 @@ siz_t bli_thread_get_range_r2l
dim_t* end
)
{
num_t dt = bli_obj_datatype( *a );
num_t dt = bli_obj_dt( *a );
dim_t m = bli_obj_length_after_trans( *a );
dim_t n = bli_obj_width_after_trans( *a );
dim_t bf = bli_blksz_get_def( dt, bmult );
@@ -243,7 +243,7 @@ siz_t bli_thread_get_range_t2b
dim_t* end
)
{
num_t dt = bli_obj_datatype( *a );
num_t dt = bli_obj_dt( *a );
dim_t m = bli_obj_length_after_trans( *a );
dim_t n = bli_obj_width_after_trans( *a );
dim_t bf = bli_blksz_get_def( dt, bmult );
@@ -263,7 +263,7 @@ siz_t bli_thread_get_range_b2t
dim_t* end
)
{
num_t dt = bli_obj_datatype( *a );
num_t dt = bli_obj_dt( *a );
dim_t m = bli_obj_length_after_trans( *a );
dim_t n = bli_obj_width_after_trans( *a );
dim_t bf = bli_blksz_get_def( dt, bmult );
@@ -760,7 +760,7 @@ siz_t bli_thread_get_range_weighted_l2r
if ( bli_obj_intersects_diag( *a ) &&
bli_obj_is_upper_or_lower( *a ) )
{
num_t dt = bli_obj_datatype( *a );
num_t dt = bli_obj_dt( *a );
doff_t diagoff = bli_obj_diag_offset( *a );
uplo_t uplo = bli_obj_uplo( *a );
dim_t m = bli_obj_length( *a );
@@ -810,7 +810,7 @@ siz_t bli_thread_get_range_weighted_r2l
if ( bli_obj_intersects_diag( *a ) &&
bli_obj_is_upper_or_lower( *a ) )
{
num_t dt = bli_obj_datatype( *a );
num_t dt = bli_obj_dt( *a );
doff_t diagoff = bli_obj_diag_offset( *a );
uplo_t uplo = bli_obj_uplo( *a );
dim_t m = bli_obj_length( *a );
@@ -862,7 +862,7 @@ siz_t bli_thread_get_range_weighted_t2b
if ( bli_obj_intersects_diag( *a ) &&
bli_obj_is_upper_or_lower( *a ) )
{
num_t dt = bli_obj_datatype( *a );
num_t dt = bli_obj_dt( *a );
doff_t diagoff = bli_obj_diag_offset( *a );
uplo_t uplo = bli_obj_uplo( *a );
dim_t m = bli_obj_length( *a );
@@ -914,7 +914,7 @@ siz_t bli_thread_get_range_weighted_b2t
if ( bli_obj_intersects_diag( *a ) &&
bli_obj_is_upper_or_lower( *a ) )
{
num_t dt = bli_obj_datatype( *a );
num_t dt = bli_obj_dt( *a );
doff_t diagoff = bli_obj_diag_offset( *a );
uplo_t uplo = bli_obj_uplo( *a );
dim_t m = bli_obj_length( *a );

View File

@@ -54,7 +54,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
dim_t n = bli_obj_vector_dim( *x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
@@ -93,7 +93,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *a ); \
num_t dt = bli_obj_dt( *a ); \
\
uplo_t uploa = bli_obj_uplo( *a ); \
dim_t m = bli_obj_length( *a ); \
@@ -135,7 +135,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
dim_t n = bli_obj_vector_dim( *x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
@@ -176,7 +176,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
doff_t diagoffx = bli_obj_diag_offset( *x ); \
diag_t diagx = bli_obj_diag( *x ); \
@@ -229,7 +229,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
dim_t n = bli_obj_vector_dim( *x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
@@ -281,7 +281,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
dim_t m = bli_obj_length( *x ); \
dim_t n = bli_obj_width( *x ); \
@@ -381,7 +381,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
dim_t n = bli_obj_vector_dim( *x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
@@ -418,7 +418,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
doff_t diagoffx = bli_obj_diag_offset( *x ); \
uplo_t uplox = bli_obj_uplo( *x ); \
@@ -464,7 +464,7 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
dim_t n = bli_obj_vector_dim( *x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \

View File

@@ -1,342 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
/*
#define FUNCPTR_T axpyf_fp
typedef void (*FUNCPTR_T)(
conj_t conjx,
dim_t n,
void* alpha,
void* x, inc_t incx,
void* y, inc_t incy
);
// If some mixed datatype functions will not be compiled, we initialize
// the corresponding elements of the function array to NULL.
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
static FUNCPTR_T GENARRAY3_ALL(ftypes,axpyf_penryn_int);
#else
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
static FUNCPTR_T GENARRAY3_EXT(ftypes,axpyf_penryn_int);
#else
static FUNCPTR_T GENARRAY3_MIN(ftypes,axpyf_penryn_int);
#endif
#endif
void bli_axpyf_penryn_int( obj_t* alpha,
obj_t* x,
obj_t* y )
{
num_t dt_x = bli_obj_datatype( *x );
num_t dt_y = bli_obj_datatype( *y );
conj_t conjx = bli_obj_conj_status( *x );
dim_t n = bli_obj_vector_dim( *x );
inc_t inc_x = bli_obj_vector_inc( *x );
void* buf_x = bli_obj_buffer_at_off( *x );
inc_t inc_y = bli_obj_vector_inc( *y );
void* buf_y = bli_obj_buffer_at_off( *y );
num_t dt_alpha;
void* buf_alpha;
FUNCPTR_T f;
// If alpha is a scalar constant, use dt_x to extract the address of the
// corresponding constant value; otherwise, use the datatype encoded
// within the alpha object and extract the buffer at the alpha offset.
bli_set_scalar_dt_buffer( alpha, dt_x, dt_alpha, buf_alpha );
// Index into the type combination array to extract the correct
// function pointer.
f = ftypes[dt_alpha][dt_x][dt_y];
// Invoke the function.
f( conjx,
n,
buf_alpha,
buf_x, inc_x,
buf_y, inc_y );
}
*/
#undef GENTFUNC3U12
#define GENTFUNC3U12( ctype_a, ctype_x, ctype_y, ctype_ax, cha, chx, chy, chax, opname, varname ) \
\
void PASTEMAC3(cha,chx,chy,varname)( \
conj_t conja, \
conj_t conjx, \
dim_t m, \
dim_t b_n, \
void* alpha, \
void* a, inc_t inca, inc_t lda, \
void* x, inc_t incx, \
void* y, inc_t incy \
) \
{ \
ctype_ax* alpha_cast = alpha; \
ctype_a* a_cast = a; \
ctype_x* x_cast = x; \
ctype_y* y_cast = y; \
ctype_a* a1; \
ctype_x* chi1; \
ctype_y* y1; \
ctype_ax alpha_chi1; \
dim_t i; \
\
for ( i = 0; i < b_n; ++i ) \
{ \
a1 = a_cast + (0 )*inca + (i )*lda; \
chi1 = x_cast + (i )*incx; \
y1 = y_cast + (0 )*incy; \
\
PASTEMAC2(chx,chax,copycjs)( conjx, *chi1, alpha_chi1 ); \
PASTEMAC2(chax,chax,scals)( *alpha_cast, alpha_chi1 ); \
\
PASTEMAC3(chax,cha,chy,axpyv)( conja, \
m, \
&alpha_chi1, \
a1, inca, \
y1, incy ); \
} \
}
// Define the basic set of functions unconditionally, and then also some
// mixed datatype functions if requested.
//INSERT_GENTFUNC3U12_BASIC( axpyf, axpyf_penryn_int )
GENTFUNC3U12( float, float, float, float, s, s, s, s, axpyf, axpyf_penryn_int )
//GENTFUNC3U12( double, double, double, double, d, d, d, d, axpyf, axpyf_penryn_int )
GENTFUNC3U12( scomplex, scomplex, scomplex, scomplex, c, c, c, c, axpyf, axpyf_penryn_int )
GENTFUNC3U12( dcomplex, dcomplex, dcomplex, dcomplex, z, z, z, z, axpyf, axpyf_penryn_int )
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
INSERT_GENTFUNC3U12_MIX_D( axpyf, axpyf_penryn_int )
#endif
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
INSERT_GENTFUNC3U12_MIX_P( axpyf, axpyf_penryn_int )
#endif
#include "pmmintrin.h"
typedef union
{
__m128d v;
double d[2];
} v2df_t;
void bli_dddaxpyf_penryn_int(
conj_t conja,
conj_t conjx,
dim_t m,
dim_t b_n,
void* alpha,
void* a, inc_t inca, inc_t lda,
void* x, inc_t incx,
void* y, inc_t incy
)
{
double* restrict alpha_cast = alpha;
double* restrict a_cast = a;
double* restrict x_cast = x;
double* restrict y_cast = y;
dim_t i;
const dim_t n_elem_per_reg = 2;
const dim_t n_iter_unroll = 2;
dim_t m_pre;
dim_t m_run;
dim_t m_left;
double* restrict a0;
double* restrict a1;
double* restrict a2;
double* restrict a3;
double* restrict y0;
double a0c, a1c, a2c, a3c;
double chi0, chi1, chi2, chi3;
v2df_t a00v, a01v, a02v, a03v, y0v;
v2df_t a10v, a11v, a12v, a13v, y1v;
v2df_t chi0v, chi1v, chi2v, chi3v;
if ( bli_zero_dim2( m, b_n ) ) return;
if ( b_n < PASTEMAC(d,axpyf_fusefac) )
{
PASTEMAC3(d,d,d,axpyf_unb_var1)( conja,
conjx,
m,
b_n,
alpha_cast,
a_cast, inca, lda,
x_cast, incx,
y_cast, incy );
return;
}
if ( inca != 1 ||
incx != 1 ||
incy != 1 ) bli_abort();
m_pre = 0;
if ( ( unsigned long ) a % 16 != 0 )
{
if ( ( unsigned long ) x % 16 == 0 ||
( unsigned long ) y % 16 == 0 ) bli_abort();
m_pre = 1;
}
m_run = ( m - m_pre ) / ( n_elem_per_reg * n_iter_unroll );
m_left = ( m - m_pre ) % ( n_elem_per_reg * n_iter_unroll );
a0 = a_cast + 0*lda;
a1 = a_cast + 1*lda;
a2 = a_cast + 2*lda;
a3 = a_cast + 3*lda;
y0 = y_cast;
chi0 = *(x_cast + 0*incx);
chi1 = *(x_cast + 1*incx);
chi2 = *(x_cast + 2*incx);
chi3 = *(x_cast + 3*incx);
PASTEMAC2(d,d,scals)( *alpha_cast, chi0 );
PASTEMAC2(d,d,scals)( *alpha_cast, chi1 );
PASTEMAC2(d,d,scals)( *alpha_cast, chi2 );
PASTEMAC2(d,d,scals)( *alpha_cast, chi3 );
if ( m_pre == 1 )
{
a0c = *a0;
a1c = *a1;
a2c = *a2;
a3c = *a3;
*y0 += chi0 * a0c +
chi1 * a1c +
chi2 * a2c +
chi3 * a3c;
a0 += inca;
a1 += inca;
a2 += inca;
a3 += inca;
y0 += incy;
}
chi0v.v = _mm_loaddup_pd( ( double* )&chi0 );
chi1v.v = _mm_loaddup_pd( ( double* )&chi1 );
chi2v.v = _mm_loaddup_pd( ( double* )&chi2 );
chi3v.v = _mm_loaddup_pd( ( double* )&chi3 );
for ( i = 0; i < m_run; ++i )
{
y0v.v = _mm_load_pd( ( double* )(y0 + 0*n_elem_per_reg) );
a00v.v = _mm_load_pd( ( double* )(a0 + 0*n_elem_per_reg) );
//a01v.v = _mm_load_pd( ( double* )(a1 + 0*n_elem_per_reg) );
a01v.v = _mm_load_pd( ( double* )(a0 + 1*lda + 0*n_elem_per_reg) );
y0v.v += chi0v.v * a00v.v;
y0v.v += chi1v.v * a01v.v;
a02v.v = _mm_load_pd( ( double* )(a2 + 0*n_elem_per_reg) );
//a03v.v = _mm_load_pd( ( double* )(a3 + 0*n_elem_per_reg) );
a03v.v = _mm_load_pd( ( double* )(a2 + 1*lda + 0*n_elem_per_reg) );
y0v.v += chi2v.v * a02v.v;
y0v.v += chi3v.v * a03v.v;
_mm_store_pd( ( double* )(y0 + 0*n_elem_per_reg), y0v.v );
y1v.v = _mm_load_pd( ( double* )(y0 + 1*n_elem_per_reg) );
a10v.v = _mm_load_pd( ( double* )(a0 + 1*n_elem_per_reg) );
//a11v.v = _mm_load_pd( ( double* )(a1 + 1*n_elem_per_reg) );
a11v.v = _mm_load_pd( ( double* )(a0 + 1*lda + 1*n_elem_per_reg) );
y1v.v += chi0v.v * a10v.v;
y1v.v += chi1v.v * a11v.v;
a12v.v = _mm_load_pd( ( double* )(a2 + 1*n_elem_per_reg) );
//a13v.v = _mm_load_pd( ( double* )(a3 + 1*n_elem_per_reg) );
a13v.v = _mm_load_pd( ( double* )(a2 + 1*lda + 1*n_elem_per_reg) );
y1v.v += chi2v.v * a12v.v;
y1v.v += chi3v.v * a13v.v;
_mm_store_pd( ( double* )(y0 + 1*n_elem_per_reg), y1v.v );
a0 += n_elem_per_reg * n_iter_unroll;
//a1 += n_elem_per_reg * n_iter_unroll;
a2 += n_elem_per_reg * n_iter_unroll;
//a3 += n_elem_per_reg * n_iter_unroll;
y0 += n_elem_per_reg * n_iter_unroll;
}
if ( m_left > 0 )
{
for ( i = 0; i < m_left; ++i )
{
a0c = *a0;
a1c = *a1;
a2c = *a2;
a3c = *a3;
*y0 += chi0 * a0c +
chi1 * a1c +
chi2 * a2c +
chi3 * a3c;
a0 += inca;
a1 += inca;
a2 += inca;
a3 += inca;
y0 += incy;
}
}
}

View File

@@ -1,421 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
/*
#define FUNCPTR_T dotxf_fp
typedef void (*FUNCPTR_T)(
conj_t conjx,
conj_t conjy,
dim_t n,
void* alpha,
void* x, inc_t incx,
void* y, inc_t incy,
void* beta,
void* rho
);
// If some mixed datatype functions will not be compiled, we initialize
// the corresponding elements of the function array to NULL.
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
static FUNCPTR_T GENARRAY3_ALL(ftypes,dotxf_penryn_int);
#else
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
static FUNCPTR_T GENARRAY3_EXT(ftypes,dotxf_penryn_int);
#else
static FUNCPTR_T GENARRAY3_MIN(ftypes,dotxf_penryn_int);
#endif
#endif
void bli_dotxf_penryn_int( obj_t* alpha,
obj_t* x,
obj_t* y,
obj_t* beta,
obj_t* rho )
{
num_t dt_x = bli_obj_datatype( *x );
num_t dt_y = bli_obj_datatype( *y );
num_t dt_rho = bli_obj_datatype( *rho );
conj_t conjx = bli_obj_conj_status( *x );
conj_t conjy = bli_obj_conj_status( *y );
dim_t n = bli_obj_vector_dim( *x );
inc_t inc_x = bli_obj_vector_inc( *x );
void* buf_x = bli_obj_buffer_at_off( *x );
inc_t inc_y = bli_obj_vector_inc( *y );
void* buf_y = bli_obj_buffer_at_off( *y );
void* buf_rho = bli_obj_buffer_at_off( *rho );
num_t dt_alpha;
void* buf_alpha;
num_t dt_beta;
void* buf_beta;
FUNCPTR_T f;
// The datatype of alpha MUST be the type union of x and y. This is to
// prevent any unnecessary loss of information during computation.
dt_alpha = bli_datatype_union( dt_x, dt_y );
buf_alpha = bli_obj_buffer_for_1x1( dt_alpha, *alpha );
// The datatype of beta MUST be the same as the datatype of rho.
dt_beta = dt_rho;
buf_beta = bli_obj_buffer_for_1x1( dt_beta, *beta );
// Index into the type combination array to extract the correct
// function pointer.
f = ftypes[dt_x][dt_y][dt_rho];
// Invoke the function.
f( conjx,
conjy,
n,
buf_alpha,
buf_x, inc_x,
buf_y, inc_y,
buf_beta,
buf_rho );
}
*/
#undef GENTFUNC3U12
#define GENTFUNC3U12( ctype_x, ctype_y, ctype_r, ctype_xy, chx, chy, chr, chxy, opname, varname ) \
\
void PASTEMAC3(chx,chy,chr,varname)( \
conj_t conjx, \
conj_t conjy, \
dim_t b_m, \
dim_t n, \
void* alpha, \
void* x, inc_t incx, inc_t ldx, \
void* y, inc_t incy, \
void* beta, \
void* r, inc_t incr \
) \
{ \
ctype_xy* alpha_cast = alpha; \
ctype_x* x_cast = x; \
ctype_y* y_cast = y; \
ctype_r* beta_cast = beta; \
ctype_r* r_cast = r; \
ctype_x* x1; \
ctype_y* y1; \
ctype_r* rho1; \
dim_t i; \
\
for ( i = 0; i < b_m; ++i ) \
{ \
x1 = x_cast + (0 )*incx + (i )*ldx; \
y1 = y_cast + (0 )*incy; \
rho1 = r_cast + (i )*incr; \
\
PASTEMAC3(chx,chy,chr,dotxv)( conjx, \
conjy, \
n, \
alpha_cast, \
x1, incx, \
y1, incy, \
beta_cast, \
rho1 ); \
} \
}
// Define the basic set of functions unconditionally, and then also some
// mixed datatype functions if requested.
//INSERT_GENTFUNC3U12_BASIC( dotxf, dotxf_penryn_int )
GENTFUNC3U12( float, float, float, float, s, s, s, s, dotxf, dotxf_penryn_int )
//GENTFUNC3U12( double, double, double, double, d, d, d, d, dotxf, dotxf_penryn_int )
GENTFUNC3U12( scomplex, scomplex, scomplex, scomplex, c, c, c, c, dotxf, dotxf_penryn_int )
GENTFUNC3U12( dcomplex, dcomplex, dcomplex, dcomplex, z, z, z, z, dotxf, dotxf_penryn_int )
#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT
INSERT_GENTFUNC3U12_MIX_D( dotxf, dotxf_penryn_int )
#endif
#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT
INSERT_GENTFUNC3U12_MIX_P( dotxf, dotxf_penryn_int )
#endif
#include "pmmintrin.h"
typedef union
{
__m128d v;
double d[2];
} v2df_t;
void bli_ddddotxf_penryn_int(
conj_t conjx,
conj_t conjy,
dim_t b_m,
dim_t n,
void* alpha,
void* x, inc_t incx, inc_t ldx,
void* y, inc_t incy,
void* beta,
void* r, inc_t incr
)
{
double* restrict alpha_cast = alpha;
double* restrict beta_cast = beta;
double* restrict x_cast = x;
double* restrict y_cast = y;
double* restrict r_cast = r;
dim_t i;
const dim_t n_elem_per_reg = 2;
const dim_t n_iter_unroll = 4;
dim_t n_pre;
dim_t n_run;
dim_t n_left;
double* restrict x0;
double* restrict x1;
double* restrict x2;
double* restrict x3;
double* restrict y0;
double rho0, rho1, rho2, rho3;
double x0c, x1c, x2c, x3c, y0c;
v2df_t rho0v, rho1v, rho2v, rho3v;
v2df_t x0v, x1v, x2v, x3v, y0v, betav, alphav;
if ( bli_zero_dim1( b_m ) ) return;
if ( bli_zero_dim1( n ) )
{
PASTEMAC(d,scals)( *beta_cast, *(r_cast ) );
PASTEMAC(d,scals)( *beta_cast, *(r_cast+1) );
PASTEMAC(d,scals)( *beta_cast, *(r_cast+2) );
PASTEMAC(d,scals)( *beta_cast, *(r_cast+3) );
return;
}
if ( b_m < PASTEMAC(d,dotxf_fusefac) )
{
PASTEMAC3(d,d,d,dotxf_unb_var1)( conjx,
conjy,
b_m,
n,
alpha_cast,
x_cast, incx, ldx,
y_cast, incy,
beta_cast,
r_cast, incr );
return;
}
if ( incx != 1 ||
incy != 1 ) bli_abort();
n_pre = 0;
if ( ( unsigned long ) y % 16 != 0 )
{
if ( ( unsigned long ) x % 16 == 0 )
bli_abort();
n_pre = 1;
}
n_run = ( n - n_pre ) / ( n_elem_per_reg * n_iter_unroll );
n_left = ( n - n_pre ) % ( n_elem_per_reg * n_iter_unroll );
x0 = x_cast;
x1 = x_cast + ldx;
x2 = x_cast + 2*ldx;
x3 = x_cast + 3*ldx;
y0 = y_cast;
PASTEMAC(d,set0)( rho0 );
PASTEMAC(d,set0)( rho1 );
PASTEMAC(d,set0)( rho2 );
PASTEMAC(d,set0)( rho3 );
if ( n_pre == 1 )
{
x0c = *x0;
x1c = *x1;
x2c = *x2;
x3c = *x3;
y0c = *y0;
rho0 += x0c * y0c;
rho1 += x1c * y0c;
rho2 += x2c * y0c;
rho3 += x3c * y0c;
x0 += incx;
x1 += incx;
x2 += incx;
x3 += incx;
y0 += incy;
}
rho0v.v = _mm_setzero_pd();
rho1v.v = _mm_setzero_pd();
rho2v.v = _mm_setzero_pd();
rho3v.v = _mm_setzero_pd();
for ( i = 0; i < n_run; ++i )
{
x0v.v = _mm_load_pd( ( double* )(x0 + 0*n_elem_per_reg) );
//x1v.v = _mm_load_pd( ( double* )(x1 + 0*n_elem_per_reg) );
x1v.v = _mm_load_pd( ( double* )(x0 + 1*ldx + 0*n_elem_per_reg) );
x2v.v = _mm_load_pd( ( double* )(x2 + 0*n_elem_per_reg) );
//x3v.v = _mm_load_pd( ( double* )(x3 + 0*n_elem_per_reg) );
x3v.v = _mm_load_pd( ( double* )(x2 + 1*ldx + 0*n_elem_per_reg) );
y0v.v = _mm_load_pd( ( double* )(y0 + 0*n_elem_per_reg) );
rho0v.v += x0v.v * y0v.v;
rho1v.v += x1v.v * y0v.v;
rho2v.v += x2v.v * y0v.v;
rho3v.v += x3v.v * y0v.v;
x0v.v = _mm_load_pd( ( double* )(x0 + 1*n_elem_per_reg) );
//x1v.v = _mm_load_pd( ( double* )(x1 + 1*n_elem_per_reg) );
x1v.v = _mm_load_pd( ( double* )(x0 + 1*ldx + 1*n_elem_per_reg) );
x2v.v = _mm_load_pd( ( double* )(x2 + 1*n_elem_per_reg) );
//x3v.v = _mm_load_pd( ( double* )(x3 + 1*n_elem_per_reg) );
x3v.v = _mm_load_pd( ( double* )(x2 + 1*ldx + 1*n_elem_per_reg) );
y0v.v = _mm_load_pd( ( double* )(y0 + 1*n_elem_per_reg) );
rho0v.v += x0v.v * y0v.v;
rho1v.v += x1v.v * y0v.v;
rho2v.v += x2v.v * y0v.v;
rho3v.v += x3v.v * y0v.v;
x0v.v = _mm_load_pd( ( double* )(x0 + 2*n_elem_per_reg) );
//x1v.v = _mm_load_pd( ( double* )(x1 + 2*n_elem_per_reg) );
x1v.v = _mm_load_pd( ( double* )(x0 + 1*ldx + 2*n_elem_per_reg) );
x2v.v = _mm_load_pd( ( double* )(x2 + 2*n_elem_per_reg) );
//x3v.v = _mm_load_pd( ( double* )(x3 + 2*n_elem_per_reg) );
x3v.v = _mm_load_pd( ( double* )(x2 + 1*ldx + 2*n_elem_per_reg) );
y0v.v = _mm_load_pd( ( double* )(y0 + 2*n_elem_per_reg) );
rho0v.v += x0v.v * y0v.v;
rho1v.v += x1v.v * y0v.v;
rho2v.v += x2v.v * y0v.v;
rho3v.v += x3v.v * y0v.v;
x0v.v = _mm_load_pd( ( double* )(x0 + 3*n_elem_per_reg) );
//x1v.v = _mm_load_pd( ( double* )(x1 + 3*n_elem_per_reg) );
x1v.v = _mm_load_pd( ( double* )(x0 + 1*ldx + 3*n_elem_per_reg) );
x2v.v = _mm_load_pd( ( double* )(x2 + 3*n_elem_per_reg) );
//x3v.v = _mm_load_pd( ( double* )(x3 + 3*n_elem_per_reg) );
x3v.v = _mm_load_pd( ( double* )(x2 + 1*ldx + 3*n_elem_per_reg) );
y0v.v = _mm_load_pd( ( double* )(y0 + 3*n_elem_per_reg) );
rho0v.v += x0v.v * y0v.v;
rho1v.v += x1v.v * y0v.v;
rho2v.v += x2v.v * y0v.v;
rho3v.v += x3v.v * y0v.v;
x0 += n_elem_per_reg * n_iter_unroll;
//x1 += n_elem_per_reg * n_iter_unroll;
x2 += n_elem_per_reg * n_iter_unroll;
//x3 += n_elem_per_reg * n_iter_unroll;
y0 += n_elem_per_reg * n_iter_unroll;
}
rho0 += rho0v.d[0] + rho0v.d[1];
rho1 += rho1v.d[0] + rho1v.d[1];
rho2 += rho2v.d[0] + rho2v.d[1];
rho3 += rho3v.d[0] + rho3v.d[1];
if ( n_left > 0 )
{
for ( i = 0; i < n_left; ++i )
{
x0c = *x0;
x1c = *x1;
x2c = *x2;
x3c = *x3;
y0c = *y0;
rho0 += x0c * y0c;
rho1 += x1c * y0c;
rho2 += x2c * y0c;
rho3 += x3c * y0c;
x0 += incx;
x1 += incx;
x2 += incx;
x3 += incx;
y0 += incy;
}
}
/*
PASTEMAC2(d,d,scals)( *beta_cast, *(r_cast ) ); \
PASTEMAC2(d,d,scals)( *beta_cast, *(r_cast+1) ); \
PASTEMAC2(d,d,scals)( *beta_cast, *(r_cast+2) ); \
PASTEMAC2(d,d,scals)( *beta_cast, *(r_cast+3) ); \
PASTEMAC3(d,d,d,axpys)( *alpha_cast, rho1, *(r_cast ) ); \
PASTEMAC3(d,d,d,axpys)( *alpha_cast, rho2, *(r_cast+1) ); \
PASTEMAC3(d,d,d,axpys)( *alpha_cast, rho3, *(r_cast+2) ); \
PASTEMAC3(d,d,d,axpys)( *alpha_cast, rho4, *(r_cast+3) ); \
*/
rho1v.d[0] = rho0;
rho1v.d[1] = rho1;
rho3v.d[0] = rho2;
rho3v.d[1] = rho3;
betav.v = _mm_loaddup_pd( ( double* ) beta_cast );
alphav.v = _mm_loaddup_pd( ( double* ) alpha_cast );
rho0v.v = _mm_load_pd( ( double* )(r_cast + 0*n_elem_per_reg) );
rho2v.v = _mm_load_pd( ( double* )(r_cast + 1*n_elem_per_reg) );
rho0v.v *= betav.v;
rho2v.v *= betav.v;
rho0v.v += alphav.v * rho1v.v;
rho2v.v += alphav.v * rho3v.v;
_mm_store_pd( ( double* )(r_cast + 0*n_elem_per_reg), rho0v.v );
_mm_store_pd( ( double* )(r_cast + 1*n_elem_per_reg), rho2v.v );
}

View File

@@ -163,8 +163,8 @@ int main( int argc, char** argv )
obj_t ar, ai;
bli_obj_alias_to( a, ar );
bli_obj_alias_to( a, ai );
bli_obj_set_datatype( BLIS_DOUBLE, ar ); ar.rs *= 2; ar.cs *= 2;
bli_obj_set_datatype( BLIS_DOUBLE, ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1;
bli_obj_set_dt( BLIS_DOUBLE, ar ); ar.rs *= 2; ar.cs *= 2;
bli_obj_set_dt( BLIS_DOUBLE, ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1;
bli_printm( "ar", &ar, "%4.1f", "" );
bli_printm( "ai", &ai, "%4.1f", "" );
*/

View File

@@ -159,8 +159,8 @@ int main( int argc, char** argv )
obj_t ar, ai;
bli_obj_alias_to( a, ar );
bli_obj_alias_to( a, ai );
bli_obj_set_datatype( BLIS_DOUBLE, ar ); ar.rs *= 2; ar.cs *= 2;
bli_obj_set_datatype( BLIS_DOUBLE, ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1;
bli_obj_set_dt( BLIS_DOUBLE, ar ); ar.rs *= 2; ar.cs *= 2;
bli_obj_set_dt( BLIS_DOUBLE, ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1;
bli_printm( "ar", &ar, "%4.1f", "" );
bli_printm( "ai", &ai, "%4.1f", "" );
*/

View File

@@ -163,8 +163,8 @@ int main( int argc, char** argv )
obj_t ar, ai;
bli_obj_alias_to( a, ar );
bli_obj_alias_to( a, ai );
bli_obj_set_datatype( BLIS_DOUBLE, ar ); ar.rs *= 2; ar.cs *= 2;
bli_obj_set_datatype( BLIS_DOUBLE, ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1;
bli_obj_set_dt( BLIS_DOUBLE, ar ); ar.rs *= 2; ar.cs *= 2;
bli_obj_set_dt( BLIS_DOUBLE, ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1;
bli_printm( "ar", &ar, "%4.1f", "" );
bli_printm( "ai", &ai, "%4.1f", "" );

View File

@@ -238,8 +238,8 @@ void libblis_test_addm_check
double* resid
)
{
num_t dt = bli_obj_datatype( *y );
num_t dt_real = bli_obj_datatype_proj_to_real( *y );
num_t dt = bli_obj_dt( *y );
num_t dt_real = bli_obj_dt_proj_to_real( *y );
dim_t m = bli_obj_length( *y );
dim_t n = bli_obj_width( *y );

View File

@@ -234,8 +234,8 @@ void libblis_test_addv_check
double* resid
)
{
num_t dt = bli_obj_datatype( *x );
num_t dt_real = bli_obj_datatype_proj_to_real( *x );
num_t dt = bli_obj_dt( *x );
num_t dt_real = bli_obj_dt_proj_to_real( *x );
dim_t m = bli_obj_vector_dim( *x );
conj_t conjx = bli_obj_conj_status( *x );

View File

@@ -301,7 +301,7 @@ void PASTEMAC0(opname) \
obj_t* index \
) \
{ \
num_t dt = bli_obj_datatype( *x ); \
num_t dt = bli_obj_dt( *x ); \
\
dim_t n = bli_obj_vector_dim( *x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \

View File

@@ -262,8 +262,8 @@ void libblis_test_axpbyv_check
double* resid
)
{
num_t dt = bli_obj_datatype( *y );
num_t dt_real = bli_obj_datatype_proj_to_real( *y );
num_t dt = bli_obj_dt( *y );
num_t dt_real = bli_obj_dt_proj_to_real( *y );
dim_t m = bli_obj_vector_dim( *y );

View File

@@ -278,8 +278,8 @@ void libblis_test_axpy2v_check
double* resid
)
{
num_t dt = bli_obj_datatype( *z );
num_t dt_real = bli_obj_datatype_proj_to_real( *z );
num_t dt = bli_obj_dt( *z );
num_t dt_real = bli_obj_dt_proj_to_real( *z );
dim_t m = bli_obj_vector_dim( *z );

View File

@@ -279,8 +279,8 @@ void libblis_test_axpyf_check
double* resid
)
{
num_t dt = bli_obj_datatype( *y );
num_t dt_real = bli_obj_datatype_proj_to_real( *y );
num_t dt = bli_obj_dt( *y );
num_t dt_real = bli_obj_dt_proj_to_real( *y );
dim_t m = bli_obj_vector_dim( *y );
dim_t b_n = bli_obj_width( *a );

View File

@@ -254,8 +254,8 @@ void libblis_test_axpym_check
double* resid
)
{
num_t dt = bli_obj_datatype( *y );
num_t dt_real = bli_obj_datatype_proj_to_real( *y );
num_t dt = bli_obj_dt( *y );
num_t dt_real = bli_obj_dt_proj_to_real( *y );
dim_t m = bli_obj_length( *y );
dim_t n = bli_obj_width( *y );

View File

@@ -252,8 +252,8 @@ void libblis_test_axpyv_check
double* resid
)
{
num_t dt = bli_obj_datatype( *y );
num_t dt_real = bli_obj_datatype_proj_to_real( *y );
num_t dt = bli_obj_dt( *y );
num_t dt_real = bli_obj_dt_proj_to_real( *y );
dim_t m = bli_obj_vector_dim( *y );

View File

@@ -226,7 +226,7 @@ void libblis_test_copym_check
double* resid
)
{
num_t dt_real = bli_obj_datatype_proj_to_real( *x );
num_t dt_real = bli_obj_dt_proj_to_real( *x );
obj_t norm_y_r;

View File

@@ -223,7 +223,7 @@ void libblis_test_copyv_check
double* resid
)
{
num_t dt_real = bli_obj_datatype_proj_to_real( *x );
num_t dt_real = bli_obj_dt_proj_to_real( *x );
obj_t norm_y_r;

View File

@@ -300,8 +300,8 @@ void libblis_test_dotaxpyv_check
double* resid
)
{
num_t dt = bli_obj_datatype( *z );
num_t dt_real = bli_obj_datatype_proj_to_real( *z );
num_t dt = bli_obj_dt( *z );
num_t dt_real = bli_obj_dt_proj_to_real( *z );
dim_t m = bli_obj_vector_dim( *z );

View File

@@ -249,7 +249,7 @@ void libblis_test_dotv_check
double* resid
)
{
num_t dt_real = bli_obj_datatype_proj_to_real( *y );
num_t dt_real = bli_obj_dt_proj_to_real( *y );
obj_t rho_r, rho_i;
obj_t norm_x, norm_xy;

View File

@@ -321,8 +321,8 @@ void libblis_test_dotxaxpyf_check
double* resid
)
{
num_t dt = bli_obj_datatype( *y );
num_t dt_real = bli_obj_datatype_proj_to_real( *y );
num_t dt = bli_obj_dt( *y );
num_t dt_real = bli_obj_dt_proj_to_real( *y );
dim_t m = bli_obj_vector_dim( *z );
dim_t b_n = bli_obj_vector_dim( *y );

View File

@@ -286,8 +286,8 @@ void libblis_test_dotxf_check
double* resid
)
{
num_t dt = bli_obj_datatype( *y );
num_t dt_real = bli_obj_datatype_proj_to_real( *y );
num_t dt = bli_obj_dt( *y );
num_t dt_real = bli_obj_dt_proj_to_real( *y );
dim_t b_n = bli_obj_vector_dim( *y );

View File

@@ -269,7 +269,7 @@ void libblis_test_dotxv_check
double* resid
)
{
num_t dt_real = bli_obj_datatype_proj_to_real( *y );
num_t dt_real = bli_obj_dt_proj_to_real( *y );
obj_t rho_r, rho_i;
obj_t norm_x_r, norm_xy_r;

View File

@@ -280,8 +280,8 @@ void libblis_test_gemm_check
double* resid
)
{
num_t dt = bli_obj_datatype( *c );
num_t dt_real = bli_obj_datatype_proj_to_real( *c );
num_t dt = bli_obj_dt( *c );
num_t dt_real = bli_obj_dt_proj_to_real( *c );
dim_t m = bli_obj_length( *c );
dim_t n = bli_obj_width( *c );

View File

@@ -352,8 +352,8 @@ void libblis_test_gemm_ukr_check
double* resid
)
{
num_t dt = bli_obj_datatype( *c );
num_t dt_real = bli_obj_datatype_proj_to_real( *c );
num_t dt = bli_obj_dt( *c );
num_t dt_real = bli_obj_dt_proj_to_real( *c );
dim_t m = bli_obj_length( *c );
dim_t n = bli_obj_width( *c );

View File

@@ -428,8 +428,8 @@ void libblis_test_gemmtrsm_ukr_check
double* resid
)
{
num_t dt = bli_obj_datatype( *b11 );
num_t dt_real = bli_obj_datatype_proj_to_real( *b11 );
num_t dt = bli_obj_dt( *b11 );
num_t dt_real = bli_obj_dt_proj_to_real( *b11 );
dim_t m = bli_obj_length( *b11 );
dim_t n = bli_obj_width( *b11 );

View File

@@ -283,8 +283,8 @@ void libblis_test_gemv_check
double* resid
)
{
num_t dt = bli_obj_datatype( *y );
num_t dt_real = bli_obj_datatype_proj_to_real( *y );
num_t dt = bli_obj_dt( *y );
num_t dt_real = bli_obj_dt_proj_to_real( *y );
conj_t conja = bli_obj_conj_status( *a );

View File

@@ -267,8 +267,8 @@ void libblis_test_ger_check
double* resid
)
{
num_t dt = bli_obj_datatype( *a );
num_t dt_real = bli_obj_datatype_proj_to_real( *a );
num_t dt = bli_obj_dt( *a );
num_t dt_real = bli_obj_dt_proj_to_real( *a );
dim_t m_a = bli_obj_length( *a );
dim_t n_a = bli_obj_width( *a );

Some files were not shown because too many files have changed in this diff Show More