Merge remote-tracking branch 'upstream/dev' into asm-macros

This commit is contained in:
Devin Matthews
2018-06-20 14:07:49 -05:00
181 changed files with 4391 additions and 923 deletions

View File

@@ -49,7 +49,7 @@ GENFRONT( copysc )
//
// Define BLAS-like interfaces with heterogeneous-typed operands.
// Prototype BLAS-like interfaces with heterogeneous-typed operands.
//
#undef GENTPROT2

View File

@@ -203,6 +203,11 @@ void bli_l1v_xy_check
e_val = bli_check_floating_object( y );
bli_check_error_code( e_val );
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( x, y );
bli_check_error_code( e_val );
// Check object dimensions.
e_val = bli_check_vector_object( x );
@@ -243,6 +248,11 @@ void bli_l1v_axy_check
e_val = bli_check_floating_object( y );
bli_check_error_code( e_val );
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( x, y );
bli_check_error_code( e_val );
// Check object dimensions.
e_val = bli_check_scalar_object( alpha );
@@ -289,6 +299,11 @@ void bli_l1v_xby_check
e_val = bli_check_floating_object( y );
bli_check_error_code( e_val );
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( x, y );
bli_check_error_code( e_val );
// Check object dimensions.
e_val = bli_check_scalar_object( beta );
@@ -339,6 +354,11 @@ void bli_l1v_axby_check
e_val = bli_check_floating_object( y );
bli_check_error_code( e_val );
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( x, y );
bli_check_error_code( e_val );
// Check object dimensions.
e_val = bli_check_scalar_object( alpha );
@@ -402,6 +422,11 @@ void bli_l1v_dot_check
e_val = bli_check_nonconstant_object( rho );
bli_check_error_code( e_val );
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( x, y );
bli_check_error_code( e_val );
// Check object dimensions.
e_val = bli_check_scalar_object( alpha );

View File

@@ -121,6 +121,11 @@ void bli_l1d_xy_check
e_val = bli_check_floating_object( y );
bli_check_error_code( e_val );
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( x, y );
bli_check_error_code( e_val );
// Check object dimensions.
e_val = bli_check_matrix_object( x );
@@ -161,6 +166,11 @@ void bli_l1d_axy_check
e_val = bli_check_floating_object( y );
bli_check_error_code( e_val );
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( x, y );
bli_check_error_code( e_val );
// Check object dimensions.
e_val = bli_check_scalar_object( alpha );

View File

@@ -66,6 +66,14 @@ void bli_axpy2v_check
e_val = bli_check_floating_object( z );
bli_check_error_code( e_val );
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( x, y );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_datatypes( x, z );
bli_check_error_code( e_val );
// Check object dimensions.
e_val = bli_check_scalar_object( alphax );
@@ -132,6 +140,14 @@ void bli_axpyf_check
e_val = bli_check_floating_object( y );
bli_check_error_code( e_val );
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( a, x );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_datatypes( a, y );
bli_check_error_code( e_val );
// Check object dimensions.
e_val = bli_check_scalar_object( alpha );
@@ -203,6 +219,17 @@ void bli_dotaxpyv_check
e_val = bli_check_floating_object( z );
bli_check_error_code( e_val );
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( x, xt );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_datatypes( x, y );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_datatypes( x, z );
bli_check_error_code( e_val );
// Check object dimensions.
e_val = bli_check_scalar_object( alpha );
@@ -299,6 +326,23 @@ void bli_dotxaxpyf_check
e_val = bli_check_floating_object( z );
bli_check_error_code( e_val );
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( a, at );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_datatypes( a, w );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_datatypes( a, x );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_datatypes( a, y );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_datatypes( a, z );
bli_check_error_code( e_val );
// Check object dimensions.
e_val = bli_check_scalar_object( alpha );
@@ -407,6 +451,14 @@ void bli_dotxf_check
e_val = bli_check_floating_object( y );
bli_check_error_code( e_val );
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( a, x );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_datatypes( a, y );
bli_check_error_code( e_val );
// Check object dimensions.
e_val = bli_check_scalar_object( alpha );

View File

@@ -106,6 +106,11 @@ void bli_l1m_xy_check
e_val = bli_check_floating_object( y );
bli_check_error_code( e_val );
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( x, y );
bli_check_error_code( e_val );
// Check object dimensions.
e_val = bli_check_matrix_object( x );
@@ -146,6 +151,11 @@ void bli_l1m_axy_check
e_val = bli_check_floating_object( y );
bli_check_error_code( e_val );
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( x, y );
bli_check_error_code( e_val );
// Check object dimensions.
e_val = bli_check_scalar_object( alpha );

View File

@@ -46,37 +46,45 @@ struct packm_params_s
};
typedef struct packm_params_s packm_params_t;
#define bli_cntl_packm_params_var_func( cntl ) \
\
( ( (packm_params_t*)(cntl)->params )->var_func )
static packm_voft bli_cntl_packm_params_var_func( cntl_t* cntl )
{
packm_params_t* ppp = cntl->params; return ppp->var_func;
}
#define bli_cntl_packm_params_bmid_m( cntl ) \
\
( ( (packm_params_t*)(cntl)->params )->bmid_m )
static bszid_t bli_cntl_packm_params_bmid_m( cntl_t* cntl )
{
packm_params_t* ppp = cntl->params; return ppp->bmid_m;
}
#define bli_cntl_packm_params_bmid_n( cntl ) \
\
( ( (packm_params_t*)(cntl)->params )->bmid_n )
static bszid_t bli_cntl_packm_params_bmid_n( cntl_t* cntl )
{
packm_params_t* ppp = cntl->params; return ppp->bmid_n;
}
#define bli_cntl_packm_params_does_invert_diag( cntl ) \
\
( ( (packm_params_t*)(cntl)->params )->does_invert_diag )
static bool_t bli_cntl_packm_params_does_invert_diag( cntl_t* cntl )
{
packm_params_t* ppp = cntl->params; return ppp->does_invert_diag;
}
#define bli_cntl_packm_params_rev_iter_if_upper( cntl ) \
\
( ( (packm_params_t*)(cntl)->params )->rev_iter_if_upper )
static bool_t bli_cntl_packm_params_rev_iter_if_upper( cntl_t* cntl )
{
packm_params_t* ppp = cntl->params; return ppp->rev_iter_if_upper;
}
#define bli_cntl_packm_params_rev_iter_if_lower( cntl ) \
\
( ( (packm_params_t*)(cntl)->params )->rev_iter_if_lower )
static bool_t bli_cntl_packm_params_rev_iter_if_lower( cntl_t* cntl )
{
packm_params_t* ppp = cntl->params; return ppp->rev_iter_if_lower;
}
#define bli_cntl_packm_params_pack_schema( cntl ) \
\
( ( (packm_params_t*)(cntl)->params )->pack_schema )
static pack_t bli_cntl_packm_params_pack_schema( cntl_t* cntl )
{
packm_params_t* ppp = cntl->params; return ppp->pack_schema;
}
#define bli_cntl_packm_params_pack_buf_type( cntl ) \
\
( ( (packm_params_t*)(cntl)->params )->pack_buf_type )
static packbuf_t bli_cntl_packm_params_pack_buf_type( cntl_t* cntl )
{
packm_params_t* ppp = cntl->params; return ppp->pack_buf_type;
}
// -----------------------------------------------------------------------------

View File

@@ -56,8 +56,8 @@ siz_t bli_packm_init
bool_t does_invert_diag;
bool_t rev_iter_if_upper;
bool_t rev_iter_if_lower;
//pack_t pack_schema;
packbuf_t pack_buf_type;
pack_t schema;
//packbuf_t pack_buf_type;
siz_t size_needed;
// Check parameters.
@@ -70,8 +70,8 @@ siz_t bli_packm_init
does_invert_diag = bli_cntl_packm_params_does_invert_diag( cntl );
rev_iter_if_upper = bli_cntl_packm_params_rev_iter_if_upper( cntl );
rev_iter_if_lower = bli_cntl_packm_params_rev_iter_if_lower( cntl );
//pack_schema = bli_cntl_packm_params_pack_schema( cntl );
pack_buf_type = bli_cntl_packm_params_pack_buf_type( cntl );
schema = bli_cntl_packm_params_pack_schema( cntl );
//pack_buf_type = bli_cntl_packm_params_pack_buf_type( cntl );
#if 0
// Let us now check to see if the object has already been packed. First
@@ -112,30 +112,51 @@ siz_t bli_packm_init
return 0;
}
// We now ignore the pack_schema field in the control tree and
// extract the schema from the context, depending on whether we are
// preparing to pack a block of A or panel of B. For A and B, we must
// obtain the schema from the context since the induced methods reuse
// the same control trees used by native execution, and those induced
// methods specify the schema used by the current execution phase
// within the context (whereas the control tree does not change).
#if 0
pack_t schema;
if ( pack_buf_type == BLIS_BUFFER_FOR_A_BLOCK )
if ( bli_cntx_method( cntx ) != BLIS_NAT )
{
schema = bli_cntx_schema_a_block( cntx );
// We now ignore the pack_schema field in the control tree and
// extract the schema from the context, depending on whether we are
// preparing to pack a block of A or panel of B. For A and B, we must
// obtain the schema from the context since the induced methods reuse
// the same control trees used by native execution, and those induced
// methods specify the schema used by the current execution phase
// within the context (whereas the control tree does not change).
if ( pack_buf_type == BLIS_BUFFER_FOR_A_BLOCK )
{
schema = bli_cntx_schema_a_block( cntx );
}
else if ( pack_buf_type == BLIS_BUFFER_FOR_B_PANEL )
{
schema = bli_cntx_schema_b_panel( cntx );
}
else // if ( pack_buf_type == BLIS_BUFFER_FOR_C_PANEL )
{
schema = bli_cntl_packm_params_pack_schema( cntl );
}
}
else if ( pack_buf_type == BLIS_BUFFER_FOR_B_PANEL )
else // ( bli_cntx_method( cntx ) == BLIS_NAT )
{
schema = bli_cntx_schema_b_panel( cntx );
// For native execution, we obtain the schema from the control tree
// node. (Notice that it doesn't matter if the pack_buf_type is for
// A or B.)
schema = bli_cntl_packm_params_pack_schema( cntl );
}
else // if ( pack_buf_type == BLIS_BUFFER_FOR_C_PANEL )
// This is no longer needed now that we branch between native and
// non-native cases above.
#if 0
if ( pack_buf_type == BLIS_BUFFER_FOR_C_PANEL )
{
// If we get a request to pack C for some reason, it is likely
// not part of an induced method, and so it would be safe (and
// necessary) to read the pack schema from the control tree.
schema = bli_cntl_packm_params_pack_schema( cntl );
}
#endif
#endif
// Prepare a few other variables based on properties of the control
// tree.

View File

@@ -53,6 +53,14 @@ void bli_gemv_check
e_val = bli_check_general_object( a );
bli_check_error_code( e_val );
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( a, x );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_datatypes( a, y );
bli_check_error_code( e_val );
}
@@ -80,6 +88,14 @@ void bli_hemv_check
e_val = bli_check_hermitian_object( a );
bli_check_error_code( e_val );
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( a, x );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_datatypes( a, y );
bli_check_error_code( e_val );
}
@@ -107,6 +123,14 @@ void bli_symv_check
e_val = bli_check_symmetric_object( a );
bli_check_error_code( e_val );
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( a, x );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_datatypes( a, y );
bli_check_error_code( e_val );
}
@@ -132,6 +156,11 @@ void bli_trmv_check
e_val = bli_check_triangular_object( a );
bli_check_error_code( e_val );
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( a, x );
bli_check_error_code( e_val );
}
@@ -157,6 +186,11 @@ void bli_trsv_check
e_val = bli_check_triangular_object( a );
bli_check_error_code( e_val );
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( a, x );
bli_check_error_code( e_val );
}
@@ -178,6 +212,14 @@ void bli_ger_check
e_val = bli_check_general_object( a );
bli_check_error_code( e_val );
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( a, x );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_datatypes( a, y );
bli_check_error_code( e_val );
}
@@ -203,6 +245,11 @@ void bli_her_check
e_val = bli_check_hermitian_object( a );
bli_check_error_code( e_val );
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( a, x );
bli_check_error_code( e_val );
}
@@ -229,6 +276,14 @@ void bli_her2_check
e_val = bli_check_hermitian_object( a );
bli_check_error_code( e_val );
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( a, x );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_datatypes( a, y );
bli_check_error_code( e_val );
}
@@ -254,6 +309,11 @@ void bli_syr_check
e_val = bli_check_symmetric_object( a );
bli_check_error_code( e_val );
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( a, x );
bli_check_error_code( e_val );
}
@@ -280,6 +340,14 @@ void bli_syr2_check
e_val = bli_check_symmetric_object( a );
bli_check_error_code( e_val );
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( a, x );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_datatypes( a, y );
bli_check_error_code( e_val );
}

View File

@@ -294,6 +294,14 @@ void bli_gemm_basic_check
e_val = bli_check_level3_dims( a, b, c );
bli_check_error_code( e_val );
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( c, a );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_datatypes( c, b );
bli_check_error_code( e_val );
}
void bli_hemm_basic_check
@@ -330,6 +338,14 @@ void bli_hemm_basic_check
e_val = bli_check_square_object( a );
bli_check_error_code( e_val );
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( c, a );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_datatypes( c, b );
bli_check_error_code( e_val );
}
void bli_herk_basic_check
@@ -365,6 +381,14 @@ void bli_herk_basic_check
e_val = bli_check_general_object( ah );
bli_check_error_code( e_val );
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( c, a );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_datatypes( c, ah );
bli_check_error_code( e_val );
}
void bli_her2k_basic_check
@@ -412,6 +436,20 @@ void bli_her2k_basic_check
e_val = bli_check_general_object( ah );
bli_check_error_code( e_val );
// Check for consistent datatypes.
e_val = bli_check_consistent_object_datatypes( c, a );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_datatypes( c, ah );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_datatypes( c, b );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_datatypes( c, bh );
bli_check_error_code( e_val );
}
void bli_l3_basic_check

View File

@@ -45,6 +45,21 @@ void bli_l3_cntl_create_if
cntl_t** cntl_use
)
{
// This is part of a hack to support mixed domain in bli_gemm_front().
// Sometimes we need to specify a non-standard schema for A and B, and
// we decided to transmit them via the schema field in the obj_t's
// rather than pass them in as function parameters. Once the values
// have been read, we immediately reset them back to their expected
// values for unpacked objects. Notice that we do this even if the
// caller passed in a custom control tree; that's because we still need
// to reset the pack schema of a and b, which were modified by the
// operation's _front() function.
pack_t schema_a = bli_obj_pack_schema( a );
pack_t schema_b = bli_obj_pack_schema( b );
bli_obj_set_pack_schema( BLIS_NOT_PACKED, a );
bli_obj_set_pack_schema( BLIS_NOT_PACKED, b );
// If the control tree pointer is NULL, we construct a default
// tree as a function of the operation family.
if ( cntl_orig == NULL )
@@ -53,7 +68,7 @@ void bli_l3_cntl_create_if
family == BLIS_HERK ||
family == BLIS_TRMM )
{
*cntl_use = bli_gemm_cntl_create( family );
*cntl_use = bli_gemm_cntl_create( family, schema_a, schema_b );
}
else // if ( family == BLIS_TRSM )
{
@@ -62,7 +77,7 @@ void bli_l3_cntl_create_if
if ( bli_obj_is_triangular( a ) ) side = BLIS_LEFT;
else side = BLIS_RIGHT;
*cntl_use = bli_trsm_cntl_create( side );
*cntl_use = bli_trsm_cntl_create( side, schema_a, schema_b );
}
}
else

View File

@@ -57,20 +57,25 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
/* Invoke the operation's "ind" function--its induced method front-end.
This function will call native execution for real domain problems.
For complex problems, it calls the highest priority induced method
that is available (ie: implemented and enabled), and if none are
enabled, it calls native execution. */ \
PASTEMAC(opname,ind) \
( \
alpha, \
a, \
b, \
beta, \
c, \
cntx \
); \
/* Only proceed with an induced method if all operands have the same
(complex) datatype. If any datatypes differ, skip the induced method
chooser function and proceed directly with native execution, which is
where mixed datatype support will be implemented (if at all). */ \
if ( bli_obj_dt( a ) == bli_obj_dt( c ) && \
bli_obj_dt( b ) == bli_obj_dt( c ) && \
bli_obj_is_complex( c ) ) \
{ \
/* Invoke the operation's "ind" function--its induced method front-end.
For complex problems, it calls the highest priority induced method
that is available (ie: implemented and enabled), and if none are
enabled, it calls native execution. (For real problems, it calls
the operation's native execution interface.) */ \
PASTEMAC(opname,ind)( alpha, a, b, beta, c, cntx ); \
} \
else \
{ \
PASTEMAC(opname,nat)( alpha, a, b, beta, c, cntx ); \
} \
}
GENFRONT( gemm )
@@ -96,16 +101,25 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
PASTEMAC(opname,ind) \
( \
side, \
alpha, \
a, \
b, \
beta, \
c, \
cntx \
); \
/* Only proceed with an induced method if all operands have the same
(complex) datatype. If any datatypes differ, skip the induced method
chooser function and proceed directly with native execution, which is
where mixed datatype support will be implemented (if at all). */ \
if ( bli_obj_dt( a ) == bli_obj_dt( c ) && \
bli_obj_dt( b ) == bli_obj_dt( c ) && \
bli_obj_is_complex( c ) ) \
{ \
/* Invoke the operation's "ind" function--its induced method front-end.
For complex problems, it calls the highest priority induced method
that is available (ie: implemented and enabled), and if none are
enabled, it calls native execution. (For real problems, it calls
the operation's native execution interface.) */ \
PASTEMAC(opname,ind)( side, alpha, a, b, beta, c, cntx ); \
} \
else \
{ \
PASTEMAC(opname,nat)( side, alpha, a, b, beta, c, cntx ); \
} \
}
GENFRONT( hemm )
@@ -129,14 +143,24 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
PASTEMAC(opname,ind) \
( \
alpha, \
a, \
beta, \
c, \
cntx \
); \
/* Only proceed with an induced method if all operands have the same
(complex) datatype. If any datatypes differ, skip the induced method
chooser function and proceed directly with native execution, which is
where mixed datatype support will be implemented (if at all). */ \
if ( bli_obj_dt( a ) == bli_obj_dt( c ) && \
bli_obj_is_complex( c ) ) \
{ \
/* Invoke the operation's "ind" function--its induced method front-end.
For complex problems, it calls the highest priority induced method
that is available (ie: implemented and enabled), and if none are
enabled, it calls native execution. (For real problems, it calls
the operation's native execution interface.) */ \
PASTEMAC(opname,ind)( alpha, a, beta, c, cntx ); \
} \
else \
{ \
PASTEMAC(opname,nat)( alpha, a, beta, c, cntx ); \
} \
}
GENFRONT( herk )
@@ -159,14 +183,24 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
PASTEMAC(opname,ind) \
( \
side, \
alpha, \
a, \
b, \
cntx \
); \
/* Only proceed with an induced method if all operands have the same
(complex) datatype. If any datatypes differ, skip the induced method
chooser function and proceed directly with native execution, which is
where mixed datatype support will be implemented (if at all). */ \
if ( bli_obj_dt( a ) == bli_obj_dt( b ) && \
bli_obj_is_complex( b ) ) \
{ \
/* Invoke the operation's "ind" function--its induced method front-end.
For complex problems, it calls the highest priority induced method
that is available (ie: implemented and enabled), and if none are
enabled, it calls native execution. (For real problems, it calls
the operation's native execution interface.) */ \
PASTEMAC(opname,ind)( side, alpha, a, b, cntx ); \
} \
else \
{ \
PASTEMAC(opname,nat)( side, alpha, a, b, cntx ); \
} \
}
GENFRONT( trmm )

View File

@@ -38,24 +38,24 @@
// gemm
#define gemm_get_next_a_micropanel( thread, a1, step ) ( a1 + step * thread->n_way )
#define gemm_get_next_b_micropanel( thread, b1, step ) ( b1 + step * thread->n_way )
#define bli_gemm_get_next_a_upanel( thread, a1, step ) ( a1 + step * thread->n_way )
#define bli_gemm_get_next_b_upanel( thread, b1, step ) ( b1 + step * thread->n_way )
// herk
#define herk_get_next_a_micropanel( thread, a1, step ) ( a1 + step * thread->n_way )
#define herk_get_next_b_micropanel( thread, b1, step ) ( b1 + step * thread->n_way )
#define bli_herk_get_next_a_upanel( thread, a1, step ) ( a1 + step * thread->n_way )
#define bli_herk_get_next_b_upanel( thread, b1, step ) ( b1 + step * thread->n_way )
// trmm
#define trmm_r_ir_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
#define trmm_r_jr_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
#define trmm_l_ir_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
#define trmm_l_jr_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
#define bli_trmm_r_ir_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
#define bli_trmm_r_jr_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
#define bli_trmm_l_ir_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
#define bli_trmm_l_jr_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
// trsm
#define trsm_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
#define bli_trsm_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
//
// thrinfo_t APIs specific to level-3 operations.

View File

@@ -55,7 +55,7 @@ void PASTEMAC(ch,opname) \
\
/* Query the context for the function address of the current
datatype's micro-kernel. */ \
PASTECH2(ch,tname,_ft) f = bli_cntx_get_l3_ukr_dt( dt, kerid, cntx ); \
PASTECH2(ch,tname,_ft) f = bli_cntx_get_l3_vir_ukr_dt( dt, kerid, cntx ); \
\
/* Invoke the typed function for the given datatype. */ \
f( \
@@ -91,7 +91,7 @@ void PASTEMAC(ch,opname) \
\
/* Query the context for the function address of the current
datatype's micro-kernel. */ \
PASTECH2(ch,tname,_ft) f = bli_cntx_get_l3_ukr_dt( dt, kerid, cntx ); \
PASTECH2(ch,tname,_ft) f = bli_cntx_get_l3_vir_ukr_dt( dt, kerid, cntx ); \
\
/* Invoke the typed function for the given datatype. */ \
f( \
@@ -129,7 +129,7 @@ void PASTEMAC(ch,opname) \
\
/* Query the context for the function address of the current
datatype's micro-kernel. */ \
PASTECH2(ch,tname,_ft) f = bli_cntx_get_l3_ukr_dt( dt, kerid, cntx ); \
PASTECH2(ch,tname,_ft) f = bli_cntx_get_l3_vir_ukr_dt( dt, kerid, cntx ); \
\
/* Invoke the typed function for the given datatype. */ \
f( \

View File

@@ -36,17 +36,21 @@
cntl_t* bli_gemm_cntl_create
(
opid_t family
opid_t family,
pack_t schema_a,
pack_t schema_b
)
{
return bli_gemmbp_cntl_create( family );
return bli_gemmbp_cntl_create( family, schema_a, schema_b );
}
// -----------------------------------------------------------------------------
cntl_t* bli_gemmbp_cntl_create
(
opid_t family
opid_t family,
pack_t schema_a,
pack_t schema_b
)
{
void* macro_kernel_p = bli_gemm_ker_var2;
@@ -82,7 +86,7 @@ cntl_t* bli_gemmbp_cntl_create
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_ROW_PANELS,
schema_a, // normally BLIS_PACKED_ROW_PANELS
BLIS_BUFFER_FOR_A_BLOCK,
gemm_cntl_bp_bu
);
@@ -106,7 +110,7 @@ cntl_t* bli_gemmbp_cntl_create
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_COL_PANELS,
schema_b, // normally BLIS_PACKED_COL_PANELS
BLIS_BUFFER_FOR_B_PANEL,
gemm_cntl_op_bp
);
@@ -134,6 +138,10 @@ cntl_t* bli_gemmbp_cntl_create
// -----------------------------------------------------------------------------
// This control tree creation function is disabled because it is no longer used.
// (It was originally created in the run up to publishing the 1m journal article,
// but was disabled to reduce complexity.)
#if 0
cntl_t* bli_gemmpb_cntl_create
(
opid_t family
@@ -223,6 +231,7 @@ cntl_t* bli_gemmpb_cntl_create
return gemm_cntl_vl_mm;
}
#endif
// -----------------------------------------------------------------------------

View File

@@ -34,20 +34,26 @@
cntl_t* bli_gemm_cntl_create
(
opid_t family
opid_t family,
pack_t schema_a,
pack_t schema_b
);
// -----------------------------------------------------------------------------
cntl_t* bli_gemmbp_cntl_create
(
opid_t family
opid_t family,
pack_t schema_a,
pack_t schema_b
);
#if 0
cntl_t* bli_gemmpb_cntl_create
(
opid_t family
opid_t family,
);
#endif
// -----------------------------------------------------------------------------

View File

@@ -77,7 +77,7 @@ void bli_gemm_front
// contiguous columns, or if C is stored by columns and the micro-kernel
// prefers contiguous rows, transpose the entire operation to allow the
// micro-kernel to access elements of C in its preferred manner.
if ( bli_cntx_l3_ukr_eff_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
{
bli_obj_swap( &a_local, &b_local );
@@ -87,10 +87,34 @@ void bli_gemm_front
}
// Record the threading for each level within the context.
bli_cntx_set_thrloop_from_env( BLIS_GEMM, BLIS_LEFT, cntx,
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ) );
bli_cntx_set_thrloop_from_env
(
BLIS_GEMM,
BLIS_LEFT, // ignored for gemm/hemm/symm
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ),
cntx
);
// A sort of hack for communicating the desired pach schemas for A and B
// to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and
// bli_l3_cntl_create_if()). This allows us to access the schemas from
// the control tree, which hopefully reduces some confusion, particularly
// in bli_packm_init().
if ( bli_cntx_method( cntx ) == BLIS_NAT )
{
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local );
}
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
{
pack_t schema_a = bli_cntx_schema_a_block( cntx );
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
bli_obj_set_pack_schema( schema_a, &a_local );
bli_obj_set_pack_schema( schema_b, &b_local );
}
// Invoke the internal back-end via the thread handler.
bli_l3_thread_decorator

View File

@@ -183,7 +183,7 @@ void PASTEMAC(ch,varname) \
/* Query the context for the micro-kernel address and cast it to its
function pointer type. */ \
PASTECH(ch,gemm_ukr_ft) \
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
/* Temporary C buffer for edge cases. Note that the strides of this
temporary buffer are set so that they match the storage of the
@@ -192,7 +192,7 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\
@@ -295,11 +295,11 @@ void PASTEMAC(ch,varname) \
m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \
\
/* Compute the addresses of the next panels of A and B. */ \
a2 = gemm_get_next_a_micropanel( caucus, a1, rstep_a ); \
a2 = bli_gemm_get_next_a_upanel( caucus, a1, rstep_a ); \
if ( bli_is_last_iter( i, m_iter, ir_thread_id, ir_num_threads ) ) \
{ \
a2 = a_cast; \
b2 = gemm_get_next_b_micropanel( thread, b1, cstep_b ); \
b2 = bli_gemm_get_next_b_upanel( thread, b1, cstep_b ); \
if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \
b2 = b_cast; \
} \

View File

@@ -163,13 +163,13 @@ void PASTEMAC(ch,varname) \
/* Query the context for the micro-kernel address and cast it to its
function pointer type. */ \
PASTECH(ch,gemm_ukr_ft) \
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
/* Temporary C buffer for edge cases. */ \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\
@@ -291,11 +291,11 @@ void PASTEMAC(ch,varname) \
m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \
\
/* Compute the addresses of the next panels of A and B. */ \
a2 = gemm_get_next_a_micropanel( caucus, a1, rstep_a ); \
a2 = bli_gemm_get_next_a_upanel( caucus, a1, rstep_a ); \
if ( bli_is_last_iter( i, m_iter, ir_thread_id, ir_num_threads ) ) \
{ \
a2 = a_cast; \
b2 = gemm_get_next_b_micropanel( thread, b1, cstep_b ); \
b2 = bli_gemm_get_next_b_upanel( thread, b1, cstep_b ); \
if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \
b2 = b_cast; \
} \

View File

@@ -163,13 +163,13 @@ void PASTEMAC(ch,varname) \
/* Query the context for the micro-kernel address and cast it to its
function pointer type. */ \
PASTECH(ch,gemm_ukr_ft) \
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
/* Temporary C buffer for edge cases. */ \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\
@@ -300,11 +300,11 @@ void PASTEMAC(ch,varname) \
m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \
\
/* Compute the addresses of the next panels of A and B. */ \
a2 = gemm_get_next_a_micropanel( caucus, a1, rstep_a ); \
a2 = bli_gemm_get_next_a_upanel( caucus, a1, rstep_a ); \
if ( bli_is_last_iter( i, m_iter, ir_thread_id, ir_num_threads ) ) \
{ \
a2 = a_cast; \
b2 = gemm_get_next_b_micropanel( thread, b1, cstep_b ); \
b2 = bli_gemm_get_next_b_upanel( thread, b1, cstep_b ); \
if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \
b2 = b_cast; \
} \

View File

@@ -72,7 +72,7 @@ void bli_hemm_front
// contiguous columns, or if C is stored by columns and the micro-kernel
// prefers contiguous rows, transpose the entire operation to allow the
// micro-kernel to access elements of C in its preferred manner.
if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
{
bli_toggle_side( &side );
bli_obj_toggle_conj( &a_local );
@@ -88,10 +88,34 @@ void bli_hemm_front
}
// Record the threading for each level within the context.
bli_cntx_set_thrloop_from_env( BLIS_HEMM, BLIS_LEFT, cntx,
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ) );
bli_cntx_set_thrloop_from_env
(
BLIS_HEMM,
BLIS_LEFT, // ignored for gemm/hemm/symm
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ),
cntx
);
// A sort of hack for communicating the desired pach schemas for A and B
// to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and
// bli_l3_cntl_create_if()). This allows us to access the schemas from
// the control tree, which hopefully reduces some confusion, particularly
// in bli_packm_init().
if ( bli_cntx_method( cntx ) == BLIS_NAT )
{
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local );
}
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
{
pack_t schema_a = bli_cntx_schema_a_block( cntx );
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
bli_obj_set_pack_schema( schema_a, &a_local );
bli_obj_set_pack_schema( schema_b, &b_local );
}
// Invoke the internal back-end.
bli_l3_thread_decorator

View File

@@ -92,7 +92,7 @@ void bli_her2k_front
// contiguous columns, or if C is stored by columns and the micro-kernel
// prefers contiguous rows, transpose the entire operation to allow the
// micro-kernel to access elements of C in its preferred manner.
if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
{
bli_obj_swap( &a_local, &bh_local );
bli_obj_swap( &b_local, &ah_local );
@@ -106,10 +106,38 @@ void bli_her2k_front
}
// Record the threading for each level within the context.
bli_cntx_set_thrloop_from_env( BLIS_HER2K, BLIS_LEFT, cntx,
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ) );
bli_cntx_set_thrloop_from_env
(
BLIS_HER2K,
BLIS_LEFT, // ignored for her[2]k/syr[2]k
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ),
cntx
);
// A sort of hack for communicating the desired pach schemas for A and B
// to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and
// bli_l3_cntl_create_if()). This allows us to access the schemas from
// the control tree, which hopefully reduces some confusion, particularly
// in bli_packm_init().
if ( bli_cntx_method( cntx ) == BLIS_NAT )
{
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &bh_local );
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &b_local );
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &ah_local );
}
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
{
pack_t schema_a = bli_cntx_schema_a_block( cntx );
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
bli_obj_set_pack_schema( schema_a, &a_local );
bli_obj_set_pack_schema( schema_b, &bh_local );
bli_obj_set_pack_schema( schema_a, &b_local );
bli_obj_set_pack_schema( schema_b, &ah_local );
}
// Invoke herk twice, using beta only the first time.

View File

@@ -77,7 +77,7 @@ void bli_herk_front
// contiguous columns, or if C is stored by columns and the micro-kernel
// prefers contiguous rows, transpose the entire operation to allow the
// micro-kernel to access elements of C in its preferred manner.
if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
{
bli_obj_toggle_conj( &a_local );
bli_obj_toggle_conj( &ah_local );
@@ -86,10 +86,34 @@ void bli_herk_front
}
// Record the threading for each level within the context.
bli_cntx_set_thrloop_from_env( BLIS_HERK, BLIS_LEFT, cntx,
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ) );
bli_cntx_set_thrloop_from_env
(
BLIS_HERK,
BLIS_LEFT, // ignored for her[2]k/syr[2]k
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ),
cntx
);
// A sort of hack for communicating the desired pach schemas for A and B
// to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and
// bli_l3_cntl_create_if()). This allows us to access the schemas from
// the control tree, which hopefully reduces some confusion, particularly
// in bli_packm_init().
if ( bli_cntx_method( cntx ) == BLIS_NAT )
{
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &ah_local );
}
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
{
pack_t schema_a = bli_cntx_schema_a_block( cntx );
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
bli_obj_set_pack_schema( schema_a, &a_local );
bli_obj_set_pack_schema( schema_b, &ah_local );
}
// Invoke the internal back-end.
bli_l3_thread_decorator

View File

@@ -168,7 +168,7 @@ void PASTEMAC(ch,varname) \
/* Query the context for the micro-kernel address and cast it to its
function pointer type. */ \
PASTECH(ch,gemm_ukr_ft) \
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
/* Temporary C buffer for edge cases. Note that the strides of this
temporary buffer are set so that they match the storage of the
@@ -177,7 +177,7 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\
@@ -312,11 +312,11 @@ void PASTEMAC(ch,varname) \
m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \
\
/* Compute the addresses of the next panels of A and B. */ \
a2 = herk_get_next_a_micropanel( caucus, a1, rstep_a ); \
a2 = bli_herk_get_next_a_upanel( caucus, a1, rstep_a ); \
if ( bli_is_last_iter( i, m_iter, ir_thread_id, ir_num_threads ) ) \
{ \
a2 = a_cast; \
b2 = herk_get_next_b_micropanel( thread, b1, cstep_b ); \
b2 = bli_herk_get_next_b_upanel( thread, b1, cstep_b ); \
if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \
b2 = b_cast; \
} \

View File

@@ -168,7 +168,7 @@ void PASTEMAC(ch,varname) \
/* Query the context for the micro-kernel address and cast it to its
function pointer type. */ \
PASTECH(ch,gemm_ukr_ft) \
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
/* Temporary C buffer for edge cases. Note that the strides of this
temporary buffer are set so that they match the storage of the
@@ -177,7 +177,7 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\
@@ -312,11 +312,11 @@ void PASTEMAC(ch,varname) \
m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \
\
/* Compute the addresses of the next panels of A and B. */ \
a2 = herk_get_next_a_micropanel( caucus, a1, rstep_a ); \
a2 = bli_herk_get_next_a_upanel( caucus, a1, rstep_a ); \
if ( bli_is_last_iter( i, m_iter, ir_thread_id, ir_num_threads ) ) \
{ \
a2 = a_cast; \
b2 = herk_get_next_b_micropanel( thread, b1, cstep_b ); \
b2 = bli_herk_get_next_b_upanel( thread, b1, cstep_b ); \
if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \
b2 = b_cast; \
} \

View File

@@ -72,7 +72,7 @@ void bli_symm_front
// contiguous columns, or if C is stored by columns and the micro-kernel
// prefers contiguous rows, transpose the entire operation to allow the
// micro-kernel to access elements of C in its preferred manner.
if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
{
bli_toggle_side( &side );
bli_obj_induce_trans( &b_local );
@@ -87,10 +87,34 @@ void bli_symm_front
}
// Record the threading for each level within the context.
bli_cntx_set_thrloop_from_env( BLIS_SYMM, BLIS_LEFT, cntx,
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ) );
bli_cntx_set_thrloop_from_env
(
BLIS_SYMM,
BLIS_LEFT, // ignored for gemm/hemm/symm
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ),
cntx
);
// A sort of hack for communicating the desired pach schemas for A and B
// to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and
// bli_l3_cntl_create_if()). This allows us to access the schemas from
// the control tree, which hopefully reduces some confusion, particularly
// in bli_packm_init().
if ( bli_cntx_method( cntx ) == BLIS_NAT )
{
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local );
}
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
{
pack_t schema_a = bli_cntx_schema_a_block( cntx );
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
bli_obj_set_pack_schema( schema_a, &a_local );
bli_obj_set_pack_schema( schema_b, &b_local );
}
// Invoke the internal back-end.
bli_l3_thread_decorator

View File

@@ -81,16 +81,44 @@ void bli_syr2k_front
// contiguous columns, or if C is stored by columns and the micro-kernel
// prefers contiguous rows, transpose the entire operation to allow the
// micro-kernel to access elements of C in its preferred manner.
if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
{
bli_obj_induce_trans( &c_local );
}
// Record the threading for each level within the context.
bli_cntx_set_thrloop_from_env( BLIS_SYR2K, BLIS_LEFT, cntx,
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ) );
bli_cntx_set_thrloop_from_env
(
BLIS_SYR2K,
BLIS_LEFT, // ignored for her[2]k/syr[2]k
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ),
cntx
);
// A sort of hack for communicating the desired pach schemas for A and B
// to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and
// bli_l3_cntl_create_if()). This allows us to access the schemas from
// the control tree, which hopefully reduces some confusion, particularly
// in bli_packm_init().
if ( bli_cntx_method( cntx ) == BLIS_NAT )
{
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &bt_local );
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &b_local );
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &at_local );
}
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
{
pack_t schema_a = bli_cntx_schema_a_block( cntx );
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
bli_obj_set_pack_schema( schema_a, &a_local );
bli_obj_set_pack_schema( schema_b, &bt_local );
bli_obj_set_pack_schema( schema_a, &b_local );
bli_obj_set_pack_schema( schema_b, &at_local );
}
// Invoke herk twice, using beta only the first time.

View File

@@ -74,16 +74,40 @@ void bli_syrk_front
// contiguous columns, or if C is stored by columns and the micro-kernel
// prefers contiguous rows, transpose the entire operation to allow the
// micro-kernel to access elements of C in its preferred manner.
if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
{
bli_obj_induce_trans( &c_local );
}
// Record the threading for each level within the context.
bli_cntx_set_thrloop_from_env( BLIS_SYRK, BLIS_LEFT, cntx,
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ) );
bli_cntx_set_thrloop_from_env
(
BLIS_SYRK,
BLIS_LEFT, // ignored for her[2]k/syr[2]k
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ),
cntx
);
// A sort of hack for communicating the desired pach schemas for A and B
// to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and
// bli_l3_cntl_create_if()). This allows us to access the schemas from
// the control tree, which hopefully reduces some confusion, particularly
// in bli_packm_init().
if ( bli_cntx_method( cntx ) == BLIS_NAT )
{
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &at_local );
}
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
{
pack_t schema_a = bli_cntx_schema_a_block( cntx );
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
bli_obj_set_pack_schema( schema_a, &a_local );
bli_obj_set_pack_schema( schema_b, &at_local );
}
// Invoke the internal back-end.
bli_l3_thread_decorator

View File

@@ -105,7 +105,7 @@ void bli_trmm_front
// NOTE: We disable the optimization for 1x1 matrices since the concept
// of row- vs. column storage breaks down.
if ( !bli_obj_is_1x1( &c_local ) )
if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
{
bli_toggle_side( &side );
bli_obj_induce_trans( &a_local );
@@ -130,10 +130,34 @@ void bli_trmm_front
bli_obj_set_as_root( &c_local );
// Record the threading for each level within the context.
bli_cntx_set_thrloop_from_env( BLIS_TRMM, side, cntx,
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ) );
bli_cntx_set_thrloop_from_env
(
BLIS_TRMM,
side,
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ),
cntx
);
// A sort of hack for communicating the desired pach schemas for A and B
// to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and
// bli_l3_cntl_create_if()). This allows us to access the schemas from
// the control tree, which hopefully reduces some confusion, particularly
// in bli_packm_init().
if ( bli_cntx_method( cntx ) == BLIS_NAT )
{
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local );
}
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
{
pack_t schema_a = bli_cntx_schema_a_block( cntx );
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
bli_obj_set_pack_schema( schema_a, &a_local );
bli_obj_set_pack_schema( schema_b, &b_local );
}
// Invoke the internal back-end.
bli_l3_thread_decorator

View File

@@ -160,7 +160,7 @@ void PASTEMAC(ch,varname) \
/* Query the context for the micro-kernel address and cast it to its
function pointer type. */ \
PASTECH(ch,gemm_ukr_ft) \
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
/* Temporary C buffer for edge cases. Note that the strides of this
temporary buffer are set so that they match the storage of the
@@ -169,7 +169,7 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\
@@ -322,7 +322,7 @@ void PASTEMAC(ch,varname) \
/* Loop over the n dimension (NR columns at a time). */ \
for ( j = 0; j < n_iter; ++j ) \
{ \
if ( trmm_l_jr_my_iter( j, jr_thread ) ) { \
if ( bli_trmm_l_jr_my_iter( j, jr_thread ) ) { \
\
ctype* restrict a1; \
ctype* restrict c11; \
@@ -364,7 +364,7 @@ void PASTEMAC(ch,varname) \
is_a_cur += ( bli_is_odd( is_a_cur ) ? 1 : 0 ); \
ps_a_cur = ( is_a_cur * ss_a_num ) / ss_a_den; \
\
if ( trmm_l_ir_my_iter( i, ir_thread ) ) { \
if ( bli_trmm_l_ir_my_iter( i, ir_thread ) ) { \
\
b1_i = b1 + ( off_a1011 * PACKNR ) / off_scl; \
\
@@ -434,7 +434,7 @@ void PASTEMAC(ch,varname) \
} \
else if ( bli_is_strictly_below_diag_n( diagoffa_i, MR, k ) ) \
{ \
if ( trmm_l_ir_my_iter( i, ir_thread ) ) { \
if ( bli_trmm_l_ir_my_iter( i, ir_thread ) ) { \
\
ctype* restrict a2; \
\

View File

@@ -160,7 +160,7 @@ void PASTEMAC(ch,varname) \
/* Query the context for the micro-kernel address and cast it to its
function pointer type. */ \
PASTECH(ch,gemm_ukr_ft) \
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
/* Temporary C buffer for edge cases. Note that the strides of this
temporary buffer are set so that they match the storage of the
@@ -169,7 +169,7 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\
@@ -329,7 +329,7 @@ void PASTEMAC(ch,varname) \
/* Loop over the n dimension (NR columns at a time). */ \
for ( j = 0; j < n_iter; ++j ) \
{ \
if ( trmm_l_jr_my_iter( j, jr_thread ) ) { \
if ( bli_trmm_l_jr_my_iter( j, jr_thread ) ) { \
\
ctype* restrict a1; \
ctype* restrict c11; \
@@ -371,7 +371,7 @@ void PASTEMAC(ch,varname) \
is_a_cur += ( bli_is_odd( is_a_cur ) ? 1 : 0 ); \
ps_a_cur = ( is_a_cur * ss_a_num ) / ss_a_den; \
\
if ( trmm_l_ir_my_iter( i, ir_thread ) ) { \
if ( bli_trmm_l_ir_my_iter( i, ir_thread ) ) { \
\
b1_i = b1 + ( off_a1112 * PACKNR ) / off_scl; \
\
@@ -441,7 +441,7 @@ void PASTEMAC(ch,varname) \
} \
else if ( bli_is_strictly_above_diag_n( diagoffa_i, MR, k ) ) \
{ \
if ( trmm_l_ir_my_iter( i, ir_thread ) ) { \
if ( bli_trmm_l_ir_my_iter( i, ir_thread ) ) { \
\
ctype* restrict a2; \
\

View File

@@ -160,7 +160,7 @@ void PASTEMAC(ch,varname) \
/* Query the context for the micro-kernel address and cast it to its
function pointer type. */ \
PASTECH(ch,gemm_ukr_ft) \
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
/* Temporary C buffer for edge cases. Note that the strides of this
temporary buffer are set so that they match the storage of the
@@ -169,7 +169,7 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\
@@ -361,7 +361,7 @@ void PASTEMAC(ch,varname) \
is_b_cur += ( bli_is_odd( is_b_cur ) ? 1 : 0 ); \
ps_b_cur = ( is_b_cur * ss_b_num ) / ss_b_den; \
\
if ( trmm_r_jr_my_iter( j, jr_thread ) ) { \
if ( bli_trmm_r_jr_my_iter( j, jr_thread ) ) { \
\
/* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t
object. */ \
@@ -370,7 +370,7 @@ void PASTEMAC(ch,varname) \
/* Loop over the m dimension (MR rows at a time). */ \
for ( i = 0; i < m_iter; ++i ) \
{ \
if ( trmm_r_ir_my_iter( i, ir_thread ) ) { \
if ( bli_trmm_r_ir_my_iter( i, ir_thread ) ) { \
\
ctype* restrict a1_i; \
ctype* restrict a2; \
@@ -446,7 +446,7 @@ void PASTEMAC(ch,varname) \
} \
else if ( bli_is_strictly_below_diag_n( diagoffb_j, k, NR ) ) \
{ \
if ( trmm_r_jr_my_iter( j, jr_thread ) ) { \
if ( bli_trmm_r_jr_my_iter( j, jr_thread ) ) { \
\
/* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t
object. */ \
@@ -455,7 +455,7 @@ void PASTEMAC(ch,varname) \
/* Loop over the m dimension (MR rows at a time). */ \
for ( i = 0; i < m_iter; ++i ) \
{ \
if ( trmm_r_ir_my_iter( i, ir_thread ) ) { \
if ( bli_trmm_r_ir_my_iter( i, ir_thread ) ) { \
\
ctype* restrict a2; \
\

View File

@@ -160,7 +160,7 @@ void PASTEMAC(ch,varname) \
/* Query the context for the micro-kernel address and cast it to its
function pointer type. */ \
PASTECH(ch,gemm_ukr_ft) \
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
/* Temporary C buffer for edge cases. Note that the strides of this
temporary buffer are set so that they match the storage of the
@@ -169,7 +169,7 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\
@@ -361,7 +361,7 @@ void PASTEMAC(ch,varname) \
is_b_cur += ( bli_is_odd( is_b_cur ) ? 1 : 0 ); \
ps_b_cur = ( is_b_cur * ss_b_num ) / ss_b_den; \
\
if ( trmm_r_jr_my_iter( j, jr_thread ) ) { \
if ( bli_trmm_r_jr_my_iter( j, jr_thread ) ) { \
\
/* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t
object. */ \
@@ -370,7 +370,7 @@ void PASTEMAC(ch,varname) \
/* Loop over the m dimension (MR rows at a time). */ \
for ( i = 0; i < m_iter; ++i ) \
{ \
if ( trmm_r_ir_my_iter( i, ir_thread ) ) { \
if ( bli_trmm_r_ir_my_iter( i, ir_thread ) ) { \
\
ctype* restrict a1_i; \
ctype* restrict a2; \
@@ -446,7 +446,7 @@ void PASTEMAC(ch,varname) \
} \
else if ( bli_is_strictly_above_diag_n( diagoffb_j, k, NR ) ) \
{ \
if ( trmm_r_jr_my_iter( j, jr_thread ) ) { \
if ( bli_trmm_r_jr_my_iter( j, jr_thread ) ) { \
\
/* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t
object. */ \
@@ -455,7 +455,7 @@ void PASTEMAC(ch,varname) \
/* Loop over the m dimension (MR rows at a time). */ \
for ( i = 0; i < m_iter; ++i ) \
{ \
if ( trmm_r_ir_my_iter( i, ir_thread ) ) { \
if ( bli_trmm_r_ir_my_iter( i, ir_thread ) ) { \
\
ctype* restrict a2; \
\

View File

@@ -104,7 +104,7 @@ void bli_trmm3_front
// contiguous columns, or if C is stored by columns and the micro-kernel
// prefers contiguous rows, transpose the entire operation to allow the
// micro-kernel to access elements of C in its preferred manner.
if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
{
bli_toggle_side( &side );
bli_obj_induce_trans( &a_local );
@@ -129,10 +129,34 @@ void bli_trmm3_front
bli_obj_set_as_root( &c_local );
// Record the threading for each level within the context.
bli_cntx_set_thrloop_from_env( BLIS_TRMM3, side, cntx,
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ) );
bli_cntx_set_thrloop_from_env
(
BLIS_TRMM3,
side,
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ),
cntx
);
// A sort of hack for communicating the desired pach schemas for A and B
// to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and
// bli_l3_cntl_create_if()). This allows us to access the schemas from
// the control tree, which hopefully reduces some confusion, particularly
// in bli_packm_init().
if ( bli_cntx_method( cntx ) == BLIS_NAT )
{
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local );
}
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
{
pack_t schema_a = bli_cntx_schema_a_block( cntx );
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
bli_obj_set_pack_schema( schema_a, &a_local );
bli_obj_set_pack_schema( schema_b, &b_local );
}
// Invoke the internal back-end.
bli_l3_thread_decorator

View File

@@ -36,16 +36,21 @@
cntl_t* bli_trsm_cntl_create
(
side_t side
side_t side,
pack_t schema_a,
pack_t schema_b
)
{
if ( bli_is_left( side ) ) return bli_trsm_l_cntl_create();
else return bli_trsm_r_cntl_create();
if ( bli_is_left( side ) )
return bli_trsm_l_cntl_create( schema_a, schema_b );
else
return bli_trsm_r_cntl_create( schema_a, schema_b );
}
cntl_t* bli_trsm_l_cntl_create
(
void
pack_t schema_a,
pack_t schema_b
)
{
void* macro_kernel_p = bli_trsm_xx_ker_var2;
@@ -79,7 +84,7 @@ cntl_t* bli_trsm_l_cntl_create
TRUE, // do NOT invert diagonal
TRUE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_ROW_PANELS,
schema_a, // normally BLIS_PACKED_ROW_PANELS
BLIS_BUFFER_FOR_A_BLOCK,
trsm_cntl_bp_bu
);
@@ -103,7 +108,7 @@ cntl_t* bli_trsm_l_cntl_create
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_COL_PANELS,
schema_b, // normally BLIS_PACKED_COL_PANELS
BLIS_BUFFER_FOR_B_PANEL,
trsm_cntl_op_bp
);
@@ -131,7 +136,8 @@ cntl_t* bli_trsm_l_cntl_create
cntl_t* bli_trsm_r_cntl_create
(
void
pack_t schema_a,
pack_t schema_b
)
{
void* macro_kernel_p = bli_trsm_xx_ker_var2;
@@ -165,7 +171,7 @@ cntl_t* bli_trsm_r_cntl_create
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_ROW_PANELS,
schema_a, // normally BLIS_PACKED_ROW_PANELS
BLIS_BUFFER_FOR_A_BLOCK,
trsm_cntl_bp_bu
);
@@ -189,7 +195,7 @@ cntl_t* bli_trsm_r_cntl_create
TRUE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
TRUE, // reverse iteration if lower?
BLIS_PACKED_COL_PANELS,
schema_b, // normally BLIS_PACKED_COL_PANELS
BLIS_BUFFER_FOR_B_PANEL,
trsm_cntl_op_bp
);

View File

@@ -34,17 +34,21 @@
cntl_t* bli_trsm_cntl_create
(
side_t side
side_t side,
pack_t schema_a,
pack_t schema_b
);
cntl_t* bli_trsm_l_cntl_create
(
void
pack_t schema_a,
pack_t schema_b
);
cntl_t* bli_trsm_r_cntl_create
(
void
pack_t schema_a,
pack_t schema_b
);
void bli_trsm_cntl_free

View File

@@ -121,10 +121,34 @@ void bli_trsm_front
bli_obj_set_as_root( &c_local );
// Record the threading for each level within the context.
bli_cntx_set_thrloop_from_env( BLIS_TRSM, side, cntx,
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ) );
bli_cntx_set_thrloop_from_env
(
BLIS_TRSM,
side,
bli_obj_length( &c_local ),
bli_obj_width( &c_local ),
bli_obj_width( &a_local ),
cntx
);
// A sort of hack for communicating the desired pach schemas for A and B
// to bli_trsm_cntl_create() (via bli_l3_thread_decorator() and
// bli_l3_cntl_create_if()). This allows us to access the schemas from
// the control tree, which hopefully reduces some confusion, particularly
// in bli_packm_init().
if ( bli_cntx_method( cntx ) == BLIS_NAT )
{
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local );
}
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
{
pack_t schema_a = bli_cntx_schema_a_block( cntx );
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
bli_obj_set_pack_schema( schema_a, &a_local );
bli_obj_set_pack_schema( schema_b, &b_local );
}
// Invoke the internal back-end.
bli_l3_thread_decorator

View File

@@ -162,9 +162,9 @@ void PASTEMAC(ch,varname) \
\
/* Cast the micro-kernel address to its function pointer type. */ \
PASTECH(ch,gemmtrsm_ukr_ft) \
gemmtrsm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMMTRSM_L_UKR, cntx ); \
gemmtrsm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMMTRSM_L_UKR, cntx ); \
PASTECH(ch,gemm_ukr_ft) \
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
/* Temporary C buffer for edge cases. Note that the strides of this
temporary buffer are set so that they match the storage of the
@@ -173,7 +173,7 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\
@@ -340,7 +340,7 @@ void PASTEMAC(ch,varname) \
/* Loop over the n dimension (NR columns at a time). */ \
for ( j = 0; j < n_iter; ++j ) \
{ \
if( trsm_my_iter( j, thread ) ) { \
if( bli_trsm_my_iter( j, thread ) ) { \
\
ctype* restrict a1; \
ctype* restrict c11; \

View File

@@ -162,9 +162,9 @@ void PASTEMAC(ch,varname) \
\
/* Cast the micro-kernel address to its function pointer type. */ \
PASTECH(ch,gemmtrsm_ukr_ft) \
gemmtrsm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMMTRSM_U_UKR, cntx ); \
gemmtrsm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMMTRSM_U_UKR, cntx ); \
PASTECH(ch,gemm_ukr_ft) \
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
/* Temporary C buffer for edge cases. Note that the strides of this
temporary buffer are set so that they match the storage of the
@@ -173,7 +173,7 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\
@@ -348,7 +348,7 @@ void PASTEMAC(ch,varname) \
/* Loop over the n dimension (NR columns at a time). */ \
for ( j = 0; j < n_iter; ++j ) \
{ \
if( trsm_my_iter( j, thread ) ) { \
if( bli_trsm_my_iter( j, thread ) ) { \
\
ctype* restrict a1; \
ctype* restrict c11; \

View File

@@ -167,9 +167,9 @@ void PASTEMAC(ch,varname) \
is transposed so that all kernel instances are of the "left"
variety (since those are the only trsm ukernels that exist). */ \
PASTECH(ch,gemmtrsm_ukr_ft) \
gemmtrsm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMMTRSM_U_UKR, cntx ); \
gemmtrsm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMMTRSM_U_UKR, cntx ); \
PASTECH(ch,gemm_ukr_ft) \
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
/* Temporary C buffer for edge cases. Note that the strides of this
temporary buffer are set so that they match the storage of the
@@ -178,7 +178,7 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\
@@ -422,7 +422,7 @@ void PASTEMAC(ch,varname) \
/* Loop over the m dimension (MR rows at a time). */ \
for ( i = 0; i < m_iter; ++i ) \
{ \
if( trsm_my_iter( i, thread ) ){ \
if( bli_trsm_my_iter( i, thread ) ){ \
\
ctype* restrict a11; \
ctype* restrict a12; \
@@ -508,7 +508,7 @@ void PASTEMAC(ch,varname) \
/* Loop over the m dimension (MR rows at a time). */ \
for ( i = 0; i < m_iter; ++i ) \
{ \
if( trsm_my_iter( i, thread ) ){ \
if( bli_trsm_my_iter( i, thread ) ){ \
\
ctype* restrict a2; \
\

View File

@@ -167,9 +167,9 @@ void PASTEMAC(ch,varname) \
is transposed so that all kernel instances are of the "left"
variety (since those are the only trsm ukernels that exist). */ \
PASTECH(ch,gemmtrsm_ukr_ft) \
gemmtrsm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMMTRSM_L_UKR, cntx ); \
gemmtrsm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMMTRSM_L_UKR, cntx ); \
PASTECH(ch,gemm_ukr_ft) \
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
\
/* Temporary C buffer for edge cases. Note that the strides of this
temporary buffer are set so that they match the storage of the
@@ -178,7 +178,7 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\
@@ -415,7 +415,7 @@ void PASTEMAC(ch,varname) \
/* Loop over the m dimension (MR rows at a time). */ \
for ( i = 0; i < m_iter; ++i ) \
{ \
if( trsm_my_iter( i, thread ) ){ \
if( bli_trsm_my_iter( i, thread ) ){ \
\
ctype* restrict a10; \
ctype* restrict a11; \
@@ -501,7 +501,7 @@ void PASTEMAC(ch,varname) \
/* Loop over the m dimension (MR rows at a time). */ \
for ( i = 0; i < m_iter; ++i ) \
{ \
if( trsm_my_iter( i, thread ) ){ \
if( bli_trsm_my_iter( i, thread ) ){ \
\
ctype* restrict a2; \
\

View File

@@ -53,7 +53,7 @@ static void* bli_auxinfo_next_a( auxinfo_t* ai )
}
static void* bli_auxinfo_next_b( auxinfo_t* ai )
{
return ai->a_next;
return ai->b_next;
}
static inc_t bli_auxinfo_is_a( auxinfo_t* ai )

View File

@@ -172,6 +172,18 @@ static void bli_blksz_scale_max
bli_blksz_set_max( ( val * num ) / den, dt, b );
}
static void bli_blksz_scale_def_max
(
dim_t num,
dim_t den,
num_t dt,
blksz_t* b
)
{
bli_blksz_scale_def( num, den, dt, b );
bli_blksz_scale_max( num, den, dt, b );
}
// -----------------------------------------------------------------------------
blksz_t* bli_blksz_create_ed

View File

@@ -342,6 +342,40 @@ err_t bli_check_real_valued_object( obj_t* a )
return e_val;
}
err_t bli_check_consistent_precisions( num_t dt_a, num_t dt_b )
{
err_t e_val = BLIS_SUCCESS;
if ( dt_a == BLIS_FLOAT )
{
if ( dt_b != BLIS_FLOAT &&
dt_b != BLIS_SCOMPLEX )
e_val = BLIS_INCONSISTENT_PRECISIONS;
}
else if ( dt_a == BLIS_DOUBLE )
{
if ( dt_b != BLIS_DOUBLE &&
dt_b != BLIS_DCOMPLEX )
e_val = BLIS_INCONSISTENT_PRECISIONS;
}
return e_val;
}
err_t bli_check_consistent_object_precisions( obj_t* a, obj_t* b )
{
err_t e_val;
num_t dt_a;
num_t dt_b;
dt_a = bli_obj_dt( a );
dt_b = bli_obj_dt( b );
e_val = bli_check_consistent_precisions( dt_a, dt_b );
return e_val;
}
// -- Dimension-related checks -------------------------------------------------
err_t bli_check_conformal_dims( obj_t* a, obj_t* b )

View File

@@ -62,6 +62,8 @@ err_t bli_check_consistent_object_datatypes( obj_t* a, obj_t* b );
err_t bli_check_datatype_real_proj_of( num_t dt_c, num_t dt_r );
err_t bli_check_object_real_proj_of( obj_t* c, obj_t* r );
err_t bli_check_real_valued_object( obj_t* a );
err_t bli_check_consistent_precisions( num_t dt_a, num_t dt_b );
err_t bli_check_consistent_object_precisions( obj_t* a, obj_t* b );
err_t bli_check_conformal_dims( obj_t* a, obj_t* b );
err_t bli_check_level3_dims( obj_t* a, obj_t* b, obj_t* c );

View File

@@ -544,8 +544,10 @@ void bli_cntx_set_l3_nat_ukrs( dim_t n_ukrs, ... )
// -- End variable argument section --
// Query the context for the addresses of:
// - the l3 virtual ukernel func_t array
// - the l3 native ukernel func_t array
// - the l3 native ukernel preferences array
func_t* cntx_l3_vir_ukrs = bli_cntx_l3_vir_ukrs_buf( cntx );
func_t* cntx_l3_nat_ukrs = bli_cntx_l3_nat_ukrs_buf( cntx );
mbool_t* cntx_l3_nat_ukrs_prefs = bli_cntx_l3_nat_ukrs_prefs_buf( cntx );
@@ -565,11 +567,18 @@ void bli_cntx_set_l3_nat_ukrs( dim_t n_ukrs, ... )
// Index into the func_t and mbool_t for the current kernel id
// being processed.
func_t* vukrs = &cntx_l3_vir_ukrs[ ukr_id ];
func_t* ukrs = &cntx_l3_nat_ukrs[ ukr_id ];
mbool_t* prefs = &cntx_l3_nat_ukrs_prefs[ ukr_id ];
// Store the ukernel function pointer and preference values into
// the context.
// the context. Notice that we redundantly store the native
// ukernel address in both the native and virtual ukernel slots
// in the context. This is standard practice when creating a
// native context. (Induced method contexts will overwrite the
// virtual function pointer with the address of the appropriate
// virtual ukernel.)
bli_func_set_dt( ukr_fp, ukr_dt, vukrs );
bli_func_set_dt( ukr_fp, ukr_dt, ukrs );
bli_mbool_set_dt( ukr_pref, ukr_dt, prefs );
}
@@ -869,10 +878,10 @@ void bli_cntx_set_thrloop_from_env
(
opid_t l3_op,
side_t side,
cntx_t* cntx,
dim_t m,
dim_t n,
dim_t k
dim_t k,
cntx_t* cntx
)
{
dim_t jc, pc, ic, jr, ir;
@@ -934,8 +943,8 @@ void bli_cntx_set_thrloop_from_env
if ( l3_op == BLIS_TRMM )
{
// We reconfigure the paralelism from trmm_r due to a dependency in
// the jc loop. (NOTE: This dependency does not exist for trmm3 )
// We reconfigure the parallelism from trmm_r due to a dependency in
// the jc loop. (NOTE: This dependency does not exist for trmm3.)
if ( bli_is_right( side ) )
{
bli_cntx_set_thrloop
@@ -988,7 +997,7 @@ void bli_cntx_set_thrloop_from_env
);
}
}
else // if ( l3_op == BLIS_TRSM )
else // any other level-3 operation besides trmm/trsm
{
bli_cntx_set_thrloop
(

View File

@@ -60,8 +60,6 @@ typedef struct cntx_s
pack_t schema_b;
pack_t schema_c;
bool_t anti_pref;
dim_t* thrloop;
membrk_t* membrk;
@@ -126,10 +124,6 @@ static pack_t bli_cntx_schema_c_panel( cntx_t* cntx )
{
return cntx->schema_c_panel;
}
static bool_t bli_cntx_anti_pref( cntx_t* cntx )
{
return cntx->anti_pref;
}
static dim_t* bli_cntx_thrloop( cntx_t* cntx )
{
return cntx->thrloop;
@@ -166,10 +160,6 @@ static void bli_cntx_set_schema_ab_blockpanel( pack_t sa, pack_t sb, cntx_t* cnt
bli_cntx_set_schema_a_block( sa, cntx );
bli_cntx_set_schema_b_panel( sb, cntx );
}
static void bli_cntx_set_anti_pref( bool_t anti_pref, cntx_t* cntx )
{
cntx->anti_pref = anti_pref;
}
static void bli_cntx_set_membrk( membrk_t* membrk, cntx_t* cntx )
{
cntx->membrk = membrk;
@@ -234,27 +224,6 @@ static dim_t bli_cntx_get_bmult_dt( num_t dt, bszid_t bs_id, cntx_t* cntx )
// -----------------------------------------------------------------------------
static func_t* bli_cntx_get_l3_ukrs( l3ukr_t ukr_id, cntx_t* cntx )
{
func_t* funcs;
if ( bli_cntx_method( (cntx) ) != BLIS_NAT )
funcs = bli_cntx_l3_vir_ukrs_buf( cntx );
else
funcs = bli_cntx_l3_nat_ukrs_buf( cntx );
func_t* func = &funcs[ ukr_id ];
return func;
}
static void* bli_cntx_get_l3_ukr_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx )
{
func_t* func = bli_cntx_get_l3_ukrs( ukr_id, cntx );
return bli_func_get_dt( dt, func );
}
static func_t* bli_cntx_get_l3_vir_ukrs( l3ukr_t ukr_id, cntx_t* cntx )
{
func_t* funcs = bli_cntx_l3_vir_ukrs_buf( cntx );
@@ -487,55 +456,43 @@ static bool_t bli_cntx_l3_nat_ukr_dislikes_storage_of( obj_t* obj, l3ukr_t ukr_i
return !bli_cntx_l3_nat_ukr_prefers_storage_of( obj, ukr_id, cntx );
}
static bool_t bli_cntx_l3_nat_ukr_eff_prefers_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx )
{
bool_t r_val = bli_cntx_l3_nat_ukr_prefers_storage_of( obj, ukr_id, cntx );
// If the anti-preference is set, negate the result.
if ( bli_cntx_anti_pref( cntx ) ) r_val = !r_val;
return r_val;
}
static bool_t bli_cntx_l3_nat_ukr_eff_dislikes_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx )
{
bool_t r_val = bli_cntx_l3_nat_ukr_dislikes_storage_of( obj, ukr_id, cntx );
// If the anti-preference is set, negate the result.
if ( bli_cntx_anti_pref( cntx ) ) r_val = !r_val;
return r_val;
}
// -----------------------------------------------------------------------------
static bool_t bli_cntx_l3_ukr_prefers_rows_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx )
static bool_t bli_cntx_l3_vir_ukr_prefers_rows_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx )
{
// For induced methods, return the ukernel storage preferences of the
// corresponding real micro-kernel.
// NOTE: This projection to real domain becomes unnecessary if you
// set the exec_dt for 1m to the real projection of the storage
// datatype.
if ( bli_cntx_method( cntx ) != BLIS_NAT )
dt = bli_dt_proj_to_real( dt );
return bli_cntx_l3_nat_ukr_prefers_rows_dt( dt, ukr_id, cntx );
}
static bool_t bli_cntx_l3_ukr_prefers_cols_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx )
static bool_t bli_cntx_l3_vir_ukr_prefers_cols_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx )
{
// For induced methods, return the ukernel storage preferences of the
// corresponding real micro-kernel.
// NOTE: This projection to real domain becomes unnecessary if you
// set the exec_dt for 1m to the real projection of the storage
// datatype.
if ( bli_cntx_method( cntx ) != BLIS_NAT )
dt = bli_dt_proj_to_real( dt );
return bli_cntx_l3_nat_ukr_prefers_cols_dt( dt, ukr_id, cntx );
}
static bool_t bli_cntx_l3_ukr_prefers_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx )
static bool_t bli_cntx_l3_vir_ukr_prefers_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx )
{
const num_t dt = bli_obj_dt( obj );
// Note that we use the execution datatype, which may differ from the
// storage datatype of C (though this would happen in very few situations).
const num_t dt = bli_obj_exec_dt( obj );
const bool_t ukr_prefers_rows
= bli_cntx_l3_ukr_prefers_rows_dt( dt, ukr_id, cntx );
= bli_cntx_l3_vir_ukr_prefers_rows_dt( dt, ukr_id, cntx );
const bool_t ukr_prefers_cols
= bli_cntx_l3_ukr_prefers_cols_dt( dt, ukr_id, cntx );
= bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, ukr_id, cntx );
bool_t r_val = FALSE;
if ( bli_obj_is_row_stored( obj ) && ukr_prefers_rows ) r_val = TRUE;
@@ -544,29 +501,9 @@ static bool_t bli_cntx_l3_ukr_prefers_storage_of( obj_t* obj, l3ukr_t ukr_id, cn
return r_val;
}
static bool_t bli_cntx_l3_ukr_dislikes_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx )
static bool_t bli_cntx_l3_vir_ukr_dislikes_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx )
{
return !bli_cntx_l3_ukr_prefers_storage_of( obj, ukr_id, cntx );
}
static bool_t bli_cntx_l3_ukr_eff_prefers_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx )
{
bool_t r_val = bli_cntx_l3_ukr_prefers_storage_of( obj, ukr_id, cntx );
// If the anti-preference is set, negate the result.
if ( bli_cntx_anti_pref( cntx ) ) r_val = !r_val;
return r_val;
}
static bool_t bli_cntx_l3_ukr_eff_dislikes_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx )
{
bool_t r_val = bli_cntx_l3_ukr_dislikes_storage_of( obj, ukr_id, cntx );
// If the anti-preference is set, negate the result.
if ( bli_cntx_anti_pref( cntx ) ) r_val = !r_val;
return r_val;
return !bli_cntx_l3_vir_ukr_prefers_storage_of( obj, ukr_id, cntx );
}
// -----------------------------------------------------------------------------
@@ -674,12 +611,15 @@ void bli_cntx_set_l1f_kers( dim_t n_kers, ... );
void bli_cntx_set_l1v_kers( dim_t n_kers, ... );
void bli_cntx_set_packm_kers( dim_t n_kers, ... );
void bli_cntx_set_thrloop_from_env( opid_t l3_op,
side_t side,
cntx_t* cntx,
dim_t m,
dim_t n,
dim_t k );
void bli_cntx_set_thrloop_from_env
(
opid_t l3_op,
side_t side,
dim_t m,
dim_t n,
dim_t k,
cntx_t* cntx
);
void bli_cntx_print( cntx_t* cntx );

View File

@@ -90,6 +90,8 @@ void bli_error_init_msgs( void )
"Expected second datatype to be real projection of first." );
sprintf( bli_error_string_for_code(BLIS_EXPECTED_REAL_VALUED_OBJECT),
"Expected real-valued object (ie: if complex, imaginary component equals zero)." );
sprintf( bli_error_string_for_code(BLIS_INCONSISTENT_PRECISIONS),
"Expected consistent precisions (both single or both double)." );
sprintf( bli_error_string_for_code(BLIS_NONCONFORMAL_DIMENSIONS),
"Encountered non-conformal dimensions between objects." );

View File

@@ -584,7 +584,7 @@ char* bli_gks_l3_ukr_impl_string( l3ukr_t ukr, ind_t method, num_t dt )
// then query the ukernel function pointer for the given datatype from
// that context.
cntx_t* cntx = bli_gks_query_ind_cntx( method, dt );
void* fp = bli_cntx_get_l3_ukr_dt( dt, ukr, cntx );
void* fp = bli_cntx_get_l3_vir_ukr_dt( dt, ukr, cntx );
// Check whether the ukernel function pointer is NULL for the given
// datatype. If it is NULL, return the string for not applicable.

View File

@@ -210,6 +210,19 @@ void bli_param_map_char_to_blis_diag( char diag, diag_t* blis_diag )
}
}
void bli_param_map_char_to_blis_dt( char dt, num_t* blis_dt )
{
if ( dt == 's' ) *blis_dt = BLIS_FLOAT;
else if ( dt == 'd' ) *blis_dt = BLIS_DOUBLE;
else if ( dt == 'c' ) *blis_dt = BLIS_SCOMPLEX;
else if ( dt == 'z' ) *blis_dt = BLIS_DCOMPLEX;
else if ( dt == 'i' ) *blis_dt = BLIS_INT;
else
{
bli_check_error_code( BLIS_INVALID_DATATYPE );
}
}
// --- BLIS to BLIS char mappings ----------------------------------------------
@@ -265,3 +278,16 @@ void bli_param_map_blis_to_char_diag( diag_t blis_diag, char* diag )
}
}
void bli_param_map_blis_to_char_dt( num_t blis_dt, char* dt )
{
if ( blis_dt == BLIS_FLOAT ) *dt = 's';
else if ( blis_dt == BLIS_DOUBLE ) *dt = 'd';
else if ( blis_dt == BLIS_SCOMPLEX ) *dt = 'c';
else if ( blis_dt == BLIS_DCOMPLEX ) *dt = 'z';
else if ( blis_dt == BLIS_INT ) *dt = 'i';
else
{
bli_check_error_code( BLIS_INVALID_DATATYPE );
}
}

View File

@@ -57,6 +57,7 @@ void bli_param_map_char_to_blis_uplo( char uplo, uplo_t* blis_uplo );
void bli_param_map_char_to_blis_trans( char trans, trans_t* blis_trans );
void bli_param_map_char_to_blis_conj( char conj, conj_t* blis_conj );
void bli_param_map_char_to_blis_diag( char diag, diag_t* blis_diag );
void bli_param_map_char_to_blis_dt( char dt, num_t* blis_dt );
// --- BLIS to BLIS char mappings ----------------------------------------------
@@ -66,4 +67,5 @@ void bli_param_map_blis_to_char_uplo( uplo_t blis_uplo, char* uplo );
void bli_param_map_blis_to_char_trans( trans_t blis_trans, char* trans );
void bli_param_map_blis_to_char_conj( conj_t blis_conj, char* conj );
void bli_param_map_blis_to_char_diag( diag_t blis_diag, char* diag );
void bli_param_map_blis_to_char_dt( num_t blis_dt, char* dt );

View File

@@ -38,6 +38,49 @@
// -- Matrix partitioning ------------------------------------------------------
void bli_acquire_mpart
(
dim_t i,
dim_t j,
dim_t bm,
dim_t bn,
obj_t* parent,
obj_t* child
)
{
// Query the dimensions of the parent object.
const dim_t m_par = bli_obj_length( parent );
const dim_t n_par = bli_obj_width( parent );
// If either i or j is already beyond what exists of the parent matrix,
// slide them back to the outer dimensions. (What will happen in this
// scenario is that bm and bn and/or will be reduced to zero so that the
// child matrix does not refer to anything beyond the bounds of the
// parent. (Note: This is a safety measure and generally should never
// be needed if the caller is passing in sane arguments.)
if ( i > m_par ) i = m_par;
if ( j > n_par ) j = n_par;
// If either bm or bn spills out over the edge of the parent matrix,
// reduce them so that the child matrix fits within the bounds of the
// parent. (Note: This is a safety measure and generally should never
// be needed if the caller is passing in sane arguments, though this
// code is somewhat more likely to be needed than the code above.)
if ( bm > m_par - i ) bm = m_par - i;
if ( bn > n_par - j ) bn = n_par - j;
// Alias the parent object's contents into the child object.
bli_obj_alias_to( parent, child );
// Set the offsets and dimensions of the child object. Note that we
// increment, rather than overwrite, the offsets of the child object
// in case the parent object already had non-zero offsets (usually
// because the parent was itself a child a larger grandparent object).
bli_obj_inc_offs( i, j, child );
bli_obj_set_dims( bm, bn, child );
}
void bli_acquire_mpart_mdim
(
dir_t direct,

View File

@@ -36,6 +36,16 @@
// -- Matrix partitioning ------------------------------------------------------
void bli_acquire_mpart
(
dim_t i,
dim_t j,
dim_t m,
dim_t n,
obj_t* obj,
obj_t* sub_obj
);
#undef GENPROT
#define GENPROT( opname ) \
\

162
frame/base/bli_setri.c Normal file
View File

@@ -0,0 +1,162 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
// -- setr ---------------------------------------------------------------------
void bli_setrm
(
obj_t* alpha,
obj_t* b
)
{
obj_t alpha_real;
obj_t br;
// Check parameters.
if ( bli_error_checking_is_enabled() )
bli_setm_check( alpha, b );
// Initialize a local scalar, alpha_real, using the real projection
// of the datatype of b.
bli_obj_scalar_init_detached( bli_obj_dt_proj_to_real( b ),
&alpha_real );
// Copy/typecast alpha to alpha_real. This discards the imaginary
// part of alpha (if it is complex).
bli_copysc( alpha, &alpha_real );
// Acquire an alias to the real part of b.
bli_obj_real_part( b, &br );
// Use setm to set the real part of b to alpha_real.
bli_setm( &alpha_real, &br );
}
void bli_setrv
(
obj_t* alpha,
obj_t* x
)
{
obj_t alpha_real;
obj_t xr;
// Check parameters.
if ( bli_error_checking_is_enabled() )
bli_setv_check( alpha, x );
// Initialize a local scalar, alpha_real, using the real projection
// of the datatype of x.
bli_obj_scalar_init_detached( bli_obj_dt_proj_to_real( x ),
&alpha_real );
// Copy/typecast alpha to alpha_real. This discards the imaginary
// part of alpha (if it is complex).
bli_copysc( alpha, &alpha_real );
// Acquire an alias to the real part of x.
bli_obj_real_part( x, &xr );
// Use setv to set the real part of x to alpha_real.
bli_setv( &alpha_real, &xr );
}
// -- seti ---------------------------------------------------------------------
void bli_setim
(
obj_t* alpha,
obj_t* b
)
{
obj_t alpha_real;
obj_t bi;
// Check parameters.
if ( bli_error_checking_is_enabled() )
bli_setm_check( alpha, b );
// If the object is real, return early.
if ( bli_obj_is_real( b ) ) return;
// Initialize a local scalar, alpha_real, using the real projection
// of the datatype of b.
bli_obj_scalar_init_detached( bli_obj_dt_proj_to_real( b ),
&alpha_real );
// Copy/typecast alpha to alpha_real. This discards the imaginary
// part of alpha (if it is complex).
bli_copysc( alpha, &alpha_real );
// Acquire an alias to the imaginary part of b.
bli_obj_imag_part( b, &bi );
// Use setm to set the imaginary part of b to alpha_real.
bli_setm( &alpha_real, &bi );
}
void bli_setiv
(
obj_t* alpha,
obj_t* x
)
{
obj_t alpha_real;
obj_t xi;
// Check parameters.
if ( bli_error_checking_is_enabled() )
bli_setv_check( alpha, x );
// If the object is real, return early.
if ( bli_obj_is_real( x ) ) return;
// Initialize a local scalar, alpha_real, using the real projection
// of the datatype of x.
bli_obj_scalar_init_detached( bli_obj_dt_proj_to_real( x ),
&alpha_real );
// Copy/typecast alpha to alpha_real. This discards the imaginary
// part of alpha (if it is complex).
bli_copysc( alpha, &alpha_real );
// Acquire an alias to the imaginary part of x.
bli_obj_imag_part( x, &xi );
// Use setm to set the imaginary part of x to alpha_real.
bli_setm( &alpha_real, &xi );
}

62
frame/base/bli_setri.h Normal file
View File

@@ -0,0 +1,62 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// -- setr ---------------------------------------------------------------------
void bli_setrm
(
obj_t* alpha,
obj_t* b
);
void bli_setrv
(
obj_t* alpha,
obj_t* x
);
// -- seti ---------------------------------------------------------------------
void bli_setim
(
obj_t* alpha,
obj_t* b
);
void bli_setiv
(
obj_t* alpha,
obj_t* x
);

267
frame/base/cast/bli_castm.c Normal file
View File

@@ -0,0 +1,267 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
// NOTE: This is one of the few functions in BLIS that is defined
// with heterogeneous type support. This is done so that we have
// an operation that can be used to typecast (copy-cast) a matrix
// of one datatype to a scalar of another datatype.
typedef void (*FUNCPTR_T)
(
trans_t transa,
dim_t m,
dim_t n,
void* restrict a, inc_t rs_a, inc_t cs_a,
void* restrict b, inc_t rs_b, inc_t cs_b
);
static FUNCPTR_T GENARRAY2_ALL(ftypes,castm);
//
// Define object-based interface.
//
void bli_castm
(
obj_t* a,
obj_t* b
)
{
num_t dt_a = bli_obj_dt( a );
num_t dt_b = bli_obj_dt( b );
trans_t transa = bli_obj_conjtrans_status( a );
dim_t m = bli_obj_length( b );
dim_t n = bli_obj_width( b );
void* buf_a = bli_obj_buffer_at_off( a );
inc_t rs_a = bli_obj_row_stride( a );
inc_t cs_a = bli_obj_col_stride( a );
void* buf_b = bli_obj_buffer_at_off( b );
inc_t rs_b = bli_obj_row_stride( b );
inc_t cs_b = bli_obj_col_stride( b );
FUNCPTR_T f;
// Check parameters.
if ( bli_error_checking_is_enabled() )
bli_castm_check( a, b );
#if 0
if ( bli_obj_dt( a ) == bli_obj_dt( b ) )
{
// If a and b share the same datatype, we can simply use copym.
bli_copym( a, b );
return;
}
#endif
// Index into the type combination array to extract the correct
// function pointer.
f = ftypes[dt_a][dt_b];
// Invoke the void pointer-based function.
f
(
transa,
m,
n,
buf_a, rs_a, cs_a,
buf_b, rs_b, cs_b
);
}
// -----------------------------------------------------------------------------
//
// Define BLAS-like interfaces with typed operands.
//
#undef GENTFUNC2
#define GENTFUNC2( ctype_a, ctype_b, cha, chb, opname ) \
\
void PASTEMAC2(cha,chb,opname) \
( \
trans_t transa, \
dim_t m, \
dim_t n, \
void* restrict a, inc_t rs_a, inc_t cs_a, \
void* restrict b, inc_t rs_b, inc_t cs_b \
) \
{ \
ctype_a* restrict a_cast = a; \
ctype_b* restrict b_cast = b; \
conj_t conja; \
dim_t n_iter; \
dim_t n_elem; \
inc_t lda, inca; \
inc_t ldb, incb; \
dim_t j, i; \
\
/* Set various loop parameters. */ \
bli_set_dims_incs_2m \
( \
transa, \
m, n, rs_a, cs_a, rs_b, cs_b, \
&n_elem, &n_iter, &inca, &lda, &incb, &ldb \
); \
\
/* Extract the conjugation component from the transa parameter. */ \
conja = bli_extract_conj( transa ); \
\
if ( bli_is_conj( conja ) ) \
{ \
if ( inca == 1 && incb == 1 ) \
{ \
for ( j = 0; j < n_iter; ++j ) \
{ \
ctype_a* restrict a1 = a_cast + (j )*lda + (0 )*inca; \
ctype_b* restrict b1 = b_cast + (j )*ldb + (0 )*incb; \
\
for ( i = 0; i < n_elem; ++i ) \
{ \
PASTEMAC2(cha,chb,copyjs)( a1[i], b1[i] ); \
} \
} \
} \
else \
{ \
for ( j = 0; j < n_iter; ++j ) \
{ \
ctype_a* restrict a1 = a_cast + (j )*lda + (0 )*inca; \
ctype_b* restrict b1 = b_cast + (j )*ldb + (0 )*incb; \
\
for ( i = 0; i < n_elem; ++i ) \
{ \
PASTEMAC2(cha,chb,copyjs)( *a1, *b1 ); \
\
a1 += inca; \
b1 += incb; \
} \
} \
} \
} \
else \
{ \
if ( inca == 1 && incb == 1 ) \
{ \
for ( j = 0; j < n_iter; ++j ) \
{ \
ctype_a* restrict a1 = a_cast + (j )*lda + (0 )*inca; \
ctype_b* restrict b1 = b_cast + (j )*ldb + (0 )*incb; \
\
for ( i = 0; i < n_elem; ++i ) \
{ \
PASTEMAC2(cha,chb,copys)( a1[i], b1[i] ); \
} \
} \
} \
else \
{ \
for ( j = 0; j < n_iter; ++j ) \
{ \
ctype_a* restrict a1 = a_cast + (j )*lda + (0 )*inca; \
ctype_b* restrict b1 = b_cast + (j )*ldb + (0 )*incb; \
\
for ( i = 0; i < n_elem; ++i ) \
{ \
PASTEMAC2(cha,chb,copys)( *a1, *b1 ); \
\
a1 += inca; \
b1 += incb; \
} \
} \
} \
} \
}
INSERT_GENTFUNC2_BASIC0( castm )
INSERT_GENTFUNC2_MIXDP0( castm )
// -----------------------------------------------------------------------------
//
// Define object-based _check() function.
//
void bli_castm_check
(
obj_t* a,
obj_t* b
)
{
err_t e_val;
// Check object datatypes.
e_val = bli_check_floating_object( a );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( b );
bli_check_error_code( e_val );
// Check structure.
// NOTE: We enforce general structure for now in order to simplify the
// implementation.
bli_check_general_object( a );
bli_check_error_code( e_val );
bli_check_general_object( b );
bli_check_error_code( e_val );
// Check object dimensions.
e_val = bli_check_matrix_object( a );
bli_check_error_code( e_val );
e_val = bli_check_matrix_object( b );
bli_check_error_code( e_val );
e_val = bli_check_conformal_dims( a, b );
bli_check_error_code( e_val );
// Check object buffers (for non-NULLness).
e_val = bli_check_object_buffer( a );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( b );
bli_check_error_code( e_val );
}

View File

@@ -0,0 +1,73 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
//
// Prototype object-based interface.
//
void bli_castm
(
obj_t* a,
obj_t* b
);
//
// Prototype BLAS-like interfaces with heterogeneous-typed operands.
//
#undef GENTPROT2
#define GENTPROT2( ctype_a, ctype_b, cha, chb, opname ) \
\
void PASTEMAC2(cha,chb,opname) \
( \
trans_t transa, \
dim_t m, \
dim_t n, \
void* a, inc_t rs_a, inc_t cs_a, \
void* b, inc_t rs_b, inc_t cs_b \
);
INSERT_GENTPROT2_BASIC0( castm )
INSERT_GENTPROT2_MIXDP0( castm )
//
// Prototype object-based _check() function.
//
void bli_castm_check
(
obj_t* a,
obj_t* b
);

211
frame/base/cast/bli_castv.c Normal file
View File

@@ -0,0 +1,211 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
// NOTE: This is one of the few functions in BLIS that is defined
// with heterogeneous type support. This is done so that we have
// an operation that can be used to typecast (copy-cast) a matrix
// of one datatype to a scalar of another datatype.
typedef void (*FUNCPTR_T)
(
conj_t conjx,
dim_t n,
void* restrict x, inc_t inc_x,
void* restrict y, inc_t inc_y
);
static FUNCPTR_T GENARRAY2_ALL(ftypes,castv);
//
// Define object-based interface.
//
void bli_castv
(
obj_t* x,
obj_t* y
)
{
num_t dt_x = bli_obj_dt( x );
num_t dt_y = bli_obj_dt( y );
conj_t conjx = bli_obj_conj_status( x );
dim_t n = bli_obj_vector_dim( x );
void* buf_x = bli_obj_buffer_at_off( x );
inc_t inc_x = bli_obj_vector_inc( x );
void* buf_y = bli_obj_buffer_at_off( y );
inc_t inc_y = bli_obj_vector_inc( y );
FUNCPTR_T f;
// Check parameters.
if ( bli_error_checking_is_enabled() )
bli_castv_check( x, y );
#if 0
if ( bli_obj_dt( x ) == bli_obj_dt( y ) )
{
// If x and y share the same datatype, we can simply use copyv.
bli_copyv( x, y );
return;
}
#endif
// Index into the type combination array to extract the correct
// function pointer.
f = ftypes[dt_x][dt_y];
// Invoke the void pointer-based function.
f
(
conjx,
n,
buf_x, inc_x,
buf_y, inc_y
);
}
// -----------------------------------------------------------------------------
//
// Define BLAS-like interfaces with typed operands.
//
#undef GENTFUNC2
#define GENTFUNC2( ctype_x, ctype_y, chx, chy, opname ) \
\
void PASTEMAC2(chx,chy,opname) \
( \
conj_t conjx, \
dim_t n, \
void* restrict x, inc_t incx, \
void* restrict y, inc_t incy \
) \
{ \
ctype_x* restrict x1 = x; \
ctype_y* restrict y1 = y; \
dim_t i; \
\
if ( bli_is_conj( conjx ) ) \
{ \
if ( incx == 1 && incy == 1 ) \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC2(chx,chy,copyjs)( x1[i], y1[i] ); \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC2(chx,chy,copyjs)( *x1, *y1 ); \
\
x1 += incx; \
y1 += incy; \
} \
} \
} \
else \
{ \
if ( incx == 1 && incy == 1 ) \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC2(chx,chy,copys)( x1[i], y1[i] ); \
} \
} \
else \
{ \
for ( i = 0; i < n; ++i ) \
{ \
PASTEMAC2(chx,chy,copys)( *x1, *y1 ); \
\
x1 += incx; \
y1 += incy; \
} \
} \
} \
}
INSERT_GENTFUNC2_BASIC0( castv )
INSERT_GENTFUNC2_MIXDP0( castv )
// -----------------------------------------------------------------------------
//
// Define object-based _check() function.
//
void bli_castv_check
(
obj_t* x,
obj_t* y
)
{
err_t e_val;
// Check object datatypes.
e_val = bli_check_floating_object( x );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( y );
bli_check_error_code( e_val );
// Check object dimensions.
e_val = bli_check_vector_object( x );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( y );
bli_check_error_code( e_val );
e_val = bli_check_equal_vector_lengths( x, y );
bli_check_error_code( e_val );
// Check object buffers (for non-NULLness).
e_val = bli_check_object_buffer( x );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( y );
bli_check_error_code( e_val );
}

View File

@@ -0,0 +1,72 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
//
// Prototype object-based interface.
//
void bli_castv
(
obj_t* x,
obj_t* y
);
//
// Prototype BLAS-like interfaces with heterogeneous-typed operands.
//
#undef GENTPROT2
#define GENTPROT2( ctype_x, ctype_y, chx, chy, opname ) \
\
void PASTEMAC2(chx,chy,opname) \
( \
conj_t conjx, \
dim_t n, \
void* x, inc_t incx, \
void* y, inc_t incy \
);
INSERT_GENTPROT2_BASIC0( castv )
INSERT_GENTPROT2_MIXDP0( castv )
//
// Prototype object-based _check() function.
//
void bli_castv_check
(
obj_t* x,
obj_t* y
);

View File

@@ -0,0 +1,118 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
void bli_castm_check
(
obj_t* a,
obj_t* b
)
{
err_t e_val;
// Check object datatypes.
e_val = bli_check_floating_object( a );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( b );
bli_check_error_code( e_val );
// Check structure.
// NOTE: We enforce general structure for now in order to simplify the
// implementation.
bli_check_general_object( a );
bli_check_error_code( e_val );
bli_check_general_object( b );
bli_check_error_code( e_val );
// Check object dimensions.
e_val = bli_check_matrix_object( a );
bli_check_error_code( e_val );
e_val = bli_check_matrix_object( b );
bli_check_error_code( e_val );
e_val = bli_check_conformal_dims( a, b );
bli_check_error_code( e_val );
// Check object buffers (for non-NULLness).
e_val = bli_check_object_buffer( a );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( b );
bli_check_error_code( e_val );
}
void bli_castv_check
(
obj_t* x,
obj_t* y
)
{
err_t e_val;
// Check object datatypes.
e_val = bli_check_floating_object( x );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( y );
bli_check_error_code( e_val );
// Check object dimensions.
e_val = bli_check_vector_object( x );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( y );
bli_check_error_code( e_val );
e_val = bli_check_equal_vector_lengths( x, y );
bli_check_error_code( e_val );
// Check object buffers (for non-NULLness).
e_val = bli_check_object_buffer( x );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( y );
bli_check_error_code( e_val );
}

View File

@@ -0,0 +1,45 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
void bli_castm_check
(
obj_t* a,
obj_t* b
);
void bli_castv_check
(
obj_t* x,
obj_t* y
);

127
frame/base/proj/bli_projm.c Normal file
View File

@@ -0,0 +1,127 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
void bli_projm
(
obj_t* a,
obj_t* b
)
{
// Check parameters.
if ( bli_error_checking_is_enabled() )
bli_projm_check( a, b );
if ( ( bli_obj_is_real( a ) && bli_obj_is_real( b ) ) ||
( bli_obj_is_complex( a ) && bli_obj_is_complex( b ) ) )
{
// If a and b are both real or both complex, we can simply use
// copym.
bli_copym( a, b );
}
else
{
// This branch handles the case where one operand is real and
// the other is complex.
if ( bli_obj_is_real( a ) /* && bli_obj_is_complex( b ) */ )
{
// If a is real and b is complex, we must obtain the real part
// of b so that we can copy a into the real part (after
// initializing all of b, including imaginary components, to
// zero).
obj_t br;
bli_obj_real_part( b, &br );
bli_setm( &BLIS_ZERO, b );
bli_copym( a, &br );
}
else // bli_obj_is_complex( a ) && bli_obj_is_real( b )
{
// If a is complex and b is real, we can simply copy the
// real part of a into b.
obj_t ar;
bli_obj_real_part( a, &ar );
bli_copym( &ar, b );
}
}
}
// -----------------------------------------------------------------------------
void bli_projm_check
(
obj_t* a,
obj_t* b
)
{
err_t e_val;
// Check object datatypes.
e_val = bli_check_floating_object( a );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( b );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_precisions( a, b );
bli_check_error_code( e_val );
// Check object dimensions.
e_val = bli_check_matrix_object( a );
bli_check_error_code( e_val );
e_val = bli_check_matrix_object( b );
bli_check_error_code( e_val );
e_val = bli_check_conformal_dims( a, b );
bli_check_error_code( e_val );
// Check object buffers (for non-NULLness).
e_val = bli_check_object_buffer( a );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( b );
bli_check_error_code( e_val );
}

View File

@@ -0,0 +1,46 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
void bli_projm
(
obj_t* a,
obj_t* b
);
void bli_projm_check
(
obj_t* a,
obj_t* b
);

127
frame/base/proj/bli_projv.c Normal file
View File

@@ -0,0 +1,127 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
void bli_projv
(
obj_t* x,
obj_t* y
)
{
// Check parameters.
if ( bli_error_checking_is_enabled() )
bli_projv_check( x, y );
if ( ( bli_obj_is_real( x ) && bli_obj_is_real( y ) ) ||
( bli_obj_is_complex( x ) && bli_obj_is_complex( y ) ) )
{
// If x and y are both real or both complex, we can simply use
// copyv.
bli_copyv( x, y );
}
else
{
// This branch handles the case where one operand is real and
// the other is complex.
if ( bli_obj_is_real( x ) /* && bli_obj_is_complex( y ) */ )
{
// If x is real and y is complex, we must obtain the real part
// of y so that we can copy x into the real part (after
// initializing all of y, including imaginary components, to
// zero).
obj_t yr;
bli_obj_real_part( y, &yr );
bli_setv( &BLIS_ZERO, y );
bli_copyv( x, &yr );
}
else // bli_obj_is_complex( x ) && bli_obj_is_real( y )
{
// If x is complex and y is real, we can simply copy the
// real part of x into y.
obj_t xr;
bli_obj_real_part( x, &xr );
bli_copyv( &xr, y );
}
}
}
// -----------------------------------------------------------------------------
void bli_projv_check
(
obj_t* x,
obj_t* y
)
{
err_t e_val;
// Check object datatypes.
e_val = bli_check_floating_object( x );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( y );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_precisions( x, y );
bli_check_error_code( e_val );
// Check object dimensions.
e_val = bli_check_vector_object( x );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( y );
bli_check_error_code( e_val );
e_val = bli_check_equal_vector_lengths( x, y );
bli_check_error_code( e_val );
// Check object buffers (for non-NULLness).
e_val = bli_check_object_buffer( x );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( y );
bli_check_error_code( e_val );
}

View File

@@ -0,0 +1,46 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
void bli_projv
(
obj_t* x,
obj_t* y
);
void bli_projv_check
(
obj_t* x,
obj_t* y
);

View File

@@ -0,0 +1,114 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
void bli_projm_check
(
obj_t* a,
obj_t* b
)
{
err_t e_val;
// Check object datatypes.
e_val = bli_check_floating_object( a );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( b );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_precisions( a, b );
bli_check_error_code( e_val );
// Check object dimensions.
e_val = bli_check_matrix_object( a );
bli_check_error_code( e_val );
e_val = bli_check_matrix_object( b );
bli_check_error_code( e_val );
e_val = bli_check_conformal_dims( a, b );
bli_check_error_code( e_val );
// Check object buffers (for non-NULLness).
e_val = bli_check_object_buffer( a );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( b );
bli_check_error_code( e_val );
}
void bli_projv_check
(
obj_t* x,
obj_t* y
)
{
err_t e_val;
// Check object datatypes.
e_val = bli_check_floating_object( x );
bli_check_error_code( e_val );
e_val = bli_check_floating_object( y );
bli_check_error_code( e_val );
e_val = bli_check_consistent_object_precisions( x, y );
bli_check_error_code( e_val );
// Check object dimensions.
e_val = bli_check_vector_object( x );
bli_check_error_code( e_val );
e_val = bli_check_vector_object( y );
bli_check_error_code( e_val );
e_val = bli_check_equal_vector_lengths( x, y );
bli_check_error_code( e_val );
// Check object buffers (for non-NULLness).
e_val = bli_check_object_buffer( x );
bli_check_error_code( e_val );
e_val = bli_check_object_buffer( y );
bli_check_error_code( e_val );
}

View File

@@ -0,0 +1,45 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
void bli_projm_check
(
obj_t* a,
obj_t* b
);
void bli_projv_check
(
obj_t* x,
obj_t* y
);

View File

@@ -402,6 +402,51 @@ GENTFUNC2( dcomplex, scomplex, z, c, tfuncname, varname ) \
// -- Mixed domain/precision (all) two-operand macro --
// -- (no auxiliary arguments) --
#define INSERT_GENTFUNC2_MIXDP0( tfuncname ) \
\
GENTFUNC2( float, double, s, d, tfuncname ) \
GENTFUNC2( float, scomplex, s, c, tfuncname ) \
GENTFUNC2( float, dcomplex, s, z, tfuncname ) \
\
GENTFUNC2( double, float, d, s, tfuncname ) \
GENTFUNC2( double, scomplex, d, c, tfuncname ) \
GENTFUNC2( double, dcomplex, d, z, tfuncname ) \
\
GENTFUNC2( scomplex, float, c, s, tfuncname ) \
GENTFUNC2( scomplex, double, c, d, tfuncname ) \
GENTFUNC2( scomplex, dcomplex, c, z, tfuncname ) \
\
GENTFUNC2( dcomplex, float, z, s, tfuncname ) \
GENTFUNC2( dcomplex, double, z, d, tfuncname ) \
GENTFUNC2( dcomplex, scomplex, z, c, tfuncname )
// -- (one auxiliary argument) --
#define INSERT_GENTFUNC2_MIX_DP( tfuncname, varname ) \
\
GENTFUNC2( float, double, s, d, tfuncname, varname ) \
GENTFUNC2( float, scomplex, s, c, tfuncname, varname ) \
GENTFUNC2( float, dcomplex, s, z, tfuncname, varname ) \
\
GENTFUNC2( double, float, d, s, tfuncname, varname ) \
GENTFUNC2( double, scomplex, d, c, tfuncname, varname ) \
GENTFUNC2( double, dcomplex, d, z, tfuncname, varname ) \
\
GENTFUNC2( scomplex, float, c, s, tfuncname, varname ) \
GENTFUNC2( scomplex, double, c, d, tfuncname, varname ) \
GENTFUNC2( scomplex, dcomplex, c, z, tfuncname, varname ) \
\
GENTFUNC2( dcomplex, float, z, s, tfuncname, varname ) \
GENTFUNC2( dcomplex, double, z, d, tfuncname, varname ) \
GENTFUNC2( dcomplex, scomplex, z, c, tfuncname, varname )
// -- Basic two-operand with real projection of first operand --
// -- (no auxiliary arguments) --

View File

@@ -395,6 +395,50 @@ GENTPROT2( dcomplex, scomplex, z, c, tfuncname, varname ) \
// -- Mixed domain/precision (all) two-operand macro --
// -- (no auxiliary arguments) --
#define INSERT_GENTPROT2_MIXDP0( funcname ) \
\
GENTPROT2( float, double, s, d, funcname ) \
GENTPROT2( float, scomplex, s, c, funcname ) \
GENTPROT2( float, dcomplex, s, z, funcname ) \
\
GENTPROT2( double, float, d, s, funcname ) \
GENTPROT2( double, scomplex, d, c, funcname ) \
GENTPROT2( double, dcomplex, d, z, funcname ) \
\
GENTPROT2( scomplex, float, c, s, funcname ) \
GENTPROT2( scomplex, double, c, d, funcname ) \
GENTPROT2( scomplex, dcomplex, c, z, funcname ) \
\
GENTPROT2( dcomplex, float, z, s, funcname ) \
GENTPROT2( dcomplex, double, z, d, funcname ) \
GENTPROT2( dcomplex, scomplex, z, c, funcname )
// -- (one auxiliary argument) --
#define INSERT_GENTPROT2_MIX_DP( tfuncname, varname ) \
\
GENTPROT2( float, double, s, d, tfuncname, varname ) \
GENTPROT2( float, scomplex, s, c, tfuncname, varname ) \
GENTPROT2( float, dcomplex, s, z, tfuncname, varname ) \
\
GENTPROT2( double, float, d, s, tfuncname, varname ) \
GENTPROT2( double, scomplex, d, c, tfuncname, varname ) \
GENTPROT2( double, dcomplex, d, z, tfuncname, varname ) \
\
GENTPROT2( scomplex, float, c, s, tfuncname, varname ) \
GENTPROT2( scomplex, double, c, d, tfuncname, varname ) \
GENTPROT2( scomplex, dcomplex, c, z, tfuncname, varname ) \
\
GENTPROT2( dcomplex, float, z, s, tfuncname, varname ) \
GENTPROT2( dcomplex, double, z, d, tfuncname, varname ) \
GENTPROT2( dcomplex, scomplex, z, c, tfuncname, varname )
// -- Basic two-operand with real projection of first operand --
// -- (no auxiliary arguments) --

View File

@@ -76,11 +76,36 @@ static bool_t bli_obj_is_const( obj_t* obj )
return ( bli_obj_dt( obj ) == BLIS_BITVAL_CONST_TYPE );
}
static objbits_t bli_obj_domain( obj_t* obj )
static dom_t bli_obj_domain( obj_t* obj )
{
return ( obj->info & BLIS_DOMAIN_BIT );
}
static prec_t bli_obj_prec( obj_t* obj )
{
return ( obj->info & BLIS_PRECISION_BIT );
}
static bool_t bli_obj_is_single_prec( obj_t* obj )
{
return ( bli_obj_prec( obj ) == BLIS_BITVAL_SINGLE_PREC );
}
static bool_t bli_obj_is_double_prec( obj_t* obj )
{
return ( bli_obj_prec( obj ) == BLIS_BITVAL_DOUBLE_PREC );
}
static num_t bli_obj_dt_proj_to_single_prec( obj_t* obj )
{
return ( bli_obj_dt( obj ) & ~BLIS_BITVAL_SINGLE_PREC );
}
static num_t bli_obj_dt_proj_to_double_prec( obj_t* obj )
{
return ( bli_obj_dt( obj ) | BLIS_BITVAL_DOUBLE_PREC );
}
static bool_t bli_obj_is_real( obj_t* obj )
{
return ( bli_obj_domain( obj ) == BLIS_BITVAL_REAL );
@@ -91,16 +116,6 @@ static bool_t bli_obj_is_complex( obj_t* obj )
return ( bli_obj_domain( obj ) == BLIS_BITVAL_COMPLEX );
}
static objbits_t bli_obj_prec( obj_t* obj )
{
return ( obj->info & BLIS_PRECISION_BIT );
}
static bool_t bli_obj_is_double_prec( obj_t* obj )
{
return ( bli_obj_prec( obj ) == BLIS_BITVAL_DOUBLE_PREC );
}
static num_t bli_obj_dt_proj_to_real( obj_t* obj )
{
return ( bli_obj_dt( obj ) & ~BLIS_BITVAL_COMPLEX );
@@ -108,7 +123,7 @@ static num_t bli_obj_dt_proj_to_real( obj_t* obj )
static num_t bli_obj_dt_proj_to_complex( obj_t* obj )
{
return ( bli_obj_dt( obj ) & BLIS_BITVAL_COMPLEX );
return ( bli_obj_dt( obj ) | BLIS_BITVAL_COMPLEX );
}
static num_t bli_obj_target_dt( obj_t* obj )
@@ -116,9 +131,29 @@ static num_t bli_obj_target_dt( obj_t* obj )
return ( ( obj->info & BLIS_TARGET_DT_BITS ) >> BLIS_TARGET_DT_SHIFT );
}
static dom_t bli_obj_target_domain( obj_t* obj )
{
return ( ( obj->info & BLIS_TARGET_DOMAIN_BIT ) >> BLIS_TARGET_DT_SHIFT );
}
static prec_t bli_obj_target_prec( obj_t* obj )
{
return ( ( obj->info & BLIS_TARGET_PREC_BIT ) >> BLIS_TARGET_DT_SHIFT );
}
static num_t bli_obj_exec_dt( obj_t* obj )
{
return ( ( obj->info & BLIS_EXECUTION_DT_BITS ) >> BLIS_EXECUTION_DT_SHIFT );
return ( ( obj->info & BLIS_EXEC_DT_BITS ) >> BLIS_EXEC_DT_SHIFT );
}
static dom_t bli_obj_exec_domain( obj_t* obj )
{
return ( ( obj->info & BLIS_EXEC_DOMAIN_BIT ) >> BLIS_EXEC_DT_SHIFT );
}
static prec_t bli_obj_exec_prec( obj_t* obj )
{
return ( ( obj->info & BLIS_EXEC_PREC_BIT ) >> BLIS_EXEC_DT_SHIFT );
}
static trans_t bli_obj_conjtrans_status( obj_t* obj )
@@ -326,9 +361,29 @@ static void bli_obj_set_target_dt( num_t dt, obj_t* obj )
obj->info = ( obj->info & ~BLIS_TARGET_DT_BITS ) | ( dt << BLIS_TARGET_DT_SHIFT );
}
static void bli_obj_set_target_domain( dom_t dt, obj_t* obj )
{
obj->info = ( obj->info & ~BLIS_TARGET_DOMAIN_BIT ) | ( dt << BLIS_TARGET_DOMAIN_SHIFT );
}
static void bli_obj_set_target_prec( prec_t dt, obj_t* obj )
{
obj->info = ( obj->info & ~BLIS_TARGET_PREC_BIT ) | ( dt << BLIS_TARGET_PREC_SHIFT );
}
static void bli_obj_set_exec_dt( num_t dt, obj_t* obj )
{
obj->info = ( obj->info & ~BLIS_EXECUTION_DT_BITS ) | ( dt << BLIS_EXECUTION_DT_SHIFT );
obj->info = ( obj->info & ~BLIS_EXEC_DT_BITS ) | ( dt << BLIS_EXEC_DT_SHIFT );
}
static void bli_obj_set_exec_domain( dom_t dt, obj_t* obj )
{
obj->info = ( obj->info & ~BLIS_EXEC_DOMAIN_BIT ) | ( dt << BLIS_EXEC_DOMAIN_SHIFT );
}
static void bli_obj_set_exec_prec( prec_t dt, obj_t* obj )
{
obj->info = ( obj->info & ~BLIS_EXEC_PREC_BIT ) | ( dt << BLIS_EXEC_PREC_SHIFT );
}
static void bli_obj_set_pack_schema( pack_t schema, obj_t* obj )
@@ -909,39 +964,7 @@ static void bli_obj_toggle_uplo_if_trans( trans_t trans, obj_t* obj )
}
}
// Make a full alias (shallow copy)
static void bli_obj_alias_to( obj_t* a, obj_t* b )
{
bli_obj_init_full_shallow_copy_of( a, b );
}
// Check if two objects are aliases of one another
static bool_t bli_obj_is_alias_of( obj_t* a, obj_t* b )
{
return ( bli_obj_buffer( a ) == bli_obj_buffer( b ) );
}
// Create an alias with a trans value applied.
// (Note: trans may include a conj component.)
static void bli_obj_alias_with_trans( trans_t trans, obj_t* a, obj_t* b )
{
bli_obj_alias_to( a, b );
bli_obj_apply_trans( trans, b );
}
// Create an alias with a conj value applied.
static void bli_obj_alias_with_conj( conj_t conja, obj_t* a, obj_t* b )
{
bli_obj_alias_to( a, b );
bli_obj_apply_conj( conja, b );
}
// Initialize object with default properties (info field)
// Initialize object with default properties (info field).
static void bli_obj_set_defaults( obj_t* obj )
{
@@ -1021,6 +1044,91 @@ static void* bli_obj_buffer_for_1x1( num_t dt, obj_t* obj )
);
}
// Make a full alias (shallow copy).
static void bli_obj_alias_to( obj_t* a, obj_t* b )
{
bli_obj_init_full_shallow_copy_of( a, b );
}
// Check if two objects are aliases of one another.
static bool_t bli_obj_is_alias_of( obj_t* a, obj_t* b )
{
return ( bli_obj_buffer( a ) == bli_obj_buffer( b ) );
}
// Create an alias with a trans value applied.
// (Note: trans may include a conj component.)
static void bli_obj_alias_with_trans( trans_t trans, obj_t* a, obj_t* b )
{
bli_obj_alias_to( a, b );
bli_obj_apply_trans( trans, b );
}
// Create an alias with a conj value applied.
static void bli_obj_alias_with_conj( conj_t conja, obj_t* a, obj_t* b )
{
bli_obj_alias_to( a, b );
bli_obj_apply_conj( conja, b );
}
// Alias only the real part.
static void bli_obj_real_part( obj_t* c, obj_t* r )
{
bli_obj_alias_to( c, r );
if ( bli_obj_is_complex( c ) )
{
// Change the datatype.
num_t dt_r = bli_obj_dt_proj_to_real( c );
bli_obj_set_dt( dt_r, r );
// Update the element size.
siz_t es_c = bli_obj_elem_size( c );
bli_obj_set_elem_size( es_c/2, r );
// Update the strides.
inc_t rs_c = bli_obj_row_stride( c );
inc_t cs_c = bli_obj_col_stride( c );
bli_obj_set_strides( 2*rs_c, 2*cs_c, r );
// Buffer is left unchanged.
}
}
// Alias only the imaginary part.
static void bli_obj_imag_part( obj_t* c, obj_t* i )
{
if ( bli_obj_is_complex( c ) )
{
bli_obj_alias_to( c, i );
// Change the datatype.
num_t dt_r = bli_obj_dt_proj_to_real( c );
bli_obj_set_dt( dt_r, i );
// Update the element size.
siz_t es_c = bli_obj_elem_size( c );
bli_obj_set_elem_size( es_c/2, i );
// Update the strides.
inc_t rs_c = bli_obj_row_stride( c );
inc_t cs_c = bli_obj_col_stride( c );
bli_obj_set_strides( 2*rs_c, 2*cs_c, i );
// Update the buffer.
inc_t is_c = bli_obj_imag_stride( c );
char* p = bli_obj_buffer_at_off( c );
bli_obj_set_buffer( p + is_c * es_c/2, i );
}
}
// Given a 1x1 object, acquire an address to the buffer depending on whether
// the object is a BLIS_CONSTANT, and also set a datatype associated with the
// chosen buffer (possibly using an auxiliary datatype if the object is

View File

@@ -112,6 +112,16 @@ static bool_t bli_is_double_prec( num_t dt )
bli_is_dcomplex( dt ) );
}
static dom_t bli_dt_domain( num_t dt )
{
return ( dt & BLIS_DOMAIN_BIT );
}
static prec_t bli_dt_prec( num_t dt )
{
return ( dt & BLIS_PRECISION_BIT );
}
static num_t bli_dt_proj_to_real( num_t dt )
{
return ( dt & ~BLIS_BITVAL_COMPLEX );
@@ -119,7 +129,17 @@ static num_t bli_dt_proj_to_real( num_t dt )
static num_t bli_dt_proj_to_complex( num_t dt )
{
return ( dt & BLIS_BITVAL_COMPLEX );
return ( dt | BLIS_BITVAL_COMPLEX );
}
static num_t bli_dt_proj_to_single_prec( num_t dt )
{
return ( dt & ~BLIS_BITVAL_SINGLE_PREC );
}
static num_t bli_dt_proj_to_double_prec( num_t dt )
{
return ( dt | BLIS_BITVAL_DOUBLE_PREC );
}
@@ -990,6 +1010,41 @@ void bli_set_dims_incs_uplo_1m_noswap
}
}
// Set dimensions and increments for TWO matrix arguments.
static
void bli_set_dims_incs_2m
(
trans_t transa,
dim_t m, dim_t n, inc_t rs_a, inc_t cs_a,
inc_t rs_b, inc_t cs_b,
dim_t* n_elem, dim_t* n_iter, inc_t* inca, inc_t* lda,
inc_t* incb, inc_t* ldb
)
{
{
*n_iter = n;
*n_elem = m;
*inca = rs_a;
*lda = cs_a;
*incb = rs_b;
*ldb = cs_b;
if ( bli_does_trans( transa ) )
{
bli_swap_incs( inca, lda );
}
if ( bli_is_row_tilted( *n_elem, *n_iter, *incb, *ldb ) &&
bli_is_row_tilted( *n_elem, *n_iter, *inca, *lda ) )
{
bli_swap_dims( n_iter, n_elem );
bli_swap_incs( inca, lda );
bli_swap_incs( incb, ldb );
}
}
}
// Set dimensions, increments, effective uplo/diagoff, etc for TWO matrix
// arguments.
@@ -1033,7 +1088,7 @@ void bli_set_dims_incs_uplo_2m
if ( bli_is_stored_subpart( diagoffa_use_, transa, uploa, m, n ) )
uploa = BLIS_DENSE;
n_iter_max_ = n;
n_iter_max_ = n;
*n_elem_max = m;
*inca = rs_a;
*lda = cs_a;

View File

@@ -210,11 +210,11 @@ typedef dcomplex f77_dcomplex;
12 ~ 10 Target numerical datatype
- 10: domain (0 == real, 1 == complex)
- 11: precision (0 == single, 1 == double)
- 12: unused
- 12: used to encode integer, constant types
15 ~ 13 Execution numerical datatype
- 13: domain (0 == real, 1 == complex)
- 14: precision (0 == single, 1 == double)
- 15: unused
- 15: used to encode integer, constant types
22 ~ 16 Packed type/status
- 0 0000 00: not packed
- 1 0000 00: packed (unspecified; by rows, columns, or vector)
@@ -271,7 +271,11 @@ typedef dcomplex f77_dcomplex;
#define BLIS_UNIT_DIAG_SHIFT 8
#define BLIS_INVERT_DIAG_SHIFT 9
#define BLIS_TARGET_DT_SHIFT 10
#define BLIS_EXECUTION_DT_SHIFT 13
#define BLIS_TARGET_DOMAIN_SHIFT 10
#define BLIS_TARGET_PREC_SHIFT 11
#define BLIS_EXEC_DT_SHIFT 13
#define BLIS_EXEC_DOMAIN_SHIFT 13
#define BLIS_EXEC_PREC_SHIFT 14
#define BLIS_PACK_SCHEMA_SHIFT 16
#define BLIS_PACK_RC_SHIFT 16
#define BLIS_PACK_PANEL_SHIFT 17
@@ -299,7 +303,11 @@ typedef dcomplex f77_dcomplex;
#define BLIS_UNIT_DIAG_BIT ( 0x1 << BLIS_UNIT_DIAG_SHIFT )
#define BLIS_INVERT_DIAG_BIT ( 0x1 << BLIS_INVERT_DIAG_SHIFT )
#define BLIS_TARGET_DT_BITS ( 0x7 << BLIS_TARGET_DT_SHIFT )
#define BLIS_EXECUTION_DT_BITS ( 0x7 << BLIS_EXECUTION_DT_SHIFT )
#define BLIS_TARGET_DOMAIN_BIT ( 0x1 << BLIS_TARGET_DOMAIN_SHIFT )
#define BLIS_TARGET_PREC_BIT ( 0x1 << BLIS_TARGET_PREC_SHIFT )
#define BLIS_EXEC_DT_BITS ( 0x7 << BLIS_EXEC_DT_SHIFT )
#define BLIS_EXEC_DOMAIN_BIT ( 0x1 << BLIS_EXEC_DOMAIN_SHIFT )
#define BLIS_EXEC_PREC_BIT ( 0x1 << BLIS_EXEC_PREC_SHIFT )
#define BLIS_PACK_SCHEMA_BITS ( 0x7F << BLIS_PACK_SCHEMA_SHIFT )
#define BLIS_PACK_RC_BIT ( 0x1 << BLIS_PACK_RC_SHIFT )
#define BLIS_PACK_PANEL_BIT ( 0x1 << BLIS_PACK_PANEL_SHIFT )
@@ -1128,8 +1136,6 @@ typedef struct cntx_s
pack_t schema_b_panel;
pack_t schema_c_panel;
bool_t anti_pref;
dim_t thrloop[ BLIS_NUM_LOOPS ];
membrk_t* membrk;
@@ -1177,6 +1183,7 @@ typedef enum
BLIS_INCONSISTENT_DATATYPES = ( -36),
BLIS_EXPECTED_REAL_PROJ_OF = ( -37),
BLIS_EXPECTED_REAL_VALUED_OBJECT = ( -38),
BLIS_INCONSISTENT_PRECISIONS = ( -39),
// Dimension-specific errors
BLIS_NONCONFORMAL_DIMENSIONS = ( -40),

View File

@@ -122,6 +122,12 @@ extern "C" {
#include "bli_cpuid.h"
#include "bli_string.h"
#include "bli_setgetij.h"
#include "bli_setri.h"
#include "bli_castm.h"
#include "bli_castv.h"
#include "bli_projm.h"
#include "bli_projv.h"
// -- Level-0 operations --

View File

@@ -60,8 +60,7 @@ void PASTEMAC(opname,imeth) \
/* Obtain a valid (native) context from the gks if necessary. */ \
if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
\
/* Invoke the operation's front end with the appropriate control
tree. */ \
/* Invoke the operation's front end. */ \
PASTEMAC(opname,_front) \
( \
alpha, a, b, beta, c, cntx, NULL \
@@ -98,8 +97,7 @@ void PASTEMAC(opname,imeth) \
/* Obtain a valid (native) context from the gks if necessary. */ \
if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
\
/* Invoke the operation's front end with the appropriate control
tree. */ \
/* Invoke the operation's front end. */ \
PASTEMAC(opname,_front) \
( \
side, alpha, a, b, beta, c, cntx, NULL \
@@ -130,8 +128,7 @@ void PASTEMAC(opname,imeth) \
/* Obtain a valid (native) context from the gks if necessary. */ \
if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
\
/* Invoke the operation's front end with the appropriate control
tree. */ \
/* Invoke the operation's front end. */ \
PASTEMAC(opname,_front) \
( \
alpha, a, beta, c, cntx, NULL \
@@ -161,8 +158,7 @@ void PASTEMAC(opname,imeth) \
/* Obtain a valid (native) context from the gks if necessary. */ \
if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
\
/* Invoke the operation's front end with the appropriate control
tree. */ \
/* Invoke the operation's front end. */ \
PASTEMAC(opname,_front) \
( \
side, alpha, a, b, cntx, NULL \
@@ -191,8 +187,7 @@ void PASTEMAC(opname,imeth) \
/* Obtain a valid (native) context from the gks if necessary. */ \
if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
\
/* Invoke the operation's front end with the appropriate control
tree. */ \
/* Invoke the operation's front end. */ \
PASTEMAC(opname,_front) \
( \
side, alpha, a, b, cntx, NULL \

View File

@@ -231,11 +231,18 @@ void bli_l3_thread_decorator
{
dim_t id = omp_get_thread_num();
obj_t a_t, b_t, c_t;
cntl_t* cntl_use;
thrinfo_t* thread;
// Alias thread-local copies of A, B, and C. These will be the objects
// we pass into the thread functions.
bli_obj_alias_to( a, &a_t );
bli_obj_alias_to( b, &b_t );
bli_obj_alias_to( c, &c_t );
// Create a default control tree for the operation, if needed.
bli_l3_cntl_create_if( family, a, b, c, cntl, &cntl_use );
bli_l3_cntl_create_if( family, &a_t, &b_t, &c_t, cntl, &cntl_use );
// Create the root node of the current thread's thrinfo_t structure.
bli_l3_thrinfo_create_root( id, gl_comm, cntx, cntl_use, &thread );
@@ -243,17 +250,17 @@ void bli_l3_thread_decorator
func
(
alpha,
a,
b,
&a_t,
&b_t,
beta,
c,
&c_t,
cntx,
cntl_use,
thread
);
// Free the control tree, if one was created locally.
bli_l3_cntl_free_if( a, b, c, cntl, cntl_use, thread );
bli_l3_cntl_free_if( &a_t, &b_t, &c_t, cntl, cntl_use, thread );
#ifdef PRINT_THRINFO
threads[id] = thread;

View File

@@ -161,11 +161,18 @@ void* bli_l3_thread_entry( void* data_void )
dim_t id = data->id;
thrcomm_t* gl_comm = data->gl_comm;
obj_t a_t, b_t, c_t;
cntl_t* cntl_use;
thrinfo_t* thread;
// Alias thread-local copies of A, B, and C. These will be the objects
// we pass into the thread function.
bli_obj_alias_to( a, &a_t );
bli_obj_alias_to( b, &b_t );
bli_obj_alias_to( c, &c_t );
// Create a default control tree for the operation, if needed.
bli_l3_cntl_create_if( family, a, b, c, cntl, &cntl_use );
bli_l3_cntl_create_if( family, &a_t, &b_t, &c_t, cntl, &cntl_use );
// Create the root node of the current thread's thrinfo_t structure.
bli_l3_thrinfo_create_root( id, gl_comm, cntx, cntl_use, &thread );
@@ -173,17 +180,17 @@ void* bli_l3_thread_entry( void* data_void )
func
(
alpha,
a,
b,
&a_t,
&b_t,
beta,
c,
&c_t,
cntx,
cntl_use,
thread
);
// Free the control tree, if one was created locally.
bli_l3_cntl_free_if( a, b, c, cntl, cntl_use, thread );
bli_l3_cntl_free_if( &a_t, &b_t, &c_t, cntl, cntl_use, thread );
// Free the current thread's thrinfo_t structure.
bli_l3_thrinfo_free( thread );

View File

@@ -94,6 +94,12 @@ void bli_l3_thread_decorator
cntl_t* cntl_use;
thrinfo_t* thread;
// NOTE: Unlike with the _openmp.c and _pthreads.c variants, we don't
// need to alias objects for A, B, and C since they were already aliased
// in bli_*_front(). (We only needed thread-local copies so each could
// safely reset their internal (beta) scalars on c after the first
// iteration of the pc (kc) loop.)
// Create a default control tree for the operation, if needed.
bli_l3_cntl_create_if( family, a, b, c, cntl, &cntl_use );