mirror of
https://github.com/amd/blis.git
synced 2026-05-11 01:30:00 +00:00
Merge remote-tracking branch 'upstream/dev' into asm-macros
This commit is contained in:
@@ -49,7 +49,7 @@ GENFRONT( copysc )
|
||||
|
||||
|
||||
//
|
||||
// Define BLAS-like interfaces with heterogeneous-typed operands.
|
||||
// Prototype BLAS-like interfaces with heterogeneous-typed operands.
|
||||
//
|
||||
|
||||
#undef GENTPROT2
|
||||
|
||||
@@ -203,6 +203,11 @@ void bli_l1v_xy_check
|
||||
e_val = bli_check_floating_object( y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check for consistent datatypes.
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( x, y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
e_val = bli_check_vector_object( x );
|
||||
@@ -243,6 +248,11 @@ void bli_l1v_axy_check
|
||||
e_val = bli_check_floating_object( y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check for consistent datatypes.
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( x, y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
e_val = bli_check_scalar_object( alpha );
|
||||
@@ -289,6 +299,11 @@ void bli_l1v_xby_check
|
||||
e_val = bli_check_floating_object( y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check for consistent datatypes.
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( x, y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
e_val = bli_check_scalar_object( beta );
|
||||
@@ -339,6 +354,11 @@ void bli_l1v_axby_check
|
||||
e_val = bli_check_floating_object( y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check for consistent datatypes.
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( x, y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
e_val = bli_check_scalar_object( alpha );
|
||||
@@ -402,6 +422,11 @@ void bli_l1v_dot_check
|
||||
e_val = bli_check_nonconstant_object( rho );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check for consistent datatypes.
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( x, y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
e_val = bli_check_scalar_object( alpha );
|
||||
|
||||
@@ -121,6 +121,11 @@ void bli_l1d_xy_check
|
||||
e_val = bli_check_floating_object( y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check for consistent datatypes.
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( x, y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
e_val = bli_check_matrix_object( x );
|
||||
@@ -161,6 +166,11 @@ void bli_l1d_axy_check
|
||||
e_val = bli_check_floating_object( y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check for consistent datatypes.
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( x, y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
e_val = bli_check_scalar_object( alpha );
|
||||
|
||||
@@ -66,6 +66,14 @@ void bli_axpy2v_check
|
||||
e_val = bli_check_floating_object( z );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check for consistent datatypes.
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( x, y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( x, z );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
e_val = bli_check_scalar_object( alphax );
|
||||
@@ -132,6 +140,14 @@ void bli_axpyf_check
|
||||
e_val = bli_check_floating_object( y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check for consistent datatypes.
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( a, x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( a, y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
e_val = bli_check_scalar_object( alpha );
|
||||
@@ -203,6 +219,17 @@ void bli_dotaxpyv_check
|
||||
e_val = bli_check_floating_object( z );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check for consistent datatypes.
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( x, xt );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( x, y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( x, z );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
e_val = bli_check_scalar_object( alpha );
|
||||
@@ -299,6 +326,23 @@ void bli_dotxaxpyf_check
|
||||
e_val = bli_check_floating_object( z );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check for consistent datatypes.
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( a, at );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( a, w );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( a, x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( a, y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( a, z );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
e_val = bli_check_scalar_object( alpha );
|
||||
@@ -407,6 +451,14 @@ void bli_dotxf_check
|
||||
e_val = bli_check_floating_object( y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check for consistent datatypes.
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( a, x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( a, y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
e_val = bli_check_scalar_object( alpha );
|
||||
|
||||
@@ -106,6 +106,11 @@ void bli_l1m_xy_check
|
||||
e_val = bli_check_floating_object( y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check for consistent datatypes.
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( x, y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
e_val = bli_check_matrix_object( x );
|
||||
@@ -146,6 +151,11 @@ void bli_l1m_axy_check
|
||||
e_val = bli_check_floating_object( y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check for consistent datatypes.
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( x, y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
e_val = bli_check_scalar_object( alpha );
|
||||
|
||||
@@ -46,37 +46,45 @@ struct packm_params_s
|
||||
};
|
||||
typedef struct packm_params_s packm_params_t;
|
||||
|
||||
#define bli_cntl_packm_params_var_func( cntl ) \
|
||||
\
|
||||
( ( (packm_params_t*)(cntl)->params )->var_func )
|
||||
static packm_voft bli_cntl_packm_params_var_func( cntl_t* cntl )
|
||||
{
|
||||
packm_params_t* ppp = cntl->params; return ppp->var_func;
|
||||
}
|
||||
|
||||
#define bli_cntl_packm_params_bmid_m( cntl ) \
|
||||
\
|
||||
( ( (packm_params_t*)(cntl)->params )->bmid_m )
|
||||
static bszid_t bli_cntl_packm_params_bmid_m( cntl_t* cntl )
|
||||
{
|
||||
packm_params_t* ppp = cntl->params; return ppp->bmid_m;
|
||||
}
|
||||
|
||||
#define bli_cntl_packm_params_bmid_n( cntl ) \
|
||||
\
|
||||
( ( (packm_params_t*)(cntl)->params )->bmid_n )
|
||||
static bszid_t bli_cntl_packm_params_bmid_n( cntl_t* cntl )
|
||||
{
|
||||
packm_params_t* ppp = cntl->params; return ppp->bmid_n;
|
||||
}
|
||||
|
||||
#define bli_cntl_packm_params_does_invert_diag( cntl ) \
|
||||
\
|
||||
( ( (packm_params_t*)(cntl)->params )->does_invert_diag )
|
||||
static bool_t bli_cntl_packm_params_does_invert_diag( cntl_t* cntl )
|
||||
{
|
||||
packm_params_t* ppp = cntl->params; return ppp->does_invert_diag;
|
||||
}
|
||||
|
||||
#define bli_cntl_packm_params_rev_iter_if_upper( cntl ) \
|
||||
\
|
||||
( ( (packm_params_t*)(cntl)->params )->rev_iter_if_upper )
|
||||
static bool_t bli_cntl_packm_params_rev_iter_if_upper( cntl_t* cntl )
|
||||
{
|
||||
packm_params_t* ppp = cntl->params; return ppp->rev_iter_if_upper;
|
||||
}
|
||||
|
||||
#define bli_cntl_packm_params_rev_iter_if_lower( cntl ) \
|
||||
\
|
||||
( ( (packm_params_t*)(cntl)->params )->rev_iter_if_lower )
|
||||
static bool_t bli_cntl_packm_params_rev_iter_if_lower( cntl_t* cntl )
|
||||
{
|
||||
packm_params_t* ppp = cntl->params; return ppp->rev_iter_if_lower;
|
||||
}
|
||||
|
||||
#define bli_cntl_packm_params_pack_schema( cntl ) \
|
||||
\
|
||||
( ( (packm_params_t*)(cntl)->params )->pack_schema )
|
||||
static pack_t bli_cntl_packm_params_pack_schema( cntl_t* cntl )
|
||||
{
|
||||
packm_params_t* ppp = cntl->params; return ppp->pack_schema;
|
||||
}
|
||||
|
||||
#define bli_cntl_packm_params_pack_buf_type( cntl ) \
|
||||
\
|
||||
( ( (packm_params_t*)(cntl)->params )->pack_buf_type )
|
||||
static packbuf_t bli_cntl_packm_params_pack_buf_type( cntl_t* cntl )
|
||||
{
|
||||
packm_params_t* ppp = cntl->params; return ppp->pack_buf_type;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@@ -56,8 +56,8 @@ siz_t bli_packm_init
|
||||
bool_t does_invert_diag;
|
||||
bool_t rev_iter_if_upper;
|
||||
bool_t rev_iter_if_lower;
|
||||
//pack_t pack_schema;
|
||||
packbuf_t pack_buf_type;
|
||||
pack_t schema;
|
||||
//packbuf_t pack_buf_type;
|
||||
siz_t size_needed;
|
||||
|
||||
// Check parameters.
|
||||
@@ -70,8 +70,8 @@ siz_t bli_packm_init
|
||||
does_invert_diag = bli_cntl_packm_params_does_invert_diag( cntl );
|
||||
rev_iter_if_upper = bli_cntl_packm_params_rev_iter_if_upper( cntl );
|
||||
rev_iter_if_lower = bli_cntl_packm_params_rev_iter_if_lower( cntl );
|
||||
//pack_schema = bli_cntl_packm_params_pack_schema( cntl );
|
||||
pack_buf_type = bli_cntl_packm_params_pack_buf_type( cntl );
|
||||
schema = bli_cntl_packm_params_pack_schema( cntl );
|
||||
//pack_buf_type = bli_cntl_packm_params_pack_buf_type( cntl );
|
||||
|
||||
#if 0
|
||||
// Let us now check to see if the object has already been packed. First
|
||||
@@ -112,30 +112,51 @@ siz_t bli_packm_init
|
||||
return 0;
|
||||
}
|
||||
|
||||
// We now ignore the pack_schema field in the control tree and
|
||||
// extract the schema from the context, depending on whether we are
|
||||
// preparing to pack a block of A or panel of B. For A and B, we must
|
||||
// obtain the schema from the context since the induced methods reuse
|
||||
// the same control trees used by native execution, and those induced
|
||||
// methods specify the schema used by the current execution phase
|
||||
// within the context (whereas the control tree does not change).
|
||||
#if 0
|
||||
pack_t schema;
|
||||
|
||||
if ( pack_buf_type == BLIS_BUFFER_FOR_A_BLOCK )
|
||||
if ( bli_cntx_method( cntx ) != BLIS_NAT )
|
||||
{
|
||||
schema = bli_cntx_schema_a_block( cntx );
|
||||
// We now ignore the pack_schema field in the control tree and
|
||||
// extract the schema from the context, depending on whether we are
|
||||
// preparing to pack a block of A or panel of B. For A and B, we must
|
||||
// obtain the schema from the context since the induced methods reuse
|
||||
// the same control trees used by native execution, and those induced
|
||||
// methods specify the schema used by the current execution phase
|
||||
// within the context (whereas the control tree does not change).
|
||||
|
||||
if ( pack_buf_type == BLIS_BUFFER_FOR_A_BLOCK )
|
||||
{
|
||||
schema = bli_cntx_schema_a_block( cntx );
|
||||
}
|
||||
else if ( pack_buf_type == BLIS_BUFFER_FOR_B_PANEL )
|
||||
{
|
||||
schema = bli_cntx_schema_b_panel( cntx );
|
||||
}
|
||||
else // if ( pack_buf_type == BLIS_BUFFER_FOR_C_PANEL )
|
||||
{
|
||||
schema = bli_cntl_packm_params_pack_schema( cntl );
|
||||
}
|
||||
}
|
||||
else if ( pack_buf_type == BLIS_BUFFER_FOR_B_PANEL )
|
||||
else // ( bli_cntx_method( cntx ) == BLIS_NAT )
|
||||
{
|
||||
schema = bli_cntx_schema_b_panel( cntx );
|
||||
// For native execution, we obtain the schema from the control tree
|
||||
// node. (Notice that it doesn't matter if the pack_buf_type is for
|
||||
// A or B.)
|
||||
schema = bli_cntl_packm_params_pack_schema( cntl );
|
||||
}
|
||||
else // if ( pack_buf_type == BLIS_BUFFER_FOR_C_PANEL )
|
||||
// This is no longer needed now that we branch between native and
|
||||
// non-native cases above.
|
||||
#if 0
|
||||
if ( pack_buf_type == BLIS_BUFFER_FOR_C_PANEL )
|
||||
{
|
||||
// If we get a request to pack C for some reason, it is likely
|
||||
// not part of an induced method, and so it would be safe (and
|
||||
// necessary) to read the pack schema from the control tree.
|
||||
schema = bli_cntl_packm_params_pack_schema( cntl );
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
// Prepare a few other variables based on properties of the control
|
||||
// tree.
|
||||
|
||||
@@ -53,6 +53,14 @@ void bli_gemv_check
|
||||
|
||||
e_val = bli_check_general_object( a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check for consistent datatypes.
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( a, x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( a, y );
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
|
||||
@@ -80,6 +88,14 @@ void bli_hemv_check
|
||||
|
||||
e_val = bli_check_hermitian_object( a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check for consistent datatypes.
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( a, x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( a, y );
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
|
||||
@@ -107,6 +123,14 @@ void bli_symv_check
|
||||
|
||||
e_val = bli_check_symmetric_object( a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check for consistent datatypes.
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( a, x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( a, y );
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
|
||||
@@ -132,6 +156,11 @@ void bli_trmv_check
|
||||
|
||||
e_val = bli_check_triangular_object( a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check for consistent datatypes.
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( a, x );
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
|
||||
@@ -157,6 +186,11 @@ void bli_trsv_check
|
||||
|
||||
e_val = bli_check_triangular_object( a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check for consistent datatypes.
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( a, x );
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
|
||||
@@ -178,6 +212,14 @@ void bli_ger_check
|
||||
|
||||
e_val = bli_check_general_object( a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check for consistent datatypes.
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( a, x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( a, y );
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
|
||||
@@ -203,6 +245,11 @@ void bli_her_check
|
||||
|
||||
e_val = bli_check_hermitian_object( a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check for consistent datatypes.
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( a, x );
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
|
||||
@@ -229,6 +276,14 @@ void bli_her2_check
|
||||
|
||||
e_val = bli_check_hermitian_object( a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check for consistent datatypes.
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( a, x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( a, y );
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
|
||||
@@ -254,6 +309,11 @@ void bli_syr_check
|
||||
|
||||
e_val = bli_check_symmetric_object( a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check for consistent datatypes.
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( a, x );
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
|
||||
@@ -280,6 +340,14 @@ void bli_syr2_check
|
||||
|
||||
e_val = bli_check_symmetric_object( a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check for consistent datatypes.
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( a, x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( a, y );
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -294,6 +294,14 @@ void bli_gemm_basic_check
|
||||
|
||||
e_val = bli_check_level3_dims( a, b, c );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check for consistent datatypes.
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( c, a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( c, b );
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
void bli_hemm_basic_check
|
||||
@@ -330,6 +338,14 @@ void bli_hemm_basic_check
|
||||
|
||||
e_val = bli_check_square_object( a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check for consistent datatypes.
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( c, a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( c, b );
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
void bli_herk_basic_check
|
||||
@@ -365,6 +381,14 @@ void bli_herk_basic_check
|
||||
|
||||
e_val = bli_check_general_object( ah );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check for consistent datatypes.
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( c, a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( c, ah );
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
void bli_her2k_basic_check
|
||||
@@ -412,6 +436,20 @@ void bli_her2k_basic_check
|
||||
|
||||
e_val = bli_check_general_object( ah );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check for consistent datatypes.
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( c, a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( c, ah );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( c, b );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_consistent_object_datatypes( c, bh );
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
void bli_l3_basic_check
|
||||
|
||||
@@ -45,6 +45,21 @@ void bli_l3_cntl_create_if
|
||||
cntl_t** cntl_use
|
||||
)
|
||||
{
|
||||
// This is part of a hack to support mixed domain in bli_gemm_front().
|
||||
// Sometimes we need to specify a non-standard schema for A and B, and
|
||||
// we decided to transmit them via the schema field in the obj_t's
|
||||
// rather than pass them in as function parameters. Once the values
|
||||
// have been read, we immediately reset them back to their expected
|
||||
// values for unpacked objects. Notice that we do this even if the
|
||||
// caller passed in a custom control tree; that's because we still need
|
||||
// to reset the pack schema of a and b, which were modified by the
|
||||
// operation's _front() function.
|
||||
pack_t schema_a = bli_obj_pack_schema( a );
|
||||
pack_t schema_b = bli_obj_pack_schema( b );
|
||||
|
||||
bli_obj_set_pack_schema( BLIS_NOT_PACKED, a );
|
||||
bli_obj_set_pack_schema( BLIS_NOT_PACKED, b );
|
||||
|
||||
// If the control tree pointer is NULL, we construct a default
|
||||
// tree as a function of the operation family.
|
||||
if ( cntl_orig == NULL )
|
||||
@@ -53,7 +68,7 @@ void bli_l3_cntl_create_if
|
||||
family == BLIS_HERK ||
|
||||
family == BLIS_TRMM )
|
||||
{
|
||||
*cntl_use = bli_gemm_cntl_create( family );
|
||||
*cntl_use = bli_gemm_cntl_create( family, schema_a, schema_b );
|
||||
}
|
||||
else // if ( family == BLIS_TRSM )
|
||||
{
|
||||
@@ -62,7 +77,7 @@ void bli_l3_cntl_create_if
|
||||
if ( bli_obj_is_triangular( a ) ) side = BLIS_LEFT;
|
||||
else side = BLIS_RIGHT;
|
||||
|
||||
*cntl_use = bli_trsm_cntl_create( side );
|
||||
*cntl_use = bli_trsm_cntl_create( side, schema_a, schema_b );
|
||||
}
|
||||
}
|
||||
else
|
||||
|
||||
@@ -57,20 +57,25 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
/* Invoke the operation's "ind" function--its induced method front-end.
|
||||
This function will call native execution for real domain problems.
|
||||
For complex problems, it calls the highest priority induced method
|
||||
that is available (ie: implemented and enabled), and if none are
|
||||
enabled, it calls native execution. */ \
|
||||
PASTEMAC(opname,ind) \
|
||||
( \
|
||||
alpha, \
|
||||
a, \
|
||||
b, \
|
||||
beta, \
|
||||
c, \
|
||||
cntx \
|
||||
); \
|
||||
/* Only proceed with an induced method if all operands have the same
|
||||
(complex) datatype. If any datatypes differ, skip the induced method
|
||||
chooser function and proceed directly with native execution, which is
|
||||
where mixed datatype support will be implemented (if at all). */ \
|
||||
if ( bli_obj_dt( a ) == bli_obj_dt( c ) && \
|
||||
bli_obj_dt( b ) == bli_obj_dt( c ) && \
|
||||
bli_obj_is_complex( c ) ) \
|
||||
{ \
|
||||
/* Invoke the operation's "ind" function--its induced method front-end.
|
||||
For complex problems, it calls the highest priority induced method
|
||||
that is available (ie: implemented and enabled), and if none are
|
||||
enabled, it calls native execution. (For real problems, it calls
|
||||
the operation's native execution interface.) */ \
|
||||
PASTEMAC(opname,ind)( alpha, a, b, beta, c, cntx ); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
PASTEMAC(opname,nat)( alpha, a, b, beta, c, cntx ); \
|
||||
} \
|
||||
}
|
||||
|
||||
GENFRONT( gemm )
|
||||
@@ -96,16 +101,25 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
PASTEMAC(opname,ind) \
|
||||
( \
|
||||
side, \
|
||||
alpha, \
|
||||
a, \
|
||||
b, \
|
||||
beta, \
|
||||
c, \
|
||||
cntx \
|
||||
); \
|
||||
/* Only proceed with an induced method if all operands have the same
|
||||
(complex) datatype. If any datatypes differ, skip the induced method
|
||||
chooser function and proceed directly with native execution, which is
|
||||
where mixed datatype support will be implemented (if at all). */ \
|
||||
if ( bli_obj_dt( a ) == bli_obj_dt( c ) && \
|
||||
bli_obj_dt( b ) == bli_obj_dt( c ) && \
|
||||
bli_obj_is_complex( c ) ) \
|
||||
{ \
|
||||
/* Invoke the operation's "ind" function--its induced method front-end.
|
||||
For complex problems, it calls the highest priority induced method
|
||||
that is available (ie: implemented and enabled), and if none are
|
||||
enabled, it calls native execution. (For real problems, it calls
|
||||
the operation's native execution interface.) */ \
|
||||
PASTEMAC(opname,ind)( side, alpha, a, b, beta, c, cntx ); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
PASTEMAC(opname,nat)( side, alpha, a, b, beta, c, cntx ); \
|
||||
} \
|
||||
}
|
||||
|
||||
GENFRONT( hemm )
|
||||
@@ -129,14 +143,24 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
PASTEMAC(opname,ind) \
|
||||
( \
|
||||
alpha, \
|
||||
a, \
|
||||
beta, \
|
||||
c, \
|
||||
cntx \
|
||||
); \
|
||||
/* Only proceed with an induced method if all operands have the same
|
||||
(complex) datatype. If any datatypes differ, skip the induced method
|
||||
chooser function and proceed directly with native execution, which is
|
||||
where mixed datatype support will be implemented (if at all). */ \
|
||||
if ( bli_obj_dt( a ) == bli_obj_dt( c ) && \
|
||||
bli_obj_is_complex( c ) ) \
|
||||
{ \
|
||||
/* Invoke the operation's "ind" function--its induced method front-end.
|
||||
For complex problems, it calls the highest priority induced method
|
||||
that is available (ie: implemented and enabled), and if none are
|
||||
enabled, it calls native execution. (For real problems, it calls
|
||||
the operation's native execution interface.) */ \
|
||||
PASTEMAC(opname,ind)( alpha, a, beta, c, cntx ); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
PASTEMAC(opname,nat)( alpha, a, beta, c, cntx ); \
|
||||
} \
|
||||
}
|
||||
|
||||
GENFRONT( herk )
|
||||
@@ -159,14 +183,24 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
PASTEMAC(opname,ind) \
|
||||
( \
|
||||
side, \
|
||||
alpha, \
|
||||
a, \
|
||||
b, \
|
||||
cntx \
|
||||
); \
|
||||
/* Only proceed with an induced method if all operands have the same
|
||||
(complex) datatype. If any datatypes differ, skip the induced method
|
||||
chooser function and proceed directly with native execution, which is
|
||||
where mixed datatype support will be implemented (if at all). */ \
|
||||
if ( bli_obj_dt( a ) == bli_obj_dt( b ) && \
|
||||
bli_obj_is_complex( b ) ) \
|
||||
{ \
|
||||
/* Invoke the operation's "ind" function--its induced method front-end.
|
||||
For complex problems, it calls the highest priority induced method
|
||||
that is available (ie: implemented and enabled), and if none are
|
||||
enabled, it calls native execution. (For real problems, it calls
|
||||
the operation's native execution interface.) */ \
|
||||
PASTEMAC(opname,ind)( side, alpha, a, b, cntx ); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
PASTEMAC(opname,nat)( side, alpha, a, b, cntx ); \
|
||||
} \
|
||||
}
|
||||
|
||||
GENFRONT( trmm )
|
||||
|
||||
@@ -38,24 +38,24 @@
|
||||
|
||||
// gemm
|
||||
|
||||
#define gemm_get_next_a_micropanel( thread, a1, step ) ( a1 + step * thread->n_way )
|
||||
#define gemm_get_next_b_micropanel( thread, b1, step ) ( b1 + step * thread->n_way )
|
||||
#define bli_gemm_get_next_a_upanel( thread, a1, step ) ( a1 + step * thread->n_way )
|
||||
#define bli_gemm_get_next_b_upanel( thread, b1, step ) ( b1 + step * thread->n_way )
|
||||
|
||||
// herk
|
||||
|
||||
#define herk_get_next_a_micropanel( thread, a1, step ) ( a1 + step * thread->n_way )
|
||||
#define herk_get_next_b_micropanel( thread, b1, step ) ( b1 + step * thread->n_way )
|
||||
#define bli_herk_get_next_a_upanel( thread, a1, step ) ( a1 + step * thread->n_way )
|
||||
#define bli_herk_get_next_b_upanel( thread, b1, step ) ( b1 + step * thread->n_way )
|
||||
|
||||
// trmm
|
||||
|
||||
#define trmm_r_ir_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
|
||||
#define trmm_r_jr_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
|
||||
#define trmm_l_ir_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
|
||||
#define trmm_l_jr_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
|
||||
#define bli_trmm_r_ir_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
|
||||
#define bli_trmm_r_jr_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
|
||||
#define bli_trmm_l_ir_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
|
||||
#define bli_trmm_l_jr_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
|
||||
|
||||
// trsm
|
||||
|
||||
#define trsm_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
|
||||
#define bli_trsm_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
|
||||
|
||||
//
|
||||
// thrinfo_t APIs specific to level-3 operations.
|
||||
|
||||
@@ -55,7 +55,7 @@ void PASTEMAC(ch,opname) \
|
||||
\
|
||||
/* Query the context for the function address of the current
|
||||
datatype's micro-kernel. */ \
|
||||
PASTECH2(ch,tname,_ft) f = bli_cntx_get_l3_ukr_dt( dt, kerid, cntx ); \
|
||||
PASTECH2(ch,tname,_ft) f = bli_cntx_get_l3_vir_ukr_dt( dt, kerid, cntx ); \
|
||||
\
|
||||
/* Invoke the typed function for the given datatype. */ \
|
||||
f( \
|
||||
@@ -91,7 +91,7 @@ void PASTEMAC(ch,opname) \
|
||||
\
|
||||
/* Query the context for the function address of the current
|
||||
datatype's micro-kernel. */ \
|
||||
PASTECH2(ch,tname,_ft) f = bli_cntx_get_l3_ukr_dt( dt, kerid, cntx ); \
|
||||
PASTECH2(ch,tname,_ft) f = bli_cntx_get_l3_vir_ukr_dt( dt, kerid, cntx ); \
|
||||
\
|
||||
/* Invoke the typed function for the given datatype. */ \
|
||||
f( \
|
||||
@@ -129,7 +129,7 @@ void PASTEMAC(ch,opname) \
|
||||
\
|
||||
/* Query the context for the function address of the current
|
||||
datatype's micro-kernel. */ \
|
||||
PASTECH2(ch,tname,_ft) f = bli_cntx_get_l3_ukr_dt( dt, kerid, cntx ); \
|
||||
PASTECH2(ch,tname,_ft) f = bli_cntx_get_l3_vir_ukr_dt( dt, kerid, cntx ); \
|
||||
\
|
||||
/* Invoke the typed function for the given datatype. */ \
|
||||
f( \
|
||||
|
||||
@@ -36,17 +36,21 @@
|
||||
|
||||
cntl_t* bli_gemm_cntl_create
|
||||
(
|
||||
opid_t family
|
||||
opid_t family,
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
)
|
||||
{
|
||||
return bli_gemmbp_cntl_create( family );
|
||||
return bli_gemmbp_cntl_create( family, schema_a, schema_b );
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
cntl_t* bli_gemmbp_cntl_create
|
||||
(
|
||||
opid_t family
|
||||
opid_t family,
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
)
|
||||
{
|
||||
void* macro_kernel_p = bli_gemm_ker_var2;
|
||||
@@ -82,7 +86,7 @@ cntl_t* bli_gemmbp_cntl_create
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_ROW_PANELS,
|
||||
schema_a, // normally BLIS_PACKED_ROW_PANELS
|
||||
BLIS_BUFFER_FOR_A_BLOCK,
|
||||
gemm_cntl_bp_bu
|
||||
);
|
||||
@@ -106,7 +110,7 @@ cntl_t* bli_gemmbp_cntl_create
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_COL_PANELS,
|
||||
schema_b, // normally BLIS_PACKED_COL_PANELS
|
||||
BLIS_BUFFER_FOR_B_PANEL,
|
||||
gemm_cntl_op_bp
|
||||
);
|
||||
@@ -134,6 +138,10 @@ cntl_t* bli_gemmbp_cntl_create
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
// This control tree creation function is disabled because it is no longer used.
|
||||
// (It was originally created in the run up to publishing the 1m journal article,
|
||||
// but was disabled to reduce complexity.)
|
||||
#if 0
|
||||
cntl_t* bli_gemmpb_cntl_create
|
||||
(
|
||||
opid_t family
|
||||
@@ -223,6 +231,7 @@ cntl_t* bli_gemmpb_cntl_create
|
||||
|
||||
return gemm_cntl_vl_mm;
|
||||
}
|
||||
#endif
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@@ -34,20 +34,26 @@
|
||||
|
||||
cntl_t* bli_gemm_cntl_create
|
||||
(
|
||||
opid_t family
|
||||
opid_t family,
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
);
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
cntl_t* bli_gemmbp_cntl_create
|
||||
(
|
||||
opid_t family
|
||||
opid_t family,
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
);
|
||||
|
||||
#if 0
|
||||
cntl_t* bli_gemmpb_cntl_create
|
||||
(
|
||||
opid_t family
|
||||
opid_t family,
|
||||
);
|
||||
#endif
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
|
||||
@@ -77,7 +77,7 @@ void bli_gemm_front
|
||||
// contiguous columns, or if C is stored by columns and the micro-kernel
|
||||
// prefers contiguous rows, transpose the entire operation to allow the
|
||||
// micro-kernel to access elements of C in its preferred manner.
|
||||
if ( bli_cntx_l3_ukr_eff_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
|
||||
if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
|
||||
{
|
||||
bli_obj_swap( &a_local, &b_local );
|
||||
|
||||
@@ -87,10 +87,34 @@ void bli_gemm_front
|
||||
}
|
||||
|
||||
// Record the threading for each level within the context.
|
||||
bli_cntx_set_thrloop_from_env( BLIS_GEMM, BLIS_LEFT, cntx,
|
||||
bli_obj_length( &c_local ),
|
||||
bli_obj_width( &c_local ),
|
||||
bli_obj_width( &a_local ) );
|
||||
bli_cntx_set_thrloop_from_env
|
||||
(
|
||||
BLIS_GEMM,
|
||||
BLIS_LEFT, // ignored for gemm/hemm/symm
|
||||
bli_obj_length( &c_local ),
|
||||
bli_obj_width( &c_local ),
|
||||
bli_obj_width( &a_local ),
|
||||
cntx
|
||||
);
|
||||
|
||||
// A sort of hack for communicating the desired pach schemas for A and B
|
||||
// to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and
|
||||
// bli_l3_cntl_create_if()). This allows us to access the schemas from
|
||||
// the control tree, which hopefully reduces some confusion, particularly
|
||||
// in bli_packm_init().
|
||||
if ( bli_cntx_method( cntx ) == BLIS_NAT )
|
||||
{
|
||||
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
|
||||
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local );
|
||||
}
|
||||
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
|
||||
{
|
||||
pack_t schema_a = bli_cntx_schema_a_block( cntx );
|
||||
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
|
||||
|
||||
bli_obj_set_pack_schema( schema_a, &a_local );
|
||||
bli_obj_set_pack_schema( schema_b, &b_local );
|
||||
}
|
||||
|
||||
// Invoke the internal back-end via the thread handler.
|
||||
bli_l3_thread_decorator
|
||||
|
||||
@@ -183,7 +183,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Query the context for the micro-kernel address and cast it to its
|
||||
function pointer type. */ \
|
||||
PASTECH(ch,gemm_ukr_ft) \
|
||||
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
\
|
||||
/* Temporary C buffer for edge cases. Note that the strides of this
|
||||
temporary buffer are set so that they match the storage of the
|
||||
@@ -192,7 +192,7 @@ void PASTEMAC(ch,varname) \
|
||||
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
|
||||
/ sizeof( ctype ) ] \
|
||||
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
|
||||
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
|
||||
\
|
||||
@@ -295,11 +295,11 @@ void PASTEMAC(ch,varname) \
|
||||
m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \
|
||||
\
|
||||
/* Compute the addresses of the next panels of A and B. */ \
|
||||
a2 = gemm_get_next_a_micropanel( caucus, a1, rstep_a ); \
|
||||
a2 = bli_gemm_get_next_a_upanel( caucus, a1, rstep_a ); \
|
||||
if ( bli_is_last_iter( i, m_iter, ir_thread_id, ir_num_threads ) ) \
|
||||
{ \
|
||||
a2 = a_cast; \
|
||||
b2 = gemm_get_next_b_micropanel( thread, b1, cstep_b ); \
|
||||
b2 = bli_gemm_get_next_b_upanel( thread, b1, cstep_b ); \
|
||||
if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
|
||||
@@ -163,13 +163,13 @@ void PASTEMAC(ch,varname) \
|
||||
/* Query the context for the micro-kernel address and cast it to its
|
||||
function pointer type. */ \
|
||||
PASTECH(ch,gemm_ukr_ft) \
|
||||
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
\
|
||||
/* Temporary C buffer for edge cases. */ \
|
||||
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
|
||||
/ sizeof( ctype ) ] \
|
||||
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
|
||||
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
|
||||
\
|
||||
@@ -291,11 +291,11 @@ void PASTEMAC(ch,varname) \
|
||||
m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \
|
||||
\
|
||||
/* Compute the addresses of the next panels of A and B. */ \
|
||||
a2 = gemm_get_next_a_micropanel( caucus, a1, rstep_a ); \
|
||||
a2 = bli_gemm_get_next_a_upanel( caucus, a1, rstep_a ); \
|
||||
if ( bli_is_last_iter( i, m_iter, ir_thread_id, ir_num_threads ) ) \
|
||||
{ \
|
||||
a2 = a_cast; \
|
||||
b2 = gemm_get_next_b_micropanel( thread, b1, cstep_b ); \
|
||||
b2 = bli_gemm_get_next_b_upanel( thread, b1, cstep_b ); \
|
||||
if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
|
||||
@@ -163,13 +163,13 @@ void PASTEMAC(ch,varname) \
|
||||
/* Query the context for the micro-kernel address and cast it to its
|
||||
function pointer type. */ \
|
||||
PASTECH(ch,gemm_ukr_ft) \
|
||||
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
\
|
||||
/* Temporary C buffer for edge cases. */ \
|
||||
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
|
||||
/ sizeof( ctype ) ] \
|
||||
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
|
||||
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
|
||||
\
|
||||
@@ -300,11 +300,11 @@ void PASTEMAC(ch,varname) \
|
||||
m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \
|
||||
\
|
||||
/* Compute the addresses of the next panels of A and B. */ \
|
||||
a2 = gemm_get_next_a_micropanel( caucus, a1, rstep_a ); \
|
||||
a2 = bli_gemm_get_next_a_upanel( caucus, a1, rstep_a ); \
|
||||
if ( bli_is_last_iter( i, m_iter, ir_thread_id, ir_num_threads ) ) \
|
||||
{ \
|
||||
a2 = a_cast; \
|
||||
b2 = gemm_get_next_b_micropanel( thread, b1, cstep_b ); \
|
||||
b2 = bli_gemm_get_next_b_upanel( thread, b1, cstep_b ); \
|
||||
if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
|
||||
@@ -72,7 +72,7 @@ void bli_hemm_front
|
||||
// contiguous columns, or if C is stored by columns and the micro-kernel
|
||||
// prefers contiguous rows, transpose the entire operation to allow the
|
||||
// micro-kernel to access elements of C in its preferred manner.
|
||||
if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
|
||||
if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
|
||||
{
|
||||
bli_toggle_side( &side );
|
||||
bli_obj_toggle_conj( &a_local );
|
||||
@@ -88,10 +88,34 @@ void bli_hemm_front
|
||||
}
|
||||
|
||||
// Record the threading for each level within the context.
|
||||
bli_cntx_set_thrloop_from_env( BLIS_HEMM, BLIS_LEFT, cntx,
|
||||
bli_obj_length( &c_local ),
|
||||
bli_obj_width( &c_local ),
|
||||
bli_obj_width( &a_local ) );
|
||||
bli_cntx_set_thrloop_from_env
|
||||
(
|
||||
BLIS_HEMM,
|
||||
BLIS_LEFT, // ignored for gemm/hemm/symm
|
||||
bli_obj_length( &c_local ),
|
||||
bli_obj_width( &c_local ),
|
||||
bli_obj_width( &a_local ),
|
||||
cntx
|
||||
);
|
||||
|
||||
// A sort of hack for communicating the desired pach schemas for A and B
|
||||
// to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and
|
||||
// bli_l3_cntl_create_if()). This allows us to access the schemas from
|
||||
// the control tree, which hopefully reduces some confusion, particularly
|
||||
// in bli_packm_init().
|
||||
if ( bli_cntx_method( cntx ) == BLIS_NAT )
|
||||
{
|
||||
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
|
||||
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local );
|
||||
}
|
||||
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
|
||||
{
|
||||
pack_t schema_a = bli_cntx_schema_a_block( cntx );
|
||||
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
|
||||
|
||||
bli_obj_set_pack_schema( schema_a, &a_local );
|
||||
bli_obj_set_pack_schema( schema_b, &b_local );
|
||||
}
|
||||
|
||||
// Invoke the internal back-end.
|
||||
bli_l3_thread_decorator
|
||||
|
||||
@@ -92,7 +92,7 @@ void bli_her2k_front
|
||||
// contiguous columns, or if C is stored by columns and the micro-kernel
|
||||
// prefers contiguous rows, transpose the entire operation to allow the
|
||||
// micro-kernel to access elements of C in its preferred manner.
|
||||
if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
|
||||
if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
|
||||
{
|
||||
bli_obj_swap( &a_local, &bh_local );
|
||||
bli_obj_swap( &b_local, &ah_local );
|
||||
@@ -106,10 +106,38 @@ void bli_her2k_front
|
||||
}
|
||||
|
||||
// Record the threading for each level within the context.
|
||||
bli_cntx_set_thrloop_from_env( BLIS_HER2K, BLIS_LEFT, cntx,
|
||||
bli_obj_length( &c_local ),
|
||||
bli_obj_width( &c_local ),
|
||||
bli_obj_width( &a_local ) );
|
||||
bli_cntx_set_thrloop_from_env
|
||||
(
|
||||
BLIS_HER2K,
|
||||
BLIS_LEFT, // ignored for her[2]k/syr[2]k
|
||||
bli_obj_length( &c_local ),
|
||||
bli_obj_width( &c_local ),
|
||||
bli_obj_width( &a_local ),
|
||||
cntx
|
||||
);
|
||||
|
||||
// A sort of hack for communicating the desired pach schemas for A and B
|
||||
// to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and
|
||||
// bli_l3_cntl_create_if()). This allows us to access the schemas from
|
||||
// the control tree, which hopefully reduces some confusion, particularly
|
||||
// in bli_packm_init().
|
||||
if ( bli_cntx_method( cntx ) == BLIS_NAT )
|
||||
{
|
||||
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
|
||||
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &bh_local );
|
||||
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &b_local );
|
||||
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &ah_local );
|
||||
}
|
||||
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
|
||||
{
|
||||
pack_t schema_a = bli_cntx_schema_a_block( cntx );
|
||||
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
|
||||
|
||||
bli_obj_set_pack_schema( schema_a, &a_local );
|
||||
bli_obj_set_pack_schema( schema_b, &bh_local );
|
||||
bli_obj_set_pack_schema( schema_a, &b_local );
|
||||
bli_obj_set_pack_schema( schema_b, &ah_local );
|
||||
}
|
||||
|
||||
// Invoke herk twice, using beta only the first time.
|
||||
|
||||
|
||||
@@ -77,7 +77,7 @@ void bli_herk_front
|
||||
// contiguous columns, or if C is stored by columns and the micro-kernel
|
||||
// prefers contiguous rows, transpose the entire operation to allow the
|
||||
// micro-kernel to access elements of C in its preferred manner.
|
||||
if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
|
||||
if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
|
||||
{
|
||||
bli_obj_toggle_conj( &a_local );
|
||||
bli_obj_toggle_conj( &ah_local );
|
||||
@@ -86,10 +86,34 @@ void bli_herk_front
|
||||
}
|
||||
|
||||
// Record the threading for each level within the context.
|
||||
bli_cntx_set_thrloop_from_env( BLIS_HERK, BLIS_LEFT, cntx,
|
||||
bli_obj_length( &c_local ),
|
||||
bli_obj_width( &c_local ),
|
||||
bli_obj_width( &a_local ) );
|
||||
bli_cntx_set_thrloop_from_env
|
||||
(
|
||||
BLIS_HERK,
|
||||
BLIS_LEFT, // ignored for her[2]k/syr[2]k
|
||||
bli_obj_length( &c_local ),
|
||||
bli_obj_width( &c_local ),
|
||||
bli_obj_width( &a_local ),
|
||||
cntx
|
||||
);
|
||||
|
||||
// A sort of hack for communicating the desired pach schemas for A and B
|
||||
// to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and
|
||||
// bli_l3_cntl_create_if()). This allows us to access the schemas from
|
||||
// the control tree, which hopefully reduces some confusion, particularly
|
||||
// in bli_packm_init().
|
||||
if ( bli_cntx_method( cntx ) == BLIS_NAT )
|
||||
{
|
||||
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
|
||||
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &ah_local );
|
||||
}
|
||||
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
|
||||
{
|
||||
pack_t schema_a = bli_cntx_schema_a_block( cntx );
|
||||
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
|
||||
|
||||
bli_obj_set_pack_schema( schema_a, &a_local );
|
||||
bli_obj_set_pack_schema( schema_b, &ah_local );
|
||||
}
|
||||
|
||||
// Invoke the internal back-end.
|
||||
bli_l3_thread_decorator
|
||||
|
||||
@@ -168,7 +168,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Query the context for the micro-kernel address and cast it to its
|
||||
function pointer type. */ \
|
||||
PASTECH(ch,gemm_ukr_ft) \
|
||||
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
\
|
||||
/* Temporary C buffer for edge cases. Note that the strides of this
|
||||
temporary buffer are set so that they match the storage of the
|
||||
@@ -177,7 +177,7 @@ void PASTEMAC(ch,varname) \
|
||||
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
|
||||
/ sizeof( ctype ) ] \
|
||||
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
|
||||
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
|
||||
\
|
||||
@@ -312,11 +312,11 @@ void PASTEMAC(ch,varname) \
|
||||
m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \
|
||||
\
|
||||
/* Compute the addresses of the next panels of A and B. */ \
|
||||
a2 = herk_get_next_a_micropanel( caucus, a1, rstep_a ); \
|
||||
a2 = bli_herk_get_next_a_upanel( caucus, a1, rstep_a ); \
|
||||
if ( bli_is_last_iter( i, m_iter, ir_thread_id, ir_num_threads ) ) \
|
||||
{ \
|
||||
a2 = a_cast; \
|
||||
b2 = herk_get_next_b_micropanel( thread, b1, cstep_b ); \
|
||||
b2 = bli_herk_get_next_b_upanel( thread, b1, cstep_b ); \
|
||||
if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
|
||||
@@ -168,7 +168,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Query the context for the micro-kernel address and cast it to its
|
||||
function pointer type. */ \
|
||||
PASTECH(ch,gemm_ukr_ft) \
|
||||
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
\
|
||||
/* Temporary C buffer for edge cases. Note that the strides of this
|
||||
temporary buffer are set so that they match the storage of the
|
||||
@@ -177,7 +177,7 @@ void PASTEMAC(ch,varname) \
|
||||
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
|
||||
/ sizeof( ctype ) ] \
|
||||
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
|
||||
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
|
||||
\
|
||||
@@ -312,11 +312,11 @@ void PASTEMAC(ch,varname) \
|
||||
m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \
|
||||
\
|
||||
/* Compute the addresses of the next panels of A and B. */ \
|
||||
a2 = herk_get_next_a_micropanel( caucus, a1, rstep_a ); \
|
||||
a2 = bli_herk_get_next_a_upanel( caucus, a1, rstep_a ); \
|
||||
if ( bli_is_last_iter( i, m_iter, ir_thread_id, ir_num_threads ) ) \
|
||||
{ \
|
||||
a2 = a_cast; \
|
||||
b2 = herk_get_next_b_micropanel( thread, b1, cstep_b ); \
|
||||
b2 = bli_herk_get_next_b_upanel( thread, b1, cstep_b ); \
|
||||
if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
|
||||
@@ -72,7 +72,7 @@ void bli_symm_front
|
||||
// contiguous columns, or if C is stored by columns and the micro-kernel
|
||||
// prefers contiguous rows, transpose the entire operation to allow the
|
||||
// micro-kernel to access elements of C in its preferred manner.
|
||||
if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
|
||||
if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
|
||||
{
|
||||
bli_toggle_side( &side );
|
||||
bli_obj_induce_trans( &b_local );
|
||||
@@ -87,10 +87,34 @@ void bli_symm_front
|
||||
}
|
||||
|
||||
// Record the threading for each level within the context.
|
||||
bli_cntx_set_thrloop_from_env( BLIS_SYMM, BLIS_LEFT, cntx,
|
||||
bli_obj_length( &c_local ),
|
||||
bli_obj_width( &c_local ),
|
||||
bli_obj_width( &a_local ) );
|
||||
bli_cntx_set_thrloop_from_env
|
||||
(
|
||||
BLIS_SYMM,
|
||||
BLIS_LEFT, // ignored for gemm/hemm/symm
|
||||
bli_obj_length( &c_local ),
|
||||
bli_obj_width( &c_local ),
|
||||
bli_obj_width( &a_local ),
|
||||
cntx
|
||||
);
|
||||
|
||||
// A sort of hack for communicating the desired pach schemas for A and B
|
||||
// to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and
|
||||
// bli_l3_cntl_create_if()). This allows us to access the schemas from
|
||||
// the control tree, which hopefully reduces some confusion, particularly
|
||||
// in bli_packm_init().
|
||||
if ( bli_cntx_method( cntx ) == BLIS_NAT )
|
||||
{
|
||||
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
|
||||
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local );
|
||||
}
|
||||
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
|
||||
{
|
||||
pack_t schema_a = bli_cntx_schema_a_block( cntx );
|
||||
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
|
||||
|
||||
bli_obj_set_pack_schema( schema_a, &a_local );
|
||||
bli_obj_set_pack_schema( schema_b, &b_local );
|
||||
}
|
||||
|
||||
// Invoke the internal back-end.
|
||||
bli_l3_thread_decorator
|
||||
|
||||
@@ -81,16 +81,44 @@ void bli_syr2k_front
|
||||
// contiguous columns, or if C is stored by columns and the micro-kernel
|
||||
// prefers contiguous rows, transpose the entire operation to allow the
|
||||
// micro-kernel to access elements of C in its preferred manner.
|
||||
if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
|
||||
if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
|
||||
{
|
||||
bli_obj_induce_trans( &c_local );
|
||||
}
|
||||
|
||||
// Record the threading for each level within the context.
|
||||
bli_cntx_set_thrloop_from_env( BLIS_SYR2K, BLIS_LEFT, cntx,
|
||||
bli_obj_length( &c_local ),
|
||||
bli_obj_width( &c_local ),
|
||||
bli_obj_width( &a_local ) );
|
||||
bli_cntx_set_thrloop_from_env
|
||||
(
|
||||
BLIS_SYR2K,
|
||||
BLIS_LEFT, // ignored for her[2]k/syr[2]k
|
||||
bli_obj_length( &c_local ),
|
||||
bli_obj_width( &c_local ),
|
||||
bli_obj_width( &a_local ),
|
||||
cntx
|
||||
);
|
||||
|
||||
// A sort of hack for communicating the desired pach schemas for A and B
|
||||
// to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and
|
||||
// bli_l3_cntl_create_if()). This allows us to access the schemas from
|
||||
// the control tree, which hopefully reduces some confusion, particularly
|
||||
// in bli_packm_init().
|
||||
if ( bli_cntx_method( cntx ) == BLIS_NAT )
|
||||
{
|
||||
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
|
||||
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &bt_local );
|
||||
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &b_local );
|
||||
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &at_local );
|
||||
}
|
||||
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
|
||||
{
|
||||
pack_t schema_a = bli_cntx_schema_a_block( cntx );
|
||||
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
|
||||
|
||||
bli_obj_set_pack_schema( schema_a, &a_local );
|
||||
bli_obj_set_pack_schema( schema_b, &bt_local );
|
||||
bli_obj_set_pack_schema( schema_a, &b_local );
|
||||
bli_obj_set_pack_schema( schema_b, &at_local );
|
||||
}
|
||||
|
||||
// Invoke herk twice, using beta only the first time.
|
||||
|
||||
|
||||
@@ -74,16 +74,40 @@ void bli_syrk_front
|
||||
// contiguous columns, or if C is stored by columns and the micro-kernel
|
||||
// prefers contiguous rows, transpose the entire operation to allow the
|
||||
// micro-kernel to access elements of C in its preferred manner.
|
||||
if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
|
||||
if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
|
||||
{
|
||||
bli_obj_induce_trans( &c_local );
|
||||
}
|
||||
|
||||
// Record the threading for each level within the context.
|
||||
bli_cntx_set_thrloop_from_env( BLIS_SYRK, BLIS_LEFT, cntx,
|
||||
bli_obj_length( &c_local ),
|
||||
bli_obj_width( &c_local ),
|
||||
bli_obj_width( &a_local ) );
|
||||
bli_cntx_set_thrloop_from_env
|
||||
(
|
||||
BLIS_SYRK,
|
||||
BLIS_LEFT, // ignored for her[2]k/syr[2]k
|
||||
bli_obj_length( &c_local ),
|
||||
bli_obj_width( &c_local ),
|
||||
bli_obj_width( &a_local ),
|
||||
cntx
|
||||
);
|
||||
|
||||
// A sort of hack for communicating the desired pach schemas for A and B
|
||||
// to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and
|
||||
// bli_l3_cntl_create_if()). This allows us to access the schemas from
|
||||
// the control tree, which hopefully reduces some confusion, particularly
|
||||
// in bli_packm_init().
|
||||
if ( bli_cntx_method( cntx ) == BLIS_NAT )
|
||||
{
|
||||
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
|
||||
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &at_local );
|
||||
}
|
||||
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
|
||||
{
|
||||
pack_t schema_a = bli_cntx_schema_a_block( cntx );
|
||||
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
|
||||
|
||||
bli_obj_set_pack_schema( schema_a, &a_local );
|
||||
bli_obj_set_pack_schema( schema_b, &at_local );
|
||||
}
|
||||
|
||||
// Invoke the internal back-end.
|
||||
bli_l3_thread_decorator
|
||||
|
||||
@@ -105,7 +105,7 @@ void bli_trmm_front
|
||||
// NOTE: We disable the optimization for 1x1 matrices since the concept
|
||||
// of row- vs. column storage breaks down.
|
||||
if ( !bli_obj_is_1x1( &c_local ) )
|
||||
if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
|
||||
if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
|
||||
{
|
||||
bli_toggle_side( &side );
|
||||
bli_obj_induce_trans( &a_local );
|
||||
@@ -130,10 +130,34 @@ void bli_trmm_front
|
||||
bli_obj_set_as_root( &c_local );
|
||||
|
||||
// Record the threading for each level within the context.
|
||||
bli_cntx_set_thrloop_from_env( BLIS_TRMM, side, cntx,
|
||||
bli_obj_length( &c_local ),
|
||||
bli_obj_width( &c_local ),
|
||||
bli_obj_width( &a_local ) );
|
||||
bli_cntx_set_thrloop_from_env
|
||||
(
|
||||
BLIS_TRMM,
|
||||
side,
|
||||
bli_obj_length( &c_local ),
|
||||
bli_obj_width( &c_local ),
|
||||
bli_obj_width( &a_local ),
|
||||
cntx
|
||||
);
|
||||
|
||||
// A sort of hack for communicating the desired pach schemas for A and B
|
||||
// to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and
|
||||
// bli_l3_cntl_create_if()). This allows us to access the schemas from
|
||||
// the control tree, which hopefully reduces some confusion, particularly
|
||||
// in bli_packm_init().
|
||||
if ( bli_cntx_method( cntx ) == BLIS_NAT )
|
||||
{
|
||||
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
|
||||
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local );
|
||||
}
|
||||
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
|
||||
{
|
||||
pack_t schema_a = bli_cntx_schema_a_block( cntx );
|
||||
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
|
||||
|
||||
bli_obj_set_pack_schema( schema_a, &a_local );
|
||||
bli_obj_set_pack_schema( schema_b, &b_local );
|
||||
}
|
||||
|
||||
// Invoke the internal back-end.
|
||||
bli_l3_thread_decorator
|
||||
|
||||
@@ -160,7 +160,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Query the context for the micro-kernel address and cast it to its
|
||||
function pointer type. */ \
|
||||
PASTECH(ch,gemm_ukr_ft) \
|
||||
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
\
|
||||
/* Temporary C buffer for edge cases. Note that the strides of this
|
||||
temporary buffer are set so that they match the storage of the
|
||||
@@ -169,7 +169,7 @@ void PASTEMAC(ch,varname) \
|
||||
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
|
||||
/ sizeof( ctype ) ] \
|
||||
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
|
||||
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
|
||||
\
|
||||
@@ -322,7 +322,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Loop over the n dimension (NR columns at a time). */ \
|
||||
for ( j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
if ( trmm_l_jr_my_iter( j, jr_thread ) ) { \
|
||||
if ( bli_trmm_l_jr_my_iter( j, jr_thread ) ) { \
|
||||
\
|
||||
ctype* restrict a1; \
|
||||
ctype* restrict c11; \
|
||||
@@ -364,7 +364,7 @@ void PASTEMAC(ch,varname) \
|
||||
is_a_cur += ( bli_is_odd( is_a_cur ) ? 1 : 0 ); \
|
||||
ps_a_cur = ( is_a_cur * ss_a_num ) / ss_a_den; \
|
||||
\
|
||||
if ( trmm_l_ir_my_iter( i, ir_thread ) ) { \
|
||||
if ( bli_trmm_l_ir_my_iter( i, ir_thread ) ) { \
|
||||
\
|
||||
b1_i = b1 + ( off_a1011 * PACKNR ) / off_scl; \
|
||||
\
|
||||
@@ -434,7 +434,7 @@ void PASTEMAC(ch,varname) \
|
||||
} \
|
||||
else if ( bli_is_strictly_below_diag_n( diagoffa_i, MR, k ) ) \
|
||||
{ \
|
||||
if ( trmm_l_ir_my_iter( i, ir_thread ) ) { \
|
||||
if ( bli_trmm_l_ir_my_iter( i, ir_thread ) ) { \
|
||||
\
|
||||
ctype* restrict a2; \
|
||||
\
|
||||
|
||||
@@ -160,7 +160,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Query the context for the micro-kernel address and cast it to its
|
||||
function pointer type. */ \
|
||||
PASTECH(ch,gemm_ukr_ft) \
|
||||
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
\
|
||||
/* Temporary C buffer for edge cases. Note that the strides of this
|
||||
temporary buffer are set so that they match the storage of the
|
||||
@@ -169,7 +169,7 @@ void PASTEMAC(ch,varname) \
|
||||
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
|
||||
/ sizeof( ctype ) ] \
|
||||
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
|
||||
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
|
||||
\
|
||||
@@ -329,7 +329,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Loop over the n dimension (NR columns at a time). */ \
|
||||
for ( j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
if ( trmm_l_jr_my_iter( j, jr_thread ) ) { \
|
||||
if ( bli_trmm_l_jr_my_iter( j, jr_thread ) ) { \
|
||||
\
|
||||
ctype* restrict a1; \
|
||||
ctype* restrict c11; \
|
||||
@@ -371,7 +371,7 @@ void PASTEMAC(ch,varname) \
|
||||
is_a_cur += ( bli_is_odd( is_a_cur ) ? 1 : 0 ); \
|
||||
ps_a_cur = ( is_a_cur * ss_a_num ) / ss_a_den; \
|
||||
\
|
||||
if ( trmm_l_ir_my_iter( i, ir_thread ) ) { \
|
||||
if ( bli_trmm_l_ir_my_iter( i, ir_thread ) ) { \
|
||||
\
|
||||
b1_i = b1 + ( off_a1112 * PACKNR ) / off_scl; \
|
||||
\
|
||||
@@ -441,7 +441,7 @@ void PASTEMAC(ch,varname) \
|
||||
} \
|
||||
else if ( bli_is_strictly_above_diag_n( diagoffa_i, MR, k ) ) \
|
||||
{ \
|
||||
if ( trmm_l_ir_my_iter( i, ir_thread ) ) { \
|
||||
if ( bli_trmm_l_ir_my_iter( i, ir_thread ) ) { \
|
||||
\
|
||||
ctype* restrict a2; \
|
||||
\
|
||||
|
||||
@@ -160,7 +160,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Query the context for the micro-kernel address and cast it to its
|
||||
function pointer type. */ \
|
||||
PASTECH(ch,gemm_ukr_ft) \
|
||||
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
\
|
||||
/* Temporary C buffer for edge cases. Note that the strides of this
|
||||
temporary buffer are set so that they match the storage of the
|
||||
@@ -169,7 +169,7 @@ void PASTEMAC(ch,varname) \
|
||||
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
|
||||
/ sizeof( ctype ) ] \
|
||||
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
|
||||
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
|
||||
\
|
||||
@@ -361,7 +361,7 @@ void PASTEMAC(ch,varname) \
|
||||
is_b_cur += ( bli_is_odd( is_b_cur ) ? 1 : 0 ); \
|
||||
ps_b_cur = ( is_b_cur * ss_b_num ) / ss_b_den; \
|
||||
\
|
||||
if ( trmm_r_jr_my_iter( j, jr_thread ) ) { \
|
||||
if ( bli_trmm_r_jr_my_iter( j, jr_thread ) ) { \
|
||||
\
|
||||
/* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t
|
||||
object. */ \
|
||||
@@ -370,7 +370,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Loop over the m dimension (MR rows at a time). */ \
|
||||
for ( i = 0; i < m_iter; ++i ) \
|
||||
{ \
|
||||
if ( trmm_r_ir_my_iter( i, ir_thread ) ) { \
|
||||
if ( bli_trmm_r_ir_my_iter( i, ir_thread ) ) { \
|
||||
\
|
||||
ctype* restrict a1_i; \
|
||||
ctype* restrict a2; \
|
||||
@@ -446,7 +446,7 @@ void PASTEMAC(ch,varname) \
|
||||
} \
|
||||
else if ( bli_is_strictly_below_diag_n( diagoffb_j, k, NR ) ) \
|
||||
{ \
|
||||
if ( trmm_r_jr_my_iter( j, jr_thread ) ) { \
|
||||
if ( bli_trmm_r_jr_my_iter( j, jr_thread ) ) { \
|
||||
\
|
||||
/* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t
|
||||
object. */ \
|
||||
@@ -455,7 +455,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Loop over the m dimension (MR rows at a time). */ \
|
||||
for ( i = 0; i < m_iter; ++i ) \
|
||||
{ \
|
||||
if ( trmm_r_ir_my_iter( i, ir_thread ) ) { \
|
||||
if ( bli_trmm_r_ir_my_iter( i, ir_thread ) ) { \
|
||||
\
|
||||
ctype* restrict a2; \
|
||||
\
|
||||
|
||||
@@ -160,7 +160,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Query the context for the micro-kernel address and cast it to its
|
||||
function pointer type. */ \
|
||||
PASTECH(ch,gemm_ukr_ft) \
|
||||
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
\
|
||||
/* Temporary C buffer for edge cases. Note that the strides of this
|
||||
temporary buffer are set so that they match the storage of the
|
||||
@@ -169,7 +169,7 @@ void PASTEMAC(ch,varname) \
|
||||
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
|
||||
/ sizeof( ctype ) ] \
|
||||
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
|
||||
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
|
||||
\
|
||||
@@ -361,7 +361,7 @@ void PASTEMAC(ch,varname) \
|
||||
is_b_cur += ( bli_is_odd( is_b_cur ) ? 1 : 0 ); \
|
||||
ps_b_cur = ( is_b_cur * ss_b_num ) / ss_b_den; \
|
||||
\
|
||||
if ( trmm_r_jr_my_iter( j, jr_thread ) ) { \
|
||||
if ( bli_trmm_r_jr_my_iter( j, jr_thread ) ) { \
|
||||
\
|
||||
/* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t
|
||||
object. */ \
|
||||
@@ -370,7 +370,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Loop over the m dimension (MR rows at a time). */ \
|
||||
for ( i = 0; i < m_iter; ++i ) \
|
||||
{ \
|
||||
if ( trmm_r_ir_my_iter( i, ir_thread ) ) { \
|
||||
if ( bli_trmm_r_ir_my_iter( i, ir_thread ) ) { \
|
||||
\
|
||||
ctype* restrict a1_i; \
|
||||
ctype* restrict a2; \
|
||||
@@ -446,7 +446,7 @@ void PASTEMAC(ch,varname) \
|
||||
} \
|
||||
else if ( bli_is_strictly_above_diag_n( diagoffb_j, k, NR ) ) \
|
||||
{ \
|
||||
if ( trmm_r_jr_my_iter( j, jr_thread ) ) { \
|
||||
if ( bli_trmm_r_jr_my_iter( j, jr_thread ) ) { \
|
||||
\
|
||||
/* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t
|
||||
object. */ \
|
||||
@@ -455,7 +455,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Loop over the m dimension (MR rows at a time). */ \
|
||||
for ( i = 0; i < m_iter; ++i ) \
|
||||
{ \
|
||||
if ( trmm_r_ir_my_iter( i, ir_thread ) ) { \
|
||||
if ( bli_trmm_r_ir_my_iter( i, ir_thread ) ) { \
|
||||
\
|
||||
ctype* restrict a2; \
|
||||
\
|
||||
|
||||
@@ -104,7 +104,7 @@ void bli_trmm3_front
|
||||
// contiguous columns, or if C is stored by columns and the micro-kernel
|
||||
// prefers contiguous rows, transpose the entire operation to allow the
|
||||
// micro-kernel to access elements of C in its preferred manner.
|
||||
if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
|
||||
if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) )
|
||||
{
|
||||
bli_toggle_side( &side );
|
||||
bli_obj_induce_trans( &a_local );
|
||||
@@ -129,10 +129,34 @@ void bli_trmm3_front
|
||||
bli_obj_set_as_root( &c_local );
|
||||
|
||||
// Record the threading for each level within the context.
|
||||
bli_cntx_set_thrloop_from_env( BLIS_TRMM3, side, cntx,
|
||||
bli_obj_length( &c_local ),
|
||||
bli_obj_width( &c_local ),
|
||||
bli_obj_width( &a_local ) );
|
||||
bli_cntx_set_thrloop_from_env
|
||||
(
|
||||
BLIS_TRMM3,
|
||||
side,
|
||||
bli_obj_length( &c_local ),
|
||||
bli_obj_width( &c_local ),
|
||||
bli_obj_width( &a_local ),
|
||||
cntx
|
||||
);
|
||||
|
||||
// A sort of hack for communicating the desired pach schemas for A and B
|
||||
// to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and
|
||||
// bli_l3_cntl_create_if()). This allows us to access the schemas from
|
||||
// the control tree, which hopefully reduces some confusion, particularly
|
||||
// in bli_packm_init().
|
||||
if ( bli_cntx_method( cntx ) == BLIS_NAT )
|
||||
{
|
||||
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
|
||||
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local );
|
||||
}
|
||||
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
|
||||
{
|
||||
pack_t schema_a = bli_cntx_schema_a_block( cntx );
|
||||
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
|
||||
|
||||
bli_obj_set_pack_schema( schema_a, &a_local );
|
||||
bli_obj_set_pack_schema( schema_b, &b_local );
|
||||
}
|
||||
|
||||
// Invoke the internal back-end.
|
||||
bli_l3_thread_decorator
|
||||
|
||||
@@ -36,16 +36,21 @@
|
||||
|
||||
cntl_t* bli_trsm_cntl_create
|
||||
(
|
||||
side_t side
|
||||
side_t side,
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
)
|
||||
{
|
||||
if ( bli_is_left( side ) ) return bli_trsm_l_cntl_create();
|
||||
else return bli_trsm_r_cntl_create();
|
||||
if ( bli_is_left( side ) )
|
||||
return bli_trsm_l_cntl_create( schema_a, schema_b );
|
||||
else
|
||||
return bli_trsm_r_cntl_create( schema_a, schema_b );
|
||||
}
|
||||
|
||||
cntl_t* bli_trsm_l_cntl_create
|
||||
(
|
||||
void
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
)
|
||||
{
|
||||
void* macro_kernel_p = bli_trsm_xx_ker_var2;
|
||||
@@ -79,7 +84,7 @@ cntl_t* bli_trsm_l_cntl_create
|
||||
TRUE, // do NOT invert diagonal
|
||||
TRUE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_ROW_PANELS,
|
||||
schema_a, // normally BLIS_PACKED_ROW_PANELS
|
||||
BLIS_BUFFER_FOR_A_BLOCK,
|
||||
trsm_cntl_bp_bu
|
||||
);
|
||||
@@ -103,7 +108,7 @@ cntl_t* bli_trsm_l_cntl_create
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_COL_PANELS,
|
||||
schema_b, // normally BLIS_PACKED_COL_PANELS
|
||||
BLIS_BUFFER_FOR_B_PANEL,
|
||||
trsm_cntl_op_bp
|
||||
);
|
||||
@@ -131,7 +136,8 @@ cntl_t* bli_trsm_l_cntl_create
|
||||
|
||||
cntl_t* bli_trsm_r_cntl_create
|
||||
(
|
||||
void
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
)
|
||||
{
|
||||
void* macro_kernel_p = bli_trsm_xx_ker_var2;
|
||||
@@ -165,7 +171,7 @@ cntl_t* bli_trsm_r_cntl_create
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_ROW_PANELS,
|
||||
schema_a, // normally BLIS_PACKED_ROW_PANELS
|
||||
BLIS_BUFFER_FOR_A_BLOCK,
|
||||
trsm_cntl_bp_bu
|
||||
);
|
||||
@@ -189,7 +195,7 @@ cntl_t* bli_trsm_r_cntl_create
|
||||
TRUE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
TRUE, // reverse iteration if lower?
|
||||
BLIS_PACKED_COL_PANELS,
|
||||
schema_b, // normally BLIS_PACKED_COL_PANELS
|
||||
BLIS_BUFFER_FOR_B_PANEL,
|
||||
trsm_cntl_op_bp
|
||||
);
|
||||
|
||||
@@ -34,17 +34,21 @@
|
||||
|
||||
cntl_t* bli_trsm_cntl_create
|
||||
(
|
||||
side_t side
|
||||
side_t side,
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
);
|
||||
|
||||
cntl_t* bli_trsm_l_cntl_create
|
||||
(
|
||||
void
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
);
|
||||
|
||||
cntl_t* bli_trsm_r_cntl_create
|
||||
(
|
||||
void
|
||||
pack_t schema_a,
|
||||
pack_t schema_b
|
||||
);
|
||||
|
||||
void bli_trsm_cntl_free
|
||||
|
||||
@@ -121,10 +121,34 @@ void bli_trsm_front
|
||||
bli_obj_set_as_root( &c_local );
|
||||
|
||||
// Record the threading for each level within the context.
|
||||
bli_cntx_set_thrloop_from_env( BLIS_TRSM, side, cntx,
|
||||
bli_obj_length( &c_local ),
|
||||
bli_obj_width( &c_local ),
|
||||
bli_obj_width( &a_local ) );
|
||||
bli_cntx_set_thrloop_from_env
|
||||
(
|
||||
BLIS_TRSM,
|
||||
side,
|
||||
bli_obj_length( &c_local ),
|
||||
bli_obj_width( &c_local ),
|
||||
bli_obj_width( &a_local ),
|
||||
cntx
|
||||
);
|
||||
|
||||
// A sort of hack for communicating the desired pach schemas for A and B
|
||||
// to bli_trsm_cntl_create() (via bli_l3_thread_decorator() and
|
||||
// bli_l3_cntl_create_if()). This allows us to access the schemas from
|
||||
// the control tree, which hopefully reduces some confusion, particularly
|
||||
// in bli_packm_init().
|
||||
if ( bli_cntx_method( cntx ) == BLIS_NAT )
|
||||
{
|
||||
bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local );
|
||||
bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local );
|
||||
}
|
||||
else // if ( bli_cntx_method( cntx ) != BLIS_NAT )
|
||||
{
|
||||
pack_t schema_a = bli_cntx_schema_a_block( cntx );
|
||||
pack_t schema_b = bli_cntx_schema_b_panel( cntx );
|
||||
|
||||
bli_obj_set_pack_schema( schema_a, &a_local );
|
||||
bli_obj_set_pack_schema( schema_b, &b_local );
|
||||
}
|
||||
|
||||
// Invoke the internal back-end.
|
||||
bli_l3_thread_decorator
|
||||
|
||||
@@ -162,9 +162,9 @@ void PASTEMAC(ch,varname) \
|
||||
\
|
||||
/* Cast the micro-kernel address to its function pointer type. */ \
|
||||
PASTECH(ch,gemmtrsm_ukr_ft) \
|
||||
gemmtrsm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMMTRSM_L_UKR, cntx ); \
|
||||
gemmtrsm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMMTRSM_L_UKR, cntx ); \
|
||||
PASTECH(ch,gemm_ukr_ft) \
|
||||
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
\
|
||||
/* Temporary C buffer for edge cases. Note that the strides of this
|
||||
temporary buffer are set so that they match the storage of the
|
||||
@@ -173,7 +173,7 @@ void PASTEMAC(ch,varname) \
|
||||
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
|
||||
/ sizeof( ctype ) ] \
|
||||
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
|
||||
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
|
||||
\
|
||||
@@ -340,7 +340,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Loop over the n dimension (NR columns at a time). */ \
|
||||
for ( j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
if( trsm_my_iter( j, thread ) ) { \
|
||||
if( bli_trsm_my_iter( j, thread ) ) { \
|
||||
\
|
||||
ctype* restrict a1; \
|
||||
ctype* restrict c11; \
|
||||
|
||||
@@ -162,9 +162,9 @@ void PASTEMAC(ch,varname) \
|
||||
\
|
||||
/* Cast the micro-kernel address to its function pointer type. */ \
|
||||
PASTECH(ch,gemmtrsm_ukr_ft) \
|
||||
gemmtrsm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMMTRSM_U_UKR, cntx ); \
|
||||
gemmtrsm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMMTRSM_U_UKR, cntx ); \
|
||||
PASTECH(ch,gemm_ukr_ft) \
|
||||
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
\
|
||||
/* Temporary C buffer for edge cases. Note that the strides of this
|
||||
temporary buffer are set so that they match the storage of the
|
||||
@@ -173,7 +173,7 @@ void PASTEMAC(ch,varname) \
|
||||
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
|
||||
/ sizeof( ctype ) ] \
|
||||
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
|
||||
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
|
||||
\
|
||||
@@ -348,7 +348,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Loop over the n dimension (NR columns at a time). */ \
|
||||
for ( j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
if( trsm_my_iter( j, thread ) ) { \
|
||||
if( bli_trsm_my_iter( j, thread ) ) { \
|
||||
\
|
||||
ctype* restrict a1; \
|
||||
ctype* restrict c11; \
|
||||
|
||||
@@ -167,9 +167,9 @@ void PASTEMAC(ch,varname) \
|
||||
is transposed so that all kernel instances are of the "left"
|
||||
variety (since those are the only trsm ukernels that exist). */ \
|
||||
PASTECH(ch,gemmtrsm_ukr_ft) \
|
||||
gemmtrsm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMMTRSM_U_UKR, cntx ); \
|
||||
gemmtrsm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMMTRSM_U_UKR, cntx ); \
|
||||
PASTECH(ch,gemm_ukr_ft) \
|
||||
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
\
|
||||
/* Temporary C buffer for edge cases. Note that the strides of this
|
||||
temporary buffer are set so that they match the storage of the
|
||||
@@ -178,7 +178,7 @@ void PASTEMAC(ch,varname) \
|
||||
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
|
||||
/ sizeof( ctype ) ] \
|
||||
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
|
||||
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
|
||||
\
|
||||
@@ -422,7 +422,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Loop over the m dimension (MR rows at a time). */ \
|
||||
for ( i = 0; i < m_iter; ++i ) \
|
||||
{ \
|
||||
if( trsm_my_iter( i, thread ) ){ \
|
||||
if( bli_trsm_my_iter( i, thread ) ){ \
|
||||
\
|
||||
ctype* restrict a11; \
|
||||
ctype* restrict a12; \
|
||||
@@ -508,7 +508,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Loop over the m dimension (MR rows at a time). */ \
|
||||
for ( i = 0; i < m_iter; ++i ) \
|
||||
{ \
|
||||
if( trsm_my_iter( i, thread ) ){ \
|
||||
if( bli_trsm_my_iter( i, thread ) ){ \
|
||||
\
|
||||
ctype* restrict a2; \
|
||||
\
|
||||
|
||||
@@ -167,9 +167,9 @@ void PASTEMAC(ch,varname) \
|
||||
is transposed so that all kernel instances are of the "left"
|
||||
variety (since those are the only trsm ukernels that exist). */ \
|
||||
PASTECH(ch,gemmtrsm_ukr_ft) \
|
||||
gemmtrsm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMMTRSM_L_UKR, cntx ); \
|
||||
gemmtrsm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMMTRSM_L_UKR, cntx ); \
|
||||
PASTECH(ch,gemm_ukr_ft) \
|
||||
gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
\
|
||||
/* Temporary C buffer for edge cases. Note that the strides of this
|
||||
temporary buffer are set so that they match the storage of the
|
||||
@@ -178,7 +178,7 @@ void PASTEMAC(ch,varname) \
|
||||
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
|
||||
/ sizeof( ctype ) ] \
|
||||
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
|
||||
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
|
||||
\
|
||||
@@ -415,7 +415,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Loop over the m dimension (MR rows at a time). */ \
|
||||
for ( i = 0; i < m_iter; ++i ) \
|
||||
{ \
|
||||
if( trsm_my_iter( i, thread ) ){ \
|
||||
if( bli_trsm_my_iter( i, thread ) ){ \
|
||||
\
|
||||
ctype* restrict a10; \
|
||||
ctype* restrict a11; \
|
||||
@@ -501,7 +501,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Loop over the m dimension (MR rows at a time). */ \
|
||||
for ( i = 0; i < m_iter; ++i ) \
|
||||
{ \
|
||||
if( trsm_my_iter( i, thread ) ){ \
|
||||
if( bli_trsm_my_iter( i, thread ) ){ \
|
||||
\
|
||||
ctype* restrict a2; \
|
||||
\
|
||||
|
||||
@@ -53,7 +53,7 @@ static void* bli_auxinfo_next_a( auxinfo_t* ai )
|
||||
}
|
||||
static void* bli_auxinfo_next_b( auxinfo_t* ai )
|
||||
{
|
||||
return ai->a_next;
|
||||
return ai->b_next;
|
||||
}
|
||||
|
||||
static inc_t bli_auxinfo_is_a( auxinfo_t* ai )
|
||||
|
||||
@@ -172,6 +172,18 @@ static void bli_blksz_scale_max
|
||||
bli_blksz_set_max( ( val * num ) / den, dt, b );
|
||||
}
|
||||
|
||||
static void bli_blksz_scale_def_max
|
||||
(
|
||||
dim_t num,
|
||||
dim_t den,
|
||||
num_t dt,
|
||||
blksz_t* b
|
||||
)
|
||||
{
|
||||
bli_blksz_scale_def( num, den, dt, b );
|
||||
bli_blksz_scale_max( num, den, dt, b );
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
blksz_t* bli_blksz_create_ed
|
||||
|
||||
@@ -342,6 +342,40 @@ err_t bli_check_real_valued_object( obj_t* a )
|
||||
return e_val;
|
||||
}
|
||||
|
||||
err_t bli_check_consistent_precisions( num_t dt_a, num_t dt_b )
|
||||
{
|
||||
err_t e_val = BLIS_SUCCESS;
|
||||
|
||||
if ( dt_a == BLIS_FLOAT )
|
||||
{
|
||||
if ( dt_b != BLIS_FLOAT &&
|
||||
dt_b != BLIS_SCOMPLEX )
|
||||
e_val = BLIS_INCONSISTENT_PRECISIONS;
|
||||
}
|
||||
else if ( dt_a == BLIS_DOUBLE )
|
||||
{
|
||||
if ( dt_b != BLIS_DOUBLE &&
|
||||
dt_b != BLIS_DCOMPLEX )
|
||||
e_val = BLIS_INCONSISTENT_PRECISIONS;
|
||||
}
|
||||
|
||||
return e_val;
|
||||
}
|
||||
|
||||
err_t bli_check_consistent_object_precisions( obj_t* a, obj_t* b )
|
||||
{
|
||||
err_t e_val;
|
||||
num_t dt_a;
|
||||
num_t dt_b;
|
||||
|
||||
dt_a = bli_obj_dt( a );
|
||||
dt_b = bli_obj_dt( b );
|
||||
|
||||
e_val = bli_check_consistent_precisions( dt_a, dt_b );
|
||||
|
||||
return e_val;
|
||||
}
|
||||
|
||||
// -- Dimension-related checks -------------------------------------------------
|
||||
|
||||
err_t bli_check_conformal_dims( obj_t* a, obj_t* b )
|
||||
|
||||
@@ -62,6 +62,8 @@ err_t bli_check_consistent_object_datatypes( obj_t* a, obj_t* b );
|
||||
err_t bli_check_datatype_real_proj_of( num_t dt_c, num_t dt_r );
|
||||
err_t bli_check_object_real_proj_of( obj_t* c, obj_t* r );
|
||||
err_t bli_check_real_valued_object( obj_t* a );
|
||||
err_t bli_check_consistent_precisions( num_t dt_a, num_t dt_b );
|
||||
err_t bli_check_consistent_object_precisions( obj_t* a, obj_t* b );
|
||||
|
||||
err_t bli_check_conformal_dims( obj_t* a, obj_t* b );
|
||||
err_t bli_check_level3_dims( obj_t* a, obj_t* b, obj_t* c );
|
||||
|
||||
@@ -544,8 +544,10 @@ void bli_cntx_set_l3_nat_ukrs( dim_t n_ukrs, ... )
|
||||
// -- End variable argument section --
|
||||
|
||||
// Query the context for the addresses of:
|
||||
// - the l3 virtual ukernel func_t array
|
||||
// - the l3 native ukernel func_t array
|
||||
// - the l3 native ukernel preferences array
|
||||
func_t* cntx_l3_vir_ukrs = bli_cntx_l3_vir_ukrs_buf( cntx );
|
||||
func_t* cntx_l3_nat_ukrs = bli_cntx_l3_nat_ukrs_buf( cntx );
|
||||
mbool_t* cntx_l3_nat_ukrs_prefs = bli_cntx_l3_nat_ukrs_prefs_buf( cntx );
|
||||
|
||||
@@ -565,11 +567,18 @@ void bli_cntx_set_l3_nat_ukrs( dim_t n_ukrs, ... )
|
||||
|
||||
// Index into the func_t and mbool_t for the current kernel id
|
||||
// being processed.
|
||||
func_t* vukrs = &cntx_l3_vir_ukrs[ ukr_id ];
|
||||
func_t* ukrs = &cntx_l3_nat_ukrs[ ukr_id ];
|
||||
mbool_t* prefs = &cntx_l3_nat_ukrs_prefs[ ukr_id ];
|
||||
|
||||
// Store the ukernel function pointer and preference values into
|
||||
// the context.
|
||||
// the context. Notice that we redundantly store the native
|
||||
// ukernel address in both the native and virtual ukernel slots
|
||||
// in the context. This is standard practice when creating a
|
||||
// native context. (Induced method contexts will overwrite the
|
||||
// virtual function pointer with the address of the appropriate
|
||||
// virtual ukernel.)
|
||||
bli_func_set_dt( ukr_fp, ukr_dt, vukrs );
|
||||
bli_func_set_dt( ukr_fp, ukr_dt, ukrs );
|
||||
bli_mbool_set_dt( ukr_pref, ukr_dt, prefs );
|
||||
}
|
||||
@@ -869,10 +878,10 @@ void bli_cntx_set_thrloop_from_env
|
||||
(
|
||||
opid_t l3_op,
|
||||
side_t side,
|
||||
cntx_t* cntx,
|
||||
dim_t m,
|
||||
dim_t n,
|
||||
dim_t k
|
||||
dim_t k,
|
||||
cntx_t* cntx
|
||||
)
|
||||
{
|
||||
dim_t jc, pc, ic, jr, ir;
|
||||
@@ -934,8 +943,8 @@ void bli_cntx_set_thrloop_from_env
|
||||
|
||||
if ( l3_op == BLIS_TRMM )
|
||||
{
|
||||
// We reconfigure the paralelism from trmm_r due to a dependency in
|
||||
// the jc loop. (NOTE: This dependency does not exist for trmm3 )
|
||||
// We reconfigure the parallelism from trmm_r due to a dependency in
|
||||
// the jc loop. (NOTE: This dependency does not exist for trmm3.)
|
||||
if ( bli_is_right( side ) )
|
||||
{
|
||||
bli_cntx_set_thrloop
|
||||
@@ -988,7 +997,7 @@ void bli_cntx_set_thrloop_from_env
|
||||
);
|
||||
}
|
||||
}
|
||||
else // if ( l3_op == BLIS_TRSM )
|
||||
else // any other level-3 operation besides trmm/trsm
|
||||
{
|
||||
bli_cntx_set_thrloop
|
||||
(
|
||||
|
||||
@@ -60,8 +60,6 @@ typedef struct cntx_s
|
||||
pack_t schema_b;
|
||||
pack_t schema_c;
|
||||
|
||||
bool_t anti_pref;
|
||||
|
||||
dim_t* thrloop;
|
||||
|
||||
membrk_t* membrk;
|
||||
@@ -126,10 +124,6 @@ static pack_t bli_cntx_schema_c_panel( cntx_t* cntx )
|
||||
{
|
||||
return cntx->schema_c_panel;
|
||||
}
|
||||
static bool_t bli_cntx_anti_pref( cntx_t* cntx )
|
||||
{
|
||||
return cntx->anti_pref;
|
||||
}
|
||||
static dim_t* bli_cntx_thrloop( cntx_t* cntx )
|
||||
{
|
||||
return cntx->thrloop;
|
||||
@@ -166,10 +160,6 @@ static void bli_cntx_set_schema_ab_blockpanel( pack_t sa, pack_t sb, cntx_t* cnt
|
||||
bli_cntx_set_schema_a_block( sa, cntx );
|
||||
bli_cntx_set_schema_b_panel( sb, cntx );
|
||||
}
|
||||
static void bli_cntx_set_anti_pref( bool_t anti_pref, cntx_t* cntx )
|
||||
{
|
||||
cntx->anti_pref = anti_pref;
|
||||
}
|
||||
static void bli_cntx_set_membrk( membrk_t* membrk, cntx_t* cntx )
|
||||
{
|
||||
cntx->membrk = membrk;
|
||||
@@ -234,27 +224,6 @@ static dim_t bli_cntx_get_bmult_dt( num_t dt, bszid_t bs_id, cntx_t* cntx )
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
static func_t* bli_cntx_get_l3_ukrs( l3ukr_t ukr_id, cntx_t* cntx )
|
||||
{
|
||||
func_t* funcs;
|
||||
|
||||
if ( bli_cntx_method( (cntx) ) != BLIS_NAT )
|
||||
funcs = bli_cntx_l3_vir_ukrs_buf( cntx );
|
||||
else
|
||||
funcs = bli_cntx_l3_nat_ukrs_buf( cntx );
|
||||
|
||||
func_t* func = &funcs[ ukr_id ];
|
||||
|
||||
return func;
|
||||
}
|
||||
|
||||
static void* bli_cntx_get_l3_ukr_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx )
|
||||
{
|
||||
func_t* func = bli_cntx_get_l3_ukrs( ukr_id, cntx );
|
||||
|
||||
return bli_func_get_dt( dt, func );
|
||||
}
|
||||
|
||||
static func_t* bli_cntx_get_l3_vir_ukrs( l3ukr_t ukr_id, cntx_t* cntx )
|
||||
{
|
||||
func_t* funcs = bli_cntx_l3_vir_ukrs_buf( cntx );
|
||||
@@ -487,55 +456,43 @@ static bool_t bli_cntx_l3_nat_ukr_dislikes_storage_of( obj_t* obj, l3ukr_t ukr_i
|
||||
return !bli_cntx_l3_nat_ukr_prefers_storage_of( obj, ukr_id, cntx );
|
||||
}
|
||||
|
||||
static bool_t bli_cntx_l3_nat_ukr_eff_prefers_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx )
|
||||
{
|
||||
bool_t r_val = bli_cntx_l3_nat_ukr_prefers_storage_of( obj, ukr_id, cntx );
|
||||
|
||||
// If the anti-preference is set, negate the result.
|
||||
if ( bli_cntx_anti_pref( cntx ) ) r_val = !r_val;
|
||||
|
||||
return r_val;
|
||||
}
|
||||
|
||||
static bool_t bli_cntx_l3_nat_ukr_eff_dislikes_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx )
|
||||
{
|
||||
bool_t r_val = bli_cntx_l3_nat_ukr_dislikes_storage_of( obj, ukr_id, cntx );
|
||||
|
||||
// If the anti-preference is set, negate the result.
|
||||
if ( bli_cntx_anti_pref( cntx ) ) r_val = !r_val;
|
||||
|
||||
return r_val;
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
static bool_t bli_cntx_l3_ukr_prefers_rows_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx )
|
||||
static bool_t bli_cntx_l3_vir_ukr_prefers_rows_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx )
|
||||
{
|
||||
// For induced methods, return the ukernel storage preferences of the
|
||||
// corresponding real micro-kernel.
|
||||
// NOTE: This projection to real domain becomes unnecessary if you
|
||||
// set the exec_dt for 1m to the real projection of the storage
|
||||
// datatype.
|
||||
if ( bli_cntx_method( cntx ) != BLIS_NAT )
|
||||
dt = bli_dt_proj_to_real( dt );
|
||||
|
||||
return bli_cntx_l3_nat_ukr_prefers_rows_dt( dt, ukr_id, cntx );
|
||||
}
|
||||
|
||||
static bool_t bli_cntx_l3_ukr_prefers_cols_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx )
|
||||
static bool_t bli_cntx_l3_vir_ukr_prefers_cols_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx )
|
||||
{
|
||||
// For induced methods, return the ukernel storage preferences of the
|
||||
// corresponding real micro-kernel.
|
||||
// NOTE: This projection to real domain becomes unnecessary if you
|
||||
// set the exec_dt for 1m to the real projection of the storage
|
||||
// datatype.
|
||||
if ( bli_cntx_method( cntx ) != BLIS_NAT )
|
||||
dt = bli_dt_proj_to_real( dt );
|
||||
|
||||
return bli_cntx_l3_nat_ukr_prefers_cols_dt( dt, ukr_id, cntx );
|
||||
}
|
||||
|
||||
static bool_t bli_cntx_l3_ukr_prefers_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx )
|
||||
static bool_t bli_cntx_l3_vir_ukr_prefers_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx )
|
||||
{
|
||||
const num_t dt = bli_obj_dt( obj );
|
||||
// Note that we use the execution datatype, which may differ from the
|
||||
// storage datatype of C (though this would happen in very few situations).
|
||||
const num_t dt = bli_obj_exec_dt( obj );
|
||||
const bool_t ukr_prefers_rows
|
||||
= bli_cntx_l3_ukr_prefers_rows_dt( dt, ukr_id, cntx );
|
||||
= bli_cntx_l3_vir_ukr_prefers_rows_dt( dt, ukr_id, cntx );
|
||||
const bool_t ukr_prefers_cols
|
||||
= bli_cntx_l3_ukr_prefers_cols_dt( dt, ukr_id, cntx );
|
||||
= bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, ukr_id, cntx );
|
||||
bool_t r_val = FALSE;
|
||||
|
||||
if ( bli_obj_is_row_stored( obj ) && ukr_prefers_rows ) r_val = TRUE;
|
||||
@@ -544,29 +501,9 @@ static bool_t bli_cntx_l3_ukr_prefers_storage_of( obj_t* obj, l3ukr_t ukr_id, cn
|
||||
return r_val;
|
||||
}
|
||||
|
||||
static bool_t bli_cntx_l3_ukr_dislikes_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx )
|
||||
static bool_t bli_cntx_l3_vir_ukr_dislikes_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx )
|
||||
{
|
||||
return !bli_cntx_l3_ukr_prefers_storage_of( obj, ukr_id, cntx );
|
||||
}
|
||||
|
||||
static bool_t bli_cntx_l3_ukr_eff_prefers_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx )
|
||||
{
|
||||
bool_t r_val = bli_cntx_l3_ukr_prefers_storage_of( obj, ukr_id, cntx );
|
||||
|
||||
// If the anti-preference is set, negate the result.
|
||||
if ( bli_cntx_anti_pref( cntx ) ) r_val = !r_val;
|
||||
|
||||
return r_val;
|
||||
}
|
||||
|
||||
static bool_t bli_cntx_l3_ukr_eff_dislikes_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx )
|
||||
{
|
||||
bool_t r_val = bli_cntx_l3_ukr_dislikes_storage_of( obj, ukr_id, cntx );
|
||||
|
||||
// If the anti-preference is set, negate the result.
|
||||
if ( bli_cntx_anti_pref( cntx ) ) r_val = !r_val;
|
||||
|
||||
return r_val;
|
||||
return !bli_cntx_l3_vir_ukr_prefers_storage_of( obj, ukr_id, cntx );
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
@@ -674,12 +611,15 @@ void bli_cntx_set_l1f_kers( dim_t n_kers, ... );
|
||||
void bli_cntx_set_l1v_kers( dim_t n_kers, ... );
|
||||
void bli_cntx_set_packm_kers( dim_t n_kers, ... );
|
||||
|
||||
void bli_cntx_set_thrloop_from_env( opid_t l3_op,
|
||||
side_t side,
|
||||
cntx_t* cntx,
|
||||
dim_t m,
|
||||
dim_t n,
|
||||
dim_t k );
|
||||
void bli_cntx_set_thrloop_from_env
|
||||
(
|
||||
opid_t l3_op,
|
||||
side_t side,
|
||||
dim_t m,
|
||||
dim_t n,
|
||||
dim_t k,
|
||||
cntx_t* cntx
|
||||
);
|
||||
|
||||
void bli_cntx_print( cntx_t* cntx );
|
||||
|
||||
|
||||
@@ -90,6 +90,8 @@ void bli_error_init_msgs( void )
|
||||
"Expected second datatype to be real projection of first." );
|
||||
sprintf( bli_error_string_for_code(BLIS_EXPECTED_REAL_VALUED_OBJECT),
|
||||
"Expected real-valued object (ie: if complex, imaginary component equals zero)." );
|
||||
sprintf( bli_error_string_for_code(BLIS_INCONSISTENT_PRECISIONS),
|
||||
"Expected consistent precisions (both single or both double)." );
|
||||
|
||||
sprintf( bli_error_string_for_code(BLIS_NONCONFORMAL_DIMENSIONS),
|
||||
"Encountered non-conformal dimensions between objects." );
|
||||
|
||||
@@ -584,7 +584,7 @@ char* bli_gks_l3_ukr_impl_string( l3ukr_t ukr, ind_t method, num_t dt )
|
||||
// then query the ukernel function pointer for the given datatype from
|
||||
// that context.
|
||||
cntx_t* cntx = bli_gks_query_ind_cntx( method, dt );
|
||||
void* fp = bli_cntx_get_l3_ukr_dt( dt, ukr, cntx );
|
||||
void* fp = bli_cntx_get_l3_vir_ukr_dt( dt, ukr, cntx );
|
||||
|
||||
// Check whether the ukernel function pointer is NULL for the given
|
||||
// datatype. If it is NULL, return the string for not applicable.
|
||||
|
||||
@@ -210,6 +210,19 @@ void bli_param_map_char_to_blis_diag( char diag, diag_t* blis_diag )
|
||||
}
|
||||
}
|
||||
|
||||
void bli_param_map_char_to_blis_dt( char dt, num_t* blis_dt )
|
||||
{
|
||||
if ( dt == 's' ) *blis_dt = BLIS_FLOAT;
|
||||
else if ( dt == 'd' ) *blis_dt = BLIS_DOUBLE;
|
||||
else if ( dt == 'c' ) *blis_dt = BLIS_SCOMPLEX;
|
||||
else if ( dt == 'z' ) *blis_dt = BLIS_DCOMPLEX;
|
||||
else if ( dt == 'i' ) *blis_dt = BLIS_INT;
|
||||
else
|
||||
{
|
||||
bli_check_error_code( BLIS_INVALID_DATATYPE );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// --- BLIS to BLIS char mappings ----------------------------------------------
|
||||
|
||||
@@ -265,3 +278,16 @@ void bli_param_map_blis_to_char_diag( diag_t blis_diag, char* diag )
|
||||
}
|
||||
}
|
||||
|
||||
void bli_param_map_blis_to_char_dt( num_t blis_dt, char* dt )
|
||||
{
|
||||
if ( blis_dt == BLIS_FLOAT ) *dt = 's';
|
||||
else if ( blis_dt == BLIS_DOUBLE ) *dt = 'd';
|
||||
else if ( blis_dt == BLIS_SCOMPLEX ) *dt = 'c';
|
||||
else if ( blis_dt == BLIS_DCOMPLEX ) *dt = 'z';
|
||||
else if ( blis_dt == BLIS_INT ) *dt = 'i';
|
||||
else
|
||||
{
|
||||
bli_check_error_code( BLIS_INVALID_DATATYPE );
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -57,6 +57,7 @@ void bli_param_map_char_to_blis_uplo( char uplo, uplo_t* blis_uplo );
|
||||
void bli_param_map_char_to_blis_trans( char trans, trans_t* blis_trans );
|
||||
void bli_param_map_char_to_blis_conj( char conj, conj_t* blis_conj );
|
||||
void bli_param_map_char_to_blis_diag( char diag, diag_t* blis_diag );
|
||||
void bli_param_map_char_to_blis_dt( char dt, num_t* blis_dt );
|
||||
|
||||
|
||||
// --- BLIS to BLIS char mappings ----------------------------------------------
|
||||
@@ -66,4 +67,5 @@ void bli_param_map_blis_to_char_uplo( uplo_t blis_uplo, char* uplo );
|
||||
void bli_param_map_blis_to_char_trans( trans_t blis_trans, char* trans );
|
||||
void bli_param_map_blis_to_char_conj( conj_t blis_conj, char* conj );
|
||||
void bli_param_map_blis_to_char_diag( diag_t blis_diag, char* diag );
|
||||
void bli_param_map_blis_to_char_dt( num_t blis_dt, char* dt );
|
||||
|
||||
|
||||
@@ -38,6 +38,49 @@
|
||||
// -- Matrix partitioning ------------------------------------------------------
|
||||
|
||||
|
||||
void bli_acquire_mpart
|
||||
(
|
||||
dim_t i,
|
||||
dim_t j,
|
||||
dim_t bm,
|
||||
dim_t bn,
|
||||
obj_t* parent,
|
||||
obj_t* child
|
||||
)
|
||||
{
|
||||
// Query the dimensions of the parent object.
|
||||
const dim_t m_par = bli_obj_length( parent );
|
||||
const dim_t n_par = bli_obj_width( parent );
|
||||
|
||||
// If either i or j is already beyond what exists of the parent matrix,
|
||||
// slide them back to the outer dimensions. (What will happen in this
|
||||
// scenario is that bm and bn and/or will be reduced to zero so that the
|
||||
// child matrix does not refer to anything beyond the bounds of the
|
||||
// parent. (Note: This is a safety measure and generally should never
|
||||
// be needed if the caller is passing in sane arguments.)
|
||||
if ( i > m_par ) i = m_par;
|
||||
if ( j > n_par ) j = n_par;
|
||||
|
||||
// If either bm or bn spills out over the edge of the parent matrix,
|
||||
// reduce them so that the child matrix fits within the bounds of the
|
||||
// parent. (Note: This is a safety measure and generally should never
|
||||
// be needed if the caller is passing in sane arguments, though this
|
||||
// code is somewhat more likely to be needed than the code above.)
|
||||
if ( bm > m_par - i ) bm = m_par - i;
|
||||
if ( bn > n_par - j ) bn = n_par - j;
|
||||
|
||||
// Alias the parent object's contents into the child object.
|
||||
bli_obj_alias_to( parent, child );
|
||||
|
||||
// Set the offsets and dimensions of the child object. Note that we
|
||||
// increment, rather than overwrite, the offsets of the child object
|
||||
// in case the parent object already had non-zero offsets (usually
|
||||
// because the parent was itself a child a larger grandparent object).
|
||||
bli_obj_inc_offs( i, j, child );
|
||||
bli_obj_set_dims( bm, bn, child );
|
||||
}
|
||||
|
||||
|
||||
void bli_acquire_mpart_mdim
|
||||
(
|
||||
dir_t direct,
|
||||
|
||||
@@ -36,6 +36,16 @@
|
||||
|
||||
// -- Matrix partitioning ------------------------------------------------------
|
||||
|
||||
void bli_acquire_mpart
|
||||
(
|
||||
dim_t i,
|
||||
dim_t j,
|
||||
dim_t m,
|
||||
dim_t n,
|
||||
obj_t* obj,
|
||||
obj_t* sub_obj
|
||||
);
|
||||
|
||||
#undef GENPROT
|
||||
#define GENPROT( opname ) \
|
||||
\
|
||||
|
||||
162
frame/base/bli_setri.c
Normal file
162
frame/base/bli_setri.c
Normal file
@@ -0,0 +1,162 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
// -- setr ---------------------------------------------------------------------
|
||||
|
||||
void bli_setrm
|
||||
(
|
||||
obj_t* alpha,
|
||||
obj_t* b
|
||||
)
|
||||
{
|
||||
obj_t alpha_real;
|
||||
obj_t br;
|
||||
|
||||
// Check parameters.
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
bli_setm_check( alpha, b );
|
||||
|
||||
// Initialize a local scalar, alpha_real, using the real projection
|
||||
// of the datatype of b.
|
||||
bli_obj_scalar_init_detached( bli_obj_dt_proj_to_real( b ),
|
||||
&alpha_real );
|
||||
|
||||
// Copy/typecast alpha to alpha_real. This discards the imaginary
|
||||
// part of alpha (if it is complex).
|
||||
bli_copysc( alpha, &alpha_real );
|
||||
|
||||
// Acquire an alias to the real part of b.
|
||||
bli_obj_real_part( b, &br );
|
||||
|
||||
// Use setm to set the real part of b to alpha_real.
|
||||
bli_setm( &alpha_real, &br );
|
||||
}
|
||||
|
||||
void bli_setrv
|
||||
(
|
||||
obj_t* alpha,
|
||||
obj_t* x
|
||||
)
|
||||
{
|
||||
obj_t alpha_real;
|
||||
obj_t xr;
|
||||
|
||||
// Check parameters.
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
bli_setv_check( alpha, x );
|
||||
|
||||
// Initialize a local scalar, alpha_real, using the real projection
|
||||
// of the datatype of x.
|
||||
bli_obj_scalar_init_detached( bli_obj_dt_proj_to_real( x ),
|
||||
&alpha_real );
|
||||
|
||||
// Copy/typecast alpha to alpha_real. This discards the imaginary
|
||||
// part of alpha (if it is complex).
|
||||
bli_copysc( alpha, &alpha_real );
|
||||
|
||||
// Acquire an alias to the real part of x.
|
||||
bli_obj_real_part( x, &xr );
|
||||
|
||||
// Use setv to set the real part of x to alpha_real.
|
||||
bli_setv( &alpha_real, &xr );
|
||||
}
|
||||
|
||||
// -- seti ---------------------------------------------------------------------
|
||||
|
||||
void bli_setim
|
||||
(
|
||||
obj_t* alpha,
|
||||
obj_t* b
|
||||
)
|
||||
{
|
||||
obj_t alpha_real;
|
||||
obj_t bi;
|
||||
|
||||
// Check parameters.
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
bli_setm_check( alpha, b );
|
||||
|
||||
// If the object is real, return early.
|
||||
if ( bli_obj_is_real( b ) ) return;
|
||||
|
||||
// Initialize a local scalar, alpha_real, using the real projection
|
||||
// of the datatype of b.
|
||||
bli_obj_scalar_init_detached( bli_obj_dt_proj_to_real( b ),
|
||||
&alpha_real );
|
||||
|
||||
// Copy/typecast alpha to alpha_real. This discards the imaginary
|
||||
// part of alpha (if it is complex).
|
||||
bli_copysc( alpha, &alpha_real );
|
||||
|
||||
// Acquire an alias to the imaginary part of b.
|
||||
bli_obj_imag_part( b, &bi );
|
||||
|
||||
// Use setm to set the imaginary part of b to alpha_real.
|
||||
bli_setm( &alpha_real, &bi );
|
||||
}
|
||||
|
||||
void bli_setiv
|
||||
(
|
||||
obj_t* alpha,
|
||||
obj_t* x
|
||||
)
|
||||
{
|
||||
obj_t alpha_real;
|
||||
obj_t xi;
|
||||
|
||||
// Check parameters.
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
bli_setv_check( alpha, x );
|
||||
|
||||
// If the object is real, return early.
|
||||
if ( bli_obj_is_real( x ) ) return;
|
||||
|
||||
// Initialize a local scalar, alpha_real, using the real projection
|
||||
// of the datatype of x.
|
||||
bli_obj_scalar_init_detached( bli_obj_dt_proj_to_real( x ),
|
||||
&alpha_real );
|
||||
|
||||
// Copy/typecast alpha to alpha_real. This discards the imaginary
|
||||
// part of alpha (if it is complex).
|
||||
bli_copysc( alpha, &alpha_real );
|
||||
|
||||
// Acquire an alias to the imaginary part of x.
|
||||
bli_obj_imag_part( x, &xi );
|
||||
|
||||
// Use setm to set the imaginary part of x to alpha_real.
|
||||
bli_setm( &alpha_real, &xi );
|
||||
}
|
||||
|
||||
62
frame/base/bli_setri.h
Normal file
62
frame/base/bli_setri.h
Normal file
@@ -0,0 +1,62 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
// -- setr ---------------------------------------------------------------------
|
||||
|
||||
void bli_setrm
|
||||
(
|
||||
obj_t* alpha,
|
||||
obj_t* b
|
||||
);
|
||||
|
||||
void bli_setrv
|
||||
(
|
||||
obj_t* alpha,
|
||||
obj_t* x
|
||||
);
|
||||
|
||||
// -- seti ---------------------------------------------------------------------
|
||||
|
||||
void bli_setim
|
||||
(
|
||||
obj_t* alpha,
|
||||
obj_t* b
|
||||
);
|
||||
|
||||
void bli_setiv
|
||||
(
|
||||
obj_t* alpha,
|
||||
obj_t* x
|
||||
);
|
||||
|
||||
267
frame/base/cast/bli_castm.c
Normal file
267
frame/base/cast/bli_castm.c
Normal file
@@ -0,0 +1,267 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
// NOTE: This is one of the few functions in BLIS that is defined
|
||||
// with heterogeneous type support. This is done so that we have
|
||||
// an operation that can be used to typecast (copy-cast) a matrix
|
||||
// of one datatype to a scalar of another datatype.
|
||||
|
||||
typedef void (*FUNCPTR_T)
|
||||
(
|
||||
trans_t transa,
|
||||
dim_t m,
|
||||
dim_t n,
|
||||
void* restrict a, inc_t rs_a, inc_t cs_a,
|
||||
void* restrict b, inc_t rs_b, inc_t cs_b
|
||||
);
|
||||
|
||||
static FUNCPTR_T GENARRAY2_ALL(ftypes,castm);
|
||||
|
||||
//
|
||||
// Define object-based interface.
|
||||
//
|
||||
|
||||
void bli_castm
|
||||
(
|
||||
obj_t* a,
|
||||
obj_t* b
|
||||
)
|
||||
{
|
||||
num_t dt_a = bli_obj_dt( a );
|
||||
num_t dt_b = bli_obj_dt( b );
|
||||
|
||||
trans_t transa = bli_obj_conjtrans_status( a );
|
||||
|
||||
dim_t m = bli_obj_length( b );
|
||||
dim_t n = bli_obj_width( b );
|
||||
|
||||
void* buf_a = bli_obj_buffer_at_off( a );
|
||||
inc_t rs_a = bli_obj_row_stride( a );
|
||||
inc_t cs_a = bli_obj_col_stride( a );
|
||||
|
||||
void* buf_b = bli_obj_buffer_at_off( b );
|
||||
inc_t rs_b = bli_obj_row_stride( b );
|
||||
inc_t cs_b = bli_obj_col_stride( b );
|
||||
|
||||
FUNCPTR_T f;
|
||||
|
||||
// Check parameters.
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
bli_castm_check( a, b );
|
||||
|
||||
#if 0
|
||||
if ( bli_obj_dt( a ) == bli_obj_dt( b ) )
|
||||
{
|
||||
// If a and b share the same datatype, we can simply use copym.
|
||||
bli_copym( a, b );
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
f = ftypes[dt_a][dt_b];
|
||||
|
||||
// Invoke the void pointer-based function.
|
||||
f
|
||||
(
|
||||
transa,
|
||||
m,
|
||||
n,
|
||||
buf_a, rs_a, cs_a,
|
||||
buf_b, rs_b, cs_b
|
||||
);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
//
|
||||
// Define BLAS-like interfaces with typed operands.
|
||||
//
|
||||
|
||||
#undef GENTFUNC2
|
||||
#define GENTFUNC2( ctype_a, ctype_b, cha, chb, opname ) \
|
||||
\
|
||||
void PASTEMAC2(cha,chb,opname) \
|
||||
( \
|
||||
trans_t transa, \
|
||||
dim_t m, \
|
||||
dim_t n, \
|
||||
void* restrict a, inc_t rs_a, inc_t cs_a, \
|
||||
void* restrict b, inc_t rs_b, inc_t cs_b \
|
||||
) \
|
||||
{ \
|
||||
ctype_a* restrict a_cast = a; \
|
||||
ctype_b* restrict b_cast = b; \
|
||||
conj_t conja; \
|
||||
dim_t n_iter; \
|
||||
dim_t n_elem; \
|
||||
inc_t lda, inca; \
|
||||
inc_t ldb, incb; \
|
||||
dim_t j, i; \
|
||||
\
|
||||
/* Set various loop parameters. */ \
|
||||
bli_set_dims_incs_2m \
|
||||
( \
|
||||
transa, \
|
||||
m, n, rs_a, cs_a, rs_b, cs_b, \
|
||||
&n_elem, &n_iter, &inca, &lda, &incb, &ldb \
|
||||
); \
|
||||
\
|
||||
/* Extract the conjugation component from the transa parameter. */ \
|
||||
conja = bli_extract_conj( transa ); \
|
||||
\
|
||||
if ( bli_is_conj( conja ) ) \
|
||||
{ \
|
||||
if ( inca == 1 && incb == 1 ) \
|
||||
{ \
|
||||
for ( j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
ctype_a* restrict a1 = a_cast + (j )*lda + (0 )*inca; \
|
||||
ctype_b* restrict b1 = b_cast + (j )*ldb + (0 )*incb; \
|
||||
\
|
||||
for ( i = 0; i < n_elem; ++i ) \
|
||||
{ \
|
||||
PASTEMAC2(cha,chb,copyjs)( a1[i], b1[i] ); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
ctype_a* restrict a1 = a_cast + (j )*lda + (0 )*inca; \
|
||||
ctype_b* restrict b1 = b_cast + (j )*ldb + (0 )*incb; \
|
||||
\
|
||||
for ( i = 0; i < n_elem; ++i ) \
|
||||
{ \
|
||||
PASTEMAC2(cha,chb,copyjs)( *a1, *b1 ); \
|
||||
\
|
||||
a1 += inca; \
|
||||
b1 += incb; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
if ( inca == 1 && incb == 1 ) \
|
||||
{ \
|
||||
for ( j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
ctype_a* restrict a1 = a_cast + (j )*lda + (0 )*inca; \
|
||||
ctype_b* restrict b1 = b_cast + (j )*ldb + (0 )*incb; \
|
||||
\
|
||||
for ( i = 0; i < n_elem; ++i ) \
|
||||
{ \
|
||||
PASTEMAC2(cha,chb,copys)( a1[i], b1[i] ); \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
ctype_a* restrict a1 = a_cast + (j )*lda + (0 )*inca; \
|
||||
ctype_b* restrict b1 = b_cast + (j )*ldb + (0 )*incb; \
|
||||
\
|
||||
for ( i = 0; i < n_elem; ++i ) \
|
||||
{ \
|
||||
PASTEMAC2(cha,chb,copys)( *a1, *b1 ); \
|
||||
\
|
||||
a1 += inca; \
|
||||
b1 += incb; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC2_BASIC0( castm )
|
||||
INSERT_GENTFUNC2_MIXDP0( castm )
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
//
|
||||
// Define object-based _check() function.
|
||||
//
|
||||
|
||||
void bli_castm_check
|
||||
(
|
||||
obj_t* a,
|
||||
obj_t* b
|
||||
)
|
||||
{
|
||||
err_t e_val;
|
||||
|
||||
// Check object datatypes.
|
||||
|
||||
e_val = bli_check_floating_object( a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_floating_object( b );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check structure.
|
||||
// NOTE: We enforce general structure for now in order to simplify the
|
||||
// implementation.
|
||||
|
||||
bli_check_general_object( a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
bli_check_general_object( b );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
e_val = bli_check_matrix_object( a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_matrix_object( b );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_conformal_dims( a, b );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object buffers (for non-NULLness).
|
||||
|
||||
e_val = bli_check_object_buffer( a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_object_buffer( b );
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
73
frame/base/cast/bli_castm.h
Normal file
73
frame/base/cast/bli_castm.h
Normal file
@@ -0,0 +1,73 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
//
|
||||
// Prototype object-based interface.
|
||||
//
|
||||
|
||||
void bli_castm
|
||||
(
|
||||
obj_t* a,
|
||||
obj_t* b
|
||||
);
|
||||
|
||||
//
|
||||
// Prototype BLAS-like interfaces with heterogeneous-typed operands.
|
||||
//
|
||||
|
||||
#undef GENTPROT2
|
||||
#define GENTPROT2( ctype_a, ctype_b, cha, chb, opname ) \
|
||||
\
|
||||
void PASTEMAC2(cha,chb,opname) \
|
||||
( \
|
||||
trans_t transa, \
|
||||
dim_t m, \
|
||||
dim_t n, \
|
||||
void* a, inc_t rs_a, inc_t cs_a, \
|
||||
void* b, inc_t rs_b, inc_t cs_b \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT2_BASIC0( castm )
|
||||
INSERT_GENTPROT2_MIXDP0( castm )
|
||||
|
||||
//
|
||||
// Prototype object-based _check() function.
|
||||
//
|
||||
|
||||
void bli_castm_check
|
||||
(
|
||||
obj_t* a,
|
||||
obj_t* b
|
||||
);
|
||||
|
||||
211
frame/base/cast/bli_castv.c
Normal file
211
frame/base/cast/bli_castv.c
Normal file
@@ -0,0 +1,211 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
// NOTE: This is one of the few functions in BLIS that is defined
|
||||
// with heterogeneous type support. This is done so that we have
|
||||
// an operation that can be used to typecast (copy-cast) a matrix
|
||||
// of one datatype to a scalar of another datatype.
|
||||
|
||||
typedef void (*FUNCPTR_T)
|
||||
(
|
||||
conj_t conjx,
|
||||
dim_t n,
|
||||
void* restrict x, inc_t inc_x,
|
||||
void* restrict y, inc_t inc_y
|
||||
);
|
||||
|
||||
static FUNCPTR_T GENARRAY2_ALL(ftypes,castv);
|
||||
|
||||
//
|
||||
// Define object-based interface.
|
||||
//
|
||||
|
||||
void bli_castv
|
||||
(
|
||||
obj_t* x,
|
||||
obj_t* y
|
||||
)
|
||||
{
|
||||
num_t dt_x = bli_obj_dt( x );
|
||||
num_t dt_y = bli_obj_dt( y );
|
||||
|
||||
conj_t conjx = bli_obj_conj_status( x );
|
||||
|
||||
dim_t n = bli_obj_vector_dim( x );
|
||||
|
||||
void* buf_x = bli_obj_buffer_at_off( x );
|
||||
inc_t inc_x = bli_obj_vector_inc( x );
|
||||
|
||||
void* buf_y = bli_obj_buffer_at_off( y );
|
||||
inc_t inc_y = bli_obj_vector_inc( y );
|
||||
|
||||
FUNCPTR_T f;
|
||||
|
||||
// Check parameters.
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
bli_castv_check( x, y );
|
||||
|
||||
#if 0
|
||||
if ( bli_obj_dt( x ) == bli_obj_dt( y ) )
|
||||
{
|
||||
// If x and y share the same datatype, we can simply use copyv.
|
||||
bli_copyv( x, y );
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
f = ftypes[dt_x][dt_y];
|
||||
|
||||
// Invoke the void pointer-based function.
|
||||
f
|
||||
(
|
||||
conjx,
|
||||
n,
|
||||
buf_x, inc_x,
|
||||
buf_y, inc_y
|
||||
);
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
//
|
||||
// Define BLAS-like interfaces with typed operands.
|
||||
//
|
||||
|
||||
#undef GENTFUNC2
|
||||
#define GENTFUNC2( ctype_x, ctype_y, chx, chy, opname ) \
|
||||
\
|
||||
void PASTEMAC2(chx,chy,opname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
void* restrict x, inc_t incx, \
|
||||
void* restrict y, inc_t incy \
|
||||
) \
|
||||
{ \
|
||||
ctype_x* restrict x1 = x; \
|
||||
ctype_y* restrict y1 = y; \
|
||||
dim_t i; \
|
||||
\
|
||||
if ( bli_is_conj( conjx ) ) \
|
||||
{ \
|
||||
if ( incx == 1 && incy == 1 ) \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC2(chx,chy,copyjs)( x1[i], y1[i] ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC2(chx,chy,copyjs)( *x1, *y1 ); \
|
||||
\
|
||||
x1 += incx; \
|
||||
y1 += incy; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
if ( incx == 1 && incy == 1 ) \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC2(chx,chy,copys)( x1[i], y1[i] ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( i = 0; i < n; ++i ) \
|
||||
{ \
|
||||
PASTEMAC2(chx,chy,copys)( *x1, *y1 ); \
|
||||
\
|
||||
x1 += incx; \
|
||||
y1 += incy; \
|
||||
} \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC2_BASIC0( castv )
|
||||
INSERT_GENTFUNC2_MIXDP0( castv )
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
//
|
||||
// Define object-based _check() function.
|
||||
//
|
||||
|
||||
void bli_castv_check
|
||||
(
|
||||
obj_t* x,
|
||||
obj_t* y
|
||||
)
|
||||
{
|
||||
err_t e_val;
|
||||
|
||||
// Check object datatypes.
|
||||
|
||||
e_val = bli_check_floating_object( x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_floating_object( y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
e_val = bli_check_vector_object( x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_vector_object( y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_equal_vector_lengths( x, y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object buffers (for non-NULLness).
|
||||
|
||||
e_val = bli_check_object_buffer( x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_object_buffer( y );
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
72
frame/base/cast/bli_castv.h
Normal file
72
frame/base/cast/bli_castv.h
Normal file
@@ -0,0 +1,72 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
//
|
||||
// Prototype object-based interface.
|
||||
//
|
||||
|
||||
void bli_castv
|
||||
(
|
||||
obj_t* x,
|
||||
obj_t* y
|
||||
);
|
||||
|
||||
//
|
||||
// Prototype BLAS-like interfaces with heterogeneous-typed operands.
|
||||
//
|
||||
|
||||
#undef GENTPROT2
|
||||
#define GENTPROT2( ctype_x, ctype_y, chx, chy, opname ) \
|
||||
\
|
||||
void PASTEMAC2(chx,chy,opname) \
|
||||
( \
|
||||
conj_t conjx, \
|
||||
dim_t n, \
|
||||
void* x, inc_t incx, \
|
||||
void* y, inc_t incy \
|
||||
);
|
||||
|
||||
INSERT_GENTPROT2_BASIC0( castv )
|
||||
INSERT_GENTPROT2_MIXDP0( castv )
|
||||
|
||||
//
|
||||
// Prototype object-based _check() function.
|
||||
//
|
||||
|
||||
void bli_castv_check
|
||||
(
|
||||
obj_t* x,
|
||||
obj_t* y
|
||||
);
|
||||
|
||||
118
frame/base/cast/old/bli_cast_check.c
Normal file
118
frame/base/cast/old/bli_cast_check.c
Normal file
@@ -0,0 +1,118 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
void bli_castm_check
|
||||
(
|
||||
obj_t* a,
|
||||
obj_t* b
|
||||
)
|
||||
{
|
||||
err_t e_val;
|
||||
|
||||
// Check object datatypes.
|
||||
|
||||
e_val = bli_check_floating_object( a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_floating_object( b );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check structure.
|
||||
// NOTE: We enforce general structure for now in order to simplify the
|
||||
// implementation.
|
||||
|
||||
bli_check_general_object( a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
bli_check_general_object( b );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
e_val = bli_check_matrix_object( a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_matrix_object( b );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_conformal_dims( a, b );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object buffers (for non-NULLness).
|
||||
|
||||
e_val = bli_check_object_buffer( a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_object_buffer( b );
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
void bli_castv_check
|
||||
(
|
||||
obj_t* x,
|
||||
obj_t* y
|
||||
)
|
||||
{
|
||||
err_t e_val;
|
||||
|
||||
// Check object datatypes.
|
||||
|
||||
e_val = bli_check_floating_object( x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_floating_object( y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
e_val = bli_check_vector_object( x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_vector_object( y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_equal_vector_lengths( x, y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object buffers (for non-NULLness).
|
||||
|
||||
e_val = bli_check_object_buffer( x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_object_buffer( y );
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
45
frame/base/cast/old/bli_cast_check.h
Normal file
45
frame/base/cast/old/bli_cast_check.h
Normal file
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
void bli_castm_check
|
||||
(
|
||||
obj_t* a,
|
||||
obj_t* b
|
||||
);
|
||||
|
||||
void bli_castv_check
|
||||
(
|
||||
obj_t* x,
|
||||
obj_t* y
|
||||
);
|
||||
127
frame/base/proj/bli_projm.c
Normal file
127
frame/base/proj/bli_projm.c
Normal file
@@ -0,0 +1,127 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
void bli_projm
|
||||
(
|
||||
obj_t* a,
|
||||
obj_t* b
|
||||
)
|
||||
{
|
||||
// Check parameters.
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
bli_projm_check( a, b );
|
||||
|
||||
if ( ( bli_obj_is_real( a ) && bli_obj_is_real( b ) ) ||
|
||||
( bli_obj_is_complex( a ) && bli_obj_is_complex( b ) ) )
|
||||
{
|
||||
// If a and b are both real or both complex, we can simply use
|
||||
// copym.
|
||||
bli_copym( a, b );
|
||||
}
|
||||
else
|
||||
{
|
||||
// This branch handles the case where one operand is real and
|
||||
// the other is complex.
|
||||
|
||||
if ( bli_obj_is_real( a ) /* && bli_obj_is_complex( b ) */ )
|
||||
{
|
||||
// If a is real and b is complex, we must obtain the real part
|
||||
// of b so that we can copy a into the real part (after
|
||||
// initializing all of b, including imaginary components, to
|
||||
// zero).
|
||||
|
||||
obj_t br;
|
||||
|
||||
bli_obj_real_part( b, &br );
|
||||
|
||||
bli_setm( &BLIS_ZERO, b );
|
||||
bli_copym( a, &br );
|
||||
}
|
||||
else // bli_obj_is_complex( a ) && bli_obj_is_real( b )
|
||||
{
|
||||
// If a is complex and b is real, we can simply copy the
|
||||
// real part of a into b.
|
||||
|
||||
obj_t ar;
|
||||
|
||||
bli_obj_real_part( a, &ar );
|
||||
|
||||
bli_copym( &ar, b );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
void bli_projm_check
|
||||
(
|
||||
obj_t* a,
|
||||
obj_t* b
|
||||
)
|
||||
{
|
||||
err_t e_val;
|
||||
|
||||
// Check object datatypes.
|
||||
|
||||
e_val = bli_check_floating_object( a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_floating_object( b );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_consistent_object_precisions( a, b );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
e_val = bli_check_matrix_object( a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_matrix_object( b );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_conformal_dims( a, b );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object buffers (for non-NULLness).
|
||||
|
||||
e_val = bli_check_object_buffer( a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_object_buffer( b );
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
46
frame/base/proj/bli_projm.h
Normal file
46
frame/base/proj/bli_projm.h
Normal file
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
void bli_projm
|
||||
(
|
||||
obj_t* a,
|
||||
obj_t* b
|
||||
);
|
||||
|
||||
void bli_projm_check
|
||||
(
|
||||
obj_t* a,
|
||||
obj_t* b
|
||||
);
|
||||
|
||||
127
frame/base/proj/bli_projv.c
Normal file
127
frame/base/proj/bli_projv.c
Normal file
@@ -0,0 +1,127 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
void bli_projv
|
||||
(
|
||||
obj_t* x,
|
||||
obj_t* y
|
||||
)
|
||||
{
|
||||
// Check parameters.
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
bli_projv_check( x, y );
|
||||
|
||||
if ( ( bli_obj_is_real( x ) && bli_obj_is_real( y ) ) ||
|
||||
( bli_obj_is_complex( x ) && bli_obj_is_complex( y ) ) )
|
||||
{
|
||||
// If x and y are both real or both complex, we can simply use
|
||||
// copyv.
|
||||
bli_copyv( x, y );
|
||||
}
|
||||
else
|
||||
{
|
||||
// This branch handles the case where one operand is real and
|
||||
// the other is complex.
|
||||
|
||||
if ( bli_obj_is_real( x ) /* && bli_obj_is_complex( y ) */ )
|
||||
{
|
||||
// If x is real and y is complex, we must obtain the real part
|
||||
// of y so that we can copy x into the real part (after
|
||||
// initializing all of y, including imaginary components, to
|
||||
// zero).
|
||||
|
||||
obj_t yr;
|
||||
|
||||
bli_obj_real_part( y, &yr );
|
||||
|
||||
bli_setv( &BLIS_ZERO, y );
|
||||
bli_copyv( x, &yr );
|
||||
}
|
||||
else // bli_obj_is_complex( x ) && bli_obj_is_real( y )
|
||||
{
|
||||
// If x is complex and y is real, we can simply copy the
|
||||
// real part of x into y.
|
||||
|
||||
obj_t xr;
|
||||
|
||||
bli_obj_real_part( x, &xr );
|
||||
|
||||
bli_copyv( &xr, y );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
|
||||
void bli_projv_check
|
||||
(
|
||||
obj_t* x,
|
||||
obj_t* y
|
||||
)
|
||||
{
|
||||
err_t e_val;
|
||||
|
||||
// Check object datatypes.
|
||||
|
||||
e_val = bli_check_floating_object( x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_floating_object( y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_consistent_object_precisions( x, y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
e_val = bli_check_vector_object( x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_vector_object( y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_equal_vector_lengths( x, y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object buffers (for non-NULLness).
|
||||
|
||||
e_val = bli_check_object_buffer( x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_object_buffer( y );
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
46
frame/base/proj/bli_projv.h
Normal file
46
frame/base/proj/bli_projv.h
Normal file
@@ -0,0 +1,46 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
void bli_projv
|
||||
(
|
||||
obj_t* x,
|
||||
obj_t* y
|
||||
);
|
||||
|
||||
void bli_projv_check
|
||||
(
|
||||
obj_t* x,
|
||||
obj_t* y
|
||||
);
|
||||
|
||||
114
frame/base/proj/old/bli_proj_check.c
Normal file
114
frame/base/proj/old/bli_proj_check.c
Normal file
@@ -0,0 +1,114 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
void bli_projm_check
|
||||
(
|
||||
obj_t* a,
|
||||
obj_t* b
|
||||
)
|
||||
{
|
||||
err_t e_val;
|
||||
|
||||
// Check object datatypes.
|
||||
|
||||
e_val = bli_check_floating_object( a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_floating_object( b );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_consistent_object_precisions( a, b );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
e_val = bli_check_matrix_object( a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_matrix_object( b );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_conformal_dims( a, b );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object buffers (for non-NULLness).
|
||||
|
||||
e_val = bli_check_object_buffer( a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_object_buffer( b );
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
void bli_projv_check
|
||||
(
|
||||
obj_t* x,
|
||||
obj_t* y
|
||||
)
|
||||
{
|
||||
err_t e_val;
|
||||
|
||||
// Check object datatypes.
|
||||
|
||||
e_val = bli_check_floating_object( x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_floating_object( y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_consistent_object_precisions( x, y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object dimensions.
|
||||
|
||||
e_val = bli_check_vector_object( x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_vector_object( y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_equal_vector_lengths( x, y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object buffers (for non-NULLness).
|
||||
|
||||
e_val = bli_check_object_buffer( x );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_object_buffer( y );
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
45
frame/base/proj/old/bli_proj_check.h
Normal file
45
frame/base/proj/old/bli_proj_check.h
Normal file
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas at Austin nor the names
|
||||
of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
void bli_projm_check
|
||||
(
|
||||
obj_t* a,
|
||||
obj_t* b
|
||||
);
|
||||
|
||||
void bli_projv_check
|
||||
(
|
||||
obj_t* x,
|
||||
obj_t* y
|
||||
);
|
||||
@@ -402,6 +402,51 @@ GENTFUNC2( dcomplex, scomplex, z, c, tfuncname, varname ) \
|
||||
|
||||
|
||||
|
||||
// -- Mixed domain/precision (all) two-operand macro --
|
||||
|
||||
// -- (no auxiliary arguments) --
|
||||
|
||||
#define INSERT_GENTFUNC2_MIXDP0( tfuncname ) \
|
||||
\
|
||||
GENTFUNC2( float, double, s, d, tfuncname ) \
|
||||
GENTFUNC2( float, scomplex, s, c, tfuncname ) \
|
||||
GENTFUNC2( float, dcomplex, s, z, tfuncname ) \
|
||||
\
|
||||
GENTFUNC2( double, float, d, s, tfuncname ) \
|
||||
GENTFUNC2( double, scomplex, d, c, tfuncname ) \
|
||||
GENTFUNC2( double, dcomplex, d, z, tfuncname ) \
|
||||
\
|
||||
GENTFUNC2( scomplex, float, c, s, tfuncname ) \
|
||||
GENTFUNC2( scomplex, double, c, d, tfuncname ) \
|
||||
GENTFUNC2( scomplex, dcomplex, c, z, tfuncname ) \
|
||||
\
|
||||
GENTFUNC2( dcomplex, float, z, s, tfuncname ) \
|
||||
GENTFUNC2( dcomplex, double, z, d, tfuncname ) \
|
||||
GENTFUNC2( dcomplex, scomplex, z, c, tfuncname )
|
||||
|
||||
|
||||
// -- (one auxiliary argument) --
|
||||
|
||||
#define INSERT_GENTFUNC2_MIX_DP( tfuncname, varname ) \
|
||||
\
|
||||
GENTFUNC2( float, double, s, d, tfuncname, varname ) \
|
||||
GENTFUNC2( float, scomplex, s, c, tfuncname, varname ) \
|
||||
GENTFUNC2( float, dcomplex, s, z, tfuncname, varname ) \
|
||||
\
|
||||
GENTFUNC2( double, float, d, s, tfuncname, varname ) \
|
||||
GENTFUNC2( double, scomplex, d, c, tfuncname, varname ) \
|
||||
GENTFUNC2( double, dcomplex, d, z, tfuncname, varname ) \
|
||||
\
|
||||
GENTFUNC2( scomplex, float, c, s, tfuncname, varname ) \
|
||||
GENTFUNC2( scomplex, double, c, d, tfuncname, varname ) \
|
||||
GENTFUNC2( scomplex, dcomplex, c, z, tfuncname, varname ) \
|
||||
\
|
||||
GENTFUNC2( dcomplex, float, z, s, tfuncname, varname ) \
|
||||
GENTFUNC2( dcomplex, double, z, d, tfuncname, varname ) \
|
||||
GENTFUNC2( dcomplex, scomplex, z, c, tfuncname, varname )
|
||||
|
||||
|
||||
|
||||
// -- Basic two-operand with real projection of first operand --
|
||||
|
||||
// -- (no auxiliary arguments) --
|
||||
|
||||
@@ -395,6 +395,50 @@ GENTPROT2( dcomplex, scomplex, z, c, tfuncname, varname ) \
|
||||
|
||||
|
||||
|
||||
// -- Mixed domain/precision (all) two-operand macro --
|
||||
|
||||
// -- (no auxiliary arguments) --
|
||||
|
||||
#define INSERT_GENTPROT2_MIXDP0( funcname ) \
|
||||
\
|
||||
GENTPROT2( float, double, s, d, funcname ) \
|
||||
GENTPROT2( float, scomplex, s, c, funcname ) \
|
||||
GENTPROT2( float, dcomplex, s, z, funcname ) \
|
||||
\
|
||||
GENTPROT2( double, float, d, s, funcname ) \
|
||||
GENTPROT2( double, scomplex, d, c, funcname ) \
|
||||
GENTPROT2( double, dcomplex, d, z, funcname ) \
|
||||
\
|
||||
GENTPROT2( scomplex, float, c, s, funcname ) \
|
||||
GENTPROT2( scomplex, double, c, d, funcname ) \
|
||||
GENTPROT2( scomplex, dcomplex, c, z, funcname ) \
|
||||
\
|
||||
GENTPROT2( dcomplex, float, z, s, funcname ) \
|
||||
GENTPROT2( dcomplex, double, z, d, funcname ) \
|
||||
GENTPROT2( dcomplex, scomplex, z, c, funcname )
|
||||
|
||||
// -- (one auxiliary argument) --
|
||||
|
||||
#define INSERT_GENTPROT2_MIX_DP( tfuncname, varname ) \
|
||||
\
|
||||
GENTPROT2( float, double, s, d, tfuncname, varname ) \
|
||||
GENTPROT2( float, scomplex, s, c, tfuncname, varname ) \
|
||||
GENTPROT2( float, dcomplex, s, z, tfuncname, varname ) \
|
||||
\
|
||||
GENTPROT2( double, float, d, s, tfuncname, varname ) \
|
||||
GENTPROT2( double, scomplex, d, c, tfuncname, varname ) \
|
||||
GENTPROT2( double, dcomplex, d, z, tfuncname, varname ) \
|
||||
\
|
||||
GENTPROT2( scomplex, float, c, s, tfuncname, varname ) \
|
||||
GENTPROT2( scomplex, double, c, d, tfuncname, varname ) \
|
||||
GENTPROT2( scomplex, dcomplex, c, z, tfuncname, varname ) \
|
||||
\
|
||||
GENTPROT2( dcomplex, float, z, s, tfuncname, varname ) \
|
||||
GENTPROT2( dcomplex, double, z, d, tfuncname, varname ) \
|
||||
GENTPROT2( dcomplex, scomplex, z, c, tfuncname, varname )
|
||||
|
||||
|
||||
|
||||
// -- Basic two-operand with real projection of first operand --
|
||||
|
||||
// -- (no auxiliary arguments) --
|
||||
|
||||
@@ -76,11 +76,36 @@ static bool_t bli_obj_is_const( obj_t* obj )
|
||||
return ( bli_obj_dt( obj ) == BLIS_BITVAL_CONST_TYPE );
|
||||
}
|
||||
|
||||
static objbits_t bli_obj_domain( obj_t* obj )
|
||||
static dom_t bli_obj_domain( obj_t* obj )
|
||||
{
|
||||
return ( obj->info & BLIS_DOMAIN_BIT );
|
||||
}
|
||||
|
||||
static prec_t bli_obj_prec( obj_t* obj )
|
||||
{
|
||||
return ( obj->info & BLIS_PRECISION_BIT );
|
||||
}
|
||||
|
||||
static bool_t bli_obj_is_single_prec( obj_t* obj )
|
||||
{
|
||||
return ( bli_obj_prec( obj ) == BLIS_BITVAL_SINGLE_PREC );
|
||||
}
|
||||
|
||||
static bool_t bli_obj_is_double_prec( obj_t* obj )
|
||||
{
|
||||
return ( bli_obj_prec( obj ) == BLIS_BITVAL_DOUBLE_PREC );
|
||||
}
|
||||
|
||||
static num_t bli_obj_dt_proj_to_single_prec( obj_t* obj )
|
||||
{
|
||||
return ( bli_obj_dt( obj ) & ~BLIS_BITVAL_SINGLE_PREC );
|
||||
}
|
||||
|
||||
static num_t bli_obj_dt_proj_to_double_prec( obj_t* obj )
|
||||
{
|
||||
return ( bli_obj_dt( obj ) | BLIS_BITVAL_DOUBLE_PREC );
|
||||
}
|
||||
|
||||
static bool_t bli_obj_is_real( obj_t* obj )
|
||||
{
|
||||
return ( bli_obj_domain( obj ) == BLIS_BITVAL_REAL );
|
||||
@@ -91,16 +116,6 @@ static bool_t bli_obj_is_complex( obj_t* obj )
|
||||
return ( bli_obj_domain( obj ) == BLIS_BITVAL_COMPLEX );
|
||||
}
|
||||
|
||||
static objbits_t bli_obj_prec( obj_t* obj )
|
||||
{
|
||||
return ( obj->info & BLIS_PRECISION_BIT );
|
||||
}
|
||||
|
||||
static bool_t bli_obj_is_double_prec( obj_t* obj )
|
||||
{
|
||||
return ( bli_obj_prec( obj ) == BLIS_BITVAL_DOUBLE_PREC );
|
||||
}
|
||||
|
||||
static num_t bli_obj_dt_proj_to_real( obj_t* obj )
|
||||
{
|
||||
return ( bli_obj_dt( obj ) & ~BLIS_BITVAL_COMPLEX );
|
||||
@@ -108,7 +123,7 @@ static num_t bli_obj_dt_proj_to_real( obj_t* obj )
|
||||
|
||||
static num_t bli_obj_dt_proj_to_complex( obj_t* obj )
|
||||
{
|
||||
return ( bli_obj_dt( obj ) & BLIS_BITVAL_COMPLEX );
|
||||
return ( bli_obj_dt( obj ) | BLIS_BITVAL_COMPLEX );
|
||||
}
|
||||
|
||||
static num_t bli_obj_target_dt( obj_t* obj )
|
||||
@@ -116,9 +131,29 @@ static num_t bli_obj_target_dt( obj_t* obj )
|
||||
return ( ( obj->info & BLIS_TARGET_DT_BITS ) >> BLIS_TARGET_DT_SHIFT );
|
||||
}
|
||||
|
||||
static dom_t bli_obj_target_domain( obj_t* obj )
|
||||
{
|
||||
return ( ( obj->info & BLIS_TARGET_DOMAIN_BIT ) >> BLIS_TARGET_DT_SHIFT );
|
||||
}
|
||||
|
||||
static prec_t bli_obj_target_prec( obj_t* obj )
|
||||
{
|
||||
return ( ( obj->info & BLIS_TARGET_PREC_BIT ) >> BLIS_TARGET_DT_SHIFT );
|
||||
}
|
||||
|
||||
static num_t bli_obj_exec_dt( obj_t* obj )
|
||||
{
|
||||
return ( ( obj->info & BLIS_EXECUTION_DT_BITS ) >> BLIS_EXECUTION_DT_SHIFT );
|
||||
return ( ( obj->info & BLIS_EXEC_DT_BITS ) >> BLIS_EXEC_DT_SHIFT );
|
||||
}
|
||||
|
||||
static dom_t bli_obj_exec_domain( obj_t* obj )
|
||||
{
|
||||
return ( ( obj->info & BLIS_EXEC_DOMAIN_BIT ) >> BLIS_EXEC_DT_SHIFT );
|
||||
}
|
||||
|
||||
static prec_t bli_obj_exec_prec( obj_t* obj )
|
||||
{
|
||||
return ( ( obj->info & BLIS_EXEC_PREC_BIT ) >> BLIS_EXEC_DT_SHIFT );
|
||||
}
|
||||
|
||||
static trans_t bli_obj_conjtrans_status( obj_t* obj )
|
||||
@@ -326,9 +361,29 @@ static void bli_obj_set_target_dt( num_t dt, obj_t* obj )
|
||||
obj->info = ( obj->info & ~BLIS_TARGET_DT_BITS ) | ( dt << BLIS_TARGET_DT_SHIFT );
|
||||
}
|
||||
|
||||
static void bli_obj_set_target_domain( dom_t dt, obj_t* obj )
|
||||
{
|
||||
obj->info = ( obj->info & ~BLIS_TARGET_DOMAIN_BIT ) | ( dt << BLIS_TARGET_DOMAIN_SHIFT );
|
||||
}
|
||||
|
||||
static void bli_obj_set_target_prec( prec_t dt, obj_t* obj )
|
||||
{
|
||||
obj->info = ( obj->info & ~BLIS_TARGET_PREC_BIT ) | ( dt << BLIS_TARGET_PREC_SHIFT );
|
||||
}
|
||||
|
||||
static void bli_obj_set_exec_dt( num_t dt, obj_t* obj )
|
||||
{
|
||||
obj->info = ( obj->info & ~BLIS_EXECUTION_DT_BITS ) | ( dt << BLIS_EXECUTION_DT_SHIFT );
|
||||
obj->info = ( obj->info & ~BLIS_EXEC_DT_BITS ) | ( dt << BLIS_EXEC_DT_SHIFT );
|
||||
}
|
||||
|
||||
static void bli_obj_set_exec_domain( dom_t dt, obj_t* obj )
|
||||
{
|
||||
obj->info = ( obj->info & ~BLIS_EXEC_DOMAIN_BIT ) | ( dt << BLIS_EXEC_DOMAIN_SHIFT );
|
||||
}
|
||||
|
||||
static void bli_obj_set_exec_prec( prec_t dt, obj_t* obj )
|
||||
{
|
||||
obj->info = ( obj->info & ~BLIS_EXEC_PREC_BIT ) | ( dt << BLIS_EXEC_PREC_SHIFT );
|
||||
}
|
||||
|
||||
static void bli_obj_set_pack_schema( pack_t schema, obj_t* obj )
|
||||
@@ -909,39 +964,7 @@ static void bli_obj_toggle_uplo_if_trans( trans_t trans, obj_t* obj )
|
||||
}
|
||||
}
|
||||
|
||||
// Make a full alias (shallow copy)
|
||||
|
||||
static void bli_obj_alias_to( obj_t* a, obj_t* b )
|
||||
{
|
||||
bli_obj_init_full_shallow_copy_of( a, b );
|
||||
}
|
||||
|
||||
// Check if two objects are aliases of one another
|
||||
|
||||
static bool_t bli_obj_is_alias_of( obj_t* a, obj_t* b )
|
||||
{
|
||||
return ( bli_obj_buffer( a ) == bli_obj_buffer( b ) );
|
||||
}
|
||||
|
||||
|
||||
// Create an alias with a trans value applied.
|
||||
// (Note: trans may include a conj component.)
|
||||
|
||||
static void bli_obj_alias_with_trans( trans_t trans, obj_t* a, obj_t* b )
|
||||
{
|
||||
bli_obj_alias_to( a, b );
|
||||
bli_obj_apply_trans( trans, b );
|
||||
}
|
||||
|
||||
// Create an alias with a conj value applied.
|
||||
|
||||
static void bli_obj_alias_with_conj( conj_t conja, obj_t* a, obj_t* b )
|
||||
{
|
||||
bli_obj_alias_to( a, b );
|
||||
bli_obj_apply_conj( conja, b );
|
||||
}
|
||||
|
||||
// Initialize object with default properties (info field)
|
||||
// Initialize object with default properties (info field).
|
||||
|
||||
static void bli_obj_set_defaults( obj_t* obj )
|
||||
{
|
||||
@@ -1021,6 +1044,91 @@ static void* bli_obj_buffer_for_1x1( num_t dt, obj_t* obj )
|
||||
);
|
||||
}
|
||||
|
||||
// Make a full alias (shallow copy).
|
||||
|
||||
static void bli_obj_alias_to( obj_t* a, obj_t* b )
|
||||
{
|
||||
bli_obj_init_full_shallow_copy_of( a, b );
|
||||
}
|
||||
|
||||
// Check if two objects are aliases of one another.
|
||||
|
||||
static bool_t bli_obj_is_alias_of( obj_t* a, obj_t* b )
|
||||
{
|
||||
return ( bli_obj_buffer( a ) == bli_obj_buffer( b ) );
|
||||
}
|
||||
|
||||
|
||||
// Create an alias with a trans value applied.
|
||||
// (Note: trans may include a conj component.)
|
||||
|
||||
static void bli_obj_alias_with_trans( trans_t trans, obj_t* a, obj_t* b )
|
||||
{
|
||||
bli_obj_alias_to( a, b );
|
||||
bli_obj_apply_trans( trans, b );
|
||||
}
|
||||
|
||||
// Create an alias with a conj value applied.
|
||||
|
||||
static void bli_obj_alias_with_conj( conj_t conja, obj_t* a, obj_t* b )
|
||||
{
|
||||
bli_obj_alias_to( a, b );
|
||||
bli_obj_apply_conj( conja, b );
|
||||
}
|
||||
|
||||
// Alias only the real part.
|
||||
|
||||
static void bli_obj_real_part( obj_t* c, obj_t* r )
|
||||
{
|
||||
bli_obj_alias_to( c, r );
|
||||
|
||||
if ( bli_obj_is_complex( c ) )
|
||||
{
|
||||
// Change the datatype.
|
||||
num_t dt_r = bli_obj_dt_proj_to_real( c );
|
||||
bli_obj_set_dt( dt_r, r );
|
||||
|
||||
// Update the element size.
|
||||
siz_t es_c = bli_obj_elem_size( c );
|
||||
bli_obj_set_elem_size( es_c/2, r );
|
||||
|
||||
// Update the strides.
|
||||
inc_t rs_c = bli_obj_row_stride( c );
|
||||
inc_t cs_c = bli_obj_col_stride( c );
|
||||
bli_obj_set_strides( 2*rs_c, 2*cs_c, r );
|
||||
|
||||
// Buffer is left unchanged.
|
||||
}
|
||||
}
|
||||
|
||||
// Alias only the imaginary part.
|
||||
|
||||
static void bli_obj_imag_part( obj_t* c, obj_t* i )
|
||||
{
|
||||
if ( bli_obj_is_complex( c ) )
|
||||
{
|
||||
bli_obj_alias_to( c, i );
|
||||
|
||||
// Change the datatype.
|
||||
num_t dt_r = bli_obj_dt_proj_to_real( c );
|
||||
bli_obj_set_dt( dt_r, i );
|
||||
|
||||
// Update the element size.
|
||||
siz_t es_c = bli_obj_elem_size( c );
|
||||
bli_obj_set_elem_size( es_c/2, i );
|
||||
|
||||
// Update the strides.
|
||||
inc_t rs_c = bli_obj_row_stride( c );
|
||||
inc_t cs_c = bli_obj_col_stride( c );
|
||||
bli_obj_set_strides( 2*rs_c, 2*cs_c, i );
|
||||
|
||||
// Update the buffer.
|
||||
inc_t is_c = bli_obj_imag_stride( c );
|
||||
char* p = bli_obj_buffer_at_off( c );
|
||||
bli_obj_set_buffer( p + is_c * es_c/2, i );
|
||||
}
|
||||
}
|
||||
|
||||
// Given a 1x1 object, acquire an address to the buffer depending on whether
|
||||
// the object is a BLIS_CONSTANT, and also set a datatype associated with the
|
||||
// chosen buffer (possibly using an auxiliary datatype if the object is
|
||||
|
||||
@@ -112,6 +112,16 @@ static bool_t bli_is_double_prec( num_t dt )
|
||||
bli_is_dcomplex( dt ) );
|
||||
}
|
||||
|
||||
static dom_t bli_dt_domain( num_t dt )
|
||||
{
|
||||
return ( dt & BLIS_DOMAIN_BIT );
|
||||
}
|
||||
|
||||
static prec_t bli_dt_prec( num_t dt )
|
||||
{
|
||||
return ( dt & BLIS_PRECISION_BIT );
|
||||
}
|
||||
|
||||
static num_t bli_dt_proj_to_real( num_t dt )
|
||||
{
|
||||
return ( dt & ~BLIS_BITVAL_COMPLEX );
|
||||
@@ -119,7 +129,17 @@ static num_t bli_dt_proj_to_real( num_t dt )
|
||||
|
||||
static num_t bli_dt_proj_to_complex( num_t dt )
|
||||
{
|
||||
return ( dt & BLIS_BITVAL_COMPLEX );
|
||||
return ( dt | BLIS_BITVAL_COMPLEX );
|
||||
}
|
||||
|
||||
static num_t bli_dt_proj_to_single_prec( num_t dt )
|
||||
{
|
||||
return ( dt & ~BLIS_BITVAL_SINGLE_PREC );
|
||||
}
|
||||
|
||||
static num_t bli_dt_proj_to_double_prec( num_t dt )
|
||||
{
|
||||
return ( dt | BLIS_BITVAL_DOUBLE_PREC );
|
||||
}
|
||||
|
||||
|
||||
@@ -990,6 +1010,41 @@ void bli_set_dims_incs_uplo_1m_noswap
|
||||
}
|
||||
}
|
||||
|
||||
// Set dimensions and increments for TWO matrix arguments.
|
||||
|
||||
static
|
||||
void bli_set_dims_incs_2m
|
||||
(
|
||||
trans_t transa,
|
||||
dim_t m, dim_t n, inc_t rs_a, inc_t cs_a,
|
||||
inc_t rs_b, inc_t cs_b,
|
||||
dim_t* n_elem, dim_t* n_iter, inc_t* inca, inc_t* lda,
|
||||
inc_t* incb, inc_t* ldb
|
||||
)
|
||||
{
|
||||
{
|
||||
*n_iter = n;
|
||||
*n_elem = m;
|
||||
*inca = rs_a;
|
||||
*lda = cs_a;
|
||||
*incb = rs_b;
|
||||
*ldb = cs_b;
|
||||
|
||||
if ( bli_does_trans( transa ) )
|
||||
{
|
||||
bli_swap_incs( inca, lda );
|
||||
}
|
||||
|
||||
if ( bli_is_row_tilted( *n_elem, *n_iter, *incb, *ldb ) &&
|
||||
bli_is_row_tilted( *n_elem, *n_iter, *inca, *lda ) )
|
||||
{
|
||||
bli_swap_dims( n_iter, n_elem );
|
||||
bli_swap_incs( inca, lda );
|
||||
bli_swap_incs( incb, ldb );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Set dimensions, increments, effective uplo/diagoff, etc for TWO matrix
|
||||
// arguments.
|
||||
|
||||
@@ -1033,7 +1088,7 @@ void bli_set_dims_incs_uplo_2m
|
||||
if ( bli_is_stored_subpart( diagoffa_use_, transa, uploa, m, n ) )
|
||||
uploa = BLIS_DENSE;
|
||||
|
||||
n_iter_max_ = n;
|
||||
n_iter_max_ = n;
|
||||
*n_elem_max = m;
|
||||
*inca = rs_a;
|
||||
*lda = cs_a;
|
||||
|
||||
@@ -210,11 +210,11 @@ typedef dcomplex f77_dcomplex;
|
||||
12 ~ 10 Target numerical datatype
|
||||
- 10: domain (0 == real, 1 == complex)
|
||||
- 11: precision (0 == single, 1 == double)
|
||||
- 12: unused
|
||||
- 12: used to encode integer, constant types
|
||||
15 ~ 13 Execution numerical datatype
|
||||
- 13: domain (0 == real, 1 == complex)
|
||||
- 14: precision (0 == single, 1 == double)
|
||||
- 15: unused
|
||||
- 15: used to encode integer, constant types
|
||||
22 ~ 16 Packed type/status
|
||||
- 0 0000 00: not packed
|
||||
- 1 0000 00: packed (unspecified; by rows, columns, or vector)
|
||||
@@ -271,7 +271,11 @@ typedef dcomplex f77_dcomplex;
|
||||
#define BLIS_UNIT_DIAG_SHIFT 8
|
||||
#define BLIS_INVERT_DIAG_SHIFT 9
|
||||
#define BLIS_TARGET_DT_SHIFT 10
|
||||
#define BLIS_EXECUTION_DT_SHIFT 13
|
||||
#define BLIS_TARGET_DOMAIN_SHIFT 10
|
||||
#define BLIS_TARGET_PREC_SHIFT 11
|
||||
#define BLIS_EXEC_DT_SHIFT 13
|
||||
#define BLIS_EXEC_DOMAIN_SHIFT 13
|
||||
#define BLIS_EXEC_PREC_SHIFT 14
|
||||
#define BLIS_PACK_SCHEMA_SHIFT 16
|
||||
#define BLIS_PACK_RC_SHIFT 16
|
||||
#define BLIS_PACK_PANEL_SHIFT 17
|
||||
@@ -299,7 +303,11 @@ typedef dcomplex f77_dcomplex;
|
||||
#define BLIS_UNIT_DIAG_BIT ( 0x1 << BLIS_UNIT_DIAG_SHIFT )
|
||||
#define BLIS_INVERT_DIAG_BIT ( 0x1 << BLIS_INVERT_DIAG_SHIFT )
|
||||
#define BLIS_TARGET_DT_BITS ( 0x7 << BLIS_TARGET_DT_SHIFT )
|
||||
#define BLIS_EXECUTION_DT_BITS ( 0x7 << BLIS_EXECUTION_DT_SHIFT )
|
||||
#define BLIS_TARGET_DOMAIN_BIT ( 0x1 << BLIS_TARGET_DOMAIN_SHIFT )
|
||||
#define BLIS_TARGET_PREC_BIT ( 0x1 << BLIS_TARGET_PREC_SHIFT )
|
||||
#define BLIS_EXEC_DT_BITS ( 0x7 << BLIS_EXEC_DT_SHIFT )
|
||||
#define BLIS_EXEC_DOMAIN_BIT ( 0x1 << BLIS_EXEC_DOMAIN_SHIFT )
|
||||
#define BLIS_EXEC_PREC_BIT ( 0x1 << BLIS_EXEC_PREC_SHIFT )
|
||||
#define BLIS_PACK_SCHEMA_BITS ( 0x7F << BLIS_PACK_SCHEMA_SHIFT )
|
||||
#define BLIS_PACK_RC_BIT ( 0x1 << BLIS_PACK_RC_SHIFT )
|
||||
#define BLIS_PACK_PANEL_BIT ( 0x1 << BLIS_PACK_PANEL_SHIFT )
|
||||
@@ -1128,8 +1136,6 @@ typedef struct cntx_s
|
||||
pack_t schema_b_panel;
|
||||
pack_t schema_c_panel;
|
||||
|
||||
bool_t anti_pref;
|
||||
|
||||
dim_t thrloop[ BLIS_NUM_LOOPS ];
|
||||
|
||||
membrk_t* membrk;
|
||||
@@ -1177,6 +1183,7 @@ typedef enum
|
||||
BLIS_INCONSISTENT_DATATYPES = ( -36),
|
||||
BLIS_EXPECTED_REAL_PROJ_OF = ( -37),
|
||||
BLIS_EXPECTED_REAL_VALUED_OBJECT = ( -38),
|
||||
BLIS_INCONSISTENT_PRECISIONS = ( -39),
|
||||
|
||||
// Dimension-specific errors
|
||||
BLIS_NONCONFORMAL_DIMENSIONS = ( -40),
|
||||
|
||||
@@ -122,6 +122,12 @@ extern "C" {
|
||||
#include "bli_cpuid.h"
|
||||
#include "bli_string.h"
|
||||
#include "bli_setgetij.h"
|
||||
#include "bli_setri.h"
|
||||
|
||||
#include "bli_castm.h"
|
||||
#include "bli_castv.h"
|
||||
#include "bli_projm.h"
|
||||
#include "bli_projv.h"
|
||||
|
||||
|
||||
// -- Level-0 operations --
|
||||
|
||||
@@ -60,8 +60,7 @@ void PASTEMAC(opname,imeth) \
|
||||
/* Obtain a valid (native) context from the gks if necessary. */ \
|
||||
if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
|
||||
\
|
||||
/* Invoke the operation's front end with the appropriate control
|
||||
tree. */ \
|
||||
/* Invoke the operation's front end. */ \
|
||||
PASTEMAC(opname,_front) \
|
||||
( \
|
||||
alpha, a, b, beta, c, cntx, NULL \
|
||||
@@ -98,8 +97,7 @@ void PASTEMAC(opname,imeth) \
|
||||
/* Obtain a valid (native) context from the gks if necessary. */ \
|
||||
if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
|
||||
\
|
||||
/* Invoke the operation's front end with the appropriate control
|
||||
tree. */ \
|
||||
/* Invoke the operation's front end. */ \
|
||||
PASTEMAC(opname,_front) \
|
||||
( \
|
||||
side, alpha, a, b, beta, c, cntx, NULL \
|
||||
@@ -130,8 +128,7 @@ void PASTEMAC(opname,imeth) \
|
||||
/* Obtain a valid (native) context from the gks if necessary. */ \
|
||||
if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
|
||||
\
|
||||
/* Invoke the operation's front end with the appropriate control
|
||||
tree. */ \
|
||||
/* Invoke the operation's front end. */ \
|
||||
PASTEMAC(opname,_front) \
|
||||
( \
|
||||
alpha, a, beta, c, cntx, NULL \
|
||||
@@ -161,8 +158,7 @@ void PASTEMAC(opname,imeth) \
|
||||
/* Obtain a valid (native) context from the gks if necessary. */ \
|
||||
if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
|
||||
\
|
||||
/* Invoke the operation's front end with the appropriate control
|
||||
tree. */ \
|
||||
/* Invoke the operation's front end. */ \
|
||||
PASTEMAC(opname,_front) \
|
||||
( \
|
||||
side, alpha, a, b, cntx, NULL \
|
||||
@@ -191,8 +187,7 @@ void PASTEMAC(opname,imeth) \
|
||||
/* Obtain a valid (native) context from the gks if necessary. */ \
|
||||
if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \
|
||||
\
|
||||
/* Invoke the operation's front end with the appropriate control
|
||||
tree. */ \
|
||||
/* Invoke the operation's front end. */ \
|
||||
PASTEMAC(opname,_front) \
|
||||
( \
|
||||
side, alpha, a, b, cntx, NULL \
|
||||
|
||||
@@ -231,11 +231,18 @@ void bli_l3_thread_decorator
|
||||
{
|
||||
dim_t id = omp_get_thread_num();
|
||||
|
||||
obj_t a_t, b_t, c_t;
|
||||
cntl_t* cntl_use;
|
||||
thrinfo_t* thread;
|
||||
|
||||
// Alias thread-local copies of A, B, and C. These will be the objects
|
||||
// we pass into the thread functions.
|
||||
bli_obj_alias_to( a, &a_t );
|
||||
bli_obj_alias_to( b, &b_t );
|
||||
bli_obj_alias_to( c, &c_t );
|
||||
|
||||
// Create a default control tree for the operation, if needed.
|
||||
bli_l3_cntl_create_if( family, a, b, c, cntl, &cntl_use );
|
||||
bli_l3_cntl_create_if( family, &a_t, &b_t, &c_t, cntl, &cntl_use );
|
||||
|
||||
// Create the root node of the current thread's thrinfo_t structure.
|
||||
bli_l3_thrinfo_create_root( id, gl_comm, cntx, cntl_use, &thread );
|
||||
@@ -243,17 +250,17 @@ void bli_l3_thread_decorator
|
||||
func
|
||||
(
|
||||
alpha,
|
||||
a,
|
||||
b,
|
||||
&a_t,
|
||||
&b_t,
|
||||
beta,
|
||||
c,
|
||||
&c_t,
|
||||
cntx,
|
||||
cntl_use,
|
||||
thread
|
||||
);
|
||||
|
||||
// Free the control tree, if one was created locally.
|
||||
bli_l3_cntl_free_if( a, b, c, cntl, cntl_use, thread );
|
||||
bli_l3_cntl_free_if( &a_t, &b_t, &c_t, cntl, cntl_use, thread );
|
||||
|
||||
#ifdef PRINT_THRINFO
|
||||
threads[id] = thread;
|
||||
|
||||
@@ -161,11 +161,18 @@ void* bli_l3_thread_entry( void* data_void )
|
||||
dim_t id = data->id;
|
||||
thrcomm_t* gl_comm = data->gl_comm;
|
||||
|
||||
obj_t a_t, b_t, c_t;
|
||||
cntl_t* cntl_use;
|
||||
thrinfo_t* thread;
|
||||
|
||||
// Alias thread-local copies of A, B, and C. These will be the objects
|
||||
// we pass into the thread function.
|
||||
bli_obj_alias_to( a, &a_t );
|
||||
bli_obj_alias_to( b, &b_t );
|
||||
bli_obj_alias_to( c, &c_t );
|
||||
|
||||
// Create a default control tree for the operation, if needed.
|
||||
bli_l3_cntl_create_if( family, a, b, c, cntl, &cntl_use );
|
||||
bli_l3_cntl_create_if( family, &a_t, &b_t, &c_t, cntl, &cntl_use );
|
||||
|
||||
// Create the root node of the current thread's thrinfo_t structure.
|
||||
bli_l3_thrinfo_create_root( id, gl_comm, cntx, cntl_use, &thread );
|
||||
@@ -173,17 +180,17 @@ void* bli_l3_thread_entry( void* data_void )
|
||||
func
|
||||
(
|
||||
alpha,
|
||||
a,
|
||||
b,
|
||||
&a_t,
|
||||
&b_t,
|
||||
beta,
|
||||
c,
|
||||
&c_t,
|
||||
cntx,
|
||||
cntl_use,
|
||||
thread
|
||||
);
|
||||
|
||||
// Free the control tree, if one was created locally.
|
||||
bli_l3_cntl_free_if( a, b, c, cntl, cntl_use, thread );
|
||||
bli_l3_cntl_free_if( &a_t, &b_t, &c_t, cntl, cntl_use, thread );
|
||||
|
||||
// Free the current thread's thrinfo_t structure.
|
||||
bli_l3_thrinfo_free( thread );
|
||||
|
||||
@@ -94,6 +94,12 @@ void bli_l3_thread_decorator
|
||||
cntl_t* cntl_use;
|
||||
thrinfo_t* thread;
|
||||
|
||||
// NOTE: Unlike with the _openmp.c and _pthreads.c variants, we don't
|
||||
// need to alias objects for A, B, and C since they were already aliased
|
||||
// in bli_*_front(). (We only needed thread-local copies so each could
|
||||
// safely reset their internal (beta) scalars on c after the first
|
||||
// iteration of the pc (kc) loop.)
|
||||
|
||||
// Create a default control tree for the operation, if needed.
|
||||
bli_l3_cntl_create_if( family, a, b, c, cntl, &cntl_use );
|
||||
|
||||
|
||||
Reference in New Issue
Block a user