mirror of
https://github.com/amd/blis.git
synced 2026-05-11 01:30:00 +00:00
Merge remote-tracking branch 'upstream/dev' into asm-macros
This commit is contained in:
@@ -402,6 +402,51 @@ GENTFUNC2( dcomplex, scomplex, z, c, tfuncname, varname ) \
|
||||
|
||||
|
||||
|
||||
// -- Mixed domain/precision (all) two-operand macro --
|
||||
|
||||
// -- (no auxiliary arguments) --
|
||||
|
||||
#define INSERT_GENTFUNC2_MIXDP0( tfuncname ) \
|
||||
\
|
||||
GENTFUNC2( float, double, s, d, tfuncname ) \
|
||||
GENTFUNC2( float, scomplex, s, c, tfuncname ) \
|
||||
GENTFUNC2( float, dcomplex, s, z, tfuncname ) \
|
||||
\
|
||||
GENTFUNC2( double, float, d, s, tfuncname ) \
|
||||
GENTFUNC2( double, scomplex, d, c, tfuncname ) \
|
||||
GENTFUNC2( double, dcomplex, d, z, tfuncname ) \
|
||||
\
|
||||
GENTFUNC2( scomplex, float, c, s, tfuncname ) \
|
||||
GENTFUNC2( scomplex, double, c, d, tfuncname ) \
|
||||
GENTFUNC2( scomplex, dcomplex, c, z, tfuncname ) \
|
||||
\
|
||||
GENTFUNC2( dcomplex, float, z, s, tfuncname ) \
|
||||
GENTFUNC2( dcomplex, double, z, d, tfuncname ) \
|
||||
GENTFUNC2( dcomplex, scomplex, z, c, tfuncname )
|
||||
|
||||
|
||||
// -- (one auxiliary argument) --
|
||||
|
||||
#define INSERT_GENTFUNC2_MIX_DP( tfuncname, varname ) \
|
||||
\
|
||||
GENTFUNC2( float, double, s, d, tfuncname, varname ) \
|
||||
GENTFUNC2( float, scomplex, s, c, tfuncname, varname ) \
|
||||
GENTFUNC2( float, dcomplex, s, z, tfuncname, varname ) \
|
||||
\
|
||||
GENTFUNC2( double, float, d, s, tfuncname, varname ) \
|
||||
GENTFUNC2( double, scomplex, d, c, tfuncname, varname ) \
|
||||
GENTFUNC2( double, dcomplex, d, z, tfuncname, varname ) \
|
||||
\
|
||||
GENTFUNC2( scomplex, float, c, s, tfuncname, varname ) \
|
||||
GENTFUNC2( scomplex, double, c, d, tfuncname, varname ) \
|
||||
GENTFUNC2( scomplex, dcomplex, c, z, tfuncname, varname ) \
|
||||
\
|
||||
GENTFUNC2( dcomplex, float, z, s, tfuncname, varname ) \
|
||||
GENTFUNC2( dcomplex, double, z, d, tfuncname, varname ) \
|
||||
GENTFUNC2( dcomplex, scomplex, z, c, tfuncname, varname )
|
||||
|
||||
|
||||
|
||||
// -- Basic two-operand with real projection of first operand --
|
||||
|
||||
// -- (no auxiliary arguments) --
|
||||
|
||||
@@ -395,6 +395,50 @@ GENTPROT2( dcomplex, scomplex, z, c, tfuncname, varname ) \
|
||||
|
||||
|
||||
|
||||
// -- Mixed domain/precision (all) two-operand macro --
|
||||
|
||||
// -- (no auxiliary arguments) --
|
||||
|
||||
#define INSERT_GENTPROT2_MIXDP0( funcname ) \
|
||||
\
|
||||
GENTPROT2( float, double, s, d, funcname ) \
|
||||
GENTPROT2( float, scomplex, s, c, funcname ) \
|
||||
GENTPROT2( float, dcomplex, s, z, funcname ) \
|
||||
\
|
||||
GENTPROT2( double, float, d, s, funcname ) \
|
||||
GENTPROT2( double, scomplex, d, c, funcname ) \
|
||||
GENTPROT2( double, dcomplex, d, z, funcname ) \
|
||||
\
|
||||
GENTPROT2( scomplex, float, c, s, funcname ) \
|
||||
GENTPROT2( scomplex, double, c, d, funcname ) \
|
||||
GENTPROT2( scomplex, dcomplex, c, z, funcname ) \
|
||||
\
|
||||
GENTPROT2( dcomplex, float, z, s, funcname ) \
|
||||
GENTPROT2( dcomplex, double, z, d, funcname ) \
|
||||
GENTPROT2( dcomplex, scomplex, z, c, funcname )
|
||||
|
||||
// -- (one auxiliary argument) --
|
||||
|
||||
#define INSERT_GENTPROT2_MIX_DP( tfuncname, varname ) \
|
||||
\
|
||||
GENTPROT2( float, double, s, d, tfuncname, varname ) \
|
||||
GENTPROT2( float, scomplex, s, c, tfuncname, varname ) \
|
||||
GENTPROT2( float, dcomplex, s, z, tfuncname, varname ) \
|
||||
\
|
||||
GENTPROT2( double, float, d, s, tfuncname, varname ) \
|
||||
GENTPROT2( double, scomplex, d, c, tfuncname, varname ) \
|
||||
GENTPROT2( double, dcomplex, d, z, tfuncname, varname ) \
|
||||
\
|
||||
GENTPROT2( scomplex, float, c, s, tfuncname, varname ) \
|
||||
GENTPROT2( scomplex, double, c, d, tfuncname, varname ) \
|
||||
GENTPROT2( scomplex, dcomplex, c, z, tfuncname, varname ) \
|
||||
\
|
||||
GENTPROT2( dcomplex, float, z, s, tfuncname, varname ) \
|
||||
GENTPROT2( dcomplex, double, z, d, tfuncname, varname ) \
|
||||
GENTPROT2( dcomplex, scomplex, z, c, tfuncname, varname )
|
||||
|
||||
|
||||
|
||||
// -- Basic two-operand with real projection of first operand --
|
||||
|
||||
// -- (no auxiliary arguments) --
|
||||
|
||||
@@ -76,11 +76,36 @@ static bool_t bli_obj_is_const( obj_t* obj )
|
||||
return ( bli_obj_dt( obj ) == BLIS_BITVAL_CONST_TYPE );
|
||||
}
|
||||
|
||||
static objbits_t bli_obj_domain( obj_t* obj )
|
||||
static dom_t bli_obj_domain( obj_t* obj )
|
||||
{
|
||||
return ( obj->info & BLIS_DOMAIN_BIT );
|
||||
}
|
||||
|
||||
static prec_t bli_obj_prec( obj_t* obj )
|
||||
{
|
||||
return ( obj->info & BLIS_PRECISION_BIT );
|
||||
}
|
||||
|
||||
static bool_t bli_obj_is_single_prec( obj_t* obj )
|
||||
{
|
||||
return ( bli_obj_prec( obj ) == BLIS_BITVAL_SINGLE_PREC );
|
||||
}
|
||||
|
||||
static bool_t bli_obj_is_double_prec( obj_t* obj )
|
||||
{
|
||||
return ( bli_obj_prec( obj ) == BLIS_BITVAL_DOUBLE_PREC );
|
||||
}
|
||||
|
||||
static num_t bli_obj_dt_proj_to_single_prec( obj_t* obj )
|
||||
{
|
||||
return ( bli_obj_dt( obj ) & ~BLIS_BITVAL_SINGLE_PREC );
|
||||
}
|
||||
|
||||
static num_t bli_obj_dt_proj_to_double_prec( obj_t* obj )
|
||||
{
|
||||
return ( bli_obj_dt( obj ) | BLIS_BITVAL_DOUBLE_PREC );
|
||||
}
|
||||
|
||||
static bool_t bli_obj_is_real( obj_t* obj )
|
||||
{
|
||||
return ( bli_obj_domain( obj ) == BLIS_BITVAL_REAL );
|
||||
@@ -91,16 +116,6 @@ static bool_t bli_obj_is_complex( obj_t* obj )
|
||||
return ( bli_obj_domain( obj ) == BLIS_BITVAL_COMPLEX );
|
||||
}
|
||||
|
||||
static objbits_t bli_obj_prec( obj_t* obj )
|
||||
{
|
||||
return ( obj->info & BLIS_PRECISION_BIT );
|
||||
}
|
||||
|
||||
static bool_t bli_obj_is_double_prec( obj_t* obj )
|
||||
{
|
||||
return ( bli_obj_prec( obj ) == BLIS_BITVAL_DOUBLE_PREC );
|
||||
}
|
||||
|
||||
static num_t bli_obj_dt_proj_to_real( obj_t* obj )
|
||||
{
|
||||
return ( bli_obj_dt( obj ) & ~BLIS_BITVAL_COMPLEX );
|
||||
@@ -108,7 +123,7 @@ static num_t bli_obj_dt_proj_to_real( obj_t* obj )
|
||||
|
||||
static num_t bli_obj_dt_proj_to_complex( obj_t* obj )
|
||||
{
|
||||
return ( bli_obj_dt( obj ) & BLIS_BITVAL_COMPLEX );
|
||||
return ( bli_obj_dt( obj ) | BLIS_BITVAL_COMPLEX );
|
||||
}
|
||||
|
||||
static num_t bli_obj_target_dt( obj_t* obj )
|
||||
@@ -116,9 +131,29 @@ static num_t bli_obj_target_dt( obj_t* obj )
|
||||
return ( ( obj->info & BLIS_TARGET_DT_BITS ) >> BLIS_TARGET_DT_SHIFT );
|
||||
}
|
||||
|
||||
static dom_t bli_obj_target_domain( obj_t* obj )
|
||||
{
|
||||
return ( ( obj->info & BLIS_TARGET_DOMAIN_BIT ) >> BLIS_TARGET_DT_SHIFT );
|
||||
}
|
||||
|
||||
static prec_t bli_obj_target_prec( obj_t* obj )
|
||||
{
|
||||
return ( ( obj->info & BLIS_TARGET_PREC_BIT ) >> BLIS_TARGET_DT_SHIFT );
|
||||
}
|
||||
|
||||
static num_t bli_obj_exec_dt( obj_t* obj )
|
||||
{
|
||||
return ( ( obj->info & BLIS_EXECUTION_DT_BITS ) >> BLIS_EXECUTION_DT_SHIFT );
|
||||
return ( ( obj->info & BLIS_EXEC_DT_BITS ) >> BLIS_EXEC_DT_SHIFT );
|
||||
}
|
||||
|
||||
static dom_t bli_obj_exec_domain( obj_t* obj )
|
||||
{
|
||||
return ( ( obj->info & BLIS_EXEC_DOMAIN_BIT ) >> BLIS_EXEC_DT_SHIFT );
|
||||
}
|
||||
|
||||
static prec_t bli_obj_exec_prec( obj_t* obj )
|
||||
{
|
||||
return ( ( obj->info & BLIS_EXEC_PREC_BIT ) >> BLIS_EXEC_DT_SHIFT );
|
||||
}
|
||||
|
||||
static trans_t bli_obj_conjtrans_status( obj_t* obj )
|
||||
@@ -326,9 +361,29 @@ static void bli_obj_set_target_dt( num_t dt, obj_t* obj )
|
||||
obj->info = ( obj->info & ~BLIS_TARGET_DT_BITS ) | ( dt << BLIS_TARGET_DT_SHIFT );
|
||||
}
|
||||
|
||||
static void bli_obj_set_target_domain( dom_t dt, obj_t* obj )
|
||||
{
|
||||
obj->info = ( obj->info & ~BLIS_TARGET_DOMAIN_BIT ) | ( dt << BLIS_TARGET_DOMAIN_SHIFT );
|
||||
}
|
||||
|
||||
static void bli_obj_set_target_prec( prec_t dt, obj_t* obj )
|
||||
{
|
||||
obj->info = ( obj->info & ~BLIS_TARGET_PREC_BIT ) | ( dt << BLIS_TARGET_PREC_SHIFT );
|
||||
}
|
||||
|
||||
static void bli_obj_set_exec_dt( num_t dt, obj_t* obj )
|
||||
{
|
||||
obj->info = ( obj->info & ~BLIS_EXECUTION_DT_BITS ) | ( dt << BLIS_EXECUTION_DT_SHIFT );
|
||||
obj->info = ( obj->info & ~BLIS_EXEC_DT_BITS ) | ( dt << BLIS_EXEC_DT_SHIFT );
|
||||
}
|
||||
|
||||
static void bli_obj_set_exec_domain( dom_t dt, obj_t* obj )
|
||||
{
|
||||
obj->info = ( obj->info & ~BLIS_EXEC_DOMAIN_BIT ) | ( dt << BLIS_EXEC_DOMAIN_SHIFT );
|
||||
}
|
||||
|
||||
static void bli_obj_set_exec_prec( prec_t dt, obj_t* obj )
|
||||
{
|
||||
obj->info = ( obj->info & ~BLIS_EXEC_PREC_BIT ) | ( dt << BLIS_EXEC_PREC_SHIFT );
|
||||
}
|
||||
|
||||
static void bli_obj_set_pack_schema( pack_t schema, obj_t* obj )
|
||||
@@ -909,39 +964,7 @@ static void bli_obj_toggle_uplo_if_trans( trans_t trans, obj_t* obj )
|
||||
}
|
||||
}
|
||||
|
||||
// Make a full alias (shallow copy)
|
||||
|
||||
static void bli_obj_alias_to( obj_t* a, obj_t* b )
|
||||
{
|
||||
bli_obj_init_full_shallow_copy_of( a, b );
|
||||
}
|
||||
|
||||
// Check if two objects are aliases of one another
|
||||
|
||||
static bool_t bli_obj_is_alias_of( obj_t* a, obj_t* b )
|
||||
{
|
||||
return ( bli_obj_buffer( a ) == bli_obj_buffer( b ) );
|
||||
}
|
||||
|
||||
|
||||
// Create an alias with a trans value applied.
|
||||
// (Note: trans may include a conj component.)
|
||||
|
||||
static void bli_obj_alias_with_trans( trans_t trans, obj_t* a, obj_t* b )
|
||||
{
|
||||
bli_obj_alias_to( a, b );
|
||||
bli_obj_apply_trans( trans, b );
|
||||
}
|
||||
|
||||
// Create an alias with a conj value applied.
|
||||
|
||||
static void bli_obj_alias_with_conj( conj_t conja, obj_t* a, obj_t* b )
|
||||
{
|
||||
bli_obj_alias_to( a, b );
|
||||
bli_obj_apply_conj( conja, b );
|
||||
}
|
||||
|
||||
// Initialize object with default properties (info field)
|
||||
// Initialize object with default properties (info field).
|
||||
|
||||
static void bli_obj_set_defaults( obj_t* obj )
|
||||
{
|
||||
@@ -1021,6 +1044,91 @@ static void* bli_obj_buffer_for_1x1( num_t dt, obj_t* obj )
|
||||
);
|
||||
}
|
||||
|
||||
// Make a full alias (shallow copy).
|
||||
|
||||
static void bli_obj_alias_to( obj_t* a, obj_t* b )
|
||||
{
|
||||
bli_obj_init_full_shallow_copy_of( a, b );
|
||||
}
|
||||
|
||||
// Check if two objects are aliases of one another.
|
||||
|
||||
static bool_t bli_obj_is_alias_of( obj_t* a, obj_t* b )
|
||||
{
|
||||
return ( bli_obj_buffer( a ) == bli_obj_buffer( b ) );
|
||||
}
|
||||
|
||||
|
||||
// Create an alias with a trans value applied.
|
||||
// (Note: trans may include a conj component.)
|
||||
|
||||
static void bli_obj_alias_with_trans( trans_t trans, obj_t* a, obj_t* b )
|
||||
{
|
||||
bli_obj_alias_to( a, b );
|
||||
bli_obj_apply_trans( trans, b );
|
||||
}
|
||||
|
||||
// Create an alias with a conj value applied.
|
||||
|
||||
static void bli_obj_alias_with_conj( conj_t conja, obj_t* a, obj_t* b )
|
||||
{
|
||||
bli_obj_alias_to( a, b );
|
||||
bli_obj_apply_conj( conja, b );
|
||||
}
|
||||
|
||||
// Alias only the real part.
|
||||
|
||||
static void bli_obj_real_part( obj_t* c, obj_t* r )
|
||||
{
|
||||
bli_obj_alias_to( c, r );
|
||||
|
||||
if ( bli_obj_is_complex( c ) )
|
||||
{
|
||||
// Change the datatype.
|
||||
num_t dt_r = bli_obj_dt_proj_to_real( c );
|
||||
bli_obj_set_dt( dt_r, r );
|
||||
|
||||
// Update the element size.
|
||||
siz_t es_c = bli_obj_elem_size( c );
|
||||
bli_obj_set_elem_size( es_c/2, r );
|
||||
|
||||
// Update the strides.
|
||||
inc_t rs_c = bli_obj_row_stride( c );
|
||||
inc_t cs_c = bli_obj_col_stride( c );
|
||||
bli_obj_set_strides( 2*rs_c, 2*cs_c, r );
|
||||
|
||||
// Buffer is left unchanged.
|
||||
}
|
||||
}
|
||||
|
||||
// Alias only the imaginary part.
|
||||
|
||||
static void bli_obj_imag_part( obj_t* c, obj_t* i )
|
||||
{
|
||||
if ( bli_obj_is_complex( c ) )
|
||||
{
|
||||
bli_obj_alias_to( c, i );
|
||||
|
||||
// Change the datatype.
|
||||
num_t dt_r = bli_obj_dt_proj_to_real( c );
|
||||
bli_obj_set_dt( dt_r, i );
|
||||
|
||||
// Update the element size.
|
||||
siz_t es_c = bli_obj_elem_size( c );
|
||||
bli_obj_set_elem_size( es_c/2, i );
|
||||
|
||||
// Update the strides.
|
||||
inc_t rs_c = bli_obj_row_stride( c );
|
||||
inc_t cs_c = bli_obj_col_stride( c );
|
||||
bli_obj_set_strides( 2*rs_c, 2*cs_c, i );
|
||||
|
||||
// Update the buffer.
|
||||
inc_t is_c = bli_obj_imag_stride( c );
|
||||
char* p = bli_obj_buffer_at_off( c );
|
||||
bli_obj_set_buffer( p + is_c * es_c/2, i );
|
||||
}
|
||||
}
|
||||
|
||||
// Given a 1x1 object, acquire an address to the buffer depending on whether
|
||||
// the object is a BLIS_CONSTANT, and also set a datatype associated with the
|
||||
// chosen buffer (possibly using an auxiliary datatype if the object is
|
||||
|
||||
@@ -112,6 +112,16 @@ static bool_t bli_is_double_prec( num_t dt )
|
||||
bli_is_dcomplex( dt ) );
|
||||
}
|
||||
|
||||
static dom_t bli_dt_domain( num_t dt )
|
||||
{
|
||||
return ( dt & BLIS_DOMAIN_BIT );
|
||||
}
|
||||
|
||||
static prec_t bli_dt_prec( num_t dt )
|
||||
{
|
||||
return ( dt & BLIS_PRECISION_BIT );
|
||||
}
|
||||
|
||||
static num_t bli_dt_proj_to_real( num_t dt )
|
||||
{
|
||||
return ( dt & ~BLIS_BITVAL_COMPLEX );
|
||||
@@ -119,7 +129,17 @@ static num_t bli_dt_proj_to_real( num_t dt )
|
||||
|
||||
static num_t bli_dt_proj_to_complex( num_t dt )
|
||||
{
|
||||
return ( dt & BLIS_BITVAL_COMPLEX );
|
||||
return ( dt | BLIS_BITVAL_COMPLEX );
|
||||
}
|
||||
|
||||
static num_t bli_dt_proj_to_single_prec( num_t dt )
|
||||
{
|
||||
return ( dt & ~BLIS_BITVAL_SINGLE_PREC );
|
||||
}
|
||||
|
||||
static num_t bli_dt_proj_to_double_prec( num_t dt )
|
||||
{
|
||||
return ( dt | BLIS_BITVAL_DOUBLE_PREC );
|
||||
}
|
||||
|
||||
|
||||
@@ -990,6 +1010,41 @@ void bli_set_dims_incs_uplo_1m_noswap
|
||||
}
|
||||
}
|
||||
|
||||
// Set dimensions and increments for TWO matrix arguments.
|
||||
|
||||
static
|
||||
void bli_set_dims_incs_2m
|
||||
(
|
||||
trans_t transa,
|
||||
dim_t m, dim_t n, inc_t rs_a, inc_t cs_a,
|
||||
inc_t rs_b, inc_t cs_b,
|
||||
dim_t* n_elem, dim_t* n_iter, inc_t* inca, inc_t* lda,
|
||||
inc_t* incb, inc_t* ldb
|
||||
)
|
||||
{
|
||||
{
|
||||
*n_iter = n;
|
||||
*n_elem = m;
|
||||
*inca = rs_a;
|
||||
*lda = cs_a;
|
||||
*incb = rs_b;
|
||||
*ldb = cs_b;
|
||||
|
||||
if ( bli_does_trans( transa ) )
|
||||
{
|
||||
bli_swap_incs( inca, lda );
|
||||
}
|
||||
|
||||
if ( bli_is_row_tilted( *n_elem, *n_iter, *incb, *ldb ) &&
|
||||
bli_is_row_tilted( *n_elem, *n_iter, *inca, *lda ) )
|
||||
{
|
||||
bli_swap_dims( n_iter, n_elem );
|
||||
bli_swap_incs( inca, lda );
|
||||
bli_swap_incs( incb, ldb );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Set dimensions, increments, effective uplo/diagoff, etc for TWO matrix
|
||||
// arguments.
|
||||
|
||||
@@ -1033,7 +1088,7 @@ void bli_set_dims_incs_uplo_2m
|
||||
if ( bli_is_stored_subpart( diagoffa_use_, transa, uploa, m, n ) )
|
||||
uploa = BLIS_DENSE;
|
||||
|
||||
n_iter_max_ = n;
|
||||
n_iter_max_ = n;
|
||||
*n_elem_max = m;
|
||||
*inca = rs_a;
|
||||
*lda = cs_a;
|
||||
|
||||
@@ -210,11 +210,11 @@ typedef dcomplex f77_dcomplex;
|
||||
12 ~ 10 Target numerical datatype
|
||||
- 10: domain (0 == real, 1 == complex)
|
||||
- 11: precision (0 == single, 1 == double)
|
||||
- 12: unused
|
||||
- 12: used to encode integer, constant types
|
||||
15 ~ 13 Execution numerical datatype
|
||||
- 13: domain (0 == real, 1 == complex)
|
||||
- 14: precision (0 == single, 1 == double)
|
||||
- 15: unused
|
||||
- 15: used to encode integer, constant types
|
||||
22 ~ 16 Packed type/status
|
||||
- 0 0000 00: not packed
|
||||
- 1 0000 00: packed (unspecified; by rows, columns, or vector)
|
||||
@@ -271,7 +271,11 @@ typedef dcomplex f77_dcomplex;
|
||||
#define BLIS_UNIT_DIAG_SHIFT 8
|
||||
#define BLIS_INVERT_DIAG_SHIFT 9
|
||||
#define BLIS_TARGET_DT_SHIFT 10
|
||||
#define BLIS_EXECUTION_DT_SHIFT 13
|
||||
#define BLIS_TARGET_DOMAIN_SHIFT 10
|
||||
#define BLIS_TARGET_PREC_SHIFT 11
|
||||
#define BLIS_EXEC_DT_SHIFT 13
|
||||
#define BLIS_EXEC_DOMAIN_SHIFT 13
|
||||
#define BLIS_EXEC_PREC_SHIFT 14
|
||||
#define BLIS_PACK_SCHEMA_SHIFT 16
|
||||
#define BLIS_PACK_RC_SHIFT 16
|
||||
#define BLIS_PACK_PANEL_SHIFT 17
|
||||
@@ -299,7 +303,11 @@ typedef dcomplex f77_dcomplex;
|
||||
#define BLIS_UNIT_DIAG_BIT ( 0x1 << BLIS_UNIT_DIAG_SHIFT )
|
||||
#define BLIS_INVERT_DIAG_BIT ( 0x1 << BLIS_INVERT_DIAG_SHIFT )
|
||||
#define BLIS_TARGET_DT_BITS ( 0x7 << BLIS_TARGET_DT_SHIFT )
|
||||
#define BLIS_EXECUTION_DT_BITS ( 0x7 << BLIS_EXECUTION_DT_SHIFT )
|
||||
#define BLIS_TARGET_DOMAIN_BIT ( 0x1 << BLIS_TARGET_DOMAIN_SHIFT )
|
||||
#define BLIS_TARGET_PREC_BIT ( 0x1 << BLIS_TARGET_PREC_SHIFT )
|
||||
#define BLIS_EXEC_DT_BITS ( 0x7 << BLIS_EXEC_DT_SHIFT )
|
||||
#define BLIS_EXEC_DOMAIN_BIT ( 0x1 << BLIS_EXEC_DOMAIN_SHIFT )
|
||||
#define BLIS_EXEC_PREC_BIT ( 0x1 << BLIS_EXEC_PREC_SHIFT )
|
||||
#define BLIS_PACK_SCHEMA_BITS ( 0x7F << BLIS_PACK_SCHEMA_SHIFT )
|
||||
#define BLIS_PACK_RC_BIT ( 0x1 << BLIS_PACK_RC_SHIFT )
|
||||
#define BLIS_PACK_PANEL_BIT ( 0x1 << BLIS_PACK_PANEL_SHIFT )
|
||||
@@ -1128,8 +1136,6 @@ typedef struct cntx_s
|
||||
pack_t schema_b_panel;
|
||||
pack_t schema_c_panel;
|
||||
|
||||
bool_t anti_pref;
|
||||
|
||||
dim_t thrloop[ BLIS_NUM_LOOPS ];
|
||||
|
||||
membrk_t* membrk;
|
||||
@@ -1177,6 +1183,7 @@ typedef enum
|
||||
BLIS_INCONSISTENT_DATATYPES = ( -36),
|
||||
BLIS_EXPECTED_REAL_PROJ_OF = ( -37),
|
||||
BLIS_EXPECTED_REAL_VALUED_OBJECT = ( -38),
|
||||
BLIS_INCONSISTENT_PRECISIONS = ( -39),
|
||||
|
||||
// Dimension-specific errors
|
||||
BLIS_NONCONFORMAL_DIMENSIONS = ( -40),
|
||||
|
||||
@@ -122,6 +122,12 @@ extern "C" {
|
||||
#include "bli_cpuid.h"
|
||||
#include "bli_string.h"
|
||||
#include "bli_setgetij.h"
|
||||
#include "bli_setri.h"
|
||||
|
||||
#include "bli_castm.h"
|
||||
#include "bli_castv.h"
|
||||
#include "bli_projm.h"
|
||||
#include "bli_projv.h"
|
||||
|
||||
|
||||
// -- Level-0 operations --
|
||||
|
||||
Reference in New Issue
Block a user