Merge remote-tracking branch 'upstream/dev' into asm-macros

This commit is contained in:
Devin Matthews
2018-06-20 14:07:49 -05:00
181 changed files with 4391 additions and 923 deletions

View File

@@ -402,6 +402,51 @@ GENTFUNC2( dcomplex, scomplex, z, c, tfuncname, varname ) \
// -- Mixed domain/precision (all) two-operand macro --
// -- (no auxiliary arguments) --
#define INSERT_GENTFUNC2_MIXDP0( tfuncname ) \
\
GENTFUNC2( float, double, s, d, tfuncname ) \
GENTFUNC2( float, scomplex, s, c, tfuncname ) \
GENTFUNC2( float, dcomplex, s, z, tfuncname ) \
\
GENTFUNC2( double, float, d, s, tfuncname ) \
GENTFUNC2( double, scomplex, d, c, tfuncname ) \
GENTFUNC2( double, dcomplex, d, z, tfuncname ) \
\
GENTFUNC2( scomplex, float, c, s, tfuncname ) \
GENTFUNC2( scomplex, double, c, d, tfuncname ) \
GENTFUNC2( scomplex, dcomplex, c, z, tfuncname ) \
\
GENTFUNC2( dcomplex, float, z, s, tfuncname ) \
GENTFUNC2( dcomplex, double, z, d, tfuncname ) \
GENTFUNC2( dcomplex, scomplex, z, c, tfuncname )
// -- (one auxiliary argument) --
#define INSERT_GENTFUNC2_MIX_DP( tfuncname, varname ) \
\
GENTFUNC2( float, double, s, d, tfuncname, varname ) \
GENTFUNC2( float, scomplex, s, c, tfuncname, varname ) \
GENTFUNC2( float, dcomplex, s, z, tfuncname, varname ) \
\
GENTFUNC2( double, float, d, s, tfuncname, varname ) \
GENTFUNC2( double, scomplex, d, c, tfuncname, varname ) \
GENTFUNC2( double, dcomplex, d, z, tfuncname, varname ) \
\
GENTFUNC2( scomplex, float, c, s, tfuncname, varname ) \
GENTFUNC2( scomplex, double, c, d, tfuncname, varname ) \
GENTFUNC2( scomplex, dcomplex, c, z, tfuncname, varname ) \
\
GENTFUNC2( dcomplex, float, z, s, tfuncname, varname ) \
GENTFUNC2( dcomplex, double, z, d, tfuncname, varname ) \
GENTFUNC2( dcomplex, scomplex, z, c, tfuncname, varname )
// -- Basic two-operand with real projection of first operand --
// -- (no auxiliary arguments) --

View File

@@ -395,6 +395,50 @@ GENTPROT2( dcomplex, scomplex, z, c, tfuncname, varname ) \
// -- Mixed domain/precision (all) two-operand macro --
// -- (no auxiliary arguments) --
#define INSERT_GENTPROT2_MIXDP0( funcname ) \
\
GENTPROT2( float, double, s, d, funcname ) \
GENTPROT2( float, scomplex, s, c, funcname ) \
GENTPROT2( float, dcomplex, s, z, funcname ) \
\
GENTPROT2( double, float, d, s, funcname ) \
GENTPROT2( double, scomplex, d, c, funcname ) \
GENTPROT2( double, dcomplex, d, z, funcname ) \
\
GENTPROT2( scomplex, float, c, s, funcname ) \
GENTPROT2( scomplex, double, c, d, funcname ) \
GENTPROT2( scomplex, dcomplex, c, z, funcname ) \
\
GENTPROT2( dcomplex, float, z, s, funcname ) \
GENTPROT2( dcomplex, double, z, d, funcname ) \
GENTPROT2( dcomplex, scomplex, z, c, funcname )
// -- (one auxiliary argument) --
#define INSERT_GENTPROT2_MIX_DP( tfuncname, varname ) \
\
GENTPROT2( float, double, s, d, tfuncname, varname ) \
GENTPROT2( float, scomplex, s, c, tfuncname, varname ) \
GENTPROT2( float, dcomplex, s, z, tfuncname, varname ) \
\
GENTPROT2( double, float, d, s, tfuncname, varname ) \
GENTPROT2( double, scomplex, d, c, tfuncname, varname ) \
GENTPROT2( double, dcomplex, d, z, tfuncname, varname ) \
\
GENTPROT2( scomplex, float, c, s, tfuncname, varname ) \
GENTPROT2( scomplex, double, c, d, tfuncname, varname ) \
GENTPROT2( scomplex, dcomplex, c, z, tfuncname, varname ) \
\
GENTPROT2( dcomplex, float, z, s, tfuncname, varname ) \
GENTPROT2( dcomplex, double, z, d, tfuncname, varname ) \
GENTPROT2( dcomplex, scomplex, z, c, tfuncname, varname )
// -- Basic two-operand with real projection of first operand --
// -- (no auxiliary arguments) --

View File

@@ -76,11 +76,36 @@ static bool_t bli_obj_is_const( obj_t* obj )
return ( bli_obj_dt( obj ) == BLIS_BITVAL_CONST_TYPE );
}
static objbits_t bli_obj_domain( obj_t* obj )
static dom_t bli_obj_domain( obj_t* obj )
{
return ( obj->info & BLIS_DOMAIN_BIT );
}
static prec_t bli_obj_prec( obj_t* obj )
{
return ( obj->info & BLIS_PRECISION_BIT );
}
static bool_t bli_obj_is_single_prec( obj_t* obj )
{
return ( bli_obj_prec( obj ) == BLIS_BITVAL_SINGLE_PREC );
}
static bool_t bli_obj_is_double_prec( obj_t* obj )
{
return ( bli_obj_prec( obj ) == BLIS_BITVAL_DOUBLE_PREC );
}
static num_t bli_obj_dt_proj_to_single_prec( obj_t* obj )
{
return ( bli_obj_dt( obj ) & ~BLIS_BITVAL_SINGLE_PREC );
}
static num_t bli_obj_dt_proj_to_double_prec( obj_t* obj )
{
return ( bli_obj_dt( obj ) | BLIS_BITVAL_DOUBLE_PREC );
}
static bool_t bli_obj_is_real( obj_t* obj )
{
return ( bli_obj_domain( obj ) == BLIS_BITVAL_REAL );
@@ -91,16 +116,6 @@ static bool_t bli_obj_is_complex( obj_t* obj )
return ( bli_obj_domain( obj ) == BLIS_BITVAL_COMPLEX );
}
static objbits_t bli_obj_prec( obj_t* obj )
{
return ( obj->info & BLIS_PRECISION_BIT );
}
static bool_t bli_obj_is_double_prec( obj_t* obj )
{
return ( bli_obj_prec( obj ) == BLIS_BITVAL_DOUBLE_PREC );
}
static num_t bli_obj_dt_proj_to_real( obj_t* obj )
{
return ( bli_obj_dt( obj ) & ~BLIS_BITVAL_COMPLEX );
@@ -108,7 +123,7 @@ static num_t bli_obj_dt_proj_to_real( obj_t* obj )
static num_t bli_obj_dt_proj_to_complex( obj_t* obj )
{
return ( bli_obj_dt( obj ) & BLIS_BITVAL_COMPLEX );
return ( bli_obj_dt( obj ) | BLIS_BITVAL_COMPLEX );
}
static num_t bli_obj_target_dt( obj_t* obj )
@@ -116,9 +131,29 @@ static num_t bli_obj_target_dt( obj_t* obj )
return ( ( obj->info & BLIS_TARGET_DT_BITS ) >> BLIS_TARGET_DT_SHIFT );
}
static dom_t bli_obj_target_domain( obj_t* obj )
{
return ( ( obj->info & BLIS_TARGET_DOMAIN_BIT ) >> BLIS_TARGET_DT_SHIFT );
}
static prec_t bli_obj_target_prec( obj_t* obj )
{
return ( ( obj->info & BLIS_TARGET_PREC_BIT ) >> BLIS_TARGET_DT_SHIFT );
}
static num_t bli_obj_exec_dt( obj_t* obj )
{
return ( ( obj->info & BLIS_EXECUTION_DT_BITS ) >> BLIS_EXECUTION_DT_SHIFT );
return ( ( obj->info & BLIS_EXEC_DT_BITS ) >> BLIS_EXEC_DT_SHIFT );
}
static dom_t bli_obj_exec_domain( obj_t* obj )
{
return ( ( obj->info & BLIS_EXEC_DOMAIN_BIT ) >> BLIS_EXEC_DT_SHIFT );
}
static prec_t bli_obj_exec_prec( obj_t* obj )
{
return ( ( obj->info & BLIS_EXEC_PREC_BIT ) >> BLIS_EXEC_DT_SHIFT );
}
static trans_t bli_obj_conjtrans_status( obj_t* obj )
@@ -326,9 +361,29 @@ static void bli_obj_set_target_dt( num_t dt, obj_t* obj )
obj->info = ( obj->info & ~BLIS_TARGET_DT_BITS ) | ( dt << BLIS_TARGET_DT_SHIFT );
}
static void bli_obj_set_target_domain( dom_t dt, obj_t* obj )
{
obj->info = ( obj->info & ~BLIS_TARGET_DOMAIN_BIT ) | ( dt << BLIS_TARGET_DOMAIN_SHIFT );
}
static void bli_obj_set_target_prec( prec_t dt, obj_t* obj )
{
obj->info = ( obj->info & ~BLIS_TARGET_PREC_BIT ) | ( dt << BLIS_TARGET_PREC_SHIFT );
}
static void bli_obj_set_exec_dt( num_t dt, obj_t* obj )
{
obj->info = ( obj->info & ~BLIS_EXECUTION_DT_BITS ) | ( dt << BLIS_EXECUTION_DT_SHIFT );
obj->info = ( obj->info & ~BLIS_EXEC_DT_BITS ) | ( dt << BLIS_EXEC_DT_SHIFT );
}
static void bli_obj_set_exec_domain( dom_t dt, obj_t* obj )
{
obj->info = ( obj->info & ~BLIS_EXEC_DOMAIN_BIT ) | ( dt << BLIS_EXEC_DOMAIN_SHIFT );
}
static void bli_obj_set_exec_prec( prec_t dt, obj_t* obj )
{
obj->info = ( obj->info & ~BLIS_EXEC_PREC_BIT ) | ( dt << BLIS_EXEC_PREC_SHIFT );
}
static void bli_obj_set_pack_schema( pack_t schema, obj_t* obj )
@@ -909,39 +964,7 @@ static void bli_obj_toggle_uplo_if_trans( trans_t trans, obj_t* obj )
}
}
// Make a full alias (shallow copy)
static void bli_obj_alias_to( obj_t* a, obj_t* b )
{
bli_obj_init_full_shallow_copy_of( a, b );
}
// Check if two objects are aliases of one another
static bool_t bli_obj_is_alias_of( obj_t* a, obj_t* b )
{
return ( bli_obj_buffer( a ) == bli_obj_buffer( b ) );
}
// Create an alias with a trans value applied.
// (Note: trans may include a conj component.)
static void bli_obj_alias_with_trans( trans_t trans, obj_t* a, obj_t* b )
{
bli_obj_alias_to( a, b );
bli_obj_apply_trans( trans, b );
}
// Create an alias with a conj value applied.
static void bli_obj_alias_with_conj( conj_t conja, obj_t* a, obj_t* b )
{
bli_obj_alias_to( a, b );
bli_obj_apply_conj( conja, b );
}
// Initialize object with default properties (info field)
// Initialize object with default properties (info field).
static void bli_obj_set_defaults( obj_t* obj )
{
@@ -1021,6 +1044,91 @@ static void* bli_obj_buffer_for_1x1( num_t dt, obj_t* obj )
);
}
// Make a full alias (shallow copy).
static void bli_obj_alias_to( obj_t* a, obj_t* b )
{
bli_obj_init_full_shallow_copy_of( a, b );
}
// Check if two objects are aliases of one another.
static bool_t bli_obj_is_alias_of( obj_t* a, obj_t* b )
{
return ( bli_obj_buffer( a ) == bli_obj_buffer( b ) );
}
// Create an alias with a trans value applied.
// (Note: trans may include a conj component.)
static void bli_obj_alias_with_trans( trans_t trans, obj_t* a, obj_t* b )
{
bli_obj_alias_to( a, b );
bli_obj_apply_trans( trans, b );
}
// Create an alias with a conj value applied.
static void bli_obj_alias_with_conj( conj_t conja, obj_t* a, obj_t* b )
{
bli_obj_alias_to( a, b );
bli_obj_apply_conj( conja, b );
}
// Alias only the real part.
static void bli_obj_real_part( obj_t* c, obj_t* r )
{
bli_obj_alias_to( c, r );
if ( bli_obj_is_complex( c ) )
{
// Change the datatype.
num_t dt_r = bli_obj_dt_proj_to_real( c );
bli_obj_set_dt( dt_r, r );
// Update the element size.
siz_t es_c = bli_obj_elem_size( c );
bli_obj_set_elem_size( es_c/2, r );
// Update the strides.
inc_t rs_c = bli_obj_row_stride( c );
inc_t cs_c = bli_obj_col_stride( c );
bli_obj_set_strides( 2*rs_c, 2*cs_c, r );
// Buffer is left unchanged.
}
}
// Alias only the imaginary part.
static void bli_obj_imag_part( obj_t* c, obj_t* i )
{
if ( bli_obj_is_complex( c ) )
{
bli_obj_alias_to( c, i );
// Change the datatype.
num_t dt_r = bli_obj_dt_proj_to_real( c );
bli_obj_set_dt( dt_r, i );
// Update the element size.
siz_t es_c = bli_obj_elem_size( c );
bli_obj_set_elem_size( es_c/2, i );
// Update the strides.
inc_t rs_c = bli_obj_row_stride( c );
inc_t cs_c = bli_obj_col_stride( c );
bli_obj_set_strides( 2*rs_c, 2*cs_c, i );
// Update the buffer.
inc_t is_c = bli_obj_imag_stride( c );
char* p = bli_obj_buffer_at_off( c );
bli_obj_set_buffer( p + is_c * es_c/2, i );
}
}
// Given a 1x1 object, acquire an address to the buffer depending on whether
// the object is a BLIS_CONSTANT, and also set a datatype associated with the
// chosen buffer (possibly using an auxiliary datatype if the object is

View File

@@ -112,6 +112,16 @@ static bool_t bli_is_double_prec( num_t dt )
bli_is_dcomplex( dt ) );
}
static dom_t bli_dt_domain( num_t dt )
{
return ( dt & BLIS_DOMAIN_BIT );
}
static prec_t bli_dt_prec( num_t dt )
{
return ( dt & BLIS_PRECISION_BIT );
}
static num_t bli_dt_proj_to_real( num_t dt )
{
return ( dt & ~BLIS_BITVAL_COMPLEX );
@@ -119,7 +129,17 @@ static num_t bli_dt_proj_to_real( num_t dt )
static num_t bli_dt_proj_to_complex( num_t dt )
{
return ( dt & BLIS_BITVAL_COMPLEX );
return ( dt | BLIS_BITVAL_COMPLEX );
}
static num_t bli_dt_proj_to_single_prec( num_t dt )
{
return ( dt & ~BLIS_BITVAL_SINGLE_PREC );
}
static num_t bli_dt_proj_to_double_prec( num_t dt )
{
return ( dt | BLIS_BITVAL_DOUBLE_PREC );
}
@@ -990,6 +1010,41 @@ void bli_set_dims_incs_uplo_1m_noswap
}
}
// Set dimensions and increments for TWO matrix arguments.
static
void bli_set_dims_incs_2m
(
trans_t transa,
dim_t m, dim_t n, inc_t rs_a, inc_t cs_a,
inc_t rs_b, inc_t cs_b,
dim_t* n_elem, dim_t* n_iter, inc_t* inca, inc_t* lda,
inc_t* incb, inc_t* ldb
)
{
{
*n_iter = n;
*n_elem = m;
*inca = rs_a;
*lda = cs_a;
*incb = rs_b;
*ldb = cs_b;
if ( bli_does_trans( transa ) )
{
bli_swap_incs( inca, lda );
}
if ( bli_is_row_tilted( *n_elem, *n_iter, *incb, *ldb ) &&
bli_is_row_tilted( *n_elem, *n_iter, *inca, *lda ) )
{
bli_swap_dims( n_iter, n_elem );
bli_swap_incs( inca, lda );
bli_swap_incs( incb, ldb );
}
}
}
// Set dimensions, increments, effective uplo/diagoff, etc for TWO matrix
// arguments.
@@ -1033,7 +1088,7 @@ void bli_set_dims_incs_uplo_2m
if ( bli_is_stored_subpart( diagoffa_use_, transa, uploa, m, n ) )
uploa = BLIS_DENSE;
n_iter_max_ = n;
n_iter_max_ = n;
*n_elem_max = m;
*inca = rs_a;
*lda = cs_a;

View File

@@ -210,11 +210,11 @@ typedef dcomplex f77_dcomplex;
12 ~ 10 Target numerical datatype
- 10: domain (0 == real, 1 == complex)
- 11: precision (0 == single, 1 == double)
- 12: unused
- 12: used to encode integer, constant types
15 ~ 13 Execution numerical datatype
- 13: domain (0 == real, 1 == complex)
- 14: precision (0 == single, 1 == double)
- 15: unused
- 15: used to encode integer, constant types
22 ~ 16 Packed type/status
- 0 0000 00: not packed
- 1 0000 00: packed (unspecified; by rows, columns, or vector)
@@ -271,7 +271,11 @@ typedef dcomplex f77_dcomplex;
#define BLIS_UNIT_DIAG_SHIFT 8
#define BLIS_INVERT_DIAG_SHIFT 9
#define BLIS_TARGET_DT_SHIFT 10
#define BLIS_EXECUTION_DT_SHIFT 13
#define BLIS_TARGET_DOMAIN_SHIFT 10
#define BLIS_TARGET_PREC_SHIFT 11
#define BLIS_EXEC_DT_SHIFT 13
#define BLIS_EXEC_DOMAIN_SHIFT 13
#define BLIS_EXEC_PREC_SHIFT 14
#define BLIS_PACK_SCHEMA_SHIFT 16
#define BLIS_PACK_RC_SHIFT 16
#define BLIS_PACK_PANEL_SHIFT 17
@@ -299,7 +303,11 @@ typedef dcomplex f77_dcomplex;
#define BLIS_UNIT_DIAG_BIT ( 0x1 << BLIS_UNIT_DIAG_SHIFT )
#define BLIS_INVERT_DIAG_BIT ( 0x1 << BLIS_INVERT_DIAG_SHIFT )
#define BLIS_TARGET_DT_BITS ( 0x7 << BLIS_TARGET_DT_SHIFT )
#define BLIS_EXECUTION_DT_BITS ( 0x7 << BLIS_EXECUTION_DT_SHIFT )
#define BLIS_TARGET_DOMAIN_BIT ( 0x1 << BLIS_TARGET_DOMAIN_SHIFT )
#define BLIS_TARGET_PREC_BIT ( 0x1 << BLIS_TARGET_PREC_SHIFT )
#define BLIS_EXEC_DT_BITS ( 0x7 << BLIS_EXEC_DT_SHIFT )
#define BLIS_EXEC_DOMAIN_BIT ( 0x1 << BLIS_EXEC_DOMAIN_SHIFT )
#define BLIS_EXEC_PREC_BIT ( 0x1 << BLIS_EXEC_PREC_SHIFT )
#define BLIS_PACK_SCHEMA_BITS ( 0x7F << BLIS_PACK_SCHEMA_SHIFT )
#define BLIS_PACK_RC_BIT ( 0x1 << BLIS_PACK_RC_SHIFT )
#define BLIS_PACK_PANEL_BIT ( 0x1 << BLIS_PACK_PANEL_SHIFT )
@@ -1128,8 +1136,6 @@ typedef struct cntx_s
pack_t schema_b_panel;
pack_t schema_c_panel;
bool_t anti_pref;
dim_t thrloop[ BLIS_NUM_LOOPS ];
membrk_t* membrk;
@@ -1177,6 +1183,7 @@ typedef enum
BLIS_INCONSISTENT_DATATYPES = ( -36),
BLIS_EXPECTED_REAL_PROJ_OF = ( -37),
BLIS_EXPECTED_REAL_VALUED_OBJECT = ( -38),
BLIS_INCONSISTENT_PRECISIONS = ( -39),
// Dimension-specific errors
BLIS_NONCONFORMAL_DIMENSIONS = ( -40),

View File

@@ -122,6 +122,12 @@ extern "C" {
#include "bli_cpuid.h"
#include "bli_string.h"
#include "bli_setgetij.h"
#include "bli_setri.h"
#include "bli_castm.h"
#include "bli_castv.h"
#include "bli_projm.h"
#include "bli_projv.h"
// -- Level-0 operations --