Converted function-like macros to static functions.

Details:
- Converted most C preprocessor macros in bli_param_macro_defs.h and
  bli_obj_macro_defs.h to static functions.
- Reshuffled some functions/macros to bli_misc_macro_defs.h and also
  between bli_param_macro_defs.h and bli_obj_macro_defs.h.
- Changed obj_t-initializing macros in bli_type_defs.h to static
  functions.
- Removed some old references to BLIS_TWO and BLIS_MINUS_TWO from
  bli_constants.h.
- Whitespace changes in select files (four spaces to single tab).
This commit is contained in:
Field G. Van Zee
2018-05-08 14:26:30 -05:00
parent 75d0d1057d
commit 4b36e85be9
247 changed files with 5480 additions and 5350 deletions

View File

@@ -194,7 +194,7 @@ void bli_zdotv_template_noopt
// toggling the effective conjugation of x and then conjugating the
// resulting dot product.
if ( bli_is_conj( conjy ) )
bli_toggle_conj( conjx_use );
bli_toggle_conj( &conjx_use );
// Iterate over elements of x and y to compute:

View File

@@ -211,7 +211,7 @@ void bli_zdotaxpyv_template_noopt
// toggling the effective conjugation of xt and then conjugating the
// resulting dot product.
if ( bli_is_conj( conjy ) )
bli_toggle_conj( conjxt_use );
bli_toggle_conj( &conjxt_use );
// Iterate over elements of x, y, and z to compute:

View File

@@ -264,7 +264,7 @@ void bli_zdotxaxpyf_template_noopt
// toggling the effective conjugation of At and then conjugating the
// resulting dot products.
if ( bli_is_conj( conjw ) )
bli_toggle_conj( conjat_use );
bli_toggle_conj( &conjat_use );
// Iterate over the columns of A and elements of w and z to compute:

View File

@@ -237,7 +237,7 @@ void bli_zdotxf_template_noopt
// toggling the effective conjugation of A and then conjugating the
// resulting product A^T*x.
if ( bli_is_conj( conjx ) )
bli_toggle_conj( conjat_use );
bli_toggle_conj( &conjat_use );
// Iterate over columns of A and rows of x to compute:

View File

@@ -121,12 +121,12 @@ int main( int argc, char** argv )
// Let's inspect the amount of padding inserted for alignment. Note
// the difference between the m dimension and the column stride.
printf( "datatype %s\n", bli_dt_string( bli_obj_dt( a8 ) ) );
printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( a8 ) ) );
printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( a8 ) );
printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( a8 ) );
printf( "row stride: %d\n", ( int )bli_obj_row_stride( a8 ) );
printf( "col stride: %d\n", ( int )bli_obj_col_stride( a8 ) );
printf( "datatype %s\n", bli_dt_string( bli_obj_dt( &a8 ) ) );
printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( &a8 ) ) );
printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( &a8 ) );
printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( &a8 ) );
printf( "row stride: %d\n", ( int )bli_obj_row_stride( &a8 ) );
printf( "col stride: %d\n", ( int )bli_obj_col_stride( &a8 ) );
//
// Example 6: Inspect object fields after creation of other floating-
@@ -139,28 +139,28 @@ int main( int argc, char** argv )
bli_obj_create( BLIS_SCOMPLEX, 3, 5, 0, 0, &a10);
bli_obj_create( BLIS_DCOMPLEX, 3, 5, 0, 0, &a11 );
printf( "datatype %s\n", bli_dt_string( bli_obj_dt( a9 ) ) );
printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( a9 ) ) );
printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( a9 ) );
printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( a9 ) );
printf( "row stride: %d\n", ( int )bli_obj_row_stride( a9 ) );
printf( "col stride: %d\n", ( int )bli_obj_col_stride( a9 ) );
printf( "datatype %s\n", bli_dt_string( bli_obj_dt( &a9 ) ) );
printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( &a9 ) ) );
printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( &a9 ) );
printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( &a9 ) );
printf( "row stride: %d\n", ( int )bli_obj_row_stride( &a9 ) );
printf( "col stride: %d\n", ( int )bli_obj_col_stride( &a9 ) );
printf( "\n" );
printf( "datatype %s\n", bli_dt_string( bli_obj_dt( a10 ) ) );
printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( a10 ) ) );
printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( a10 ) );
printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( a10 ) );
printf( "row stride: %d\n", ( int )bli_obj_row_stride( a10 ) );
printf( "col stride: %d\n", ( int )bli_obj_col_stride( a10 ) );
printf( "datatype %s\n", bli_dt_string( bli_obj_dt( &a10 ) ) );
printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( &a10 ) ) );
printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( &a10 ) );
printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( &a10 ) );
printf( "row stride: %d\n", ( int )bli_obj_row_stride( &a10 ) );
printf( "col stride: %d\n", ( int )bli_obj_col_stride( &a10 ) );
printf( "\n" );
printf( "datatype %s\n", bli_dt_string( bli_obj_dt( a11 ) ) );
printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( a11 ) ) );
printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( a11 ) );
printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( a11 ) );
printf( "row stride: %d\n", ( int )bli_obj_row_stride( a11 ) );
printf( "col stride: %d\n", ( int )bli_obj_col_stride( a11 ) );
printf( "datatype %s\n", bli_dt_string( bli_obj_dt( &a11 ) ) );
printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( &a11 ) ) );
printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( &a11 ) );
printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( &a11 ) );
printf( "row stride: %d\n", ( int )bli_obj_row_stride( &a11 ) );
printf( "col stride: %d\n", ( int )bli_obj_col_stride( &a11 ) );
//
// Example 7: Initialize an object's elements to random values and then

View File

@@ -148,7 +148,7 @@ int main( int argc, char** argv )
// on 'e', the input operand. Transposition can be indicated by setting a
// bit in the object. Since it always starts out as "no transpose", we can
// simply toggle the bit.
bli_obj_toggle_trans( e );
bli_obj_toggle_trans( &e );
// Another way to mark and object for transposition is to set it directly.
//bli_obj_set_onlytrans( BLIS_TRANSPOSE, &e );
@@ -192,8 +192,8 @@ int main( int argc, char** argv )
bli_printm( "h (initial value):", &h, "%4.1f", "" );
// Set both the transpose and conjugation bits.
bli_obj_toggle_trans( g );
bli_obj_toggle_conj( g );
bli_obj_toggle_trans( &g );
bli_obj_toggle_conj( &g );
// Copy 'g' to 'h', conjugating and transposing 'g' in the process.
// Once again, notice that it's the source operand that we've marked for

View File

@@ -59,14 +59,14 @@ int main( int argc, char** argv )
bli_obj_create( dt, m, n, rs, cs, &a );
// First, we mark the matrix structure as triangular.
bli_obj_set_struc( BLIS_TRIANGULAR, a )
bli_obj_set_struc( BLIS_TRIANGULAR, &a )
// Next, we specify whether the lower part or the upper part is to be
// recognized as the "stored" region (which we call the uplo field). The
// strictly opposite part (in this case, the strictly lower region) will
// be *assumed* to be zero during computation. However, when printed out,
// the strictly lower part may contain junk values.
bli_obj_set_uplo( BLIS_UPPER, a );
bli_obj_set_uplo( BLIS_UPPER, &a );
// Now set the upper triangle to random values.
bli_randm( &a );
@@ -89,8 +89,8 @@ int main( int argc, char** argv )
bli_obj_create( dt, m, n, rs, cs, &b );
// Set structure and uplo.
bli_obj_set_struc( BLIS_TRIANGULAR, b )
bli_obj_set_uplo( BLIS_UPPER, b );
bli_obj_set_struc( BLIS_TRIANGULAR, &b )
bli_obj_set_uplo( BLIS_UPPER, &b );
// Create an alias, 'bl', of the original object 'b'. Both objects will
// refer to the same underlying matrix elements, but now we will have two
@@ -98,7 +98,7 @@ int main( int argc, char** argv )
// of the objects, meaning no additional memory allocation takes place.
// Therefore it is up to the API user (you) to make sure that you only
// free the original object (or exactly one of the aliases).
bli_obj_alias_to( b, bl );
bli_obj_alias_to( &b, &bl );
// Digression: Each object contains a diagonal offset (even vectors),
// even if it is never needed. The diagonal offset for a newly-created
@@ -111,10 +111,10 @@ int main( int argc, char** argv )
// x-axis value.
// Set the diagonal offset of 'bl' to -1.
bli_obj_set_diag_offset( -1, bl );
bli_obj_set_diag_offset( -1, &bl );
// Set the uplo field of 'bl' to "lower".
bli_obj_set_uplo( BLIS_LOWER, bl );
bli_obj_set_uplo( BLIS_LOWER, &bl );
// Set the upper triangle of 'b' to random values.
bli_randm( &b );
@@ -148,7 +148,7 @@ int main( int argc, char** argv )
bli_obj_create( dt, m, n, rs, cs, &c );
// Reset the diagonal offset of 'bl' to 0.
bli_obj_set_diag_offset( 0, bl );
bli_obj_set_diag_offset( 0, &bl );
// Copy the lower triangle of matrix 'b' from Example 2 to object 'c'.
// This should give us -1.0 in the strictly lower part and some non-zero
@@ -212,7 +212,7 @@ int main( int argc, char** argv )
// We want to pluck out the lower triangle and transpose it into the upper
// triangle of 'd'.
bli_obj_toggle_trans( bl );
bli_obj_toggle_trans( &bl );
// Now we copy the transpose of the lower part of 'bl' into the upper
// part of 'd'. (Again, notice that we haven't modified any properties of
@@ -242,11 +242,11 @@ int main( int argc, char** argv )
bli_printm( "e: initial value (all -1.0)", &e, "%4.1f", "" );
// Create an alias to work with.
bli_obj_alias_to( e, el );
bli_obj_alias_to( &e, &el );
// Set structure and uplo of 'el'.
bli_obj_set_struc( BLIS_TRIANGULAR, el )
bli_obj_set_uplo( BLIS_LOWER, el );
bli_obj_set_struc( BLIS_TRIANGULAR, &el )
bli_obj_set_uplo( BLIS_LOWER, &el );
// Digression: Notice that "triangular" structure does not require that
// the matrix be square. Rather, it simply means that either the part above
@@ -259,8 +259,8 @@ int main( int argc, char** argv )
// Move the diagonal offset of 'el' to 1 and flip the uplo field to
// "upper".
bli_obj_set_diag_offset( 1, el );
bli_obj_set_uplo( BLIS_UPPER, el );
bli_obj_set_diag_offset( 1, &el );
bli_obj_set_uplo( BLIS_UPPER, &el );
// Set the upper triangle to zero.
bli_setm( &BLIS_ZERO, &el );
@@ -287,11 +287,11 @@ int main( int argc, char** argv )
bli_printm( "h: initial value (all -1.0)", &h, "%4.1f", "" );
// Set the diagonal offset of 'h' to -1.
bli_obj_set_diag_offset( -1, h );
bli_obj_set_diag_offset( -1, &h );
// Set the structure and uplo of 'h'.
bli_obj_set_struc( BLIS_TRIANGULAR, h )
bli_obj_set_uplo( BLIS_UPPER, h );
bli_obj_set_struc( BLIS_TRIANGULAR, &h )
bli_obj_set_uplo( BLIS_UPPER, &h );
// Randomize the elements on and above the first subdiagonal.
bli_randm( &h );
@@ -299,11 +299,11 @@ int main( int argc, char** argv )
bli_printm( "h: after randomizing above first subdiagonal", &h, "%4.1f", "" );
// Create an alias to work with.
bli_obj_alias_to( h, hl );
bli_obj_alias_to( &h, &hl );
// Flip the uplo of 'hl' and move the diagonal down by one.
bli_obj_set_uplo( BLIS_LOWER, hl );
bli_obj_set_diag_offset( -2, hl );
bli_obj_set_uplo( BLIS_LOWER, &hl );
bli_obj_set_diag_offset( -2, &hl );
// Set the region strictly below the first subdiagonal (on or below
// the second subdiagonal) to zero.

View File

@@ -157,8 +157,8 @@ int main( int argc, char** argv )
// Mark matrix 'a' as symmetric and stored in the lower triangle, and
// then randomize that lower triangle.
bli_obj_set_struc( BLIS_SYMMETRIC, a )
bli_obj_set_uplo( BLIS_LOWER, a );
bli_obj_set_struc( BLIS_SYMMETRIC, &a )
bli_obj_set_uplo( BLIS_LOWER, &a );
bli_randm( &a );
bli_printm( "x: set to random values", &x, "%4.1f", "" );
@@ -200,8 +200,8 @@ int main( int argc, char** argv )
// Mark matrix 'a' as symmetric and stored in the upper triangle, and
// then randomize that upper triangle.
bli_obj_set_struc( BLIS_SYMMETRIC, a )
bli_obj_set_uplo( BLIS_UPPER, a );
bli_obj_set_struc( BLIS_SYMMETRIC, &a )
bli_obj_set_uplo( BLIS_UPPER, &a );
bli_randm( &a );
bli_printm( "a: randomized (zeros in lower triangle)", &a, "%4.1f", "" );
@@ -242,8 +242,8 @@ int main( int argc, char** argv )
// Mark matrix 'a' as triangular and stored in the lower triangle, and
// then randomize that lower triangle.
bli_obj_set_struc( BLIS_TRIANGULAR, a )
bli_obj_set_uplo( BLIS_LOWER, a );
bli_obj_set_struc( BLIS_TRIANGULAR, &a )
bli_obj_set_uplo( BLIS_LOWER, &a );
bli_randm( &a );
bli_printm( "a: randomized (zeros in upper triangle)", &a, "%4.1f", "" );
@@ -283,8 +283,8 @@ int main( int argc, char** argv )
// Mark matrix 'a' as triangular and stored in the lower triangle, and
// then randomize that lower triangle.
bli_obj_set_struc( BLIS_TRIANGULAR, a )
bli_obj_set_uplo( BLIS_LOWER, a );
bli_obj_set_struc( BLIS_TRIANGULAR, &a )
bli_obj_set_uplo( BLIS_LOWER, &a );
bli_randm( &a );
// Load the diagonal. By setting the diagonal to something of greater

View File

@@ -111,7 +111,7 @@ int main( int argc, char** argv )
bli_setm( &BLIS_ZERO, &cc );
// Set the transpose bit in 'aa'.
bli_obj_toggle_trans( aa );
bli_obj_toggle_trans( &aa );
bli_printm( "a: randomized", &aa, "%4.1f", "" );
bli_printm( "b: set to 1.0", &bb, "%4.1f", "" );
@@ -148,8 +148,8 @@ int main( int argc, char** argv )
// Mark matrix 'c' as symmetric and stored in the lower triangle, and
// then randomize that lower triangle.
bli_obj_set_struc( BLIS_SYMMETRIC, c )
bli_obj_set_uplo( BLIS_LOWER, c );
bli_obj_set_struc( BLIS_SYMMETRIC, &c )
bli_obj_set_uplo( BLIS_LOWER, &c );
bli_randm( &c );
bli_printm( "a: set to random values", &a, "%4.1f", "" );
@@ -194,8 +194,8 @@ int main( int argc, char** argv )
// Mark matrix 'a' as symmetric and stored in the upper triangle, and
// then randomize that upper triangle.
bli_obj_set_struc( BLIS_SYMMETRIC, a )
bli_obj_set_uplo( BLIS_UPPER, a );
bli_obj_set_struc( BLIS_SYMMETRIC, &a )
bli_obj_set_uplo( BLIS_UPPER, &a );
bli_randm( &a );
bli_printm( "a: randomized (zeros in lower triangle)", &a, "%4.1f", "" );
@@ -241,8 +241,8 @@ int main( int argc, char** argv )
// Mark matrix 'a' as triangular and stored in the lower triangle, and
// then randomize that lower triangle.
bli_obj_set_struc( BLIS_TRIANGULAR, a )
bli_obj_set_uplo( BLIS_LOWER, a );
bli_obj_set_struc( BLIS_TRIANGULAR, &a )
bli_obj_set_uplo( BLIS_LOWER, &a );
bli_randm( &a );
bli_printm( "a: randomized (zeros in upper triangle)", &a, "%4.1f", "" );
@@ -286,8 +286,8 @@ int main( int argc, char** argv )
// Mark matrix 'a' as triangular and stored in the lower triangle, and
// then randomize that lower triangle.
bli_obj_set_struc( BLIS_TRIANGULAR, a )
bli_obj_set_uplo( BLIS_LOWER, a );
bli_obj_set_struc( BLIS_TRIANGULAR, &a )
bli_obj_set_uplo( BLIS_LOWER, &a );
bli_randm( &a );
// Load the diagonal. By setting the diagonal to something of greater

View File

@@ -147,8 +147,8 @@ int main( int argc, char** argv )
bli_setm( &BLIS_MINUS_ONE, &c );
// Set the structure and uplo of 'c'.
bli_obj_set_struc( BLIS_SYMMETRIC, c )
bli_obj_set_uplo( BLIS_LOWER, c );
bli_obj_set_struc( BLIS_SYMMETRIC, &c )
bli_obj_set_uplo( BLIS_LOWER, &c );
// Randomize the lower triangle of 'c'.
bli_randm( &c );
@@ -170,8 +170,8 @@ int main( int argc, char** argv )
// Initialize all of 'd' to -1.0 to simulate junk values.
bli_setm( &BLIS_MINUS_ONE, &d );
bli_obj_set_struc( BLIS_HERMITIAN, d )
bli_obj_set_uplo( BLIS_LOWER, d );
bli_obj_set_struc( BLIS_HERMITIAN, &d )
bli_obj_set_uplo( BLIS_LOWER, &d );
// Randomize the lower triangle of 'd'.
bli_randm( &d );
@@ -185,8 +185,8 @@ int main( int argc, char** argv )
bli_printm( "d (after mkherm):", &d, "%4.1f", "" );
// Set the structure and uplo of 'd'.
bli_obj_set_struc( BLIS_HERMITIAN, d )
bli_obj_set_uplo( BLIS_LOWER, d );
bli_obj_set_struc( BLIS_HERMITIAN, &d )
bli_obj_set_uplo( BLIS_LOWER, &d );
//
// Example 4: Make a complex matrix explicitly symmetric or Hermitian.
@@ -203,8 +203,8 @@ int main( int argc, char** argv )
bli_setm( &BLIS_MINUS_ONE, &e );
// Set the structure and uplo of 'e'.
bli_obj_set_struc( BLIS_SYMMETRIC, e )
bli_obj_set_uplo( BLIS_UPPER, e );
bli_obj_set_struc( BLIS_SYMMETRIC, &e )
bli_obj_set_uplo( BLIS_UPPER, &e );
// Randomize the upper triangle of 'e'.
bli_randm( &e );
@@ -221,8 +221,8 @@ int main( int argc, char** argv )
bli_setm( &BLIS_MINUS_ONE, &f );
// Set the structure and uplo of 'f'.
bli_obj_set_struc( BLIS_HERMITIAN, f )
bli_obj_set_uplo( BLIS_UPPER, f );
bli_obj_set_struc( BLIS_HERMITIAN, &f )
bli_obj_set_uplo( BLIS_UPPER, &f );
// Randomize the upper triangle of 'f'.
bli_randm( &f );
@@ -249,8 +249,8 @@ int main( int argc, char** argv )
bli_setm( &BLIS_MINUS_ONE, &g );
// Set the structure and uplo of 'g'.
bli_obj_set_struc( BLIS_TRIANGULAR, g )
bli_obj_set_uplo( BLIS_LOWER, g );
bli_obj_set_struc( BLIS_TRIANGULAR, &g )
bli_obj_set_uplo( BLIS_LOWER, &g );
// Randomize the lower triangle of 'g'.
bli_randm( &g );

View File

@@ -50,10 +50,10 @@ void PASTEMAC0(opname) \
bli_init_once(); \
\
num_t dt_chi; \
num_t dt_absq_c = bli_obj_dt_proj_to_complex( *absq ); \
num_t dt_absq_c = bli_obj_dt_proj_to_complex( absq ); \
\
void* buf_chi; \
void* buf_absq = bli_obj_buffer_at_off( *absq ); \
void* buf_chi; \
void* buf_absq = bli_obj_buffer_at_off( absq ); \
\
if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( chi, absq ); \
@@ -61,7 +61,7 @@ void PASTEMAC0(opname) \
/* If chi is a scalar constant, use dt_absq_c to extract the address of the
corresponding constant value; otherwise, use the datatype encoded
within the chi object and extract the buffer at the chi offset. */ \
bli_set_scalar_dt_buffer( chi, dt_absq_c, dt_chi, buf_chi ); \
bli_obj_scalar_set_dt_buffer( chi, dt_absq_c, &dt_chi, &buf_chi ); \
\
/* Invoke the typed function. */ \
bli_call_ft_2 \
@@ -88,12 +88,12 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_dt( *psi ); \
num_t dt = bli_obj_dt( psi ); \
\
conj_t conjchi = bli_obj_conj_status( *chi ); \
conj_t conjchi = bli_obj_conj_status( chi ); \
\
void* buf_chi = bli_obj_buffer_for_1x1( dt, *chi ); \
void* buf_psi = bli_obj_buffer_at_off( *psi ); \
void* buf_chi = bli_obj_buffer_for_1x1( dt, chi ); \
void* buf_psi = bli_obj_buffer_at_off( psi ); \
\
if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( chi, psi ); \
@@ -125,11 +125,11 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_dt( *chi ); \
num_t dt = bli_obj_dt( chi ); \
\
conj_t conjchi = bli_obj_conj_status( *chi ); \
conj_t conjchi = bli_obj_conj_status( chi ); \
\
void* buf_chi = bli_obj_buffer_for_1x1( dt, *chi ); \
void* buf_chi = bli_obj_buffer_for_1x1( dt, chi ); \
\
if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( chi ); \
@@ -158,10 +158,10 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_dt( *psi ); \
num_t dt = bli_obj_dt( psi ); \
\
void* buf_chi = bli_obj_buffer_for_1x1( dt, *chi ); \
void* buf_psi = bli_obj_buffer_at_off( *psi ); \
void* buf_chi = bli_obj_buffer_for_1x1( dt, chi ); \
void* buf_psi = bli_obj_buffer_at_off( psi ); \
\
if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( chi, psi ); \
@@ -191,14 +191,14 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
num_t dt_chi = bli_obj_dt( *chi ); \
num_t dt_chi = bli_obj_dt( chi ); \
num_t dt_def = BLIS_DCOMPLEX; \
num_t dt_use; \
\
/* If chi is a constant object, default to using the dcomplex
value to maximize precision, and since we don't know if the
caller needs just the real or the real and imaginary parts. */ \
void* buf_chi = bli_obj_buffer_for_1x1( dt_def, *chi ); \
void* buf_chi = bli_obj_buffer_for_1x1( dt_def, chi ); \
\
if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( chi, zeta_r, zeta_i ); \
@@ -234,9 +234,9 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
num_t dt_chi = bli_obj_dt( *chi ); \
num_t dt_chi = bli_obj_dt( chi ); \
\
void* buf_chi = bli_obj_buffer_at_off( *chi ); \
void* buf_chi = bli_obj_buffer_at_off( chi ); \
\
if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( zeta_r, zeta_i, chi ); \
@@ -268,12 +268,12 @@ void PASTEMAC0(opname) \
bli_init_once(); \
\
num_t dt_chi; \
num_t dt_zeta_c = bli_obj_dt_proj_to_complex( *zeta_r ); \
num_t dt_zeta_c = bli_obj_dt_proj_to_complex( zeta_r ); \
\
void* buf_chi; \
void* buf_chi; \
\
void* buf_zeta_r = bli_obj_buffer_at_off( *zeta_r ); \
void* buf_zeta_i = bli_obj_buffer_at_off( *zeta_i ); \
void* buf_zeta_r = bli_obj_buffer_at_off( zeta_r ); \
void* buf_zeta_i = bli_obj_buffer_at_off( zeta_i ); \
\
if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( chi, zeta_r, zeta_i ); \
@@ -281,7 +281,7 @@ void PASTEMAC0(opname) \
/* If chi is a scalar constant, use dt_zeta_c to extract the address of the
corresponding constant value; otherwise, use the datatype encoded
within the chi object and extract the buffer at the chi offset. */ \
bli_set_scalar_dt_buffer( chi, dt_zeta_c, dt_chi, buf_chi ); \
bli_obj_scalar_set_dt_buffer( chi, dt_zeta_c, &dt_chi, &buf_chi ); \
\
/* Invoke the typed function. */ \
bli_call_ft_3 \
@@ -309,12 +309,12 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
num_t dt_chi = bli_obj_dt( *chi ); \
num_t dt_chi = bli_obj_dt( chi ); \
\
void* buf_zeta_r = bli_obj_buffer_for_1x1( dt_chi, *zeta_r ); \
void* buf_zeta_i = bli_obj_buffer_for_1x1( dt_chi, *zeta_i ); \
void* buf_zeta_r = bli_obj_buffer_for_1x1( dt_chi, zeta_r ); \
void* buf_zeta_i = bli_obj_buffer_for_1x1( dt_chi, zeta_i ); \
\
void* buf_chi = bli_obj_buffer_at_off( *chi ); \
void* buf_chi = bli_obj_buffer_at_off( chi ); \
\
if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( chi, zeta_r, zeta_i ); \

View File

@@ -63,10 +63,10 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
conj_t conjchi = bli_obj_conj_status( *chi ); \
conj_t conjchi = bli_obj_conj_status( chi ); \
\
num_t dt_psi = bli_obj_dt( *psi ); \
void* buf_psi = bli_obj_buffer_at_off( *psi ); \
num_t dt_psi = bli_obj_dt( psi ); \
void* buf_psi = bli_obj_buffer_at_off( psi ); \
\
num_t dt_chi; \
void* buf_chi; \
@@ -79,7 +79,7 @@ void PASTEMAC0(opname) \
/* If chi is a scalar constant, use dt_psi to extract the address of the
corresponding constant value; otherwise, use the datatype encoded
within the chi object and extract the buffer at the chi offset. */ \
bli_set_scalar_dt_buffer( chi, dt_psi, dt_chi, buf_chi ); \
bli_obj_scalar_set_dt_buffer( chi, dt_psi, &dt_chi, &buf_chi ); \
\
/* Index into the type combination array to extract the correct
function pointer. */ \

View File

@@ -54,14 +54,14 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *x ); \
num_t dt = bli_obj_dt( x ); \
\
conj_t conjx = bli_obj_conj_status( *x ); \
dim_t n = bli_obj_vector_dim( *x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \
inc_t inc_y = bli_obj_vector_inc( *y ); \
conj_t conjx = bli_obj_conj_status( x ); \
dim_t n = bli_obj_vector_dim( x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t inc_x = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t inc_y = bli_obj_vector_inc( y ); \
\
if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( x, y ); \
@@ -98,13 +98,13 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *x ); \
num_t dt = bli_obj_dt( x ); \
\
dim_t n = bli_obj_vector_dim( *x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t incx = bli_obj_vector_inc( *x ); \
dim_t n = bli_obj_vector_dim( x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( x ); \
\
void* buf_index = bli_obj_buffer_at_off( *index ); \
void* buf_index = bli_obj_buffer_at_off( index ); \
\
if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( x, index ); \
@@ -140,14 +140,14 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *x ); \
num_t dt = bli_obj_dt( x ); \
\
conj_t conjx = bli_obj_conj_status( *x ); \
dim_t n = bli_obj_vector_dim( *x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \
inc_t inc_y = bli_obj_vector_inc( *y ); \
conj_t conjx = bli_obj_conj_status( x ); \
dim_t n = bli_obj_vector_dim( x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t inc_x = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t inc_y = bli_obj_vector_inc( y ); \
\
void* buf_alpha; \
void* buf_beta; \
@@ -164,8 +164,8 @@ void PASTEMAC(opname,EX_SUF) \
alpha, &alpha_local ); \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
beta, &beta_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \
\
/* Invoke the void pointer-based function. */ \
bli_call_ft_9 \
@@ -200,14 +200,14 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *x ); \
num_t dt = bli_obj_dt( x ); \
\
conj_t conjx = bli_obj_conj_status( *x ); \
dim_t n = bli_obj_vector_dim( *x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \
inc_t inc_y = bli_obj_vector_inc( *y ); \
conj_t conjx = bli_obj_conj_status( x ); \
dim_t n = bli_obj_vector_dim( x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t inc_x = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t inc_y = bli_obj_vector_inc( y ); \
\
void* buf_alpha; \
\
@@ -220,7 +220,7 @@ void PASTEMAC(opname,EX_SUF) \
as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
alpha, &alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
\
/* Invoke the void pointer-based function. */ \
bli_call_ft_8 \
@@ -255,16 +255,16 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *x ); \
num_t dt = bli_obj_dt( x ); \
\
conj_t conjx = bli_obj_conj_status( *x ); \
conj_t conjy = bli_obj_conj_status( *y ); \
dim_t n = bli_obj_vector_dim( *x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \
inc_t inc_y = bli_obj_vector_inc( *y ); \
void* buf_rho = bli_obj_buffer_at_off( *rho ); \
conj_t conjx = bli_obj_conj_status( x ); \
conj_t conjy = bli_obj_conj_status( y ); \
dim_t n = bli_obj_vector_dim( x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t inc_x = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t inc_y = bli_obj_vector_inc( y ); \
void* buf_rho = bli_obj_buffer_at_off( rho ); \
\
if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( x, y, rho ); \
@@ -304,16 +304,16 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *x ); \
num_t dt = bli_obj_dt( x ); \
\
conj_t conjx = bli_obj_conj_status( *x ); \
conj_t conjy = bli_obj_conj_status( *y ); \
dim_t n = bli_obj_vector_dim( *x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \
inc_t inc_y = bli_obj_vector_inc( *y ); \
void* buf_rho = bli_obj_buffer_at_off( *rho ); \
conj_t conjx = bli_obj_conj_status( x ); \
conj_t conjy = bli_obj_conj_status( y ); \
dim_t n = bli_obj_vector_dim( x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t inc_x = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t inc_y = bli_obj_vector_inc( y ); \
void* buf_rho = bli_obj_buffer_at_off( rho ); \
\
void* buf_alpha; \
void* buf_beta; \
@@ -330,8 +330,8 @@ void PASTEMAC(opname,EX_SUF) \
alpha, &alpha_local ); \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
beta, &beta_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \
\
/* Invoke the void pointer-based function. */ \
bli_call_ft_11 \
@@ -366,11 +366,11 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *x ); \
num_t dt = bli_obj_dt( x ); \
\
dim_t n = bli_obj_vector_dim( *x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \
dim_t n = bli_obj_vector_dim( x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t inc_x = bli_obj_vector_inc( x ); \
\
if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( x ); \
@@ -403,12 +403,12 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *x ); \
num_t dt = bli_obj_dt( x ); \
\
/* conj_t conjalpha = bli_obj_conj_status( *alpha ); */ \
dim_t n = bli_obj_vector_dim( *x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \
/* conj_t conjalpha = bli_obj_conj_status( alpha ); */ \
dim_t n = bli_obj_vector_dim( x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t inc_x = bli_obj_vector_inc( x ); \
\
void* buf_alpha; \
\
@@ -421,7 +421,7 @@ void PASTEMAC(opname,EX_SUF) \
as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
alpha, &alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
\
/* Invoke the void pointer-based function. */ \
bli_call_ft_6 \
@@ -454,13 +454,13 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *x ); \
num_t dt = bli_obj_dt( x ); \
\
dim_t n = bli_obj_vector_dim( *x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \
inc_t inc_y = bli_obj_vector_inc( *y ); \
dim_t n = bli_obj_vector_dim( x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t inc_x = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t inc_y = bli_obj_vector_inc( y ); \
\
if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( x, y ); \
@@ -495,14 +495,14 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *x ); \
num_t dt = bli_obj_dt( x ); \
\
conj_t conjx = bli_obj_conj_status( *x ); \
dim_t n = bli_obj_vector_dim( *x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \
inc_t inc_y = bli_obj_vector_inc( *y ); \
conj_t conjx = bli_obj_conj_status( x ); \
dim_t n = bli_obj_vector_dim( x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t inc_x = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t inc_y = bli_obj_vector_inc( y ); \
\
void* buf_beta; \
\
@@ -515,7 +515,7 @@ void PASTEMAC(opname,EX_SUF) \
as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
beta, &beta_local ); \
buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \
buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \
\
/* Invoke the void pointer-based function. */ \
bli_call_ft_8 \

View File

@@ -61,7 +61,7 @@ void bli_packv_init
// is NULL, and if so, simply alias the object to its packed counterpart.
if ( bli_cntl_is_noop( cntl ) )
{
bli_obj_alias_to( *a, *p );
bli_obj_alias_to( a, p );
return;
}
@@ -73,15 +73,15 @@ void bli_packv_init
// BLIS_NOT_PACKED and thus packing will be called for (but in some
// cases packing has already taken place). Also, not all combinations
// of current pack status and desired pack schema are valid.
if ( bli_obj_pack_schema( *a ) == cntl_pack_schema( cntl ) )
if ( bli_obj_pack_schema( a ) == cntl_pack_schema( cntl ) )
{
bli_obj_alias_to( *a, *p );
bli_obj_alias_to( a, p );
return;
}
// Now, if we are not skipping the pack operation, then the only question
// left is whether we are to typecast vector a before packing.
if ( bli_obj_dt( *a ) != bli_obj_target_dt( *a ) )
if ( bli_obj_dt( a ) != bli_obj_target_dt( a ) )
bli_abort();
// Extract various fields from the control tree and pass them in
@@ -113,8 +113,8 @@ siz_t bli_packv_init_pack
cntx_t* cntx
)
{
num_t dt = bli_obj_dt( *a );
dim_t dim_a = bli_obj_vector_dim( *a );
num_t dt = bli_obj_dt( a );
dim_t dim_a = bli_obj_vector_dim( a );
dim_t bmult = bli_cntx_get_blksz_def_dt( dt, bmult_id, cntx );
membrk_t* membrk = bli_cntx_membrk( cntx );
@@ -129,23 +129,23 @@ siz_t bli_packv_init_pack
// We begin by copying the basic fields of c.
bli_obj_alias_to( *a, *p );
bli_obj_alias_to( a, p );
// Update the dimensions.
bli_obj_set_dims( dim_a, 1, *p );
bli_obj_set_dims( dim_a, 1, p );
// Reset the view offsets to (0,0).
bli_obj_set_offs( 0, 0, *p );
bli_obj_set_offs( 0, 0, p );
// Set the pack schema in the p object to the value in the control tree
// node.
bli_obj_set_pack_schema( schema, *p );
bli_obj_set_pack_schema( schema, p );
// Compute the dimensions padded by the dimension multiples.
m_p_pad = bli_align_dim_to_mult( bli_obj_vector_dim( *p ), bmult );
m_p_pad = bli_align_dim_to_mult( bli_obj_vector_dim( p ), bmult );
// Compute the size of the packed buffer.
size_p = m_p_pad * 1 * bli_obj_elem_size( *p );
size_p = m_p_pad * 1 * bli_obj_elem_size( p );
#if 0
// Extract the address of the mem_t object within p that will track
@@ -179,11 +179,11 @@ siz_t bli_packv_init_pack
// copied when the value is already up-to-date, because it persists
// in the main object buffer field across loop iterations.)
buf = bli_mem_buffer( mem_p );
bli_obj_set_buffer( buf, *p );
bli_obj_set_buffer( buf, p );
#endif
// Save the padded (packed) dimensions into the packed object.
bli_obj_set_padded_dims( m_p_pad, 1, *p );
bli_obj_set_padded_dims( m_p_pad, 1, p );
// Set the row and column strides of p based on the pack schema.
if ( schema == BLIS_PACKED_VECTOR )
@@ -193,9 +193,9 @@ siz_t bli_packv_init_pack
// how much space beyond the vector would need to be zero-padded, if
// zero-padding was needed.
rs_p = 1;
cs_p = bli_obj_padded_length( *p );
cs_p = bli_obj_padded_length( p );
bli_obj_set_strides( rs_p, cs_p, *p );
bli_obj_set_strides( rs_p, cs_p, p );
}
return size_p;

View File

@@ -71,7 +71,7 @@ void bli_packv_int
// Sanity check; A should never have a zero dimension. If we must support
// it, then we should fold it into the next alias-and-early-exit block.
//if ( bli_obj_has_zero_dim( *a ) ) bli_abort();
//if ( bli_obj_has_zero_dim( a ) ) bli_abort();
// First check if we are to skip this operation because the control tree
// is NULL. We return without taking any action because a was already
@@ -91,7 +91,7 @@ void bli_packv_int
// not important, as long as its packed into contiguous rows or
// contiguous columns. A good example of this is packing for matrix
// operands in the level-2 operations.
if ( bli_obj_pack_schema( *a ) == BLIS_PACKED_UNSPEC )
if ( bli_obj_pack_schema( a ) == BLIS_PACKED_UNSPEC )
{
return;
}
@@ -104,7 +104,7 @@ void bli_packv_int
// already taken place, or does not need to take place, and so that will
// be indicated by the pack status). Also, not all combinations of
// current pack status and desired pack schema are valid.
if ( bli_obj_pack_schema( *a ) == cntl_pack_schema( cntl ) )
if ( bli_obj_pack_schema( a ) == cntl_pack_schema( cntl ) )
{
return;
}

View File

@@ -51,15 +51,15 @@ void bli_packv_unb_var1( obj_t* c,
cntx_t* cntx,
packv_t* cntl )
{
num_t dt_cp = bli_obj_dt( *c );
num_t dt_cp = bli_obj_dt( c );
dim_t dim_p = bli_obj_vector_dim( *p );
dim_t dim_p = bli_obj_vector_dim( p );
void* buf_c = bli_obj_buffer_at_off( *c );
inc_t incc = bli_obj_vector_inc( *c );
void* buf_c = bli_obj_buffer_at_off( c );
inc_t incc = bli_obj_vector_inc( c );
void* buf_p = bli_obj_buffer_at_off( *p );
inc_t incp = bli_obj_vector_inc( *p );
void* buf_p = bli_obj_buffer_at_off( p );
inc_t incp = bli_obj_vector_inc( p );
FUNCPTR_T f;

View File

@@ -54,7 +54,7 @@ void bli_scalv_int( obj_t* alpha,
FUNCPTR_T f;
// Return early if one of the matrix operands has a zero dimension.
if ( bli_obj_has_zero_dim( *x ) ) return;
if ( bli_obj_has_zero_dim( x ) ) return;
// Check parameters.
if ( bli_error_checking_is_enabled() )

View File

@@ -75,7 +75,7 @@ void bli_unpackv_int( obj_t* p,
// Sanity check; A should never have a zero dimension. If we must support
// it, then we should fold it into the next alias-and-early-exit block.
if ( bli_obj_has_zero_dim( *a ) ) bli_abort();
if ( bli_obj_has_zero_dim( a ) ) bli_abort();
// First check if we are to skip this operation because the control tree
// is NULL, and if so, simply return.
@@ -87,17 +87,17 @@ void bli_unpackv_int( obj_t* p,
// If p was aliased to a during the pack stage (because it was already
// in an acceptable packed/contiguous format), then no unpack is actually
// necessary, so we return.
if ( bli_obj_is_alias_of( *p, *a ) )
if ( bli_obj_is_alias_of( p, a ) )
{
return;
}
// Now, if we are not skipping the unpack operation, then the only
// question left is whether we are to typecast vector a after unpacking.
if ( bli_obj_dt( *p ) != bli_obj_dt( *a ) )
if ( bli_obj_dt( p ) != bli_obj_dt( a ) )
bli_abort();
/*
if ( bli_obj_dt( *p ) != bli_obj_dt( *a ) )
if ( bli_obj_dt( p ) != bli_obj_dt( a ) )
{
// Initialize an object c for the intermediate typecast vector.
bli_unpackv_init_cast( p,
@@ -110,7 +110,7 @@ void bli_unpackv_int( obj_t* p,
// If no cast is needed, then aliasing object c to the original
// vector serves as a minor optimization. This causes the unpackv
// implementation to unpack directly into vector a.
bli_obj_alias_to( *a, c );
bli_obj_alias_to( a, &c );
}
// Now we are ready to proceed with the unpacking.
@@ -132,7 +132,7 @@ void bli_unpackv_int( obj_t* p,
// was not necessary, then we are done because the call to the unpackv
// implementation would have unpacked directly to vector a.
/*
if ( bli_obj_dt( *p ) != bli_obj_dt( *a ) )
if ( bli_obj_dt( p ) != bli_obj_dt( a ) )
{
// Copy/typecast vector c to vector a.
// NOTE: Here, we use copynzv instead of copym because, in the cases
@@ -179,26 +179,26 @@ void bli_unpackv_init_cast( obj_t* p,
// already available. (After acquring a mem entry from the memory
// manager, it is cached within p for quick access later on.)
num_t dt_targ_a = bli_obj_target_dt( *a );
dim_t dim_a = bli_obj_vector_dim( *a );
num_t dt_targ_a = bli_obj_target_dt( a );
dim_t dim_a = bli_obj_vector_dim( a );
siz_t elem_size_c = bli_dt_size( dt_targ_a );
// We begin by copying the basic fields of a.
bli_obj_alias_to( *a, *c );
bli_obj_alias_to( a, c );
// Update datatype and element size fields.
bli_obj_set_dt( dt_targ_a, *c );
bli_obj_set_elem_size( elem_size_c, *c );
bli_obj_set_dt( dt_targ_a, c );
bli_obj_set_elem_size( elem_size_c, c );
// Update the strides and dimensions. We set the increments to reflect a
// column-stored vector. Note that the column stride is set to dim(a),
// though it should never be used because there is no second column to
// index into (and therefore it also does not need to be aligned).
bli_obj_set_dims( dim_a, 1, *c );
bli_obj_set_strides( 1, dim_a, *c );
bli_obj_set_dims( dim_a, 1, c );
bli_obj_set_strides( 1, dim_a, c );
// Reset the view offsets to (0,0).
bli_obj_set_offs( 0, 0, *c );
bli_obj_set_offs( 0, 0, c );
// Check the mem_t entry of p associated with the cast buffer. If it is
// NULL, then acquire memory sufficient to hold the object data and cache

View File

@@ -51,15 +51,15 @@ void bli_unpackv_unb_var1( obj_t* p,
cntx_t* cntx,
unpackv_t* cntl )
{
num_t dt_pc = bli_obj_dt( *p );
num_t dt_pc = bli_obj_dt( p );
dim_t dim_c = bli_obj_vector_dim( *c );
dim_t dim_c = bli_obj_vector_dim( c );
void* buf_p = bli_obj_buffer_at_off( *p );
inc_t incp = bli_obj_vector_inc( *p );
void* buf_p = bli_obj_buffer_at_off( p );
inc_t incp = bli_obj_vector_inc( p );
void* buf_c = bli_obj_buffer_at_off( *c );
inc_t incc = bli_obj_vector_inc( *c );
void* buf_c = bli_obj_buffer_at_off( c );
inc_t incc = bli_obj_vector_inc( c );
FUNCPTR_T f;

View File

@@ -54,19 +54,19 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *x ); \
num_t dt = bli_obj_dt( x ); \
\
doff_t diagoffx = bli_obj_diag_offset( *x ); \
diag_t diagx = bli_obj_diag( *x ); \
trans_t transx = bli_obj_conjtrans_status( *x ); \
dim_t m = bli_obj_length( *y ); \
dim_t n = bli_obj_width( *y ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t rs_x = bli_obj_row_stride( *x ); \
inc_t cs_x = bli_obj_col_stride( *x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \
inc_t rs_y = bli_obj_row_stride( *y ); \
inc_t cs_y = bli_obj_col_stride( *y ); \
doff_t diagoffx = bli_obj_diag_offset( x ); \
diag_t diagx = bli_obj_diag( x ); \
trans_t transx = bli_obj_conjtrans_status( x ); \
dim_t m = bli_obj_length( y ); \
dim_t n = bli_obj_width( y ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t rs_x = bli_obj_row_stride( x ); \
inc_t cs_x = bli_obj_col_stride( x ); \
void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t rs_y = bli_obj_row_stride( y ); \
inc_t cs_y = bli_obj_col_stride( y ); \
\
if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( x, y ); \
@@ -107,19 +107,19 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *x ); \
num_t dt = bli_obj_dt( x ); \
\
doff_t diagoffx = bli_obj_diag_offset( *x ); \
diag_t diagx = bli_obj_diag( *x ); \
trans_t transx = bli_obj_conjtrans_status( *x ); \
dim_t m = bli_obj_length( *y ); \
dim_t n = bli_obj_width( *y ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t rs_x = bli_obj_row_stride( *x ); \
inc_t cs_x = bli_obj_col_stride( *x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \
inc_t rs_y = bli_obj_row_stride( *y ); \
inc_t cs_y = bli_obj_col_stride( *y ); \
doff_t diagoffx = bli_obj_diag_offset( x ); \
diag_t diagx = bli_obj_diag( x ); \
trans_t transx = bli_obj_conjtrans_status( x ); \
dim_t m = bli_obj_length( y ); \
dim_t n = bli_obj_width( y ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t rs_x = bli_obj_row_stride( x ); \
inc_t cs_x = bli_obj_col_stride( x ); \
void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t rs_y = bli_obj_row_stride( y ); \
inc_t cs_y = bli_obj_col_stride( y ); \
\
void* buf_alpha; \
\
@@ -132,7 +132,7 @@ void PASTEMAC(opname,EX_SUF) \
as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
alpha, &alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
\
/* Invoke the typed function. */ \
bli_call_ft_13 \
@@ -168,14 +168,14 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *x ); \
num_t dt = bli_obj_dt( x ); \
\
doff_t diagoffx = bli_obj_diag_offset( *x ); \
dim_t m = bli_obj_length( *x ); \
dim_t n = bli_obj_width( *x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t rs_x = bli_obj_row_stride( *x ); \
inc_t cs_x = bli_obj_col_stride( *x ); \
doff_t diagoffx = bli_obj_diag_offset( x ); \
dim_t m = bli_obj_length( x ); \
dim_t n = bli_obj_width( x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t rs_x = bli_obj_row_stride( x ); \
inc_t cs_x = bli_obj_col_stride( x ); \
\
if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( x ); \
@@ -210,15 +210,15 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *x ); \
num_t dt = bli_obj_dt( x ); \
\
/* conj_t conjalpha = bli_obj_conj_status( *alpha ); */ \
doff_t diagoffx = bli_obj_diag_offset( *x ); \
dim_t m = bli_obj_length( *x ); \
dim_t n = bli_obj_width( *x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t rs_x = bli_obj_row_stride( *x ); \
inc_t cs_x = bli_obj_col_stride( *x ); \
/* conj_t conjalpha = bli_obj_conj_status( alpha ); */ \
doff_t diagoffx = bli_obj_diag_offset( x ); \
dim_t m = bli_obj_length( x ); \
dim_t n = bli_obj_width( x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t rs_x = bli_obj_row_stride( x ); \
inc_t cs_x = bli_obj_col_stride( x ); \
\
void* buf_alpha; \
\
@@ -231,7 +231,7 @@ void PASTEMAC(opname,EX_SUF) \
as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
alpha, &alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
\
/* Invoke the typed function. */ \
bli_call_ft_9 \
@@ -266,16 +266,16 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *x ); \
num_t dt = bli_obj_dt( x ); \
\
doff_t diagoffx = bli_obj_diag_offset( *x ); \
dim_t m = bli_obj_length( *x ); \
dim_t n = bli_obj_width( *x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t rs_x = bli_obj_row_stride( *x ); \
inc_t cs_x = bli_obj_col_stride( *x ); \
doff_t diagoffx = bli_obj_diag_offset( x ); \
dim_t m = bli_obj_length( x ); \
dim_t n = bli_obj_width( x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t rs_x = bli_obj_row_stride( x ); \
inc_t cs_x = bli_obj_col_stride( x ); \
\
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \
void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \
\
if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( alpha, x ); \

View File

@@ -70,9 +70,12 @@ void PASTEMAC(ch,opname) \
\
/* Determine the distance to the diagonals, the number of diagonal
elements, and the diagonal increments. */ \
bli_set_dims_incs_2d( diagoffx, transx, \
m, n, rs_x, cs_x, rs_y, cs_y, \
offx, offy, n_elem, incx, incy ); \
bli_set_dims_incs_2d \
( \
diagoffx, transx, \
m, n, rs_x, cs_x, rs_y, cs_y, \
&offx, &offy, &n_elem, &incx, &incy \
); \
\
conjx = bli_extract_conj( transx ); \
\
@@ -144,9 +147,12 @@ void PASTEMAC(ch,opname) \
\
/* Determine the distance to the diagonals, the number of diagonal
elements, and the diagonal increments. */ \
bli_set_dims_incs_2d( diagoffx, transx, \
m, n, rs_x, cs_x, rs_y, cs_y, \
offx, offy, n_elem, incx, incy ); \
bli_set_dims_incs_2d \
( \
diagoffx, transx, \
m, n, rs_x, cs_x, rs_y, cs_y, \
&offx, &offy, &n_elem, &incx, &incy \
); \
\
conjx = bli_extract_conj( transx ); \
\
@@ -212,9 +218,12 @@ void PASTEMAC(ch,opname) \
\
/* Determine the distance to the diagonals, the number of diagonal
elements, and the diagonal increments. */ \
bli_set_dims_incs_1d( diagoffx, \
m, n, rs_x, cs_x, \
offx, n_elem, incx ); \
bli_set_dims_incs_1d \
( \
diagoffx, \
m, n, rs_x, cs_x, \
&offx, &n_elem, &incx \
); \
\
x1 = x + offx; \
\
@@ -264,9 +273,12 @@ void PASTEMAC(ch,opname) \
\
/* Determine the distance to the diagonals, the number of diagonal
elements, and the diagonal increments. */ \
bli_set_dims_incs_1d( diagoffx, \
m, n, rs_x, cs_x, \
offx, n_elem, incx ); \
bli_set_dims_incs_1d \
( \
diagoffx, \
m, n, rs_x, cs_x, \
&offx, &n_elem, &incx \
); \
\
x1 = x + offx; \
\
@@ -322,9 +334,12 @@ void PASTEMAC(ch,opname) \
\
/* Determine the distance to the diagonals, the number of diagonal
elements, and the diagonal increments. */ \
bli_set_dims_incs_1d( diagoffx, \
m, n, rs_x, cs_x, \
offx, n_elem, incx ); \
bli_set_dims_incs_1d \
( \
diagoffx, \
m, n, rs_x, cs_x, \
&offx, &n_elem, &incx \
); \
\
/* Alternate implementation. (Substitute for remainder of function). */ \
/* for ( i = 0; i < n_elem; ++i ) \

View File

@@ -146,10 +146,10 @@ void bli_axpyf_check
e_val = bli_check_vector_object( y );
bli_check_error_code( e_val );
e_val = bli_check_vector_dim_equals( x, bli_obj_width_after_trans( *a ) );
e_val = bli_check_vector_dim_equals( x, bli_obj_width_after_trans( a ) );
bli_check_error_code( e_val );
e_val = bli_check_vector_dim_equals( y, bli_obj_length_after_trans( *a ) );
e_val = bli_check_vector_dim_equals( y, bli_obj_length_after_trans( a ) );
bli_check_error_code( e_val );
// Check object buffers (for non-NULLness).
@@ -334,16 +334,16 @@ void bli_dotxaxpyf_check
e_val = bli_check_conformal_dims( at, a );
bli_check_error_code( e_val );
e_val = bli_check_object_length_equals( at, bli_obj_vector_dim( *w ) );
e_val = bli_check_object_length_equals( at, bli_obj_vector_dim( w ) );
bli_check_error_code( e_val );
e_val = bli_check_object_width_equals( at, bli_obj_vector_dim( *y ) );
e_val = bli_check_object_width_equals( at, bli_obj_vector_dim( y ) );
bli_check_error_code( e_val );
e_val = bli_check_object_length_equals( a, bli_obj_vector_dim( *z ) );
e_val = bli_check_object_length_equals( a, bli_obj_vector_dim( z ) );
bli_check_error_code( e_val );
e_val = bli_check_object_width_equals( a, bli_obj_vector_dim( *x ) );
e_val = bli_check_object_width_equals( a, bli_obj_vector_dim( x ) );
bli_check_error_code( e_val );
// Check object aliases.
@@ -424,10 +424,10 @@ void bli_dotxf_check
e_val = bli_check_vector_object( y );
bli_check_error_code( e_val );
e_val = bli_check_vector_dim_equals( x, bli_obj_length_after_trans( *a ) );
e_val = bli_check_vector_dim_equals( x, bli_obj_length_after_trans( a ) );
bli_check_error_code( e_val );
e_val = bli_check_vector_dim_equals( y, bli_obj_width_after_trans( *a ) );
e_val = bli_check_vector_dim_equals( y, bli_obj_width_after_trans( a ) );
bli_check_error_code( e_val );
// Check object buffers (for non-NULLness).

View File

@@ -57,17 +57,17 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *x ); \
num_t dt = bli_obj_dt( x ); \
\
conj_t conjx = bli_obj_conj_status( *x ); \
conj_t conjy = bli_obj_conj_status( *y ); \
dim_t n = bli_obj_vector_dim( *x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \
inc_t inc_y = bli_obj_vector_inc( *y ); \
void* buf_z = bli_obj_buffer_at_off( *z ); \
inc_t inc_z = bli_obj_vector_inc( *z ); \
conj_t conjx = bli_obj_conj_status( x ); \
conj_t conjy = bli_obj_conj_status( y ); \
dim_t n = bli_obj_vector_dim( x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t inc_x = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t inc_y = bli_obj_vector_inc( y ); \
void* buf_z = bli_obj_buffer_at_off( z ); \
inc_t inc_z = bli_obj_vector_inc( z ); \
\
void* buf_alphax; \
void* buf_alphay; \
@@ -84,8 +84,8 @@ void PASTEMAC(opname,EX_SUF) \
alphax, &alphax_local ); \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
alphay, &alphay_local ); \
buf_alphax = bli_obj_buffer_for_1x1( dt, alphax_local ); \
buf_alphay = bli_obj_buffer_for_1x1( dt, alphay_local ); \
buf_alphax = bli_obj_buffer_for_1x1( dt, &alphax_local ); \
buf_alphay = bli_obj_buffer_for_1x1( dt, &alphay_local ); \
\
/* Invoke the void pointer-based function. */ \
bli_call_ft_12 \
@@ -123,19 +123,19 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *x ); \
num_t dt = bli_obj_dt( x ); \
\
conj_t conja = bli_obj_conj_status( *a ); \
conj_t conjx = bli_obj_conj_status( *x ); \
dim_t m = bli_obj_vector_dim( *y ); \
dim_t b_n = bli_obj_vector_dim( *x ); \
void* buf_a = bli_obj_buffer_at_off( *a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \
inc_t inc_y = bli_obj_vector_inc( *y ); \
conj_t conja = bli_obj_conj_status( a ); \
conj_t conjx = bli_obj_conj_status( x ); \
dim_t m = bli_obj_vector_dim( y ); \
dim_t b_n = bli_obj_vector_dim( x ); \
void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( a ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t inc_x = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t inc_y = bli_obj_vector_inc( y ); \
\
void* buf_alpha; \
\
@@ -148,10 +148,10 @@ void PASTEMAC(opname,EX_SUF) \
as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
alpha, &alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
\
/* Support cases where matrix A requires a transposition. */ \
if ( bli_obj_has_trans( *a ) ) { bli_swap_incs( rs_a, cs_a ); } \
if ( bli_obj_has_trans( a ) ) { bli_swap_incs( &rs_a, &cs_a ); } \
\
/* Invoke the void pointer-based function. */ \
bli_call_ft_13 \
@@ -191,19 +191,19 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *x ); \
num_t dt = bli_obj_dt( x ); \
\
conj_t conjxt = bli_obj_conj_status( *xt ); \
conj_t conjx = bli_obj_conj_status( *x ); \
conj_t conjy = bli_obj_conj_status( *y ); \
dim_t n = bli_obj_vector_dim( *x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \
inc_t inc_y = bli_obj_vector_inc( *y ); \
void* buf_z = bli_obj_buffer_at_off( *z ); \
inc_t inc_z = bli_obj_vector_inc( *z ); \
void* buf_rho = bli_obj_buffer_at_off( *rho ); \
conj_t conjxt = bli_obj_conj_status( xt ); \
conj_t conjx = bli_obj_conj_status( x ); \
conj_t conjy = bli_obj_conj_status( y ); \
dim_t n = bli_obj_vector_dim( x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t inc_x = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t inc_y = bli_obj_vector_inc( y ); \
void* buf_z = bli_obj_buffer_at_off( z ); \
inc_t inc_z = bli_obj_vector_inc( z ); \
void* buf_rho = bli_obj_buffer_at_off( rho ); \
\
void* buf_alpha; \
\
@@ -216,7 +216,7 @@ void PASTEMAC(opname,EX_SUF) \
as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
alpha, &alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
\
/* Invoke the void pointer-based function. */ \
bli_call_ft_13 \
@@ -259,25 +259,25 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *x ); \
num_t dt = bli_obj_dt( x ); \
\
conj_t conjat = bli_obj_conj_status( *at ); \
conj_t conja = bli_obj_conj_status( *a ); \
conj_t conjw = bli_obj_conj_status( *w ); \
conj_t conjx = bli_obj_conj_status( *x ); \
dim_t m = bli_obj_vector_dim( *z ); \
dim_t b_n = bli_obj_vector_dim( *y ); \
void* buf_a = bli_obj_buffer_at_off( *a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \
void* buf_w = bli_obj_buffer_at_off( *w ); \
inc_t inc_w = bli_obj_vector_inc( *w ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \
inc_t inc_y = bli_obj_vector_inc( *y ); \
void* buf_z = bli_obj_buffer_at_off( *z ); \
inc_t inc_z = bli_obj_vector_inc( *z ); \
conj_t conjat = bli_obj_conj_status( at ); \
conj_t conja = bli_obj_conj_status( a ); \
conj_t conjw = bli_obj_conj_status( w ); \
conj_t conjx = bli_obj_conj_status( x ); \
dim_t m = bli_obj_vector_dim( z ); \
dim_t b_n = bli_obj_vector_dim( y ); \
void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( a ); \
void* buf_w = bli_obj_buffer_at_off( w ); \
inc_t inc_w = bli_obj_vector_inc( w ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t inc_x = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t inc_y = bli_obj_vector_inc( y ); \
void* buf_z = bli_obj_buffer_at_off( z ); \
inc_t inc_z = bli_obj_vector_inc( z ); \
\
void* buf_alpha; \
void* buf_beta; \
@@ -294,11 +294,11 @@ void PASTEMAC(opname,EX_SUF) \
alpha, &alpha_local ); \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
beta, &beta_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \
\
/* Support cases where matrix A requires a transposition. */ \
if ( bli_obj_has_trans( *a ) ) { bli_swap_incs( rs_a, cs_a ); } \
if ( bli_obj_has_trans( a ) ) { bli_swap_incs( &rs_a, &cs_a ); } \
\
/* Invoke the void pointer-based function. */ \
bli_call_ft_20 \
@@ -342,19 +342,19 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *x ); \
num_t dt = bli_obj_dt( x ); \
\
conj_t conjat = bli_obj_conj_status( *a ); \
conj_t conjx = bli_obj_conj_status( *x ); \
dim_t m = bli_obj_vector_dim( *x ); \
dim_t b_n = bli_obj_vector_dim( *y ); \
void* buf_a = bli_obj_buffer_at_off( *a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \
inc_t inc_y = bli_obj_vector_inc( *y ); \
conj_t conjat = bli_obj_conj_status( a ); \
conj_t conjx = bli_obj_conj_status( x ); \
dim_t m = bli_obj_vector_dim( x ); \
dim_t b_n = bli_obj_vector_dim( y ); \
void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( a ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t inc_x = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t inc_y = bli_obj_vector_inc( y ); \
\
void* buf_alpha; \
void* buf_beta; \
@@ -371,11 +371,11 @@ void PASTEMAC(opname,EX_SUF) \
alpha, &alpha_local ); \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
beta, &beta_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \
\
/* Support cases where matrix A requires a transposition. */ \
if ( bli_obj_has_trans( *a ) ) { bli_swap_incs( rs_a, cs_a ); } \
if ( bli_obj_has_trans( a ) ) { bli_swap_incs( &rs_a, &cs_a ); } \
\
/* Invoke the void pointer-based function. */ \
bli_call_ft_14 \

View File

@@ -54,20 +54,20 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *x ); \
num_t dt = bli_obj_dt( x ); \
\
doff_t diagoffx = bli_obj_diag_offset( *x ); \
diag_t diagx = bli_obj_diag( *x ); \
uplo_t uplox = bli_obj_uplo( *x ); \
trans_t transx = bli_obj_conjtrans_status( *x ); \
dim_t m = bli_obj_length( *y ); \
dim_t n = bli_obj_width( *y ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t rs_x = bli_obj_row_stride( *x ); \
inc_t cs_x = bli_obj_col_stride( *x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \
inc_t rs_y = bli_obj_row_stride( *y ); \
inc_t cs_y = bli_obj_col_stride( *y ); \
doff_t diagoffx = bli_obj_diag_offset( x ); \
diag_t diagx = bli_obj_diag( x ); \
uplo_t uplox = bli_obj_uplo( x ); \
trans_t transx = bli_obj_conjtrans_status( x ); \
dim_t m = bli_obj_length( y ); \
dim_t n = bli_obj_width( y ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t rs_x = bli_obj_row_stride( x ); \
inc_t cs_x = bli_obj_col_stride( x ); \
void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t rs_y = bli_obj_row_stride( y ); \
inc_t cs_y = bli_obj_col_stride( y ); \
\
if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( x, y ); \
@@ -109,20 +109,20 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *x ); \
num_t dt = bli_obj_dt( x ); \
\
doff_t diagoffx = bli_obj_diag_offset( *x ); \
diag_t diagx = bli_obj_diag( *x ); \
uplo_t uplox = bli_obj_uplo( *x ); \
trans_t transx = bli_obj_conjtrans_status( *x ); \
dim_t m = bli_obj_length( *y ); \
dim_t n = bli_obj_width( *y ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t rs_x = bli_obj_row_stride( *x ); \
inc_t cs_x = bli_obj_col_stride( *x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \
inc_t rs_y = bli_obj_row_stride( *y ); \
inc_t cs_y = bli_obj_col_stride( *y ); \
doff_t diagoffx = bli_obj_diag_offset( x ); \
diag_t diagx = bli_obj_diag( x ); \
uplo_t uplox = bli_obj_uplo( x ); \
trans_t transx = bli_obj_conjtrans_status( x ); \
dim_t m = bli_obj_length( y ); \
dim_t n = bli_obj_width( y ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t rs_x = bli_obj_row_stride( x ); \
inc_t cs_x = bli_obj_col_stride( x ); \
void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t rs_y = bli_obj_row_stride( y ); \
inc_t cs_y = bli_obj_col_stride( y ); \
\
void* buf_alpha; \
\
@@ -135,7 +135,7 @@ void PASTEMAC(opname,EX_SUF) \
as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
alpha, &alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
\
/* Invoke the typed function. */ \
bli_call_ft_14 \
@@ -173,17 +173,17 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *x ); \
num_t dt = bli_obj_dt( x ); \
\
/* conj_t conjalpha = bli_obj_conj_status( *alpha ); */ \
doff_t diagoffx = bli_obj_diag_offset( *x ); \
diag_t diagx = bli_obj_diag( *x ); \
uplo_t uplox = bli_obj_uplo( *x ); \
dim_t m = bli_obj_length( *x ); \
dim_t n = bli_obj_width( *x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t rs_x = bli_obj_row_stride( *x ); \
inc_t cs_x = bli_obj_col_stride( *x ); \
/* conj_t conjalpha = bli_obj_conj_status( alpha ); */ \
doff_t diagoffx = bli_obj_diag_offset( x ); \
diag_t diagx = bli_obj_diag( x ); \
uplo_t uplox = bli_obj_uplo( x ); \
dim_t m = bli_obj_length( x ); \
dim_t n = bli_obj_width( x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t rs_x = bli_obj_row_stride( x ); \
inc_t cs_x = bli_obj_col_stride( x ); \
\
void* buf_alpha; \
\
@@ -194,7 +194,7 @@ void PASTEMAC(opname,EX_SUF) \
PASTEMAC(opname,_check)( alpha, x ); \
\
/* Alias x to x_local so we can apply alpha if it is non-unit. */ \
bli_obj_alias_to( *x, x_local ); \
bli_obj_alias_to( x, &x_local ); \
\
/* If alpha is non-unit, apply it to the scalar attached to x. */ \
if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) \
@@ -209,7 +209,7 @@ void PASTEMAC(opname,EX_SUF) \
\
/* Grab the address of the internal scalar buffer for the scalar
attached to x. */ \
buf_alpha = bli_obj_internal_scalar_buffer( x_local ); \
buf_alpha = bli_obj_internal_scalar_buffer( &x_local ); \
\
/* Invoke the typed function. */ \
bli_call_ft_11 \
@@ -245,17 +245,17 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *x ); \
num_t dt = bli_obj_dt( x ); \
\
/* conj_t conjalpha = bli_obj_conj_status( *alpha ); */ \
doff_t diagoffx = bli_obj_diag_offset( *x ); \
diag_t diagx = bli_obj_diag( *x ); \
uplo_t uplox = bli_obj_uplo( *x ); \
dim_t m = bli_obj_length( *x ); \
dim_t n = bli_obj_width( *x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t rs_x = bli_obj_row_stride( *x ); \
inc_t cs_x = bli_obj_col_stride( *x ); \
/* conj_t conjalpha = bli_obj_conj_status( alpha ); */ \
doff_t diagoffx = bli_obj_diag_offset( x ); \
diag_t diagx = bli_obj_diag( x ); \
uplo_t uplox = bli_obj_uplo( x ); \
dim_t m = bli_obj_length( x ); \
dim_t n = bli_obj_width( x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t rs_x = bli_obj_row_stride( x ); \
inc_t cs_x = bli_obj_col_stride( x ); \
\
void* buf_alpha; \
\
@@ -268,7 +268,7 @@ void PASTEMAC(opname,EX_SUF) \
as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
alpha, &alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
\
/* Invoke the typed function. */ \
bli_call_ft_11 \

View File

@@ -146,7 +146,7 @@ void PASTEMAC(ch,opname) \
ctype* one = PASTEMAC(ch,1); \
\
if ( bli_does_trans( transx ) ) \
bli_negate_diag_offset( diagoffy ); \
bli_negate_diag_offset( &diagoffy ); \
\
PASTEMAC(ch,setd) \
( \
@@ -299,7 +299,7 @@ void PASTEMAC(ch,opname) \
doff_t diagoffy = diagoffx; \
\
if ( bli_does_trans( transx ) ) \
bli_negate_diag_offset( diagoffy ); \
bli_negate_diag_offset( &diagoffy ); \
\
PASTEMAC(ch,setd) \
( \

View File

@@ -68,10 +68,13 @@ void PASTEMAC(ch,opname) \
dim_t ij0, n_shift; \
\
/* Set various loop parameters. */ \
bli_set_dims_incs_uplo_2m( diagoffx, diagx, transx, \
uplox, m, n, rs_x, cs_x, rs_y, cs_y, \
uplox_eff, n_elem_max, n_iter, incx, ldx, incy, ldy, \
ij0, n_shift ); \
bli_set_dims_incs_uplo_2m \
( \
diagoffx, diagx, transx, \
uplox, m, n, rs_x, cs_x, rs_y, cs_y, \
&uplox_eff, &n_elem_max, &n_iter, &incx, &ldx, &incy, &ldy, \
&ij0, &n_shift \
); \
\
if ( bli_is_zeros( uplox_eff ) ) return; \
\
@@ -181,10 +184,13 @@ void PASTEMAC(ch,opname) \
dim_t ij0, n_shift; \
\
/* Set various loop parameters. */ \
bli_set_dims_incs_uplo_2m( diagoffx, diagx, transx, \
uplox, m, n, rs_x, cs_x, rs_y, cs_y, \
uplox_eff, n_elem_max, n_iter, incx, ldx, incy, ldy, \
ij0, n_shift ); \
bli_set_dims_incs_uplo_2m \
( \
diagoffx, diagx, transx, \
uplox, m, n, rs_x, cs_x, rs_y, cs_y, \
&uplox_eff, &n_elem_max, &n_iter, &incx, &ldx, &incy, &ldy, \
&ij0, &n_shift \
); \
\
if ( bli_is_zeros( uplox_eff ) ) return; \
\
@@ -292,10 +298,13 @@ void PASTEMAC(ch,opname) \
dim_t ij0, n_shift; \
\
/* Set various loop parameters. */ \
bli_set_dims_incs_uplo_1m( diagoffx, diagx, \
uplox, m, n, rs_x, cs_x, \
uplox_eff, n_elem_max, n_iter, incx, ldx, \
ij0, n_shift ); \
bli_set_dims_incs_uplo_1m \
( \
diagoffx, diagx, \
uplox, m, n, rs_x, cs_x, \
&uplox_eff, &n_elem_max, &n_iter, &incx, &ldx, \
&ij0, &n_shift \
); \
\
if ( bli_is_zeros( uplox_eff ) ) return; \
\

View File

@@ -108,33 +108,33 @@ void bli_packm_blk_var1
thrinfo_t* t
)
{
num_t dt_cp = bli_obj_dt( *c );
num_t dt_cp = bli_obj_dt( c );
struc_t strucc = bli_obj_struc( *c );
doff_t diagoffc = bli_obj_diag_offset( *c );
diag_t diagc = bli_obj_diag( *c );
uplo_t uploc = bli_obj_uplo( *c );
trans_t transc = bli_obj_conjtrans_status( *c );
pack_t schema = bli_obj_pack_schema( *p );
bool_t invdiag = bli_obj_has_inverted_diag( *p );
bool_t revifup = bli_obj_is_pack_rev_if_upper( *p );
bool_t reviflo = bli_obj_is_pack_rev_if_lower( *p );
struc_t strucc = bli_obj_struc( c );
doff_t diagoffc = bli_obj_diag_offset( c );
diag_t diagc = bli_obj_diag( c );
uplo_t uploc = bli_obj_uplo( c );
trans_t transc = bli_obj_conjtrans_status( c );
pack_t schema = bli_obj_pack_schema( p );
bool_t invdiag = bli_obj_has_inverted_diag( p );
bool_t revifup = bli_obj_is_pack_rev_if_upper( p );
bool_t reviflo = bli_obj_is_pack_rev_if_lower( p );
dim_t m_p = bli_obj_length( *p );
dim_t n_p = bli_obj_width( *p );
dim_t m_max_p = bli_obj_padded_length( *p );
dim_t n_max_p = bli_obj_padded_width( *p );
dim_t m_p = bli_obj_length( p );
dim_t n_p = bli_obj_width( p );
dim_t m_max_p = bli_obj_padded_length( p );
dim_t n_max_p = bli_obj_padded_width( p );
void* buf_c = bli_obj_buffer_at_off( *c );
inc_t rs_c = bli_obj_row_stride( *c );
inc_t cs_c = bli_obj_col_stride( *c );
void* buf_c = bli_obj_buffer_at_off( c );
inc_t rs_c = bli_obj_row_stride( c );
inc_t cs_c = bli_obj_col_stride( c );
void* buf_p = bli_obj_buffer_at_off( *p );
inc_t rs_p = bli_obj_row_stride( *p );
inc_t cs_p = bli_obj_col_stride( *p );
inc_t is_p = bli_obj_imag_stride( *p );
dim_t pd_p = bli_obj_panel_dim( *p );
inc_t ps_p = bli_obj_panel_stride( *p );
void* buf_p = bli_obj_buffer_at_off( p );
inc_t rs_p = bli_obj_row_stride( p );
inc_t cs_p = bli_obj_col_stride( p );
inc_t is_p = bli_obj_imag_stride( p );
dim_t pd_p = bli_obj_panel_dim( p );
inc_t ps_p = bli_obj_panel_stride( p );
obj_t kappa;
obj_t* kappa_p;
@@ -155,7 +155,7 @@ void bli_packm_blk_var1
// higher-level operation. Thus, we use BLIS_ONE for kappa so
// that the underlying packm implementation does not perform
// any scaling during packing.
buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE );
buf_kappa = bli_obj_buffer_for_const( dt_cp, &BLIS_ONE );
}
else // if ( bli_is_ind_packed( schema ) )
{
@@ -187,7 +187,7 @@ void bli_packm_blk_var1
}
// Acquire the buffer to the kappa chosen above.
buf_kappa = bli_obj_buffer_for_1x1( dt_cp, *kappa_p );
buf_kappa = bli_obj_buffer_for_1x1( dt_cp, kappa_p );
}
@@ -344,10 +344,10 @@ void PASTEMAC(ch,varname) \
express the remaining parameters and code. */ \
if ( bli_does_trans( transc ) ) \
{ \
bli_swap_incs( rs_c, cs_c ); \
bli_negate_diag_offset( diagoffc ); \
bli_toggle_uplo( uploc ); \
bli_toggle_trans( transc ); \
bli_swap_incs( &rs_c, &cs_c ); \
bli_negate_diag_offset( &diagoffc ); \
bli_toggle_uplo( &uploc ); \
bli_toggle_trans( &transc ); \
} \
\
/* Create flags to incidate row or column storage. Note that the

View File

@@ -68,33 +68,33 @@ void bli_packm_blk_var1( obj_t* c,
obj_t* p,
packm_thrinfo_t* t )
{
num_t dt_cp = bli_obj_dt( *c );
num_t dt_cp = bli_obj_dt( c );
struc_t strucc = bli_obj_struc( *c );
doff_t diagoffc = bli_obj_diag_offset( *c );
diag_t diagc = bli_obj_diag( *c );
uplo_t uploc = bli_obj_uplo( *c );
trans_t transc = bli_obj_conjtrans_status( *c );
pack_t schema = bli_obj_pack_schema( *p );
bool_t invdiag = bli_obj_has_inverted_diag( *p );
bool_t revifup = bli_obj_is_pack_rev_if_upper( *p );
bool_t reviflo = bli_obj_is_pack_rev_if_lower( *p );
struc_t strucc = bli_obj_struc( c );
doff_t diagoffc = bli_obj_diag_offset( c );
diag_t diagc = bli_obj_diag( c );
uplo_t uploc = bli_obj_uplo( c );
trans_t transc = bli_obj_conjtrans_status( c );
pack_t schema = bli_obj_pack_schema( p );
bool_t invdiag = bli_obj_has_inverted_diag( p );
bool_t revifup = bli_obj_is_pack_rev_if_upper( p );
bool_t reviflo = bli_obj_is_pack_rev_if_lower( p );
dim_t m_p = bli_obj_length( *p );
dim_t n_p = bli_obj_width( *p );
dim_t m_max_p = bli_obj_padded_length( *p );
dim_t n_max_p = bli_obj_padded_width( *p );
dim_t m_p = bli_obj_length( p );
dim_t n_p = bli_obj_width( p );
dim_t m_max_p = bli_obj_padded_length( p );
dim_t n_max_p = bli_obj_padded_width( p );
void* buf_c = bli_obj_buffer_at_off( *c );
inc_t rs_c = bli_obj_row_stride( *c );
inc_t cs_c = bli_obj_col_stride( *c );
void* buf_c = bli_obj_buffer_at_off( c );
inc_t rs_c = bli_obj_row_stride( c );
inc_t cs_c = bli_obj_col_stride( c );
void* buf_p = bli_obj_buffer_at_off( *p );
inc_t rs_p = bli_obj_row_stride( *p );
inc_t cs_p = bli_obj_col_stride( *p );
inc_t is_p = bli_obj_imag_stride( *p );
dim_t pd_p = bli_obj_panel_dim( *p );
inc_t ps_p = bli_obj_panel_stride( *p );
void* buf_p = bli_obj_buffer_at_off( p );
inc_t rs_p = bli_obj_row_stride( p );
inc_t cs_p = bli_obj_col_stride( p );
inc_t is_p = bli_obj_imag_stride( p );
dim_t pd_p = bli_obj_panel_dim( p );
inc_t ps_p = bli_obj_panel_stride( p );
void* buf_kappa;
@@ -107,7 +107,7 @@ void bli_packm_blk_var1( obj_t* c,
// alpha scalar of the higher-level operation. Thus, we use BLIS_ONE
// for kappa so that the underlying packm implementation does not
// scale during packing.
buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE );
buf_kappa = bli_obj_buffer_for_const( dt_cp, &BLIS_ONE );
// Choose the correct func_t object.
packm_kers = packm_struc_cxk_kers;
@@ -222,10 +222,10 @@ void PASTEMAC(ch,varname) \
express the remaining parameters and code. */ \
if ( bli_does_trans( transc ) ) \
{ \
bli_swap_incs( rs_c, cs_c ); \
bli_negate_diag_offset( diagoffc ); \
bli_toggle_uplo( uploc ); \
bli_toggle_trans( transc ); \
bli_swap_incs( &rs_c, &cs_c ); \
bli_negate_diag_offset( &diagoffc ); \
bli_toggle_uplo( &uploc ); \
bli_toggle_trans( &transc ); \
} \
\
/* Create flags to incidate row or column storage. Note that the

View File

@@ -83,9 +83,9 @@ siz_t bli_packm_init
// not important, as long as its packed into contiguous rows or
// contiguous columns. A good example of this is packing for matrix
// operands in the level-2 operations.
if ( bli_obj_pack_schema( *a ) == BLIS_PACKED_UNSPEC )
if ( bli_obj_pack_schema( a ) == BLIS_PACKED_UNSPEC )
{
bli_obj_alias_to( *a, *p );
bli_obj_alias_to( a, p );
return 0;
}
@@ -97,18 +97,18 @@ siz_t bli_packm_init
// already taken place, or does not need to take place, and so that will
// be indicated by the pack status). Also, not all combinations of
// current pack status and desired pack schema are valid.
if ( bli_obj_pack_schema( *a ) == pack_schema )
if ( bli_obj_pack_schema( a ) == pack_schema )
{
bli_obj_alias_to( *a, *p );
bli_obj_alias_to( a, p );
return 0;
}
#endif
// If the object is marked as being filled with zeros, then we can skip
// the packm operation entirely and alias.
if ( bli_obj_is_zeros( *a ) )
if ( bli_obj_is_zeros( a ) )
{
bli_obj_alias_to( *a, *p );
bli_obj_alias_to( a, p );
return 0;
}
@@ -189,10 +189,10 @@ siz_t bli_packm_init_pack
{
bli_init_once();
num_t dt = bli_obj_dt( *a );
trans_t transa = bli_obj_onlytrans_status( *a );
dim_t m_a = bli_obj_length( *a );
dim_t n_a = bli_obj_width( *a );
num_t dt = bli_obj_dt( a );
trans_t transa = bli_obj_onlytrans_status( a );
dim_t m_a = bli_obj_length( a );
dim_t n_a = bli_obj_width( a );
dim_t bmult_m_def = bli_cntx_get_blksz_def_dt( dt, bmult_id_m, cntx );
dim_t bmult_m_pack = bli_cntx_get_blksz_max_dt( dt, bmult_id_m, cntx );
dim_t bmult_n_def = bli_cntx_get_blksz_def_dt( dt, bmult_id_n, cntx );
@@ -207,7 +207,7 @@ siz_t bli_packm_init_pack
// We begin by copying the fields of A.
bli_obj_alias_to( *a, *p );
bli_obj_alias_to( a, p );
// Update the dimension fields to explicitly reflect a transposition,
// if needed.
@@ -219,13 +219,13 @@ siz_t bli_packm_init_pack
// we either toggle the uplo of P.
// Finally, if we mark P as dense since we assume that all matrices,
// regardless of structure, will be densified.
bli_obj_set_dims_with_trans( transa, m_a, n_a, *p );
bli_obj_set_conjtrans( BLIS_NO_TRANSPOSE, *p );
bli_obj_set_dims_with_trans( transa, m_a, n_a, p );
bli_obj_set_conjtrans( BLIS_NO_TRANSPOSE, p );
if ( bli_does_trans( transa ) )
{
bli_obj_negate_diag_offset( *p );
if ( bli_obj_is_upper_or_lower( *a ) )
bli_obj_toggle_uplo( *p );
bli_obj_negate_diag_offset( p );
if ( bli_obj_is_upper_or_lower( a ) )
bli_obj_toggle_uplo( p );
}
// If we are packing micro-panels, mark P as dense. Otherwise, we are
@@ -236,22 +236,22 @@ siz_t bli_packm_init_pack
// execute a "lower" or "upper" branch of code.
if ( bli_is_panel_packed( schema ) )
{
bli_obj_set_uplo( BLIS_DENSE, *p );
bli_obj_set_uplo( BLIS_DENSE, p );
}
// Reset the view offsets to (0,0).
bli_obj_set_offs( 0, 0, *p );
bli_obj_set_offs( 0, 0, p );
// Set the invert diagonal field.
bli_obj_set_invert_diag( invert_diag, *p );
bli_obj_set_invert_diag( invert_diag, p );
// Set the pack status of P to the pack schema prescribed in the control
// tree node.
bli_obj_set_pack_schema( schema, *p );
bli_obj_set_pack_schema( schema, p );
// Set the packing order bits.
bli_obj_set_pack_order_if_upper( pack_ord_if_up, *p );
bli_obj_set_pack_order_if_lower( pack_ord_if_lo, *p );
bli_obj_set_pack_order_if_upper( pack_ord_if_up, p );
bli_obj_set_pack_order_if_lower( pack_ord_if_lo, p );
// Compute the dimensions padded by the dimension multiples. These
// dimensions will be the dimensions of the packed matrices, including
@@ -260,15 +260,15 @@ siz_t bli_packm_init_pack
// in P) and aligning them to the dimension multiples (typically equal
// to register blocksizes). This does waste a little bit of space for
// level-2 operations, but that's okay with us.
m_p = bli_obj_length( *p );
n_p = bli_obj_width( *p );
m_p = bli_obj_length( p );
n_p = bli_obj_width( p );
m_p_pad = bli_align_dim_to_mult( m_p, bmult_m_def );
n_p_pad = bli_align_dim_to_mult( n_p, bmult_n_def );
// Save the padded dimensions into the packed object. It is important
// to save these dimensions since they represent the actual dimensions
// of the zero-padded matrix.
bli_obj_set_padded_dims( m_p_pad, n_p_pad, *p );
bli_obj_set_padded_dims( m_p_pad, n_p_pad, p );
// Now we prepare to compute strides, align them, and compute the
// total number of bytes needed for the packed buffer. The caller
@@ -276,7 +276,7 @@ siz_t bli_packm_init_pack
// from the memory allocator.
// Extract the element size for the packed object.
elem_size_p = bli_obj_elem_size( *p );
elem_size_p = bli_obj_elem_size( p );
// Set the row and column strides of p based on the pack schema.
if ( bli_is_row_packed( schema ) &&
@@ -297,7 +297,7 @@ siz_t bli_packm_init_pack
BLIS_HEAP_STRIDE_ALIGN_SIZE );
// Store the strides in P.
bli_obj_set_strides( rs_p, cs_p, *p );
bli_obj_set_strides( rs_p, cs_p, p );
// Compute the size of the packed buffer.
size_p = m_p_pad * rs_p * elem_size_p;
@@ -320,7 +320,7 @@ siz_t bli_packm_init_pack
BLIS_HEAP_STRIDE_ALIGN_SIZE );
// Store the strides in P.
bli_obj_set_strides( rs_p, cs_p, *p );
bli_obj_set_strides( rs_p, cs_p, p );
// Compute the size of the packed buffer.
size_p = cs_p * n_p_pad * elem_size_p;
@@ -408,12 +408,12 @@ siz_t bli_packm_init_pack
else is_p = 1;
// Store the strides and panel dimension in P.
bli_obj_set_strides( rs_p, cs_p, *p );
bli_obj_set_imag_stride( is_p, *p );
bli_obj_set_panel_dim( m_panel, *p );
bli_obj_set_panel_stride( ps_p, *p );
bli_obj_set_panel_length( m_panel, *p );
bli_obj_set_panel_width( n_p, *p );
bli_obj_set_strides( rs_p, cs_p, p );
bli_obj_set_imag_stride( is_p, p );
bli_obj_set_panel_dim( m_panel, p );
bli_obj_set_panel_stride( ps_p, p );
bli_obj_set_panel_length( m_panel, p );
bli_obj_set_panel_width( n_p, p );
// Compute the size of the packed buffer.
size_p = ps_p * ( m_p_pad / m_panel ) * elem_size_p;
@@ -501,12 +501,12 @@ siz_t bli_packm_init_pack
else is_p = 1;
// Store the strides and panel dimension in P.
bli_obj_set_strides( rs_p, cs_p, *p );
bli_obj_set_imag_stride( is_p, *p );
bli_obj_set_panel_dim( n_panel, *p );
bli_obj_set_panel_stride( ps_p, *p );
bli_obj_set_panel_length( m_p, *p );
bli_obj_set_panel_width( n_panel, *p );
bli_obj_set_strides( rs_p, cs_p, p );
bli_obj_set_imag_stride( is_p, p );
bli_obj_set_panel_dim( n_panel, p );
bli_obj_set_panel_stride( ps_p, p );
bli_obj_set_panel_length( m_p, p );
bli_obj_set_panel_width( n_panel, p );
// Compute the size of the packed buffer.
size_p = ps_p * ( n_p_pad / n_panel ) * elem_size_p;

View File

@@ -53,7 +53,7 @@ void bli_packm_int
// Sanity check; A should never have a zero dimension. If we must support
// it, then we should fold it into the next alias-and-early-exit block.
//if ( bli_obj_has_zero_dim( *a ) ) bli_abort();
//if ( bli_obj_has_zero_dim( a ) ) bli_abort();
// Let us now check to see if the object has already been packed. First
// we check if it has been packed to an unspecified (row or column)
@@ -65,7 +65,7 @@ void bli_packm_int
// not important, as long as its packed into contiguous rows or
// contiguous columns. A good example of this is packing for matrix
// operands in the level-2 operations.
if ( bli_obj_pack_schema( *a ) == BLIS_PACKED_UNSPEC )
if ( bli_obj_pack_schema( a ) == BLIS_PACKED_UNSPEC )
{
return;
}
@@ -78,14 +78,14 @@ void bli_packm_int
// already taken place, or does not need to take place, and so that will
// be indicated by the pack status). Also, not all combinations of
// current pack status and desired pack schema are valid.
if ( bli_obj_pack_schema( *a ) == bli_cntl_packm_params_pack_schema( cntl ) )
if ( bli_obj_pack_schema( a ) == bli_cntl_packm_params_pack_schema( cntl ) )
{
return;
}
// If the object is marked as being filled with zeros, then we can skip
// the packm operation entirely.
if ( bli_obj_is_zeros( *a ) )
if ( bli_obj_is_zeros( a ) )
{
return;
}

View File

@@ -54,14 +54,14 @@ void bli_packm_acquire_mpart_t2b( subpart_t requested_part,
// Partitioning top-to-bottom through packed column panels (which are
// row-stored) is not yet supported.
if ( bli_obj_is_col_packed( *obj ) )
if ( bli_obj_is_col_packed( obj ) )
{
bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED );
}
// Query the dimensions of the parent object.
m = bli_obj_length( *obj );
n = bli_obj_width( *obj );
m = bli_obj_length( obj );
n = bli_obj_width( obj );
// Foolproofing: do not let b exceed what's left of the m dimension at
// row offset i.
@@ -71,10 +71,10 @@ void bli_packm_acquire_mpart_t2b( subpart_t requested_part,
// stride fields of the parent object. Note that this omits copying view
// information because the new partition will have its own dimensions
// and offsets.
bli_obj_init_subpart_from( *obj, *sub_obj );
bli_obj_init_subpart_from( obj, sub_obj );
// Modify offsets and dimensions of requested partition.
bli_obj_set_dims( b, n, *sub_obj );
bli_obj_set_dims( b, n, sub_obj );
// Tweak the padded length of the subpartition to trick the underlying
// implementation into only zero-padding for the narrow submatrix of
@@ -86,25 +86,25 @@ void bli_packm_acquire_mpart_t2b( subpart_t requested_part,
// b for the edge iteration). In these cases, we arrive at the new
// packed length by simply subtracting off i.
{
dim_t m_pack_max = bli_obj_padded_length( *sub_obj );
dim_t m_pack_max = bli_obj_padded_length( sub_obj );
dim_t m_pack_cur;
if ( i + b == m ) m_pack_cur = m_pack_max - i;
else m_pack_cur = b;
bli_obj_set_padded_length( m_pack_cur, *sub_obj );
bli_obj_set_padded_length( m_pack_cur, sub_obj );
}
// Translate the desired offsets to a panel offset and adjust the
// buffer pointer of the subpartition object.
{
char* buf_p = bli_obj_buffer( *sub_obj );
siz_t elem_size = bli_obj_elem_size( *sub_obj );
char* buf_p = bli_obj_buffer( sub_obj );
siz_t elem_size = bli_obj_elem_size( sub_obj );
dim_t off_to_panel = bli_packm_offset_to_panel_for( i, sub_obj );
buf_p = buf_p + elem_size * off_to_panel;
bli_obj_set_buffer( ( void* )buf_p, *sub_obj );
bli_obj_set_buffer( buf_p, sub_obj );
}
}
@@ -130,14 +130,14 @@ void bli_packm_acquire_mpart_l2r( subpart_t requested_part,
// Partitioning left-to-right through packed row panels (which are
// column-stored) is not yet supported.
if ( bli_obj_is_row_packed( *obj ) )
if ( bli_obj_is_row_packed( obj ) )
{
bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED );
}
// Query the dimensions of the parent object.
m = bli_obj_length( *obj );
n = bli_obj_width( *obj );
m = bli_obj_length( obj );
n = bli_obj_width( obj );
// Foolproofing: do not let b exceed what's left of the n dimension at
// column offset j.
@@ -147,10 +147,10 @@ void bli_packm_acquire_mpart_l2r( subpart_t requested_part,
// stride fields of the parent object. Note that this omits copying view
// information because the new partition will have its own dimensions
// and offsets.
bli_obj_init_subpart_from( *obj, *sub_obj );
bli_obj_init_subpart_from( obj, sub_obj );
// Modify offsets and dimensions of requested partition.
bli_obj_set_dims( m, b, *sub_obj );
bli_obj_set_dims( m, b, sub_obj );
// Tweak the padded width of the subpartition to trick the underlying
// implementation into only zero-padding for the narrow submatrix of
@@ -162,25 +162,25 @@ void bli_packm_acquire_mpart_l2r( subpart_t requested_part,
// b for the edge iteration). In these cases, we arrive at the new
// packed width by simply subtracting off j.
{
dim_t n_pack_max = bli_obj_padded_width( *sub_obj );
dim_t n_pack_max = bli_obj_padded_width( sub_obj );
dim_t n_pack_cur;
if ( j + b == n ) n_pack_cur = n_pack_max - j;
else n_pack_cur = b;
bli_obj_set_padded_width( n_pack_cur, *sub_obj );
bli_obj_set_padded_width( n_pack_cur, sub_obj );
}
// Translate the desired offsets to a panel offset and adjust the
// buffer pointer of the subpartition object.
{
char* buf_p = bli_obj_buffer( *sub_obj );
siz_t elem_size = bli_obj_elem_size( *sub_obj );
char* buf_p = bli_obj_buffer( sub_obj );
siz_t elem_size = bli_obj_elem_size( sub_obj );
dim_t off_to_panel = bli_packm_offset_to_panel_for( j, sub_obj );
buf_p = buf_p + elem_size * off_to_panel;
bli_obj_set_buffer( ( void* )buf_p, *sub_obj );
bli_obj_set_buffer( buf_p, sub_obj );
}
}
@@ -201,47 +201,47 @@ dim_t bli_packm_offset_to_panel_for( dim_t offmn, obj_t* p )
{
dim_t panel_off;
if ( bli_obj_pack_schema( *p ) == BLIS_PACKED_ROWS )
if ( bli_obj_pack_schema( p ) == BLIS_PACKED_ROWS )
{
// For the "packed rows" schema, a single row is effectively one
// row panel, and so we use the row offset as the panel offset.
// Then we multiply this offset by the effective panel stride
// (ie: the row stride) to arrive at the desired offset.
panel_off = offmn * bli_obj_row_stride( *p );
panel_off = offmn * bli_obj_row_stride( p );
}
else if ( bli_obj_pack_schema( *p ) == BLIS_PACKED_COLUMNS )
else if ( bli_obj_pack_schema( p ) == BLIS_PACKED_COLUMNS )
{
// For the "packed columns" schema, a single column is effectively one
// column panel, and so we use the column offset as the panel offset.
// Then we multiply this offset by the effective panel stride
// (ie: the column stride) to arrive at the desired offset.
panel_off = offmn * bli_obj_col_stride( *p );
panel_off = offmn * bli_obj_col_stride( p );
}
else if ( bli_obj_pack_schema( *p ) == BLIS_PACKED_ROW_PANELS )
else if ( bli_obj_pack_schema( p ) == BLIS_PACKED_ROW_PANELS )
{
// For the "packed row panels" schema, the column stride is equal to
// the panel dimension (length). So we can divide it into offmn
// (interpreted as a row offset) to arrive at a panel offset. Then
// we multiply this offset by the panel stride to arrive at the total
// offset to the panel (in units of elements).
panel_off = offmn / bli_obj_col_stride( *p );
panel_off = panel_off * bli_obj_panel_stride( *p );
panel_off = offmn / bli_obj_col_stride( p );
panel_off = panel_off * bli_obj_panel_stride( p );
// Sanity check.
if ( offmn % bli_obj_col_stride( *p ) > 0 ) bli_abort();
if ( offmn % bli_obj_col_stride( p ) > 0 ) bli_abort();
}
else if ( bli_obj_pack_schema( *p ) == BLIS_PACKED_COL_PANELS )
else if ( bli_obj_pack_schema( p ) == BLIS_PACKED_COL_PANELS )
{
// For the "packed column panels" schema, the row stride is equal to
// the panel dimension (width). So we can divide it into offmn
// (interpreted as a column offset) to arrive at a panel offset. Then
// we multiply this offset by the panel stride to arrive at the total
// offset to the panel (in units of elements).
panel_off = offmn / bli_obj_row_stride( *p );
panel_off = panel_off * bli_obj_panel_stride( *p );
panel_off = offmn / bli_obj_row_stride( p );
panel_off = panel_off * bli_obj_panel_stride( p );
// Sanity check.
if ( offmn % bli_obj_row_stride( *p ) > 0 ) bli_abort();
if ( offmn % bli_obj_row_stride( p ) > 0 ) bli_abort();
}
else
{

View File

@@ -308,10 +308,10 @@ void PASTEMAC(ch,varname) \
{ \
c = c + diagoffc * ( doff_t )cs_c + \
-diagoffc * ( doff_t )rs_c; \
bli_swap_incs( incc, ldc ); \
bli_swap_incs( &incc, &ldc ); \
\
if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc ); \
bli_toggle_conj( &conjc ); \
} \
\
/* Pack the full panel. */ \
@@ -376,7 +376,7 @@ void PASTEMAC(ch,varname) \
conjc12 = conjc; \
\
if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc12 ); \
bli_toggle_conj( &conjc12 ); \
} \
else /* if ( ( row_stored && bli_is_lower( uploc ) ) || \
( col_stored && bli_is_upper( uploc ) ) ) */ \
@@ -402,7 +402,7 @@ void PASTEMAC(ch,varname) \
conjc12 = conjc; \
\
if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc10 ); \
bli_toggle_conj( &conjc10 ); \
} \
\
/* Pack to p10. For upper storage, this includes the unstored
@@ -573,8 +573,8 @@ void PASTEMAC(ch,varname) \
ctype* restrict zero = PASTEMAC(ch,0); \
uplo_t uplop = uploc; \
\
bli_toggle_uplo( uplop ); \
bli_shift_diag_offset_to_shrink_uplo( uplop, diagoffp ); \
bli_toggle_uplo( &uplop ); \
bli_shift_diag_offset_to_shrink_uplo( uplop, &diagoffp ); \
\
PASTEMAC(ch,setm) \
( \

View File

@@ -310,10 +310,10 @@ void PASTEMAC(ch,varname) \
{ \
c = c + diagoffc * ( doff_t )cs_c + \
-diagoffc * ( doff_t )rs_c; \
bli_swap_incs( incc, ldc ); \
bli_swap_incs( &incc, &ldc ); \
\
if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc ); \
bli_toggle_conj( &conjc ); \
} \
\
/* Pack the full panel. */ \
@@ -380,7 +380,7 @@ void PASTEMAC(ch,varname) \
conjc12 = conjc; \
\
if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc12 ); \
bli_toggle_conj( &conjc12 ); \
} \
else /* if ( ( row_stored && bli_is_lower( uploc ) ) || \
( col_stored && bli_is_upper( uploc ) ) ) */ \
@@ -406,7 +406,7 @@ void PASTEMAC(ch,varname) \
conjc12 = conjc; \
\
if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc10 ); \
bli_toggle_conj( &conjc10 ); \
} \
\
/* Pack to p10. For upper storage, this includes the unstored
@@ -581,8 +581,8 @@ void PASTEMAC(ch,varname) \
doff_t diagoffp11_0 = 0; \
dim_t p11_0_dim = panel_dim - 1; \
\
bli_toggle_uplo( uplop ); \
bli_shift_diag_offset_to_shrink_uplo( uplop, diagoffp11_0 ); \
bli_toggle_uplo( &uplop ); \
bli_shift_diag_offset_to_shrink_uplo( uplop, &diagoffp11_0 ); \
\
/* Note that this macro works a little differently than the setm
operation. Here, we pass in the dimensions of only p11, rather

View File

@@ -363,10 +363,10 @@ void PASTEMAC(ch,varname) \
{ \
c = c + diagoffc * ( doff_t )cs_c + \
-diagoffc * ( doff_t )rs_c; \
bli_swap_incs( incc, ldc ); \
bli_swap_incs( &incc, &ldc ); \
\
if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc ); \
bli_toggle_conj( &conjc ); \
} \
\
/* Pack the full panel. */ \
@@ -436,7 +436,7 @@ void PASTEMAC(ch,varname) \
conjc12 = conjc; \
\
if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc12 ); \
bli_toggle_conj( &conjc12 ); \
} \
else /* if ( ( row_stored && bli_is_lower( uploc ) ) || \
( col_stored && bli_is_upper( uploc ) ) ) */ \
@@ -462,7 +462,7 @@ void PASTEMAC(ch,varname) \
conjc12 = conjc; \
\
if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc10 ); \
bli_toggle_conj( &conjc10 ); \
} \
\
/* Pack to p10. For upper storage, this includes the unstored
@@ -744,8 +744,8 @@ void PASTEMAC(ch,varname) \
ctype_r* restrict zero_r = PASTEMAC(chr,0); \
uplo_t uplop = uploc; \
\
bli_toggle_uplo( uplop ); \
bli_shift_diag_offset_to_shrink_uplo( uplop, diagoffp ); \
bli_toggle_uplo( &uplop ); \
bli_shift_diag_offset_to_shrink_uplo( uplop, &diagoffp ); \
\
PASTEMAC(chr,setm) \
( \

View File

@@ -337,10 +337,10 @@ void PASTEMAC(ch,varname) \
{ \
c = c + diagoffc * ( doff_t )cs_c + \
-diagoffc * ( doff_t )rs_c; \
bli_swap_incs( incc, ldc ); \
bli_swap_incs( &incc, &ldc ); \
\
if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc ); \
bli_toggle_conj( &conjc ); \
} \
\
/* Pack the full panel. */ \
@@ -410,7 +410,7 @@ void PASTEMAC(ch,varname) \
conjc12 = conjc; \
\
if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc12 ); \
bli_toggle_conj( &conjc12 ); \
} \
else /* if ( ( row_stored && bli_is_lower( uploc ) ) || \
( col_stored && bli_is_upper( uploc ) ) ) */ \
@@ -436,7 +436,7 @@ void PASTEMAC(ch,varname) \
conjc12 = conjc; \
\
if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc10 ); \
bli_toggle_conj( &conjc10 ); \
} \
\
/* Pack to p10. For upper storage, this includes the unstored
@@ -676,8 +676,8 @@ void PASTEMAC(ch,varname) \
ctype_r* restrict zero_r = PASTEMAC(chr,0); \
uplo_t uplop = uploc; \
\
bli_toggle_uplo( uplop ); \
bli_shift_diag_offset_to_shrink_uplo( uplop, diagoffp ); \
bli_toggle_uplo( &uplop ); \
bli_shift_diag_offset_to_shrink_uplo( uplop, &diagoffp ); \
\
PASTEMAC(chr,setm) \
( \

View File

@@ -305,10 +305,10 @@ void PASTEMAC(ch,varname) \
{ \
c = c + diagoffc * ( doff_t )cs_c + \
-diagoffc * ( doff_t )rs_c; \
bli_swap_incs( incc, ldc ); \
bli_swap_incs( &incc, &ldc ); \
\
if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc ); \
bli_toggle_conj( &conjc ); \
} \
\
/* Pack the full panel. */ \
@@ -376,7 +376,7 @@ void PASTEMAC(ch,varname) \
conjc12 = conjc; \
\
if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc12 ); \
bli_toggle_conj( &conjc12 ); \
} \
else /* if ( ( row_stored && bli_is_lower( uploc ) ) || \
( col_stored && bli_is_upper( uploc ) ) ) */ \
@@ -402,7 +402,7 @@ void PASTEMAC(ch,varname) \
conjc12 = conjc; \
\
if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc10 ); \
bli_toggle_conj( &conjc10 ); \
} \
\
/* Pack to p10. For upper storage, this includes the unstored
@@ -568,8 +568,8 @@ void PASTEMAC(ch,varname) \
ctype_r* restrict zero_r = PASTEMAC(chr,0); \
uplo_t uplop = uploc; \
\
bli_toggle_uplo( uplop ); \
bli_shift_diag_offset_to_shrink_uplo( uplop, diagoffp ); \
bli_toggle_uplo( &uplop ); \
bli_shift_diag_offset_to_shrink_uplo( uplop, &diagoffp ); \
\
PASTEMAC(chr,setm) \
( \

View File

@@ -64,26 +64,26 @@ void bli_packm_unb_var1
thrinfo_t* thread
)
{
num_t dt_cp = bli_obj_dt( *c );
num_t dt_cp = bli_obj_dt( c );
struc_t strucc = bli_obj_struc( *c );
doff_t diagoffc = bli_obj_diag_offset( *c );
diag_t diagc = bli_obj_diag( *c );
uplo_t uploc = bli_obj_uplo( *c );
trans_t transc = bli_obj_conjtrans_status( *c );
struc_t strucc = bli_obj_struc( c );
doff_t diagoffc = bli_obj_diag_offset( c );
diag_t diagc = bli_obj_diag( c );
uplo_t uploc = bli_obj_uplo( c );
trans_t transc = bli_obj_conjtrans_status( c );
dim_t m_p = bli_obj_length( *p );
dim_t n_p = bli_obj_width( *p );
dim_t m_max_p = bli_obj_padded_length( *p );
dim_t n_max_p = bli_obj_padded_width( *p );
dim_t m_p = bli_obj_length( p );
dim_t n_p = bli_obj_width( p );
dim_t m_max_p = bli_obj_padded_length( p );
dim_t n_max_p = bli_obj_padded_width( p );
void* buf_c = bli_obj_buffer_at_off( *c );
inc_t rs_c = bli_obj_row_stride( *c );
inc_t cs_c = bli_obj_col_stride( *c );
void* buf_c = bli_obj_buffer_at_off( c );
inc_t rs_c = bli_obj_row_stride( c );
inc_t cs_c = bli_obj_col_stride( c );
void* buf_p = bli_obj_buffer_at_off( *p );
inc_t rs_p = bli_obj_row_stride( *p );
inc_t cs_p = bli_obj_col_stride( *p );
void* buf_p = bli_obj_buffer_at_off( p );
inc_t rs_p = bli_obj_row_stride( p );
inc_t cs_p = bli_obj_col_stride( p );
void* buf_kappa;
@@ -94,7 +94,7 @@ void bli_packm_unb_var1
// the alpha scalar of the higher-level operation. Thus, we use BLIS_ONE
// for kappa so that the underlying packm implementation does not scale
// during packing.
buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE );
buf_kappa = bli_obj_buffer_for_const( dt_cp, &BLIS_ONE );
// Index into the type combination array to extract the correct
// function pointer.
@@ -180,15 +180,15 @@ void PASTEMAC(ch,varname) \
side of the diagonal. */ \
c_cast = c_cast + diagoffc * ( doff_t )cs_c + \
-diagoffc * ( doff_t )rs_c; \
bli_negate_diag_offset( diagoffc ); \
bli_toggle_trans( transc ); \
bli_negate_diag_offset( &diagoffc ); \
bli_toggle_trans( &transc ); \
if ( bli_is_upper( uploc ) ) diagoffc += 1; \
else if ( bli_is_lower( uploc ) ) diagoffc -= 1; \
\
/* If c is Hermitian, we need to apply a conjugation when
copying the region opposite the diagonal. */ \
if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( transc ); \
transc = bli_trans_toggled_conj( transc ); \
\
/* Copy the data from the region opposite the diagonal of c
(as specified by the original value of diagoffc). Notice
@@ -217,16 +217,16 @@ void PASTEMAC(ch,varname) \
we can derive from the parameters given. */ \
if ( bli_does_trans( transc ) ) \
{ \
bli_negate_diag_offset( diagoffp ); \
bli_toggle_uplo( uplop ); \
bli_negate_diag_offset( &diagoffp ); \
bli_toggle_uplo( &uplop ); \
} \
\
/* For triangular matrices, we wish to reference the region
strictly opposite the diagonal of C. This amounts to
toggling uploc and then shifting the diagonal offset to
shrink the stored region (by one diagonal). */ \
bli_toggle_uplo( uplop ); \
bli_shift_diag_offset_to_shrink_uplo( uplop, diagoffp ); \
bli_toggle_uplo( &uplop ); \
bli_shift_diag_offset_to_shrink_uplo( uplop, &diagoffp ); \
\
/* Set the region opposite the diagonal of p to zero. */ \
PASTEMAC(ch,setm) \

View File

@@ -57,7 +57,7 @@ void bli_scalm_int( obj_t* alpha,
FUNCPTR_T f;
// Return early if one of the matrix operands has a zero dimension.
if ( bli_obj_has_zero_dim( *x ) ) return;
if ( bli_obj_has_zero_dim( x ) ) return;
// Check parameters.
if ( bli_error_checking_is_enabled() )

View File

@@ -64,17 +64,17 @@ void bli_unpackm_blk_var1
thrinfo_t* thread
)
{
num_t dt_cp = bli_obj_dt( *c );
num_t dt_cp = bli_obj_dt( c );
// Normally we take the parameters from the source argument. But here,
// the packm/unpackm framework is not yet solidified enough for us to
// assume that at this point struc(P) == struc(C), (ie: since
// densification may have marked P's structure as dense when the root
// is upper or lower). So, we take the struc field from C, not P.
struc_t strucc = bli_obj_struc( *c );
doff_t diagoffc = bli_obj_diag_offset( *c );
diag_t diagc = bli_obj_diag( *c );
uplo_t uploc = bli_obj_uplo( *c );
struc_t strucc = bli_obj_struc( c );
doff_t diagoffc = bli_obj_diag_offset( c );
diag_t diagc = bli_obj_diag( c );
uplo_t uploc = bli_obj_uplo( c );
// Again, normally the trans argument is on the source matrix. But we
// know that the packed matrix is not transposed. If there is to be a
@@ -83,22 +83,22 @@ void bli_unpackm_blk_var1
// the trans status (not the conjugation status), since we probably
// don't want to un-conjugate if the original matrix was conjugated
// when packed.
trans_t transc = bli_obj_onlytrans_status( *c );
trans_t transc = bli_obj_onlytrans_status( c );
dim_t m_c = bli_obj_length( *c );
dim_t n_c = bli_obj_width( *c );
dim_t m_panel = bli_obj_panel_length( *c );
dim_t n_panel = bli_obj_panel_width( *c );
dim_t m_c = bli_obj_length( c );
dim_t n_c = bli_obj_width( c );
dim_t m_panel = bli_obj_panel_length( c );
dim_t n_panel = bli_obj_panel_width( c );
void* buf_p = bli_obj_buffer_at_off( *p );
inc_t rs_p = bli_obj_row_stride( *p );
inc_t cs_p = bli_obj_col_stride( *p );
dim_t pd_p = bli_obj_panel_dim( *p );
inc_t ps_p = bli_obj_panel_stride( *p );
void* buf_p = bli_obj_buffer_at_off( p );
inc_t rs_p = bli_obj_row_stride( p );
inc_t cs_p = bli_obj_col_stride( p );
dim_t pd_p = bli_obj_panel_dim( p );
inc_t ps_p = bli_obj_panel_stride( p );
void* buf_c = bli_obj_buffer_at_off( *c );
inc_t rs_c = bli_obj_row_stride( *c );
inc_t cs_c = bli_obj_col_stride( *c );
void* buf_c = bli_obj_buffer_at_off( c );
inc_t rs_c = bli_obj_row_stride( c );
inc_t cs_c = bli_obj_col_stride( c );
FUNCPTR_T f;
@@ -170,10 +170,10 @@ void PASTEMAC(ch,varname) \
express the remaining parameters and code. */ \
if ( bli_does_trans( transc ) ) \
{ \
bli_swap_incs( rs_c, cs_c ); \
bli_negate_diag_offset( diagoffc ); \
bli_toggle_uplo( uploc ); \
bli_toggle_trans( transc ); \
bli_swap_incs( &rs_c, &cs_c ); \
bli_negate_diag_offset( &diagoffc ); \
bli_toggle_uplo( &uploc ); \
bli_toggle_trans( &transc ); \
} \
\
/* If the strides of p indicate row storage, then we are packing to

View File

@@ -54,7 +54,7 @@ void bli_unpackm_int
// If p was aliased to a during the pack stage (because it was already
// in an acceptable packed/contiguous format), then no unpack is actually
// necessary, so we return.
if ( bli_obj_is_alias_of( *p, *a ) ) return;
if ( bli_obj_is_alias_of( p, a ) ) return;
// Extract the function pointer from the current control tree node.
f = bli_cntl_unpackm_params_var_func( cntl );

View File

@@ -59,22 +59,22 @@ void bli_unpackm_unb_var1
thrinfo_t* thread
)
{
num_t dt_pc = bli_obj_dt( *p );
num_t dt_pc = bli_obj_dt( p );
doff_t diagoffp = bli_obj_diag_offset( *p );
uplo_t uplop = bli_obj_uplo( *p );
trans_t transc = bli_obj_onlytrans_status( *c );
doff_t diagoffp = bli_obj_diag_offset( p );
uplo_t uplop = bli_obj_uplo( p );
trans_t transc = bli_obj_onlytrans_status( c );
dim_t m_c = bli_obj_length( *c );
dim_t n_c = bli_obj_width( *c );
dim_t m_c = bli_obj_length( c );
dim_t n_c = bli_obj_width( c );
void* buf_p = bli_obj_buffer_at_off( *p );
inc_t rs_p = bli_obj_row_stride( *p );
inc_t cs_p = bli_obj_col_stride( *p );
void* buf_p = bli_obj_buffer_at_off( p );
inc_t rs_p = bli_obj_row_stride( p );
inc_t cs_p = bli_obj_col_stride( p );
void* buf_c = bli_obj_buffer_at_off( *c );
inc_t rs_c = bli_obj_row_stride( *c );
inc_t cs_c = bli_obj_col_stride( *c );
void* buf_c = bli_obj_buffer_at_off( c );
inc_t rs_c = bli_obj_row_stride( c );
inc_t cs_c = bli_obj_col_stride( c );
FUNCPTR_T f;

View File

@@ -330,10 +330,10 @@ void bli_xxmv_check
e_val = bli_check_vector_object( y );
bli_check_error_code( e_val );
e_val = bli_check_vector_dim_equals( x, bli_obj_width_after_trans( *a ) );
e_val = bli_check_vector_dim_equals( x, bli_obj_width_after_trans( a ) );
bli_check_error_code( e_val );
e_val = bli_check_vector_dim_equals( y, bli_obj_length_after_trans( *a ) );
e_val = bli_check_vector_dim_equals( y, bli_obj_length_after_trans( a ) );
bli_check_error_code( e_val );
// Check object buffers (for non-NULLness).
@@ -392,10 +392,10 @@ void bli_xxr_check
e_val = bli_check_matrix_object( a );
bli_check_error_code( e_val );
e_val = bli_check_vector_dim_equals( x, bli_obj_length_after_trans( *a ) );
e_val = bli_check_vector_dim_equals( x, bli_obj_length_after_trans( a ) );
bli_check_error_code( e_val );
e_val = bli_check_vector_dim_equals( y, bli_obj_width_after_trans( *a ) );
e_val = bli_check_vector_dim_equals( y, bli_obj_width_after_trans( a ) );
bli_check_error_code( e_val );
// Check object buffers (for non-NULLness).

View File

@@ -57,19 +57,19 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *a ); \
num_t dt = bli_obj_dt( a ); \
\
trans_t transa = bli_obj_conjtrans_status( *a ); \
conj_t conjx = bli_obj_conj_status( *x ); \
dim_t m = bli_obj_length( *a ); \
dim_t n = bli_obj_width( *a ); \
void* buf_a = bli_obj_buffer_at_off( *a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t incx = bli_obj_vector_inc( *x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \
inc_t incy = bli_obj_vector_inc( *y ); \
trans_t transa = bli_obj_conjtrans_status( a ); \
conj_t conjx = bli_obj_conj_status( x ); \
dim_t m = bli_obj_length( a ); \
dim_t n = bli_obj_width( a ); \
void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( a ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t incy = bli_obj_vector_inc( y ); \
\
void* buf_alpha; \
void* buf_beta; \
@@ -86,8 +86,8 @@ void PASTEMAC(opname,EX_SUF) \
alpha, &alpha_local ); \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
beta, &beta_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \
\
/* Invoke the typed function. */ \
bli_call_ft_14 \
@@ -126,19 +126,19 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *a ); \
num_t dt = bli_obj_dt( a ); \
\
conj_t conjx = bli_obj_conj_status( *x ); \
conj_t conjy = bli_obj_conj_status( *y ); \
dim_t m = bli_obj_length( *a ); \
dim_t n = bli_obj_width( *a ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t incx = bli_obj_vector_inc( *x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \
inc_t incy = bli_obj_vector_inc( *y ); \
void* buf_a = bli_obj_buffer_at_off( *a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \
conj_t conjx = bli_obj_conj_status( x ); \
conj_t conjy = bli_obj_conj_status( y ); \
dim_t m = bli_obj_length( a ); \
dim_t n = bli_obj_width( a ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t incy = bli_obj_vector_inc( y ); \
void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( a ); \
\
void* buf_alpha; \
\
@@ -151,7 +151,7 @@ void PASTEMAC(opname,EX_SUF) \
as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
alpha, &alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
\
/* Invoke the typed function. */ \
bli_call_ft_13 \
@@ -190,19 +190,19 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *a ); \
num_t dt = bli_obj_dt( a ); \
\
uplo_t uploa = bli_obj_uplo( *a ); \
conj_t conja = bli_obj_conj_status( *a ); \
conj_t conjx = bli_obj_conj_status( *x ); \
dim_t m = bli_obj_length( *a ); \
void* buf_a = bli_obj_buffer_at_off( *a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t incx = bli_obj_vector_inc( *x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \
inc_t incy = bli_obj_vector_inc( *y ); \
uplo_t uploa = bli_obj_uplo( a ); \
conj_t conja = bli_obj_conj_status( a ); \
conj_t conjx = bli_obj_conj_status( x ); \
dim_t m = bli_obj_length( a ); \
void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( a ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t incy = bli_obj_vector_inc( y ); \
\
void* buf_alpha; \
void* buf_beta; \
@@ -219,8 +219,8 @@ void PASTEMAC(opname,EX_SUF) \
alpha, &alpha_local ); \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
beta, &beta_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \
\
/* Invoke the typed function. */ \
bli_call_ft_14 \
@@ -259,16 +259,16 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *a ); \
num_t dt = bli_obj_dt( a ); \
\
uplo_t uploa = bli_obj_uplo( *a ); \
conj_t conjx = bli_obj_conj_status( *x ); \
dim_t m = bli_obj_length( *a ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t incx = bli_obj_vector_inc( *x ); \
void* buf_a = bli_obj_buffer_at_off( *a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \
uplo_t uploa = bli_obj_uplo( a ); \
conj_t conjx = bli_obj_conj_status( x ); \
dim_t m = bli_obj_length( a ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( x ); \
void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( a ); \
\
void* buf_alpha; \
\
@@ -281,7 +281,7 @@ void PASTEMAC(opname,EX_SUF) \
as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
alpha, &alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
\
/* Invoke the typed function. */ \
bli_call_ft_10 \
@@ -318,19 +318,19 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *a ); \
num_t dt = bli_obj_dt( a ); \
\
uplo_t uploa = bli_obj_uplo( *a ); \
conj_t conjx = bli_obj_conj_status( *x ); \
conj_t conjy = bli_obj_conj_status( *y ); \
dim_t m = bli_obj_length( *a ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t incx = bli_obj_vector_inc( *x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \
inc_t incy = bli_obj_vector_inc( *y ); \
void* buf_a = bli_obj_buffer_at_off( *a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \
uplo_t uploa = bli_obj_uplo( a ); \
conj_t conjx = bli_obj_conj_status( x ); \
conj_t conjy = bli_obj_conj_status( y ); \
dim_t m = bli_obj_length( a ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t incy = bli_obj_vector_inc( y ); \
void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( a ); \
\
void* buf_alpha; \
\
@@ -343,7 +343,7 @@ void PASTEMAC(opname,EX_SUF) \
as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
alpha, &alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
\
/* Invoke the typed function. */ \
bli_call_ft_13 \
@@ -381,17 +381,17 @@ void PASTEMAC(opname,EX_SUF) \
\
BLIS_OAPI_CNTX_DECL \
\
num_t dt = bli_obj_dt( *a ); \
num_t dt = bli_obj_dt( a ); \
\
uplo_t uploa = bli_obj_uplo( *a ); \
trans_t transa = bli_obj_conjtrans_status( *a ); \
diag_t diaga = bli_obj_diag( *a ); \
dim_t m = bli_obj_length( *a ); \
void* buf_a = bli_obj_buffer_at_off( *a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t incx = bli_obj_vector_inc( *x ); \
uplo_t uploa = bli_obj_uplo( a ); \
trans_t transa = bli_obj_conjtrans_status( a ); \
diag_t diaga = bli_obj_diag( a ); \
dim_t m = bli_obj_length( a ); \
void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( a ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( x ); \
\
void* buf_alpha; \
\
@@ -404,7 +404,7 @@ void PASTEMAC(opname,EX_SUF) \
as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
alpha, &alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
\
/* Invoke the typed function. */ \
bli_call_ft_11 \

View File

@@ -60,7 +60,7 @@ void PASTEMAC(ch,opname) \
dim_t m_y, n_x; \
\
/* Determine the dimensions of y and x. */ \
bli_set_dims_with_trans( transa, m, n, m_y, n_x ); \
bli_set_dims_with_trans( transa, m, n, &m_y, &n_x ); \
\
/* If y has zero elements, return early. */ \
if ( bli_zero_dim1( m_y ) ) return; \

View File

@@ -63,7 +63,7 @@ void PASTEMAC(ch,varname) \
\
bli_set_dims_incs_with_trans( transa, \
m, n, rs_a, cs_a, \
n_iter, n_elem, rs_at, cs_at ); \
&n_iter, &n_elem, &rs_at, &cs_at ); \
\
conja = bli_extract_conj( transa ); \
\

View File

@@ -65,7 +65,7 @@ void PASTEMAC(ch,varname) \
\
bli_set_dims_incs_with_trans( transa, \
m, n, rs_a, cs_a, \
n_elem, n_iter, rs_at, cs_at ); \
&n_elem, &n_iter, &rs_at, &cs_at ); \
\
conja = bli_extract_conj( transa ); \
\

View File

@@ -64,7 +64,7 @@ void PASTEMAC(ch,varname) \
\
bli_set_dims_incs_with_trans( transa, \
m, n, rs_a, cs_a, \
n_iter, n_elem, rs_at, cs_at ); \
&n_iter, &n_elem, &rs_at, &cs_at ); \
\
conja = bli_extract_conj( transa ); \
\

View File

@@ -65,7 +65,7 @@ void PASTEMAC(ch,varname) \
\
bli_set_dims_incs_with_trans( transa, \
m, n, rs_a, cs_a, \
n_elem, n_iter, rs_at, cs_at ); \
&n_elem, &n_iter, &rs_at, &cs_at ); \
\
conja = bli_extract_conj( transa ); \
\

View File

@@ -50,26 +50,26 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_dt( *a ); \
num_t dt = bli_obj_dt( a ); \
\
trans_t transa = bli_obj_conjtrans_status( *a ); \
conj_t conjx = bli_obj_conj_status( *x ); \
trans_t transa = bli_obj_conjtrans_status( a ); \
conj_t conjx = bli_obj_conj_status( x ); \
\
dim_t m = bli_obj_length( *a ); \
dim_t n = bli_obj_width( *a ); \
dim_t m = bli_obj_length( a ); \
dim_t n = bli_obj_width( a ); \
\
void* buf_a = bli_obj_buffer_at_off( *a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \
void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( a ); \
\
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t incx = bli_obj_vector_inc( *x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( x ); \
\
void* buf_y = bli_obj_buffer_at_off( *y ); \
inc_t incy = bli_obj_vector_inc( *y ); \
void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t incy = bli_obj_vector_inc( y ); \
\
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \
void* buf_beta = bli_obj_buffer_for_1x1( dt, *beta ); \
void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \
void* buf_beta = bli_obj_buffer_for_1x1( dt, beta ); \
\
/* Invoke the void pointer-based function for the given datatype. */ \
bli_call_ft_14 \

View File

@@ -51,26 +51,26 @@ void PASTEMAC0(opname) \
gemv_t* cntl \
) \
{ \
num_t dt = bli_obj_dt( *a ); \
num_t dt = bli_obj_dt( a ); \
\
trans_t transa = bli_obj_conjtrans_status( *a ); \
conj_t conjx = bli_obj_conj_status( *x ); \
trans_t transa = bli_obj_conjtrans_status( a ); \
conj_t conjx = bli_obj_conj_status( x ); \
\
dim_t m = bli_obj_length( *a ); \
dim_t n = bli_obj_width( *a ); \
dim_t m = bli_obj_length( a ); \
dim_t n = bli_obj_width( a ); \
\
void* buf_a = bli_obj_buffer_at_off( *a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \
void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( a ); \
\
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t incx = bli_obj_vector_inc( *x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( x ); \
\
void* buf_y = bli_obj_buffer_at_off( *y ); \
inc_t incy = bli_obj_vector_inc( *y ); \
void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t incy = bli_obj_vector_inc( y ); \
\
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \
void* buf_beta = bli_obj_buffer_for_1x1( dt, *beta ); \
void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \
void* buf_beta = bli_obj_buffer_for_1x1( dt, beta ); \
\
PASTECH(ftname,_vft) f = PASTECH(opname,_vfp)[dt]; \
\

View File

@@ -54,7 +54,7 @@ void bli_gemv_blk_var1( obj_t* alpha,
bli_obj_init_pack( &y1_pack );
// Query dimension in partitioning direction.
m_trans = bli_obj_length_after_trans( *a );
m_trans = bli_obj_length_after_trans( a );
// Partition along the m dimension.
for ( i = 0; i < m_trans; i += b_alg )

View File

@@ -54,7 +54,7 @@ void bli_gemv_blk_var2( obj_t* alpha,
bli_obj_init_pack( &x1_pack );
// Query dimension in partitioning direction.
n_trans = bli_obj_width_after_trans( *a );
n_trans = bli_obj_width_after_trans( a );
// y = beta * y;
bli_scalv_int( beta,

View File

@@ -67,15 +67,15 @@ void bli_gemv_front
// Query the target datatypes of each object.
dt_targ_a = bli_obj_target_dt( *a );
dt_targ_x = bli_obj_target_dt( *x );
dt_targ_y = bli_obj_target_dt( *y );
dt_targ_a = bli_obj_target_dt( a );
dt_targ_x = bli_obj_target_dt( x );
dt_targ_y = bli_obj_target_dt( y );
// Determine whether each operand is stored with unit stride.
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
bli_obj_is_col_stored( *a ) );
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 );
a_has_unit_inc = ( bli_obj_is_row_stored( a ) ||
bli_obj_is_col_stored( a ) );
x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 );
// Create an object to hold a copy-cast of alpha. Notice that we use
@@ -112,14 +112,14 @@ void bli_gemv_front
// row-major cases with a transpose and column-major without a
// transpose. For the general stride case, we mimic that of column-
// major storage since that is the format into which we copy/pack.
if ( bli_obj_has_notrans( *a ) )
if ( bli_obj_has_notrans( a ) )
{
if ( bli_obj_is_row_stored( *a ) ) gemv_cntl = gemv_cntl_bs_ke_dot;
if ( bli_obj_is_row_stored( a ) ) gemv_cntl = gemv_cntl_bs_ke_dot;
else gemv_cntl = gemv_cntl_bs_ke_axpy;
}
else // if ( bli_obj_has_trans( *a ) )
else // if ( bli_obj_has_trans( a ) )
{
if ( bli_obj_is_row_stored( *a ) ) gemv_cntl = gemv_cntl_bs_ke_axpy;
if ( bli_obj_is_row_stored( a ) ) gemv_cntl = gemv_cntl_bs_ke_axpy;
else gemv_cntl = gemv_cntl_bs_ke_dot;
}
}
@@ -127,20 +127,20 @@ void bli_gemv_front
{
// Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm.
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, a );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y );
// Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking.
if ( bli_obj_has_notrans( *a ) )
if ( bli_obj_has_notrans( a ) )
{
if ( bli_obj_is_row_tilted( *a ) ) gemv_cntl = gemv_cntl_ge_dot;
if ( bli_obj_is_row_tilted( a ) ) gemv_cntl = gemv_cntl_ge_dot;
else gemv_cntl = gemv_cntl_ge_axpy;
}
else // if ( bli_obj_has_trans( *a ) )
else // if ( bli_obj_has_trans( a ) )
{
if ( bli_obj_is_row_tilted( *a ) ) gemv_cntl = gemv_cntl_ge_axpy;
if ( bli_obj_is_row_tilted( a ) ) gemv_cntl = gemv_cntl_ge_axpy;
else gemv_cntl = gemv_cntl_ge_dot;
}
}
@@ -189,8 +189,8 @@ void PASTEMAC(ch,opname) \
inc_t rs_x, cs_x; \
inc_t rs_y, cs_y; \
\
bli_set_dims_with_trans( BLIS_NO_TRANSPOSE, m, n, m_a, n_a ); \
bli_set_dims_with_trans( transa, m, n, m_y, m_x ); \
bli_set_dims_with_trans( BLIS_NO_TRANSPOSE, m, n, &m_a, &n_a ); \
bli_set_dims_with_trans( transa, m, n, &m_y, &m_x ); \
\
rs_x = incx; cs_x = m_x * incx; \
rs_y = incy; cs_y = m_y * incy; \
@@ -202,8 +202,8 @@ void PASTEMAC(ch,opname) \
bli_obj_create_with_attached_buffer( dt, m_x, 1, x, rs_x, cs_x, &xo ); \
bli_obj_create_with_attached_buffer( dt, m_y, 1, y, rs_y, cs_y, &yo ); \
\
bli_obj_set_conjtrans( transa, ao ); \
bli_obj_set_conj( conjx, xo ); \
bli_obj_set_conjtrans( transa, &ao ); \
bli_obj_set_conj( conjx, &xo ); \
\
PASTEMAC0(opname)( &alphao, \
&ao, \

View File

@@ -69,8 +69,8 @@ void bli_gemv_int( trans_t transa,
obj_t x_local;
// Apply the trans and/or conj parameters to aliases of the objects.
bli_obj_alias_with_trans( transa, *a, a_local );
bli_obj_alias_with_conj( conjx, *x, x_local );
bli_obj_alias_with_trans( transa, a, &a_local );
bli_obj_alias_with_conj( conjx, x, &x_local );
// Check parameters. We use the aliased copy of A so the transa parameter
// is taken into account for dimension checking.
@@ -78,10 +78,10 @@ void bli_gemv_int( trans_t transa,
bli_gemv_check( alpha, &a_local, &x_local, beta, y );
// If y has a zero dimension, return early.
if ( bli_obj_has_zero_dim( *y ) ) return;
if ( bli_obj_has_zero_dim( y ) ) return;
// If x has a zero dimension, scale y by beta and return early.
if ( bli_obj_has_zero_dim( *x ) )
if ( bli_obj_has_zero_dim( x ) )
{
bli_scalm( beta, y );
return;

View File

@@ -49,25 +49,25 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_dt( *a ); \
num_t dt = bli_obj_dt( a ); \
\
conj_t conjx = bli_obj_conj_status( *x ); \
conj_t conjy = bli_obj_conj_status( *y ); \
conj_t conjx = bli_obj_conj_status( x ); \
conj_t conjy = bli_obj_conj_status( y ); \
\
dim_t m = bli_obj_length( *a ); \
dim_t n = bli_obj_width( *a ); \
dim_t m = bli_obj_length( a ); \
dim_t n = bli_obj_width( a ); \
\
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t incx = bli_obj_vector_inc( *x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( x ); \
\
void* buf_y = bli_obj_buffer_at_off( *y ); \
inc_t incy = bli_obj_vector_inc( *y ); \
void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t incy = bli_obj_vector_inc( y ); \
\
void* buf_a = bli_obj_buffer_at_off( *a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \
void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( a ); \
\
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \
void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \
\
/* Invoke the void pointer-based function for the given datatype. */ \
bli_call_ft_13 \

View File

@@ -53,7 +53,7 @@ void bli_ger_blk_var1( obj_t* alpha,
bli_obj_init_pack( &x1_pack );
// Query dimension in partitioning direction.
m_trans = bli_obj_length_after_trans( *a );
m_trans = bli_obj_length_after_trans( a );
// Partition along the m dimension.
for ( i = 0; i < m_trans; i += b_alg )

View File

@@ -53,7 +53,7 @@ void bli_ger_blk_var2( obj_t* alpha,
bli_obj_init_pack( &y1_pack );
// Query dimension in partitioning direction.
n_trans = bli_obj_width_after_trans( *a );
n_trans = bli_obj_width_after_trans( a );
// Partition along the n dimension.
for ( i = 0; i < n_trans; i += b_alg )

View File

@@ -64,15 +64,15 @@ void bli_ger_front
// Query the target datatypes of each object.
dt_targ_x = bli_obj_target_dt( *x );
dt_targ_y = bli_obj_target_dt( *y );
//dt_targ_a = bli_obj_target_dt( *a );
dt_targ_x = bli_obj_target_dt( x );
dt_targ_y = bli_obj_target_dt( y );
//dt_targ_a = bli_obj_target_dt( a );
// Determine whether each operand with unit stride.
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 );
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
bli_obj_is_col_stored( *a ) );
x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 );
a_has_unit_inc = ( bli_obj_is_row_stored( a ) ||
bli_obj_is_col_stored( a ) );
// Create an object to hold a copy-cast of alpha. Notice that we use
@@ -93,20 +93,20 @@ void bli_ger_front
{
// Use different control trees depending on storage of the matrix
// operand.
if ( bli_obj_is_row_stored( *a ) ) ger_cntl = ger_cntl_bs_ke_row;
if ( bli_obj_is_row_stored( a ) ) ger_cntl = ger_cntl_bs_ke_row;
else ger_cntl = ger_cntl_bs_ke_col;
}
else
{
// Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm.
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y );
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, a );
// Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking.
if ( bli_obj_is_row_tilted( *a ) ) ger_cntl = ger_cntl_ge_row;
if ( bli_obj_is_row_tilted( a ) ) ger_cntl = ger_cntl_ge_row;
else ger_cntl = ger_cntl_ge_col;
}
@@ -151,7 +151,7 @@ void PASTEMAC(ch,opname) \
inc_t rs_x, cs_x; \
inc_t rs_y, cs_y; \
\
bli_set_dims_with_trans( BLIS_NO_TRANSPOSE, m, n, m_x, m_y ); \
bli_set_dims_with_trans( BLIS_NO_TRANSPOSE, m, n, &m_x, &m_y ); \
\
rs_x = incx; cs_x = m_x * incx; \
rs_y = incy; cs_y = m_y * incy; \
@@ -162,8 +162,8 @@ void PASTEMAC(ch,opname) \
bli_obj_create_with_attached_buffer( dt, m_y, 1, y, rs_y, cs_y, &yo ); \
bli_obj_create_with_attached_buffer( dt, m, n, a, rs_a, cs_a, &ao ); \
\
bli_obj_set_conj( conjx, xo ); \
bli_obj_set_conj( conjy, yo ); \
bli_obj_set_conj( conjx, &xo ); \
bli_obj_set_conj( conjy, &yo ); \
\
PASTEMAC0(opname)( &alphao, \
&xo, \

View File

@@ -74,27 +74,27 @@ void bli_ger_int( conj_t conjx,
bli_ger_check( alpha, x, y, a );
// If A has a zero dimension, return early.
if ( bli_obj_has_zero_dim( *a ) ) return;
if ( bli_obj_has_zero_dim( a ) ) return;
// If x or y has a zero dimension, return early.
if ( bli_obj_has_zero_dim( *x ) ||
bli_obj_has_zero_dim( *y ) ) return;
if ( bli_obj_has_zero_dim( x ) ||
bli_obj_has_zero_dim( y ) ) return;
// Alias the objects, applying conjx and conjy to x and y, respectively.
bli_obj_alias_with_conj( conjx, *x, x_local );
bli_obj_alias_with_conj( conjy, *y, y_local );
bli_obj_alias_to( *a, a_local );
bli_obj_alias_with_conj( conjx, x, &x_local );
bli_obj_alias_with_conj( conjy, y, &y_local );
bli_obj_alias_to( a, &a_local );
// If matrix A is marked for conjugation, we interpret this as a request
// to apply a conjugation to the other operands.
if ( bli_obj_has_conj( a_local ) )
if ( bli_obj_has_conj( &a_local ) )
{
bli_obj_toggle_conj( a_local );
bli_obj_toggle_conj( &a_local );
bli_obj_toggle_conj( x_local );
bli_obj_toggle_conj( y_local );
bli_obj_toggle_conj( &x_local );
bli_obj_toggle_conj( &y_local );
bli_obj_scalar_init_detached_copy_of( bli_obj_dt( *alpha ),
bli_obj_scalar_init_detached_copy_of( bli_obj_dt( alpha ),
BLIS_CONJUGATE,
alpha,
&alpha_local );
@@ -107,10 +107,10 @@ void bli_ger_int( conj_t conjx,
// If we are about the call a leaf-level implementation, and matrix A
// still needs a transposition, then we must induce one by swapping the
// strides and dimensions.
if ( bli_cntl_is_leaf( cntl ) && bli_obj_has_trans( a_local ) )
if ( bli_cntl_is_leaf( cntl ) && bli_obj_has_trans( &a_local ) )
{
bli_obj_induce_trans( a_local );
bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, a_local );
bli_obj_induce_trans( &a_local );
bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &a_local );
}
// Extract the variant number and implementation type.

View File

@@ -51,26 +51,26 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_dt( *a ); \
num_t dt = bli_obj_dt( a ); \
\
uplo_t uplo = bli_obj_uplo( *a ); \
conj_t conja = bli_obj_conj_status( *a ); \
conj_t conjx = bli_obj_conj_status( *x ); \
uplo_t uplo = bli_obj_uplo( a ); \
conj_t conja = bli_obj_conj_status( a ); \
conj_t conjx = bli_obj_conj_status( x ); \
\
dim_t m = bli_obj_length( *a ); \
dim_t m = bli_obj_length( a ); \
\
void* buf_a = bli_obj_buffer_at_off( *a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \
void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( a ); \
\
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t incx = bli_obj_vector_inc( *x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( x ); \
\
void* buf_y = bli_obj_buffer_at_off( *y ); \
inc_t incy = bli_obj_vector_inc( *y ); \
void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t incy = bli_obj_vector_inc( y ); \
\
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \
void* buf_beta = bli_obj_buffer_for_1x1( dt, *beta ); \
void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \
void* buf_beta = bli_obj_buffer_for_1x1( dt, beta ); \
\
/* Invoke the void pointer-based function for the given datatype. */ \
bli_call_ft_15 \

View File

@@ -69,7 +69,7 @@ void bli_hemv_blk_var1( conj_t conjh,
bli_obj_init_pack( &y1_pack );
// Query dimension.
mn = bli_obj_length( *a );
mn = bli_obj_length( a );
// y = beta * y;
bli_scalv_int( beta,

View File

@@ -70,7 +70,7 @@ void bli_hemv_blk_var2( conj_t conjh,
bli_obj_init_pack( &y1_pack );
// Query dimension.
mn = bli_obj_length( *a );
mn = bli_obj_length( a );
// y = beta * y;
bli_scalv_int( beta,

View File

@@ -69,7 +69,7 @@ void bli_hemv_blk_var3( conj_t conjh,
bli_obj_init_pack( &y1_pack );
// Query dimension.
mn = bli_obj_length( *a );
mn = bli_obj_length( a );
// y = beta * y;
bli_scalv_int( beta,

View File

@@ -70,7 +70,7 @@ void bli_hemv_blk_var4( conj_t conjh,
bli_obj_init_pack( &y1_pack );
// Query dimension.
mn = bli_obj_length( *a );
mn = bli_obj_length( a );
// y = beta * y;
bli_scalv_int( beta,

View File

@@ -67,15 +67,15 @@ void bli_hemv_front
// Query the target datatypes of each object.
dt_targ_a = bli_obj_target_dt( *a );
dt_targ_x = bli_obj_target_dt( *x );
dt_targ_y = bli_obj_target_dt( *y );
dt_targ_a = bli_obj_target_dt( a );
dt_targ_x = bli_obj_target_dt( x );
dt_targ_y = bli_obj_target_dt( y );
// Determine whether each operand with unit stride.
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
bli_obj_is_col_stored( *a ) );
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 );
a_has_unit_inc = ( bli_obj_is_row_stored( a ) ||
bli_obj_is_col_stored( a ) );
x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 );
// Create an object to hold a copy-cast of alpha. Notice that we use
@@ -109,14 +109,14 @@ void bli_hemv_front
// combinations of upper/lower triangular storage and row/column-storage.
// The row-stored lower triangular and column-stored upper triangular
// trees are identical. Same for the remaining two trees.
if ( bli_obj_is_lower( *a ) )
if ( bli_obj_is_lower( a ) )
{
if ( bli_obj_is_row_stored( *a ) ) hemv_cntl = hemv_cntl_bs_ke_lrow_ucol;
if ( bli_obj_is_row_stored( a ) ) hemv_cntl = hemv_cntl_bs_ke_lrow_ucol;
else hemv_cntl = hemv_cntl_bs_ke_lcol_urow;
}
else // if ( bli_obj_is_upper( *a ) )
else // if ( bli_obj_is_upper( a ) )
{
if ( bli_obj_is_row_stored( *a ) ) hemv_cntl = hemv_cntl_bs_ke_lcol_urow;
if ( bli_obj_is_row_stored( a ) ) hemv_cntl = hemv_cntl_bs_ke_lcol_urow;
else hemv_cntl = hemv_cntl_bs_ke_lrow_ucol;
}
}
@@ -124,20 +124,20 @@ void bli_hemv_front
{
// Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm.
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, a );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y );
// Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking.
if ( bli_obj_is_lower( *a ) )
if ( bli_obj_is_lower( a ) )
{
if ( bli_obj_is_row_tilted( *a ) ) hemv_cntl = hemv_cntl_ge_lrow_ucol;
if ( bli_obj_is_row_tilted( a ) ) hemv_cntl = hemv_cntl_ge_lrow_ucol;
else hemv_cntl = hemv_cntl_ge_lcol_urow;
}
else // if ( bli_obj_is_upper( *a ) )
else // if ( bli_obj_is_upper( a ) )
{
if ( bli_obj_is_row_tilted( *a ) ) hemv_cntl = hemv_cntl_ge_lcol_urow;
if ( bli_obj_is_row_tilted( a ) ) hemv_cntl = hemv_cntl_ge_lcol_urow;
else hemv_cntl = hemv_cntl_ge_lrow_ucol;
}
}
@@ -193,11 +193,11 @@ void PASTEMAC(ch,opname) \
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
bli_obj_create_with_attached_buffer( dt, m, 1, y, rs_y, cs_y, &yo ); \
\
bli_obj_set_uplo( uploa, ao ); \
bli_obj_set_conj( conja, ao ); \
bli_obj_set_conj( conjx, xo ); \
bli_obj_set_uplo( uploa, &ao ); \
bli_obj_set_conj( conja, &ao ); \
bli_obj_set_conj( conjx, &xo ); \
\
bli_obj_set_struc( BLIS_HERMITIAN, ao ); \
bli_obj_set_struc( BLIS_HERMITIAN, &ao ); \
\
PASTEMAC0(opname)( &alphao, \
&ao, \

View File

@@ -76,17 +76,17 @@ void bli_hemv_int( conj_t conjh,
}
// If y has a zero dimension, return early.
if ( bli_obj_has_zero_dim( *y ) ) return;
if ( bli_obj_has_zero_dim( y ) ) return;
// If x has a zero dimension, scale y by beta and return early.
if ( bli_obj_has_zero_dim( *x ) )
if ( bli_obj_has_zero_dim( x ) )
{
bli_scalm( beta, y );
return;
}
// Alias A in case we need to induce the upper triangular case.
bli_obj_alias_to( *a, a_local );
bli_obj_alias_to( a, &a_local );
/*
// Our blocked algorithms only [explicitly] implement the lower triangular
@@ -96,10 +96,10 @@ void bli_hemv_int( conj_t conjh,
// triangular case. But we only need to do this for blocked algorithms,
// since unblocked algorithms are responsible for handling the upper case
// explicitly (and they should not be inspecting the transposition bit anyway).
if ( bli_cntl_is_blocked( cntl ) && bli_obj_is_upper( *a ) )
if ( bli_cntl_is_blocked( cntl ) && bli_obj_is_upper( a ) )
{
bli_obj_toggle_conj( a_local );
bli_obj_toggle_trans( a_local );
bli_obj_toggle_conj( &a_local );
bli_obj_toggle_trans( &a_local );
}
*/

View File

@@ -49,21 +49,21 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_dt( *c ); \
num_t dt = bli_obj_dt( c ); \
\
uplo_t uplo = bli_obj_uplo( *c ); \
conj_t conjx = bli_obj_conj_status( *x ); \
uplo_t uplo = bli_obj_uplo( c ); \
conj_t conjx = bli_obj_conj_status( x ); \
\
dim_t m = bli_obj_length( *c ); \
dim_t m = bli_obj_length( c ); \
\
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t incx = bli_obj_vector_inc( *x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( x ); \
\
void* buf_c = bli_obj_buffer_at_off( *c ); \
inc_t rs_c = bli_obj_row_stride( *c ); \
inc_t cs_c = bli_obj_col_stride( *c ); \
void* buf_c = bli_obj_buffer_at_off( c ); \
inc_t rs_c = bli_obj_row_stride( c ); \
inc_t cs_c = bli_obj_col_stride( c ); \
\
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \
void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \
\
/* Invoke the void pointer-based function for the given datatype. */ \
bli_call_ft_11 \

View File

@@ -64,7 +64,7 @@ void bli_her_blk_var1( conj_t conjh,
bli_obj_init_pack( &x1_pack );
// Query dimension.
mn = bli_obj_length( *c );
mn = bli_obj_length( c );
// Partition diagonally.
for ( ij = 0; ij < mn; ij += b_alg )

View File

@@ -64,7 +64,7 @@ void bli_her_blk_var2( conj_t conjh,
bli_obj_init_pack( &x1_pack );
// Query dimension.
mn = bli_obj_length( *c );
mn = bli_obj_length( c );
// Partition diagonally.
for ( ij = 0; ij < mn; ij += b_alg )

View File

@@ -61,13 +61,13 @@ void bli_her_front
// Query the target datatypes of each object.
dt_targ_x = bli_obj_target_dt( *x );
//dt_targ_c = bli_obj_target_dt( *c );
dt_targ_x = bli_obj_target_dt( x );
//dt_targ_c = bli_obj_target_dt( c );
// Determine whether each operand with unit stride.
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
c_has_unit_inc = ( bli_obj_is_row_stored( *c ) ||
bli_obj_is_col_stored( *c ) );
x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
c_has_unit_inc = ( bli_obj_is_row_stored( c ) ||
bli_obj_is_col_stored( c ) );
// Create object to hold a copy-cast of alpha.
@@ -87,14 +87,14 @@ void bli_her_front
// combinations of upper/lower triangular storage and row/column-storage.
// The row-stored lower triangular and column-stored upper triangular
// trees are identical. Same for the remaining two trees.
if ( bli_obj_is_lower( *c ) )
if ( bli_obj_is_lower( c ) )
{
if ( bli_obj_is_row_stored( *c ) ) her_cntl = her_cntl_bs_ke_lrow_ucol;
if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_bs_ke_lrow_ucol;
else her_cntl = her_cntl_bs_ke_lcol_urow;
}
else // if ( bli_obj_is_upper( *c ) )
else // if ( bli_obj_is_upper( c ) )
{
if ( bli_obj_is_row_stored( *c ) ) her_cntl = her_cntl_bs_ke_lcol_urow;
if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_bs_ke_lcol_urow;
else her_cntl = her_cntl_bs_ke_lrow_ucol;
}
}
@@ -102,19 +102,19 @@ void bli_her_front
{
// Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm.
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, c );
// Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking.
if ( bli_obj_is_lower( *c ) )
if ( bli_obj_is_lower( c ) )
{
if ( bli_obj_is_row_stored( *c ) ) her_cntl = her_cntl_ge_lrow_ucol;
if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_ge_lrow_ucol;
else her_cntl = her_cntl_ge_lcol_urow;
}
else // if ( bli_obj_is_upper( *c ) )
else // if ( bli_obj_is_upper( c ) )
{
if ( bli_obj_is_row_stored( *c ) ) her_cntl = her_cntl_ge_lcol_urow;
if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_ge_lcol_urow;
else her_cntl = her_cntl_ge_lrow_ucol;
}
}
@@ -162,10 +162,10 @@ void PASTEMAC(ch,opname) \
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \
\
bli_obj_set_conj( conjx, xo ); \
bli_obj_set_uplo( uploc, co ); \
bli_obj_set_conj( conjx, &xo ); \
bli_obj_set_uplo( uploc, &co ); \
\
bli_obj_set_struc( BLIS_HERMITIAN, co ); \
bli_obj_set_struc( BLIS_HERMITIAN, &co ); \
\
PASTEMAC0(opname)( &alphao, \
&xo, \

View File

@@ -73,22 +73,22 @@ void bli_her_int( conj_t conjh,
}
// If C or x has a zero dimension, return early.
if ( bli_obj_has_zero_dim( *c ) ) return;
if ( bli_obj_has_zero_dim( *x ) ) return;
if ( bli_obj_has_zero_dim( c ) ) return;
if ( bli_obj_has_zero_dim( x ) ) return;
// Alias the operands in case we need to apply conjugations.
bli_obj_alias_to( *x, x_local );
bli_obj_alias_to( *c, c_local );
bli_obj_alias_to( x, &x_local );
bli_obj_alias_to( c, &c_local );
// If matrix C is marked for conjugation, we interpret this as a request
// to apply a conjugation to the other operands.
if ( bli_obj_has_conj( c_local ) )
if ( bli_obj_has_conj( &c_local ) )
{
bli_obj_toggle_conj( c_local );
bli_obj_toggle_conj( &c_local );
// Notice that we don't need to conjugate alpha since it is guaranteed
// to be real.
bli_obj_toggle_conj( x_local );
bli_obj_toggle_conj( &x_local );
}
// Extract the variant number and implementation type.

View File

@@ -53,7 +53,6 @@ void PASTEMAC(ch,varname) \
{ \
const num_t dt = PASTEMAC(ch,type); \
\
ctype* two = PASTEMAC(ch,2); \
ctype* x0; \
ctype* chi1; \
ctype* y0; \
@@ -156,7 +155,8 @@ void PASTEMAC(ch,varname) \
\
/* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \
+ conj(alpha) * psi1 * conj(chi1); */ \
PASTEMAC(ch,axpys)( *two, alpha0_chi1_psi1, *gamma11 ); \
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
\
/* For her2, explicitly set the imaginary component of gamma11 to
zero. */ \

View File

@@ -53,7 +53,6 @@ void PASTEMAC(ch,varname) \
{ \
const num_t dt = PASTEMAC(ch,type); \
\
ctype* two = PASTEMAC(ch,2); \
ctype* x0; \
ctype* chi1; \
ctype* x2; \
@@ -165,7 +164,8 @@ void PASTEMAC(ch,varname) \
\
/* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \
+ conj(alpha) * psi1 * conj(chi1); */ \
PASTEMAC(ch,axpys)( *two, alpha0_chi1_psi1, *gamma11 ); \
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
\
/* For her2, explicitly set the imaginary component of gamma11 to
zero. */ \

View File

@@ -53,7 +53,6 @@ void PASTEMAC(ch,varname) \
{ \
const num_t dt = PASTEMAC(ch,type); \
\
ctype* two = PASTEMAC(ch,2); \
ctype* chi1; \
ctype* y0; \
ctype* psi1; \
@@ -165,7 +164,8 @@ void PASTEMAC(ch,varname) \
\
/* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \
+ conj(alpha) * psi1 * conj(chi1); */ \
PASTEMAC(ch,axpys)( *two, alpha0_chi1_psi1, *gamma11 ); \
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
\
/* For her2, explicitly set the imaginary component of gamma11 to
zero. */ \

View File

@@ -53,7 +53,6 @@ void PASTEMAC(ch,varname) \
{ \
const num_t dt = PASTEMAC(ch,type); \
\
ctype* two = PASTEMAC(ch,2); \
ctype* chi1; \
ctype* x2; \
ctype* psi1; \
@@ -164,7 +163,8 @@ void PASTEMAC(ch,varname) \
\
/* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \
+ conj(alpha) * psi1 * conj(chi1); */ \
PASTEMAC(ch,axpys)( *two, alpha0_chi1_psi1, *gamma11 ); \
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
\
/* For her2, explicitly set the imaginary component of gamma11 to
zero. */ \

View File

@@ -53,7 +53,6 @@ void PASTEMAC(ch,varname) \
{ \
const num_t dt = PASTEMAC(ch,type); \
\
ctype* two = PASTEMAC(ch,2); \
ctype* x0; \
ctype* chi1; \
ctype* y0; \
@@ -149,7 +148,8 @@ void PASTEMAC(ch,varname) \
\
/* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \
+ conj(alpha) * psi1 * conj(chi1); */ \
PASTEMAC(ch,axpys)( *two, alpha0_chi1_psi1, *gamma11 ); \
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
\
/* For her2, explicitly set the imaginary component of gamma11 to
zero. */ \

View File

@@ -53,7 +53,6 @@ void PASTEMAC(ch,varname) \
{ \
const num_t dt = PASTEMAC(ch,type); \
\
ctype* two = PASTEMAC(ch,2); \
ctype* chi1; \
ctype* x2; \
ctype* psi1; \
@@ -157,7 +156,8 @@ void PASTEMAC(ch,varname) \
\
/* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \
+ conj(alpha) * psi1 * conj(chi1); */ \
PASTEMAC(ch,axpys)( *two, alpha0_chi1_psi1, *gamma11 ); \
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
\
/* For her2, explicitly set the imaginary component of gamma11 to
zero. */ \

View File

@@ -51,25 +51,25 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_dt( *c ); \
num_t dt = bli_obj_dt( c ); \
\
uplo_t uplo = bli_obj_uplo( *c ); \
conj_t conjx = bli_obj_conj_status( *x ); \
conj_t conjy = bli_obj_conj_status( *y ); \
uplo_t uplo = bli_obj_uplo( c ); \
conj_t conjx = bli_obj_conj_status( x ); \
conj_t conjy = bli_obj_conj_status( y ); \
\
dim_t m = bli_obj_length( *c ); \
dim_t m = bli_obj_length( c ); \
\
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t incx = bli_obj_vector_inc( *x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( x ); \
\
void* buf_y = bli_obj_buffer_at_off( *y ); \
inc_t incy = bli_obj_vector_inc( *y ); \
void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t incy = bli_obj_vector_inc( y ); \
\
void* buf_c = bli_obj_buffer_at_off( *c ); \
inc_t rs_c = bli_obj_row_stride( *c ); \
inc_t cs_c = bli_obj_col_stride( *c ); \
void* buf_c = bli_obj_buffer_at_off( c ); \
inc_t rs_c = bli_obj_row_stride( c ); \
inc_t cs_c = bli_obj_col_stride( c ); \
\
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \
void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \
\
/* Invoke the void pointer-based function for the given datatype. */ \
bli_call_ft_14 \

View File

@@ -69,7 +69,7 @@ void bli_her2_blk_var1( conj_t conjh,
bli_obj_init_pack( &y1_pack );
// Query dimension.
mn = bli_obj_length( *c );
mn = bli_obj_length( c );
// Partition diagonally.
for ( ij = 0; ij < mn; ij += b_alg )

View File

@@ -70,7 +70,7 @@ void bli_her2_blk_var2( conj_t conjh,
bli_obj_init_pack( &y1_pack );
// Query dimension.
mn = bli_obj_length( *c );
mn = bli_obj_length( c );
// Partition diagonally.
for ( ij = 0; ij < mn; ij += b_alg )

View File

@@ -70,7 +70,7 @@ void bli_her2_blk_var3( conj_t conjh,
bli_obj_init_pack( &y1_pack );
// Query dimension.
mn = bli_obj_length( *c );
mn = bli_obj_length( c );
// Partition diagonally.
for ( ij = 0; ij < mn; ij += b_alg )

View File

@@ -69,7 +69,7 @@ void bli_her2_blk_var4( conj_t conjh,
bli_obj_init_pack( &y1_pack );
// Query dimension.
mn = bli_obj_length( *c );
mn = bli_obj_length( c );
// Partition diagonally.
for ( ij = 0; ij < mn; ij += b_alg )

View File

@@ -65,15 +65,15 @@ void bli_her2_front
// Query the target datatypes of each object.
dt_targ_x = bli_obj_target_dt( *x );
dt_targ_y = bli_obj_target_dt( *y );
//dt_targ_c = bli_obj_target_dt( *c );
dt_targ_x = bli_obj_target_dt( x );
dt_targ_y = bli_obj_target_dt( y );
//dt_targ_c = bli_obj_target_dt( c );
// Determine whether each operand with unit stride.
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 );
c_has_unit_inc = ( bli_obj_is_row_stored( *c ) ||
bli_obj_is_col_stored( *c ) );
x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 );
c_has_unit_inc = ( bli_obj_is_row_stored( c ) ||
bli_obj_is_col_stored( c ) );
// Create an object to hold a copy-cast of alpha. Notice that we use
@@ -101,14 +101,14 @@ void bli_her2_front
// combinations of upper/lower triangular storage and row/column-storage.
// The row-stored lower triangular and column-stored upper triangular
// trees are identical. Same for the remaining two trees.
if ( bli_obj_is_lower( *c ) )
if ( bli_obj_is_lower( c ) )
{
if ( bli_obj_is_row_stored( *c ) ) her2_cntl = her2_cntl_bs_ke_lrow_ucol;
if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_bs_ke_lrow_ucol;
else her2_cntl = her2_cntl_bs_ke_lcol_urow;
}
else // if ( bli_obj_is_upper( *c ) )
else // if ( bli_obj_is_upper( c ) )
{
if ( bli_obj_is_row_stored( *c ) ) her2_cntl = her2_cntl_bs_ke_lcol_urow;
if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_bs_ke_lcol_urow;
else her2_cntl = her2_cntl_bs_ke_lrow_ucol;
}
}
@@ -116,20 +116,20 @@ void bli_her2_front
{
// Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm.
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y );
if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, c );
// Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking.
if ( bli_obj_is_lower( *c ) )
if ( bli_obj_is_lower( c ) )
{
if ( bli_obj_is_row_stored( *c ) ) her2_cntl = her2_cntl_ge_lrow_ucol;
if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_ge_lrow_ucol;
else her2_cntl = her2_cntl_ge_lcol_urow;
}
else // if ( bli_obj_is_upper( *c ) )
else // if ( bli_obj_is_upper( c ) )
{
if ( bli_obj_is_row_stored( *c ) ) her2_cntl = her2_cntl_ge_lcol_urow;
if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_ge_lcol_urow;
else her2_cntl = her2_cntl_ge_lrow_ucol;
}
}
@@ -183,11 +183,11 @@ void PASTEMAC(ch,opname) \
bli_obj_create_with_attached_buffer( dt, m, 1, y, rs_y, cs_y, &yo ); \
bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \
\
bli_obj_set_conj( conjx, xo ); \
bli_obj_set_conj( conjy, yo ); \
bli_obj_set_uplo( uploc, co ); \
bli_obj_set_conj( conjx, &xo ); \
bli_obj_set_conj( conjy, &yo ); \
bli_obj_set_uplo( uploc, &co ); \
\
bli_obj_set_struc( BLIS_HERMITIAN, co ); \
bli_obj_set_struc( BLIS_HERMITIAN, &co ); \
\
PASTEMAC0(opname)( &alphao, \
&xo, \

View File

@@ -80,29 +80,29 @@ void bli_her2_int( conj_t conjh,
}
// If C, x, or y has a zero dimension, return early.
if ( bli_obj_has_zero_dim( *c ) ) return;
if ( bli_obj_has_zero_dim( *x ) ) return;
if ( bli_obj_has_zero_dim( *y ) ) return;
if ( bli_obj_has_zero_dim( c ) ) return;
if ( bli_obj_has_zero_dim( x ) ) return;
if ( bli_obj_has_zero_dim( y ) ) return;
// Alias the operands in case we need to apply conjugations.
bli_obj_alias_to( *x, x_local );
bli_obj_alias_to( *y, y_local );
bli_obj_alias_to( *c, c_local );
bli_obj_alias_to( x, &x_local );
bli_obj_alias_to( y, &y_local );
bli_obj_alias_to( c, &c_local );
// If matrix C is marked for conjugation, we interpret this as a request
// to apply a conjugation to the other operands.
if ( bli_obj_has_conj( c_local ) )
if ( bli_obj_has_conj( &c_local ) )
{
bli_obj_toggle_conj( c_local );
bli_obj_toggle_conj( &c_local );
bli_obj_toggle_conj( x_local );
bli_obj_toggle_conj( y_local );
bli_obj_toggle_conj( &x_local );
bli_obj_toggle_conj( &y_local );
bli_obj_scalar_init_detached_copy_of( bli_obj_dt( *alpha ),
bli_obj_scalar_init_detached_copy_of( bli_obj_dt( alpha ),
BLIS_CONJUGATE,
alpha,
&alpha_local );
bli_obj_scalar_init_detached_copy_of( bli_obj_dt( *alpha_conj ),
bli_obj_scalar_init_detached_copy_of( bli_obj_dt( alpha_conj ),
BLIS_CONJUGATE,
alpha_conj,
&alpha_conj_local );

View File

@@ -67,15 +67,15 @@ void bli_symv_front
// Query the target datatypes of each object.
dt_targ_a = bli_obj_target_dt( *a );
dt_targ_x = bli_obj_target_dt( *x );
dt_targ_y = bli_obj_target_dt( *y );
dt_targ_a = bli_obj_target_dt( a );
dt_targ_x = bli_obj_target_dt( x );
dt_targ_y = bli_obj_target_dt( y );
// Determine whether each operand with unit stride.
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
bli_obj_is_col_stored( *a ) );
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 );
a_has_unit_inc = ( bli_obj_is_row_stored( a ) ||
bli_obj_is_col_stored( a ) );
x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 );
// Create an object to hold a copy-cast of alpha. Notice that we use
@@ -109,14 +109,14 @@ void bli_symv_front
// combinations of upper/lower triangular storage and row/column-storage.
// The row-stored lower triangular and column-stored upper triangular
// trees are identical. Same for the remaining two trees.
if ( bli_obj_is_lower( *a ) )
if ( bli_obj_is_lower( a ) )
{
if ( bli_obj_is_row_stored( *a ) ) hemv_cntl = hemv_cntl_bs_ke_lrow_ucol;
if ( bli_obj_is_row_stored( a ) ) hemv_cntl = hemv_cntl_bs_ke_lrow_ucol;
else hemv_cntl = hemv_cntl_bs_ke_lcol_urow;
}
else // if ( bli_obj_is_upper( *a ) )
else // if ( bli_obj_is_upper( a ) )
{
if ( bli_obj_is_row_stored( *a ) ) hemv_cntl = hemv_cntl_bs_ke_lcol_urow;
if ( bli_obj_is_row_stored( a ) ) hemv_cntl = hemv_cntl_bs_ke_lcol_urow;
else hemv_cntl = hemv_cntl_bs_ke_lrow_ucol;
}
}
@@ -124,20 +124,20 @@ void bli_symv_front
{
// Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm.
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, a );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y );
// Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking.
if ( bli_obj_is_lower( *a ) )
if ( bli_obj_is_lower( a ) )
{
if ( bli_obj_is_row_tilted( *a ) ) hemv_cntl = hemv_cntl_ge_lrow_ucol;
if ( bli_obj_is_row_tilted( a ) ) hemv_cntl = hemv_cntl_ge_lrow_ucol;
else hemv_cntl = hemv_cntl_ge_lcol_urow;
}
else // if ( bli_obj_is_upper( *a ) )
else // if ( bli_obj_is_upper( a ) )
{
if ( bli_obj_is_row_tilted( *a ) ) hemv_cntl = hemv_cntl_ge_lcol_urow;
if ( bli_obj_is_row_tilted( a ) ) hemv_cntl = hemv_cntl_ge_lcol_urow;
else hemv_cntl = hemv_cntl_ge_lrow_ucol;
}
}
@@ -194,11 +194,11 @@ void PASTEMAC(ch,opname) \
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
bli_obj_create_with_attached_buffer( dt, m, 1, y, rs_y, cs_y, &yo ); \
\
bli_obj_set_uplo( uploa, ao ); \
bli_obj_set_conj( conja, ao ); \
bli_obj_set_conj( conjx, xo ); \
bli_obj_set_uplo( uploa, &ao ); \
bli_obj_set_conj( conja, &ao ); \
bli_obj_set_conj( conjx, &xo ); \
\
bli_obj_set_struc( BLIS_SYMMETRIC, ao ); \
bli_obj_set_struc( BLIS_SYMMETRIC, &ao ); \
\
PASTEMAC0(opname)( &alphao, \
&ao, \

View File

@@ -61,13 +61,13 @@ void bli_syr_front
// Query the target datatypes of each object.
dt_targ_x = bli_obj_target_dt( *x );
dt_targ_c = bli_obj_target_dt( *c );
dt_targ_x = bli_obj_target_dt( x );
dt_targ_c = bli_obj_target_dt( c );
// Determine whether each operand with unit stride.
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
c_has_unit_inc = ( bli_obj_is_row_stored( *c ) ||
bli_obj_is_col_stored( *c ) );
x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
c_has_unit_inc = ( bli_obj_is_row_stored( c ) ||
bli_obj_is_col_stored( c ) );
// Create an object to hold a copy-cast of alpha. Notice that we use
@@ -89,14 +89,14 @@ void bli_syr_front
// combinations of upper/lower triangular storage and row/column-storage.
// The row-stored lower triangular and column-stored upper triangular
// trees are identical. Same for the remaining two trees.
if ( bli_obj_is_lower( *c ) )
if ( bli_obj_is_lower( c ) )
{
if ( bli_obj_is_row_stored( *c ) ) her_cntl = her_cntl_bs_ke_lrow_ucol;
if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_bs_ke_lrow_ucol;
else her_cntl = her_cntl_bs_ke_lcol_urow;
}
else // if ( bli_obj_is_upper( *c ) )
else // if ( bli_obj_is_upper( c ) )
{
if ( bli_obj_is_row_stored( *c ) ) her_cntl = her_cntl_bs_ke_lcol_urow;
if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_bs_ke_lcol_urow;
else her_cntl = her_cntl_bs_ke_lrow_ucol;
}
}
@@ -104,19 +104,19 @@ void bli_syr_front
{
// Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm.
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, c );
// Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking.
if ( bli_obj_is_lower( *c ) )
if ( bli_obj_is_lower( c ) )
{
if ( bli_obj_is_row_stored( *c ) ) her_cntl = her_cntl_ge_lrow_ucol;
if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_ge_lrow_ucol;
else her_cntl = her_cntl_ge_lcol_urow;
}
else // if ( bli_obj_is_upper( *c ) )
else // if ( bli_obj_is_upper( c ) )
{
if ( bli_obj_is_row_stored( *c ) ) her_cntl = her_cntl_ge_lcol_urow;
if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_ge_lcol_urow;
else her_cntl = her_cntl_ge_lrow_ucol;
}
}
@@ -163,10 +163,10 @@ void PASTEMAC(ch,opname) \
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \
\
bli_obj_set_conj( conjx, xo ); \
bli_obj_set_uplo( uploc, co ); \
bli_obj_set_conj( conjx, &xo ); \
bli_obj_set_uplo( uploc, &co ); \
\
bli_obj_set_struc( BLIS_SYMMETRIC, co ); \
bli_obj_set_struc( BLIS_SYMMETRIC, &co ); \
\
PASTEMAC0(opname)( &alphao, \
&xo, \

View File

@@ -64,15 +64,15 @@ void bli_syr2_front
// Query the target datatypes of each object.
dt_targ_x = bli_obj_target_dt( *x );
dt_targ_y = bli_obj_target_dt( *y );
//dt_targ_c = bli_obj_target_dt( *c );
dt_targ_x = bli_obj_target_dt( x );
dt_targ_y = bli_obj_target_dt( y );
//dt_targ_c = bli_obj_target_dt( c );
// Determine whether each operand with unit stride.
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 );
c_has_unit_inc = ( bli_obj_is_row_stored( *c ) ||
bli_obj_is_col_stored( *c ) );
x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 );
c_has_unit_inc = ( bli_obj_is_row_stored( c ) ||
bli_obj_is_col_stored( c ) );
// Create an object to hold a copy-cast of alpha. Notice that we use
@@ -94,14 +94,14 @@ void bli_syr2_front
// combinations of upper/lower triangular storage and row/column-storage.
// The row-stored lower triangular and column-stored upper triangular
// trees are identical. Same for the remaining two trees.
if ( bli_obj_is_lower( *c ) )
if ( bli_obj_is_lower( c ) )
{
if ( bli_obj_is_row_stored( *c ) ) her2_cntl = her2_cntl_bs_ke_lrow_ucol;
if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_bs_ke_lrow_ucol;
else her2_cntl = her2_cntl_bs_ke_lcol_urow;
}
else // if ( bli_obj_is_upper( *c ) )
else // if ( bli_obj_is_upper( c ) )
{
if ( bli_obj_is_row_stored( *c ) ) her2_cntl = her2_cntl_bs_ke_lcol_urow;
if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_bs_ke_lcol_urow;
else her2_cntl = her2_cntl_bs_ke_lrow_ucol;
}
}
@@ -109,20 +109,20 @@ void bli_syr2_front
{
// Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm.
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y );
if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, c );
// Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking.
if ( bli_obj_is_lower( *c ) )
if ( bli_obj_is_lower( c ) )
{
if ( bli_obj_is_row_stored( *c ) ) her2_cntl = her2_cntl_ge_lrow_ucol;
if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_ge_lrow_ucol;
else her2_cntl = her2_cntl_ge_lcol_urow;
}
else // if ( bli_obj_is_upper( *c ) )
else // if ( bli_obj_is_upper( c ) )
{
if ( bli_obj_is_row_stored( *c ) ) her2_cntl = her2_cntl_ge_lcol_urow;
if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_ge_lcol_urow;
else her2_cntl = her2_cntl_ge_lrow_ucol;
}
}
@@ -176,11 +176,11 @@ void PASTEMAC(ch,opname) \
bli_obj_create_with_attached_buffer( dt, m, 1, y, rs_y, cs_y, &yo ); \
bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \
\
bli_obj_set_conj( conjx, xo ); \
bli_obj_set_conj( conjy, yo ); \
bli_obj_set_uplo( uploc, co ); \
bli_obj_set_conj( conjx, &xo ); \
bli_obj_set_conj( conjy, &yo ); \
bli_obj_set_uplo( uploc, &co ); \
\
bli_obj_set_struc( BLIS_SYMMETRIC, co ); \
bli_obj_set_struc( BLIS_SYMMETRIC, &co ); \
\
PASTEMAC0(opname)( &alphao, \
&xo, \

View File

@@ -48,22 +48,22 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_dt( *a ); \
num_t dt = bli_obj_dt( a ); \
\
uplo_t uploa = bli_obj_uplo( *a ); \
trans_t transa = bli_obj_conjtrans_status( *a ); \
diag_t diaga = bli_obj_diag( *a ); \
uplo_t uploa = bli_obj_uplo( a ); \
trans_t transa = bli_obj_conjtrans_status( a ); \
diag_t diaga = bli_obj_diag( a ); \
\
dim_t m = bli_obj_length( *a ); \
dim_t m = bli_obj_length( a ); \
\
void* buf_a = bli_obj_buffer_at_off( *a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \
void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( a ); \
\
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t incx = bli_obj_vector_inc( *x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( x ); \
\
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \
void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \
\
/* Invoke the void pointer-based function for the given datatype. */ \
bli_call_ft_11 \

View File

@@ -61,13 +61,13 @@ void bli_trmv_front
// Query the target datatypes of each object.
dt_targ_a = bli_obj_target_dt( *a );
dt_targ_x = bli_obj_target_dt( *x );
dt_targ_a = bli_obj_target_dt( a );
dt_targ_x = bli_obj_target_dt( x );
// Determine whether each operand with unit stride.
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
bli_obj_is_col_stored( *a ) );
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
a_has_unit_inc = ( bli_obj_is_row_stored( a ) ||
bli_obj_is_col_stored( a ) );
x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
// Create an object to hold a copy-cast of alpha. Notice that we use
@@ -89,14 +89,14 @@ void bli_trmv_front
// combinations of transposition and row/column-storage.
// The row-stored without transpose and column-stored with transpose
// trees are identical. Same for the remaining two trees.
if ( bli_obj_has_notrans( *a ) )
if ( bli_obj_has_notrans( a ) )
{
if ( bli_obj_is_row_stored( *a ) ) trmv_cntl = trmv_cntl_bs_ke_nrow_tcol;
if ( bli_obj_is_row_stored( a ) ) trmv_cntl = trmv_cntl_bs_ke_nrow_tcol;
else trmv_cntl = trmv_cntl_bs_ke_ncol_trow;
}
else // if ( bli_obj_has_trans( *a ) )
else // if ( bli_obj_has_trans( a ) )
{
if ( bli_obj_is_row_stored( *a ) ) trmv_cntl = trmv_cntl_bs_ke_ncol_trow;
if ( bli_obj_is_row_stored( a ) ) trmv_cntl = trmv_cntl_bs_ke_ncol_trow;
else trmv_cntl = trmv_cntl_bs_ke_nrow_tcol;
}
}
@@ -104,19 +104,19 @@ void bli_trmv_front
{
// Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm.
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, a );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
// Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking.
if ( bli_obj_has_notrans( *a ) )
if ( bli_obj_has_notrans( a ) )
{
if ( bli_obj_is_row_tilted( *a ) ) trmv_cntl = trmv_cntl_ge_nrow_tcol;
if ( bli_obj_is_row_tilted( a ) ) trmv_cntl = trmv_cntl_ge_nrow_tcol;
else trmv_cntl = trmv_cntl_ge_ncol_trow;
}
else // if ( bli_obj_has_trans( *a ) )
else // if ( bli_obj_has_trans( a ) )
{
if ( bli_obj_is_row_tilted( *a ) ) trmv_cntl = trmv_cntl_ge_ncol_trow;
if ( bli_obj_is_row_tilted( a ) ) trmv_cntl = trmv_cntl_ge_ncol_trow;
else trmv_cntl = trmv_cntl_ge_nrow_tcol;
}
}
@@ -162,11 +162,11 @@ void PASTEMAC(ch,opname) \
bli_obj_create_with_attached_buffer( dt, m, m, a, rs_a, cs_a, &ao ); \
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
\
bli_obj_set_uplo( uploa, ao ); \
bli_obj_set_conjtrans( transa, ao ); \
bli_obj_set_diag( diaga, ao ); \
bli_obj_set_uplo( uploa, &ao ); \
bli_obj_set_conjtrans( transa, &ao ); \
bli_obj_set_diag( diaga, &ao ); \
\
bli_obj_set_struc( BLIS_TRIANGULAR, ao ); \
bli_obj_set_struc( BLIS_TRIANGULAR, &ao ); \
\
PASTEMAC0(opname)( &alphao, \
&ao, \

View File

@@ -77,18 +77,18 @@ void bli_trmv_int( obj_t* alpha,
bli_trmv_check( alpha, a, x );
// If A or x has a zero dimension, return early.
if ( bli_obj_has_zero_dim( *a ) ) return;
if ( bli_obj_has_zero_dim( *x ) ) return;
if ( bli_obj_has_zero_dim( a ) ) return;
if ( bli_obj_has_zero_dim( x ) ) return;
// Alias A in case we need to induce a transformation (ie: transposition).
bli_obj_alias_to( *a, a_local );
bli_obj_alias_to( a, &a_local );
// NOTE: to support cases where B is complex and A is real, we will
// need to have the default side case be BLIS_RIGHT and then express
// the left case in terms of it, rather than the other way around.
// Determine uplo (for indexing to the correct function pointer).
if ( bli_obj_is_lower( a_local ) ) uplo = 0;
if ( bli_obj_is_lower( &a_local ) ) uplo = 0;
else uplo = 1;
// We do not explicitly implement the cases where A is transposed.
@@ -107,11 +107,12 @@ void bli_trmv_int( obj_t* alpha,
// affect the optimal choice of kernel (ie: a column-major column panel
// matrix with transpose times a vector would use the same kernel as a
// row-major row panel matrix with no transpose times a vector).
if ( bli_obj_has_trans( a_local ) )
if ( bli_obj_has_trans( &a_local ) )
{
//bli_obj_induce_trans( a_local );
//bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, a_local );
bli_toggle_bool( uplo );
//bli_obj_induce_trans( &a_local );
//bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &a_local );
if ( uplo == 1 ) uplo = 0;
else uplo = 1;
}
// Extract the variant number and implementation type.

View File

@@ -54,7 +54,7 @@ void bli_trmv_l_blk_var1( obj_t* alpha,
bli_obj_init_pack( &x1_pack );
// Query dimension.
mn = bli_obj_length( *a );
mn = bli_obj_length( a );
// Partition diagonally.
for ( ij = 0; ij < mn; ij += b_alg )

View File

@@ -54,7 +54,7 @@ void bli_trmv_l_blk_var2( obj_t* alpha,
bli_obj_init_pack( &x1_pack );
// Query dimension.
mn = bli_obj_length( *a );
mn = bli_obj_length( a );
// Partition diagonally.
for ( ij = 0; ij < mn; ij += b_alg )

View File

@@ -54,7 +54,7 @@ void bli_trmv_u_blk_var1( obj_t* alpha,
bli_obj_init_pack( &x1_pack );
// Query dimension.
mn = bli_obj_length( *a );
mn = bli_obj_length( a );
// Partition diagonally.
for ( ij = 0; ij < mn; ij += b_alg )

View File

@@ -54,7 +54,7 @@ void bli_trmv_u_blk_var2( obj_t* alpha,
bli_obj_init_pack( &x1_pack );
// Query dimension.
mn = bli_obj_length( *a );
mn = bli_obj_length( a );
// Partition diagonally.
for ( ij = 0; ij < mn; ij += b_alg )

View File

@@ -48,22 +48,22 @@ void PASTEMAC0(opname) \
{ \
bli_init_once(); \
\
num_t dt = bli_obj_dt( *a ); \
num_t dt = bli_obj_dt( a ); \
\
uplo_t uploa = bli_obj_uplo( *a ); \
trans_t transa = bli_obj_conjtrans_status( *a ); \
diag_t diaga = bli_obj_diag( *a ); \
uplo_t uploa = bli_obj_uplo( a ); \
trans_t transa = bli_obj_conjtrans_status( a ); \
diag_t diaga = bli_obj_diag( a ); \
\
dim_t m = bli_obj_length( *a ); \
dim_t m = bli_obj_length( a ); \
\
void* buf_a = bli_obj_buffer_at_off( *a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \
void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( a ); \
\
void* buf_x = bli_obj_buffer_at_off( *x ); \
inc_t incx = bli_obj_vector_inc( *x ); \
void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( x ); \
\
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \
void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \
\
/* Invoke the void pointer-based function for the given datatype. */ \
bli_call_ft_11 \

View File

@@ -61,13 +61,13 @@ void bli_trsv_front
// Query the target datatypes of each object.
dt_targ_a = bli_obj_dt( *a );
dt_targ_x = bli_obj_dt( *x );
dt_targ_a = bli_obj_dt( a );
dt_targ_x = bli_obj_dt( x );
// Determine whether each operand with unit stride.
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
bli_obj_is_col_stored( *a ) );
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
a_has_unit_inc = ( bli_obj_is_row_stored( a ) ||
bli_obj_is_col_stored( a ) );
x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
// Create an object to hold a copy-cast of alpha. Notice that we use
@@ -85,14 +85,14 @@ void bli_trsv_front
if ( a_has_unit_inc &&
x_has_unit_inc )
{
if ( bli_obj_has_notrans( *a ) )
if ( bli_obj_has_notrans( a ) )
{
if ( bli_obj_is_row_stored( *a ) ) trsv_cntl = trsv_cntl_bs_ke_nrow_tcol;
if ( bli_obj_is_row_stored( a ) ) trsv_cntl = trsv_cntl_bs_ke_nrow_tcol;
else trsv_cntl = trsv_cntl_bs_ke_ncol_trow;
}
else // if ( bli_obj_has_trans( *a ) )
else // if ( bli_obj_has_trans( a ) )
{
if ( bli_obj_is_row_stored( *a ) ) trsv_cntl = trsv_cntl_bs_ke_ncol_trow;
if ( bli_obj_is_row_stored( a ) ) trsv_cntl = trsv_cntl_bs_ke_ncol_trow;
else trsv_cntl = trsv_cntl_bs_ke_nrow_tcol;
}
}
@@ -100,19 +100,19 @@ void bli_trsv_front
{
// Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm.
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, a );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
// Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking.
if ( bli_obj_has_notrans( *a ) )
if ( bli_obj_has_notrans( a ) )
{
if ( bli_obj_is_row_tilted( *a ) ) trsv_cntl = trsv_cntl_ge_nrow_tcol;
if ( bli_obj_is_row_tilted( a ) ) trsv_cntl = trsv_cntl_ge_nrow_tcol;
else trsv_cntl = trsv_cntl_ge_ncol_trow;
}
else // if ( bli_obj_has_trans( *a ) )
else // if ( bli_obj_has_trans( a ) )
{
if ( bli_obj_is_row_tilted( *a ) ) trsv_cntl = trsv_cntl_ge_ncol_trow;
if ( bli_obj_is_row_tilted( a ) ) trsv_cntl = trsv_cntl_ge_ncol_trow;
else trsv_cntl = trsv_cntl_ge_nrow_tcol;
}
}
@@ -158,11 +158,11 @@ void PASTEMAC(ch,opname) \
bli_obj_create_with_attached_buffer( dt, m, m, a, rs_a, cs_a, &ao ); \
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
\
bli_obj_set_uplo( uploa, ao ); \
bli_obj_set_conjtrans( transa, ao ); \
bli_obj_set_diag( diaga, ao ); \
bli_obj_set_uplo( uploa, &ao ); \
bli_obj_set_conjtrans( transa, &ao ); \
bli_obj_set_diag( diaga, &ao ); \
\
bli_obj_set_struc( BLIS_TRIANGULAR, ao ); \
bli_obj_set_struc( BLIS_TRIANGULAR, &ao ); \
\
PASTEMAC0(opname)( &alphao, \
&ao, \

View File

@@ -77,19 +77,19 @@ void bli_trsv_int( obj_t* alpha,
bli_trsv_check( alpha, a, x );
// If A or x has a zero dimension, return early.
if ( bli_obj_has_zero_dim( *a ) ) return;
if ( bli_obj_has_zero_dim( *x ) ) return;
if ( bli_obj_has_zero_dim( a ) ) return;
if ( bli_obj_has_zero_dim( x ) ) return;
// Alias A in case we need to induce a transformation (ie: transposition).
bli_obj_alias_to( *a, a_local );
bli_obj_alias_to( a, &a_local );
// NOTE: to support cases where B is complex and A is real, we will
// need to have the default side case be BLIS_RIGHT and then express
// the left case in terms of it, rather than the other way around.
// Determine uplo (for indexing to the correct function pointer).
if ( bli_obj_is_lower( a_local ) ) uplo = 0;
else uplo = 1;
if ( bli_obj_is_lower( &a_local ) ) uplo = 0;
else uplo = 1;
// We do not explicitly implement the cases where A is transposed.
// However, we can still handle them. Specifically, if A is marked as
@@ -107,11 +107,12 @@ void bli_trsv_int( obj_t* alpha,
// affect the optimal choice of kernel (ie: a column-major column panel
// matrix with transpose times a vector would use the same kernel as a
// row-major row panel matrix with no transpose times a vector).
if ( bli_obj_has_trans( a_local ) )
if ( bli_obj_has_trans( &a_local ) )
{
//bli_obj_induce_trans( a_local );
//bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, a_local );
bli_toggle_bool( uplo );
//bli_obj_induce_trans( &a_local );
//bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &a_local );
if ( uplo == 1 ) uplo = 0;
else uplo = 1;
}
// Extract the variant number and implementation type.

View File

@@ -54,7 +54,7 @@ void bli_trsv_l_blk_var1( obj_t* alpha,
bli_obj_init_pack( &x1_pack );
// Query dimension.
mn = bli_obj_length( *a );
mn = bli_obj_length( a );
// x = alpha * x;
bli_scalv_int( alpha,

View File

@@ -54,7 +54,7 @@ void bli_trsv_l_blk_var2( obj_t* alpha,
bli_obj_init_pack( &x1_pack );
// Query dimension.
mn = bli_obj_length( *a );
mn = bli_obj_length( a );
// x = alpha * x;
bli_scalv_int( alpha,

View File

@@ -54,7 +54,7 @@ void bli_trsv_u_blk_var1( obj_t* alpha,
bli_obj_init_pack( &x1_pack );
// Query dimension.
mn = bli_obj_length( *a );
mn = bli_obj_length( a );
// x = alpha * x;
bli_scalv_int( alpha,

Some files were not shown because too many files have changed in this diff Show More