Converted function-like macros to static functions.

Details:
- Converted most C preprocessor macros in bli_param_macro_defs.h and
  bli_obj_macro_defs.h to static functions.
- Reshuffled some functions/macros to bli_misc_macro_defs.h and also
  between bli_param_macro_defs.h and bli_obj_macro_defs.h.
- Changed obj_t-initializing macros in bli_type_defs.h to static
  functions.
- Removed some old references to BLIS_TWO and BLIS_MINUS_TWO from
  bli_constants.h.
- Whitespace changes in select files (four spaces to single tab).
This commit is contained in:
Field G. Van Zee
2018-05-08 14:26:30 -05:00
parent 75d0d1057d
commit 4b36e85be9
247 changed files with 5480 additions and 5350 deletions

View File

@@ -194,7 +194,7 @@ void bli_zdotv_template_noopt
// toggling the effective conjugation of x and then conjugating the // toggling the effective conjugation of x and then conjugating the
// resulting dot product. // resulting dot product.
if ( bli_is_conj( conjy ) ) if ( bli_is_conj( conjy ) )
bli_toggle_conj( conjx_use ); bli_toggle_conj( &conjx_use );
// Iterate over elements of x and y to compute: // Iterate over elements of x and y to compute:

View File

@@ -211,7 +211,7 @@ void bli_zdotaxpyv_template_noopt
// toggling the effective conjugation of xt and then conjugating the // toggling the effective conjugation of xt and then conjugating the
// resulting dot product. // resulting dot product.
if ( bli_is_conj( conjy ) ) if ( bli_is_conj( conjy ) )
bli_toggle_conj( conjxt_use ); bli_toggle_conj( &conjxt_use );
// Iterate over elements of x, y, and z to compute: // Iterate over elements of x, y, and z to compute:

View File

@@ -264,7 +264,7 @@ void bli_zdotxaxpyf_template_noopt
// toggling the effective conjugation of At and then conjugating the // toggling the effective conjugation of At and then conjugating the
// resulting dot products. // resulting dot products.
if ( bli_is_conj( conjw ) ) if ( bli_is_conj( conjw ) )
bli_toggle_conj( conjat_use ); bli_toggle_conj( &conjat_use );
// Iterate over the columns of A and elements of w and z to compute: // Iterate over the columns of A and elements of w and z to compute:

View File

@@ -237,7 +237,7 @@ void bli_zdotxf_template_noopt
// toggling the effective conjugation of A and then conjugating the // toggling the effective conjugation of A and then conjugating the
// resulting product A^T*x. // resulting product A^T*x.
if ( bli_is_conj( conjx ) ) if ( bli_is_conj( conjx ) )
bli_toggle_conj( conjat_use ); bli_toggle_conj( &conjat_use );
// Iterate over columns of A and rows of x to compute: // Iterate over columns of A and rows of x to compute:

View File

@@ -121,12 +121,12 @@ int main( int argc, char** argv )
// Let's inspect the amount of padding inserted for alignment. Note // Let's inspect the amount of padding inserted for alignment. Note
// the difference between the m dimension and the column stride. // the difference between the m dimension and the column stride.
printf( "datatype %s\n", bli_dt_string( bli_obj_dt( a8 ) ) ); printf( "datatype %s\n", bli_dt_string( bli_obj_dt( &a8 ) ) );
printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( a8 ) ) ); printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( &a8 ) ) );
printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( a8 ) ); printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( &a8 ) );
printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( a8 ) ); printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( &a8 ) );
printf( "row stride: %d\n", ( int )bli_obj_row_stride( a8 ) ); printf( "row stride: %d\n", ( int )bli_obj_row_stride( &a8 ) );
printf( "col stride: %d\n", ( int )bli_obj_col_stride( a8 ) ); printf( "col stride: %d\n", ( int )bli_obj_col_stride( &a8 ) );
// //
// Example 6: Inspect object fields after creation of other floating- // Example 6: Inspect object fields after creation of other floating-
@@ -139,28 +139,28 @@ int main( int argc, char** argv )
bli_obj_create( BLIS_SCOMPLEX, 3, 5, 0, 0, &a10); bli_obj_create( BLIS_SCOMPLEX, 3, 5, 0, 0, &a10);
bli_obj_create( BLIS_DCOMPLEX, 3, 5, 0, 0, &a11 ); bli_obj_create( BLIS_DCOMPLEX, 3, 5, 0, 0, &a11 );
printf( "datatype %s\n", bli_dt_string( bli_obj_dt( a9 ) ) ); printf( "datatype %s\n", bli_dt_string( bli_obj_dt( &a9 ) ) );
printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( a9 ) ) ); printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( &a9 ) ) );
printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( a9 ) ); printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( &a9 ) );
printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( a9 ) ); printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( &a9 ) );
printf( "row stride: %d\n", ( int )bli_obj_row_stride( a9 ) ); printf( "row stride: %d\n", ( int )bli_obj_row_stride( &a9 ) );
printf( "col stride: %d\n", ( int )bli_obj_col_stride( a9 ) ); printf( "col stride: %d\n", ( int )bli_obj_col_stride( &a9 ) );
printf( "\n" ); printf( "\n" );
printf( "datatype %s\n", bli_dt_string( bli_obj_dt( a10 ) ) ); printf( "datatype %s\n", bli_dt_string( bli_obj_dt( &a10 ) ) );
printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( a10 ) ) ); printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( &a10 ) ) );
printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( a10 ) ); printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( &a10 ) );
printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( a10 ) ); printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( &a10 ) );
printf( "row stride: %d\n", ( int )bli_obj_row_stride( a10 ) ); printf( "row stride: %d\n", ( int )bli_obj_row_stride( &a10 ) );
printf( "col stride: %d\n", ( int )bli_obj_col_stride( a10 ) ); printf( "col stride: %d\n", ( int )bli_obj_col_stride( &a10 ) );
printf( "\n" ); printf( "\n" );
printf( "datatype %s\n", bli_dt_string( bli_obj_dt( a11 ) ) ); printf( "datatype %s\n", bli_dt_string( bli_obj_dt( &a11 ) ) );
printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( a11 ) ) ); printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( &a11 ) ) );
printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( a11 ) ); printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( &a11 ) );
printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( a11 ) ); printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( &a11 ) );
printf( "row stride: %d\n", ( int )bli_obj_row_stride( a11 ) ); printf( "row stride: %d\n", ( int )bli_obj_row_stride( &a11 ) );
printf( "col stride: %d\n", ( int )bli_obj_col_stride( a11 ) ); printf( "col stride: %d\n", ( int )bli_obj_col_stride( &a11 ) );
// //
// Example 7: Initialize an object's elements to random values and then // Example 7: Initialize an object's elements to random values and then

View File

@@ -148,7 +148,7 @@ int main( int argc, char** argv )
// on 'e', the input operand. Transposition can be indicated by setting a // on 'e', the input operand. Transposition can be indicated by setting a
// bit in the object. Since it always starts out as "no transpose", we can // bit in the object. Since it always starts out as "no transpose", we can
// simply toggle the bit. // simply toggle the bit.
bli_obj_toggle_trans( e ); bli_obj_toggle_trans( &e );
// Another way to mark and object for transposition is to set it directly. // Another way to mark and object for transposition is to set it directly.
//bli_obj_set_onlytrans( BLIS_TRANSPOSE, &e ); //bli_obj_set_onlytrans( BLIS_TRANSPOSE, &e );
@@ -192,8 +192,8 @@ int main( int argc, char** argv )
bli_printm( "h (initial value):", &h, "%4.1f", "" ); bli_printm( "h (initial value):", &h, "%4.1f", "" );
// Set both the transpose and conjugation bits. // Set both the transpose and conjugation bits.
bli_obj_toggle_trans( g ); bli_obj_toggle_trans( &g );
bli_obj_toggle_conj( g ); bli_obj_toggle_conj( &g );
// Copy 'g' to 'h', conjugating and transposing 'g' in the process. // Copy 'g' to 'h', conjugating and transposing 'g' in the process.
// Once again, notice that it's the source operand that we've marked for // Once again, notice that it's the source operand that we've marked for

View File

@@ -59,14 +59,14 @@ int main( int argc, char** argv )
bli_obj_create( dt, m, n, rs, cs, &a ); bli_obj_create( dt, m, n, rs, cs, &a );
// First, we mark the matrix structure as triangular. // First, we mark the matrix structure as triangular.
bli_obj_set_struc( BLIS_TRIANGULAR, a ) bli_obj_set_struc( BLIS_TRIANGULAR, &a )
// Next, we specify whether the lower part or the upper part is to be // Next, we specify whether the lower part or the upper part is to be
// recognized as the "stored" region (which we call the uplo field). The // recognized as the "stored" region (which we call the uplo field). The
// strictly opposite part (in this case, the strictly lower region) will // strictly opposite part (in this case, the strictly lower region) will
// be *assumed* to be zero during computation. However, when printed out, // be *assumed* to be zero during computation. However, when printed out,
// the strictly lower part may contain junk values. // the strictly lower part may contain junk values.
bli_obj_set_uplo( BLIS_UPPER, a ); bli_obj_set_uplo( BLIS_UPPER, &a );
// Now set the upper triangle to random values. // Now set the upper triangle to random values.
bli_randm( &a ); bli_randm( &a );
@@ -89,8 +89,8 @@ int main( int argc, char** argv )
bli_obj_create( dt, m, n, rs, cs, &b ); bli_obj_create( dt, m, n, rs, cs, &b );
// Set structure and uplo. // Set structure and uplo.
bli_obj_set_struc( BLIS_TRIANGULAR, b ) bli_obj_set_struc( BLIS_TRIANGULAR, &b )
bli_obj_set_uplo( BLIS_UPPER, b ); bli_obj_set_uplo( BLIS_UPPER, &b );
// Create an alias, 'bl', of the original object 'b'. Both objects will // Create an alias, 'bl', of the original object 'b'. Both objects will
// refer to the same underlying matrix elements, but now we will have two // refer to the same underlying matrix elements, but now we will have two
@@ -98,7 +98,7 @@ int main( int argc, char** argv )
// of the objects, meaning no additional memory allocation takes place. // of the objects, meaning no additional memory allocation takes place.
// Therefore it is up to the API user (you) to make sure that you only // Therefore it is up to the API user (you) to make sure that you only
// free the original object (or exactly one of the aliases). // free the original object (or exactly one of the aliases).
bli_obj_alias_to( b, bl ); bli_obj_alias_to( &b, &bl );
// Digression: Each object contains a diagonal offset (even vectors), // Digression: Each object contains a diagonal offset (even vectors),
// even if it is never needed. The diagonal offset for a newly-created // even if it is never needed. The diagonal offset for a newly-created
@@ -111,10 +111,10 @@ int main( int argc, char** argv )
// x-axis value. // x-axis value.
// Set the diagonal offset of 'bl' to -1. // Set the diagonal offset of 'bl' to -1.
bli_obj_set_diag_offset( -1, bl ); bli_obj_set_diag_offset( -1, &bl );
// Set the uplo field of 'bl' to "lower". // Set the uplo field of 'bl' to "lower".
bli_obj_set_uplo( BLIS_LOWER, bl ); bli_obj_set_uplo( BLIS_LOWER, &bl );
// Set the upper triangle of 'b' to random values. // Set the upper triangle of 'b' to random values.
bli_randm( &b ); bli_randm( &b );
@@ -148,7 +148,7 @@ int main( int argc, char** argv )
bli_obj_create( dt, m, n, rs, cs, &c ); bli_obj_create( dt, m, n, rs, cs, &c );
// Reset the diagonal offset of 'bl' to 0. // Reset the diagonal offset of 'bl' to 0.
bli_obj_set_diag_offset( 0, bl ); bli_obj_set_diag_offset( 0, &bl );
// Copy the lower triangle of matrix 'b' from Example 2 to object 'c'. // Copy the lower triangle of matrix 'b' from Example 2 to object 'c'.
// This should give us -1.0 in the strictly lower part and some non-zero // This should give us -1.0 in the strictly lower part and some non-zero
@@ -212,7 +212,7 @@ int main( int argc, char** argv )
// We want to pluck out the lower triangle and transpose it into the upper // We want to pluck out the lower triangle and transpose it into the upper
// triangle of 'd'. // triangle of 'd'.
bli_obj_toggle_trans( bl ); bli_obj_toggle_trans( &bl );
// Now we copy the transpose of the lower part of 'bl' into the upper // Now we copy the transpose of the lower part of 'bl' into the upper
// part of 'd'. (Again, notice that we haven't modified any properties of // part of 'd'. (Again, notice that we haven't modified any properties of
@@ -242,11 +242,11 @@ int main( int argc, char** argv )
bli_printm( "e: initial value (all -1.0)", &e, "%4.1f", "" ); bli_printm( "e: initial value (all -1.0)", &e, "%4.1f", "" );
// Create an alias to work with. // Create an alias to work with.
bli_obj_alias_to( e, el ); bli_obj_alias_to( &e, &el );
// Set structure and uplo of 'el'. // Set structure and uplo of 'el'.
bli_obj_set_struc( BLIS_TRIANGULAR, el ) bli_obj_set_struc( BLIS_TRIANGULAR, &el )
bli_obj_set_uplo( BLIS_LOWER, el ); bli_obj_set_uplo( BLIS_LOWER, &el );
// Digression: Notice that "triangular" structure does not require that // Digression: Notice that "triangular" structure does not require that
// the matrix be square. Rather, it simply means that either the part above // the matrix be square. Rather, it simply means that either the part above
@@ -259,8 +259,8 @@ int main( int argc, char** argv )
// Move the diagonal offset of 'el' to 1 and flip the uplo field to // Move the diagonal offset of 'el' to 1 and flip the uplo field to
// "upper". // "upper".
bli_obj_set_diag_offset( 1, el ); bli_obj_set_diag_offset( 1, &el );
bli_obj_set_uplo( BLIS_UPPER, el ); bli_obj_set_uplo( BLIS_UPPER, &el );
// Set the upper triangle to zero. // Set the upper triangle to zero.
bli_setm( &BLIS_ZERO, &el ); bli_setm( &BLIS_ZERO, &el );
@@ -287,11 +287,11 @@ int main( int argc, char** argv )
bli_printm( "h: initial value (all -1.0)", &h, "%4.1f", "" ); bli_printm( "h: initial value (all -1.0)", &h, "%4.1f", "" );
// Set the diagonal offset of 'h' to -1. // Set the diagonal offset of 'h' to -1.
bli_obj_set_diag_offset( -1, h ); bli_obj_set_diag_offset( -1, &h );
// Set the structure and uplo of 'h'. // Set the structure and uplo of 'h'.
bli_obj_set_struc( BLIS_TRIANGULAR, h ) bli_obj_set_struc( BLIS_TRIANGULAR, &h )
bli_obj_set_uplo( BLIS_UPPER, h ); bli_obj_set_uplo( BLIS_UPPER, &h );
// Randomize the elements on and above the first subdiagonal. // Randomize the elements on and above the first subdiagonal.
bli_randm( &h ); bli_randm( &h );
@@ -299,11 +299,11 @@ int main( int argc, char** argv )
bli_printm( "h: after randomizing above first subdiagonal", &h, "%4.1f", "" ); bli_printm( "h: after randomizing above first subdiagonal", &h, "%4.1f", "" );
// Create an alias to work with. // Create an alias to work with.
bli_obj_alias_to( h, hl ); bli_obj_alias_to( &h, &hl );
// Flip the uplo of 'hl' and move the diagonal down by one. // Flip the uplo of 'hl' and move the diagonal down by one.
bli_obj_set_uplo( BLIS_LOWER, hl ); bli_obj_set_uplo( BLIS_LOWER, &hl );
bli_obj_set_diag_offset( -2, hl ); bli_obj_set_diag_offset( -2, &hl );
// Set the region strictly below the first subdiagonal (on or below // Set the region strictly below the first subdiagonal (on or below
// the second subdiagonal) to zero. // the second subdiagonal) to zero.

View File

@@ -157,8 +157,8 @@ int main( int argc, char** argv )
// Mark matrix 'a' as symmetric and stored in the lower triangle, and // Mark matrix 'a' as symmetric and stored in the lower triangle, and
// then randomize that lower triangle. // then randomize that lower triangle.
bli_obj_set_struc( BLIS_SYMMETRIC, a ) bli_obj_set_struc( BLIS_SYMMETRIC, &a )
bli_obj_set_uplo( BLIS_LOWER, a ); bli_obj_set_uplo( BLIS_LOWER, &a );
bli_randm( &a ); bli_randm( &a );
bli_printm( "x: set to random values", &x, "%4.1f", "" ); bli_printm( "x: set to random values", &x, "%4.1f", "" );
@@ -200,8 +200,8 @@ int main( int argc, char** argv )
// Mark matrix 'a' as symmetric and stored in the upper triangle, and // Mark matrix 'a' as symmetric and stored in the upper triangle, and
// then randomize that upper triangle. // then randomize that upper triangle.
bli_obj_set_struc( BLIS_SYMMETRIC, a ) bli_obj_set_struc( BLIS_SYMMETRIC, &a )
bli_obj_set_uplo( BLIS_UPPER, a ); bli_obj_set_uplo( BLIS_UPPER, &a );
bli_randm( &a ); bli_randm( &a );
bli_printm( "a: randomized (zeros in lower triangle)", &a, "%4.1f", "" ); bli_printm( "a: randomized (zeros in lower triangle)", &a, "%4.1f", "" );
@@ -242,8 +242,8 @@ int main( int argc, char** argv )
// Mark matrix 'a' as triangular and stored in the lower triangle, and // Mark matrix 'a' as triangular and stored in the lower triangle, and
// then randomize that lower triangle. // then randomize that lower triangle.
bli_obj_set_struc( BLIS_TRIANGULAR, a ) bli_obj_set_struc( BLIS_TRIANGULAR, &a )
bli_obj_set_uplo( BLIS_LOWER, a ); bli_obj_set_uplo( BLIS_LOWER, &a );
bli_randm( &a ); bli_randm( &a );
bli_printm( "a: randomized (zeros in upper triangle)", &a, "%4.1f", "" ); bli_printm( "a: randomized (zeros in upper triangle)", &a, "%4.1f", "" );
@@ -283,8 +283,8 @@ int main( int argc, char** argv )
// Mark matrix 'a' as triangular and stored in the lower triangle, and // Mark matrix 'a' as triangular and stored in the lower triangle, and
// then randomize that lower triangle. // then randomize that lower triangle.
bli_obj_set_struc( BLIS_TRIANGULAR, a ) bli_obj_set_struc( BLIS_TRIANGULAR, &a )
bli_obj_set_uplo( BLIS_LOWER, a ); bli_obj_set_uplo( BLIS_LOWER, &a );
bli_randm( &a ); bli_randm( &a );
// Load the diagonal. By setting the diagonal to something of greater // Load the diagonal. By setting the diagonal to something of greater

View File

@@ -111,7 +111,7 @@ int main( int argc, char** argv )
bli_setm( &BLIS_ZERO, &cc ); bli_setm( &BLIS_ZERO, &cc );
// Set the transpose bit in 'aa'. // Set the transpose bit in 'aa'.
bli_obj_toggle_trans( aa ); bli_obj_toggle_trans( &aa );
bli_printm( "a: randomized", &aa, "%4.1f", "" ); bli_printm( "a: randomized", &aa, "%4.1f", "" );
bli_printm( "b: set to 1.0", &bb, "%4.1f", "" ); bli_printm( "b: set to 1.0", &bb, "%4.1f", "" );
@@ -148,8 +148,8 @@ int main( int argc, char** argv )
// Mark matrix 'c' as symmetric and stored in the lower triangle, and // Mark matrix 'c' as symmetric and stored in the lower triangle, and
// then randomize that lower triangle. // then randomize that lower triangle.
bli_obj_set_struc( BLIS_SYMMETRIC, c ) bli_obj_set_struc( BLIS_SYMMETRIC, &c )
bli_obj_set_uplo( BLIS_LOWER, c ); bli_obj_set_uplo( BLIS_LOWER, &c );
bli_randm( &c ); bli_randm( &c );
bli_printm( "a: set to random values", &a, "%4.1f", "" ); bli_printm( "a: set to random values", &a, "%4.1f", "" );
@@ -194,8 +194,8 @@ int main( int argc, char** argv )
// Mark matrix 'a' as symmetric and stored in the upper triangle, and // Mark matrix 'a' as symmetric and stored in the upper triangle, and
// then randomize that upper triangle. // then randomize that upper triangle.
bli_obj_set_struc( BLIS_SYMMETRIC, a ) bli_obj_set_struc( BLIS_SYMMETRIC, &a )
bli_obj_set_uplo( BLIS_UPPER, a ); bli_obj_set_uplo( BLIS_UPPER, &a );
bli_randm( &a ); bli_randm( &a );
bli_printm( "a: randomized (zeros in lower triangle)", &a, "%4.1f", "" ); bli_printm( "a: randomized (zeros in lower triangle)", &a, "%4.1f", "" );
@@ -241,8 +241,8 @@ int main( int argc, char** argv )
// Mark matrix 'a' as triangular and stored in the lower triangle, and // Mark matrix 'a' as triangular and stored in the lower triangle, and
// then randomize that lower triangle. // then randomize that lower triangle.
bli_obj_set_struc( BLIS_TRIANGULAR, a ) bli_obj_set_struc( BLIS_TRIANGULAR, &a )
bli_obj_set_uplo( BLIS_LOWER, a ); bli_obj_set_uplo( BLIS_LOWER, &a );
bli_randm( &a ); bli_randm( &a );
bli_printm( "a: randomized (zeros in upper triangle)", &a, "%4.1f", "" ); bli_printm( "a: randomized (zeros in upper triangle)", &a, "%4.1f", "" );
@@ -286,8 +286,8 @@ int main( int argc, char** argv )
// Mark matrix 'a' as triangular and stored in the lower triangle, and // Mark matrix 'a' as triangular and stored in the lower triangle, and
// then randomize that lower triangle. // then randomize that lower triangle.
bli_obj_set_struc( BLIS_TRIANGULAR, a ) bli_obj_set_struc( BLIS_TRIANGULAR, &a )
bli_obj_set_uplo( BLIS_LOWER, a ); bli_obj_set_uplo( BLIS_LOWER, &a );
bli_randm( &a ); bli_randm( &a );
// Load the diagonal. By setting the diagonal to something of greater // Load the diagonal. By setting the diagonal to something of greater

View File

@@ -147,8 +147,8 @@ int main( int argc, char** argv )
bli_setm( &BLIS_MINUS_ONE, &c ); bli_setm( &BLIS_MINUS_ONE, &c );
// Set the structure and uplo of 'c'. // Set the structure and uplo of 'c'.
bli_obj_set_struc( BLIS_SYMMETRIC, c ) bli_obj_set_struc( BLIS_SYMMETRIC, &c )
bli_obj_set_uplo( BLIS_LOWER, c ); bli_obj_set_uplo( BLIS_LOWER, &c );
// Randomize the lower triangle of 'c'. // Randomize the lower triangle of 'c'.
bli_randm( &c ); bli_randm( &c );
@@ -170,8 +170,8 @@ int main( int argc, char** argv )
// Initialize all of 'd' to -1.0 to simulate junk values. // Initialize all of 'd' to -1.0 to simulate junk values.
bli_setm( &BLIS_MINUS_ONE, &d ); bli_setm( &BLIS_MINUS_ONE, &d );
bli_obj_set_struc( BLIS_HERMITIAN, d ) bli_obj_set_struc( BLIS_HERMITIAN, &d )
bli_obj_set_uplo( BLIS_LOWER, d ); bli_obj_set_uplo( BLIS_LOWER, &d );
// Randomize the lower triangle of 'd'. // Randomize the lower triangle of 'd'.
bli_randm( &d ); bli_randm( &d );
@@ -185,8 +185,8 @@ int main( int argc, char** argv )
bli_printm( "d (after mkherm):", &d, "%4.1f", "" ); bli_printm( "d (after mkherm):", &d, "%4.1f", "" );
// Set the structure and uplo of 'd'. // Set the structure and uplo of 'd'.
bli_obj_set_struc( BLIS_HERMITIAN, d ) bli_obj_set_struc( BLIS_HERMITIAN, &d )
bli_obj_set_uplo( BLIS_LOWER, d ); bli_obj_set_uplo( BLIS_LOWER, &d );
// //
// Example 4: Make a complex matrix explicitly symmetric or Hermitian. // Example 4: Make a complex matrix explicitly symmetric or Hermitian.
@@ -203,8 +203,8 @@ int main( int argc, char** argv )
bli_setm( &BLIS_MINUS_ONE, &e ); bli_setm( &BLIS_MINUS_ONE, &e );
// Set the structure and uplo of 'e'. // Set the structure and uplo of 'e'.
bli_obj_set_struc( BLIS_SYMMETRIC, e ) bli_obj_set_struc( BLIS_SYMMETRIC, &e )
bli_obj_set_uplo( BLIS_UPPER, e ); bli_obj_set_uplo( BLIS_UPPER, &e );
// Randomize the upper triangle of 'e'. // Randomize the upper triangle of 'e'.
bli_randm( &e ); bli_randm( &e );
@@ -221,8 +221,8 @@ int main( int argc, char** argv )
bli_setm( &BLIS_MINUS_ONE, &f ); bli_setm( &BLIS_MINUS_ONE, &f );
// Set the structure and uplo of 'f'. // Set the structure and uplo of 'f'.
bli_obj_set_struc( BLIS_HERMITIAN, f ) bli_obj_set_struc( BLIS_HERMITIAN, &f )
bli_obj_set_uplo( BLIS_UPPER, f ); bli_obj_set_uplo( BLIS_UPPER, &f );
// Randomize the upper triangle of 'f'. // Randomize the upper triangle of 'f'.
bli_randm( &f ); bli_randm( &f );
@@ -249,8 +249,8 @@ int main( int argc, char** argv )
bli_setm( &BLIS_MINUS_ONE, &g ); bli_setm( &BLIS_MINUS_ONE, &g );
// Set the structure and uplo of 'g'. // Set the structure and uplo of 'g'.
bli_obj_set_struc( BLIS_TRIANGULAR, g ) bli_obj_set_struc( BLIS_TRIANGULAR, &g )
bli_obj_set_uplo( BLIS_LOWER, g ); bli_obj_set_uplo( BLIS_LOWER, &g );
// Randomize the lower triangle of 'g'. // Randomize the lower triangle of 'g'.
bli_randm( &g ); bli_randm( &g );

View File

@@ -50,10 +50,10 @@ void PASTEMAC0(opname) \
bli_init_once(); \ bli_init_once(); \
\ \
num_t dt_chi; \ num_t dt_chi; \
num_t dt_absq_c = bli_obj_dt_proj_to_complex( *absq ); \ num_t dt_absq_c = bli_obj_dt_proj_to_complex( absq ); \
\ \
void* buf_chi; \ void* buf_chi; \
void* buf_absq = bli_obj_buffer_at_off( *absq ); \ void* buf_absq = bli_obj_buffer_at_off( absq ); \
\ \
if ( bli_error_checking_is_enabled() ) \ if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( chi, absq ); \ PASTEMAC(opname,_check)( chi, absq ); \
@@ -61,7 +61,7 @@ void PASTEMAC0(opname) \
/* If chi is a scalar constant, use dt_absq_c to extract the address of the /* If chi is a scalar constant, use dt_absq_c to extract the address of the
corresponding constant value; otherwise, use the datatype encoded corresponding constant value; otherwise, use the datatype encoded
within the chi object and extract the buffer at the chi offset. */ \ within the chi object and extract the buffer at the chi offset. */ \
bli_set_scalar_dt_buffer( chi, dt_absq_c, dt_chi, buf_chi ); \ bli_obj_scalar_set_dt_buffer( chi, dt_absq_c, &dt_chi, &buf_chi ); \
\ \
/* Invoke the typed function. */ \ /* Invoke the typed function. */ \
bli_call_ft_2 \ bli_call_ft_2 \
@@ -88,12 +88,12 @@ void PASTEMAC0(opname) \
{ \ { \
bli_init_once(); \ bli_init_once(); \
\ \
num_t dt = bli_obj_dt( *psi ); \ num_t dt = bli_obj_dt( psi ); \
\ \
conj_t conjchi = bli_obj_conj_status( *chi ); \ conj_t conjchi = bli_obj_conj_status( chi ); \
\ \
void* buf_chi = bli_obj_buffer_for_1x1( dt, *chi ); \ void* buf_chi = bli_obj_buffer_for_1x1( dt, chi ); \
void* buf_psi = bli_obj_buffer_at_off( *psi ); \ void* buf_psi = bli_obj_buffer_at_off( psi ); \
\ \
if ( bli_error_checking_is_enabled() ) \ if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( chi, psi ); \ PASTEMAC(opname,_check)( chi, psi ); \
@@ -125,11 +125,11 @@ void PASTEMAC0(opname) \
{ \ { \
bli_init_once(); \ bli_init_once(); \
\ \
num_t dt = bli_obj_dt( *chi ); \ num_t dt = bli_obj_dt( chi ); \
\ \
conj_t conjchi = bli_obj_conj_status( *chi ); \ conj_t conjchi = bli_obj_conj_status( chi ); \
\ \
void* buf_chi = bli_obj_buffer_for_1x1( dt, *chi ); \ void* buf_chi = bli_obj_buffer_for_1x1( dt, chi ); \
\ \
if ( bli_error_checking_is_enabled() ) \ if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( chi ); \ PASTEMAC(opname,_check)( chi ); \
@@ -158,10 +158,10 @@ void PASTEMAC0(opname) \
{ \ { \
bli_init_once(); \ bli_init_once(); \
\ \
num_t dt = bli_obj_dt( *psi ); \ num_t dt = bli_obj_dt( psi ); \
\ \
void* buf_chi = bli_obj_buffer_for_1x1( dt, *chi ); \ void* buf_chi = bli_obj_buffer_for_1x1( dt, chi ); \
void* buf_psi = bli_obj_buffer_at_off( *psi ); \ void* buf_psi = bli_obj_buffer_at_off( psi ); \
\ \
if ( bli_error_checking_is_enabled() ) \ if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( chi, psi ); \ PASTEMAC(opname,_check)( chi, psi ); \
@@ -191,14 +191,14 @@ void PASTEMAC0(opname) \
{ \ { \
bli_init_once(); \ bli_init_once(); \
\ \
num_t dt_chi = bli_obj_dt( *chi ); \ num_t dt_chi = bli_obj_dt( chi ); \
num_t dt_def = BLIS_DCOMPLEX; \ num_t dt_def = BLIS_DCOMPLEX; \
num_t dt_use; \ num_t dt_use; \
\ \
/* If chi is a constant object, default to using the dcomplex /* If chi is a constant object, default to using the dcomplex
value to maximize precision, and since we don't know if the value to maximize precision, and since we don't know if the
caller needs just the real or the real and imaginary parts. */ \ caller needs just the real or the real and imaginary parts. */ \
void* buf_chi = bli_obj_buffer_for_1x1( dt_def, *chi ); \ void* buf_chi = bli_obj_buffer_for_1x1( dt_def, chi ); \
\ \
if ( bli_error_checking_is_enabled() ) \ if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( chi, zeta_r, zeta_i ); \ PASTEMAC(opname,_check)( chi, zeta_r, zeta_i ); \
@@ -234,9 +234,9 @@ void PASTEMAC0(opname) \
{ \ { \
bli_init_once(); \ bli_init_once(); \
\ \
num_t dt_chi = bli_obj_dt( *chi ); \ num_t dt_chi = bli_obj_dt( chi ); \
\ \
void* buf_chi = bli_obj_buffer_at_off( *chi ); \ void* buf_chi = bli_obj_buffer_at_off( chi ); \
\ \
if ( bli_error_checking_is_enabled() ) \ if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( zeta_r, zeta_i, chi ); \ PASTEMAC(opname,_check)( zeta_r, zeta_i, chi ); \
@@ -268,12 +268,12 @@ void PASTEMAC0(opname) \
bli_init_once(); \ bli_init_once(); \
\ \
num_t dt_chi; \ num_t dt_chi; \
num_t dt_zeta_c = bli_obj_dt_proj_to_complex( *zeta_r ); \ num_t dt_zeta_c = bli_obj_dt_proj_to_complex( zeta_r ); \
\ \
void* buf_chi; \ void* buf_chi; \
\ \
void* buf_zeta_r = bli_obj_buffer_at_off( *zeta_r ); \ void* buf_zeta_r = bli_obj_buffer_at_off( zeta_r ); \
void* buf_zeta_i = bli_obj_buffer_at_off( *zeta_i ); \ void* buf_zeta_i = bli_obj_buffer_at_off( zeta_i ); \
\ \
if ( bli_error_checking_is_enabled() ) \ if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( chi, zeta_r, zeta_i ); \ PASTEMAC(opname,_check)( chi, zeta_r, zeta_i ); \
@@ -281,7 +281,7 @@ void PASTEMAC0(opname) \
/* If chi is a scalar constant, use dt_zeta_c to extract the address of the /* If chi is a scalar constant, use dt_zeta_c to extract the address of the
corresponding constant value; otherwise, use the datatype encoded corresponding constant value; otherwise, use the datatype encoded
within the chi object and extract the buffer at the chi offset. */ \ within the chi object and extract the buffer at the chi offset. */ \
bli_set_scalar_dt_buffer( chi, dt_zeta_c, dt_chi, buf_chi ); \ bli_obj_scalar_set_dt_buffer( chi, dt_zeta_c, &dt_chi, &buf_chi ); \
\ \
/* Invoke the typed function. */ \ /* Invoke the typed function. */ \
bli_call_ft_3 \ bli_call_ft_3 \
@@ -309,12 +309,12 @@ void PASTEMAC0(opname) \
{ \ { \
bli_init_once(); \ bli_init_once(); \
\ \
num_t dt_chi = bli_obj_dt( *chi ); \ num_t dt_chi = bli_obj_dt( chi ); \
\ \
void* buf_zeta_r = bli_obj_buffer_for_1x1( dt_chi, *zeta_r ); \ void* buf_zeta_r = bli_obj_buffer_for_1x1( dt_chi, zeta_r ); \
void* buf_zeta_i = bli_obj_buffer_for_1x1( dt_chi, *zeta_i ); \ void* buf_zeta_i = bli_obj_buffer_for_1x1( dt_chi, zeta_i ); \
\ \
void* buf_chi = bli_obj_buffer_at_off( *chi ); \ void* buf_chi = bli_obj_buffer_at_off( chi ); \
\ \
if ( bli_error_checking_is_enabled() ) \ if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( chi, zeta_r, zeta_i ); \ PASTEMAC(opname,_check)( chi, zeta_r, zeta_i ); \

View File

@@ -63,10 +63,10 @@ void PASTEMAC0(opname) \
{ \ { \
bli_init_once(); \ bli_init_once(); \
\ \
conj_t conjchi = bli_obj_conj_status( *chi ); \ conj_t conjchi = bli_obj_conj_status( chi ); \
\ \
num_t dt_psi = bli_obj_dt( *psi ); \ num_t dt_psi = bli_obj_dt( psi ); \
void* buf_psi = bli_obj_buffer_at_off( *psi ); \ void* buf_psi = bli_obj_buffer_at_off( psi ); \
\ \
num_t dt_chi; \ num_t dt_chi; \
void* buf_chi; \ void* buf_chi; \
@@ -79,7 +79,7 @@ void PASTEMAC0(opname) \
/* If chi is a scalar constant, use dt_psi to extract the address of the /* If chi is a scalar constant, use dt_psi to extract the address of the
corresponding constant value; otherwise, use the datatype encoded corresponding constant value; otherwise, use the datatype encoded
within the chi object and extract the buffer at the chi offset. */ \ within the chi object and extract the buffer at the chi offset. */ \
bli_set_scalar_dt_buffer( chi, dt_psi, dt_chi, buf_chi ); \ bli_obj_scalar_set_dt_buffer( chi, dt_psi, &dt_chi, &buf_chi ); \
\ \
/* Index into the type combination array to extract the correct /* Index into the type combination array to extract the correct
function pointer. */ \ function pointer. */ \

View File

@@ -54,14 +54,14 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *x ); \ num_t dt = bli_obj_dt( x ); \
\ \
conj_t conjx = bli_obj_conj_status( *x ); \ conj_t conjx = bli_obj_conj_status( x ); \
dim_t n = bli_obj_vector_dim( *x ); \ dim_t n = bli_obj_vector_dim( x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \ inc_t inc_x = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \ void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t inc_y = bli_obj_vector_inc( *y ); \ inc_t inc_y = bli_obj_vector_inc( y ); \
\ \
if ( bli_error_checking_is_enabled() ) \ if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( x, y ); \ PASTEMAC(opname,_check)( x, y ); \
@@ -98,13 +98,13 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *x ); \ num_t dt = bli_obj_dt( x ); \
\ \
dim_t n = bli_obj_vector_dim( *x ); \ dim_t n = bli_obj_vector_dim( x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( *x ); \ inc_t incx = bli_obj_vector_inc( x ); \
\ \
void* buf_index = bli_obj_buffer_at_off( *index ); \ void* buf_index = bli_obj_buffer_at_off( index ); \
\ \
if ( bli_error_checking_is_enabled() ) \ if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( x, index ); \ PASTEMAC(opname,_check)( x, index ); \
@@ -140,14 +140,14 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *x ); \ num_t dt = bli_obj_dt( x ); \
\ \
conj_t conjx = bli_obj_conj_status( *x ); \ conj_t conjx = bli_obj_conj_status( x ); \
dim_t n = bli_obj_vector_dim( *x ); \ dim_t n = bli_obj_vector_dim( x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \ inc_t inc_x = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \ void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t inc_y = bli_obj_vector_inc( *y ); \ inc_t inc_y = bli_obj_vector_inc( y ); \
\ \
void* buf_alpha; \ void* buf_alpha; \
void* buf_beta; \ void* buf_beta; \
@@ -164,8 +164,8 @@ void PASTEMAC(opname,EX_SUF) \
alpha, &alpha_local ); \ alpha, &alpha_local ); \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
beta, &beta_local ); \ beta, &beta_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \ buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \
\ \
/* Invoke the void pointer-based function. */ \ /* Invoke the void pointer-based function. */ \
bli_call_ft_9 \ bli_call_ft_9 \
@@ -200,14 +200,14 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *x ); \ num_t dt = bli_obj_dt( x ); \
\ \
conj_t conjx = bli_obj_conj_status( *x ); \ conj_t conjx = bli_obj_conj_status( x ); \
dim_t n = bli_obj_vector_dim( *x ); \ dim_t n = bli_obj_vector_dim( x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \ inc_t inc_x = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \ void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t inc_y = bli_obj_vector_inc( *y ); \ inc_t inc_y = bli_obj_vector_inc( y ); \
\ \
void* buf_alpha; \ void* buf_alpha; \
\ \
@@ -220,7 +220,7 @@ void PASTEMAC(opname,EX_SUF) \
as needed). */ \ as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
alpha, &alpha_local ); \ alpha, &alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
\ \
/* Invoke the void pointer-based function. */ \ /* Invoke the void pointer-based function. */ \
bli_call_ft_8 \ bli_call_ft_8 \
@@ -255,16 +255,16 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *x ); \ num_t dt = bli_obj_dt( x ); \
\ \
conj_t conjx = bli_obj_conj_status( *x ); \ conj_t conjx = bli_obj_conj_status( x ); \
conj_t conjy = bli_obj_conj_status( *y ); \ conj_t conjy = bli_obj_conj_status( y ); \
dim_t n = bli_obj_vector_dim( *x ); \ dim_t n = bli_obj_vector_dim( x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \ inc_t inc_x = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \ void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t inc_y = bli_obj_vector_inc( *y ); \ inc_t inc_y = bli_obj_vector_inc( y ); \
void* buf_rho = bli_obj_buffer_at_off( *rho ); \ void* buf_rho = bli_obj_buffer_at_off( rho ); \
\ \
if ( bli_error_checking_is_enabled() ) \ if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( x, y, rho ); \ PASTEMAC(opname,_check)( x, y, rho ); \
@@ -304,16 +304,16 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *x ); \ num_t dt = bli_obj_dt( x ); \
\ \
conj_t conjx = bli_obj_conj_status( *x ); \ conj_t conjx = bli_obj_conj_status( x ); \
conj_t conjy = bli_obj_conj_status( *y ); \ conj_t conjy = bli_obj_conj_status( y ); \
dim_t n = bli_obj_vector_dim( *x ); \ dim_t n = bli_obj_vector_dim( x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \ inc_t inc_x = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \ void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t inc_y = bli_obj_vector_inc( *y ); \ inc_t inc_y = bli_obj_vector_inc( y ); \
void* buf_rho = bli_obj_buffer_at_off( *rho ); \ void* buf_rho = bli_obj_buffer_at_off( rho ); \
\ \
void* buf_alpha; \ void* buf_alpha; \
void* buf_beta; \ void* buf_beta; \
@@ -330,8 +330,8 @@ void PASTEMAC(opname,EX_SUF) \
alpha, &alpha_local ); \ alpha, &alpha_local ); \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
beta, &beta_local ); \ beta, &beta_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \ buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \
\ \
/* Invoke the void pointer-based function. */ \ /* Invoke the void pointer-based function. */ \
bli_call_ft_11 \ bli_call_ft_11 \
@@ -366,11 +366,11 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *x ); \ num_t dt = bli_obj_dt( x ); \
\ \
dim_t n = bli_obj_vector_dim( *x ); \ dim_t n = bli_obj_vector_dim( x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \ inc_t inc_x = bli_obj_vector_inc( x ); \
\ \
if ( bli_error_checking_is_enabled() ) \ if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( x ); \ PASTEMAC(opname,_check)( x ); \
@@ -403,12 +403,12 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *x ); \ num_t dt = bli_obj_dt( x ); \
\ \
/* conj_t conjalpha = bli_obj_conj_status( *alpha ); */ \ /* conj_t conjalpha = bli_obj_conj_status( alpha ); */ \
dim_t n = bli_obj_vector_dim( *x ); \ dim_t n = bli_obj_vector_dim( x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \ inc_t inc_x = bli_obj_vector_inc( x ); \
\ \
void* buf_alpha; \ void* buf_alpha; \
\ \
@@ -421,7 +421,7 @@ void PASTEMAC(opname,EX_SUF) \
as needed). */ \ as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
alpha, &alpha_local ); \ alpha, &alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
\ \
/* Invoke the void pointer-based function. */ \ /* Invoke the void pointer-based function. */ \
bli_call_ft_6 \ bli_call_ft_6 \
@@ -454,13 +454,13 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *x ); \ num_t dt = bli_obj_dt( x ); \
\ \
dim_t n = bli_obj_vector_dim( *x ); \ dim_t n = bli_obj_vector_dim( x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \ inc_t inc_x = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \ void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t inc_y = bli_obj_vector_inc( *y ); \ inc_t inc_y = bli_obj_vector_inc( y ); \
\ \
if ( bli_error_checking_is_enabled() ) \ if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( x, y ); \ PASTEMAC(opname,_check)( x, y ); \
@@ -495,14 +495,14 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *x ); \ num_t dt = bli_obj_dt( x ); \
\ \
conj_t conjx = bli_obj_conj_status( *x ); \ conj_t conjx = bli_obj_conj_status( x ); \
dim_t n = bli_obj_vector_dim( *x ); \ dim_t n = bli_obj_vector_dim( x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \ inc_t inc_x = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \ void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t inc_y = bli_obj_vector_inc( *y ); \ inc_t inc_y = bli_obj_vector_inc( y ); \
\ \
void* buf_beta; \ void* buf_beta; \
\ \
@@ -515,7 +515,7 @@ void PASTEMAC(opname,EX_SUF) \
as needed). */ \ as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
beta, &beta_local ); \ beta, &beta_local ); \
buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \ buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \
\ \
/* Invoke the void pointer-based function. */ \ /* Invoke the void pointer-based function. */ \
bli_call_ft_8 \ bli_call_ft_8 \

View File

@@ -61,7 +61,7 @@ void bli_packv_init
// is NULL, and if so, simply alias the object to its packed counterpart. // is NULL, and if so, simply alias the object to its packed counterpart.
if ( bli_cntl_is_noop( cntl ) ) if ( bli_cntl_is_noop( cntl ) )
{ {
bli_obj_alias_to( *a, *p ); bli_obj_alias_to( a, p );
return; return;
} }
@@ -73,15 +73,15 @@ void bli_packv_init
// BLIS_NOT_PACKED and thus packing will be called for (but in some // BLIS_NOT_PACKED and thus packing will be called for (but in some
// cases packing has already taken place). Also, not all combinations // cases packing has already taken place). Also, not all combinations
// of current pack status and desired pack schema are valid. // of current pack status and desired pack schema are valid.
if ( bli_obj_pack_schema( *a ) == cntl_pack_schema( cntl ) ) if ( bli_obj_pack_schema( a ) == cntl_pack_schema( cntl ) )
{ {
bli_obj_alias_to( *a, *p ); bli_obj_alias_to( a, p );
return; return;
} }
// Now, if we are not skipping the pack operation, then the only question // Now, if we are not skipping the pack operation, then the only question
// left is whether we are to typecast vector a before packing. // left is whether we are to typecast vector a before packing.
if ( bli_obj_dt( *a ) != bli_obj_target_dt( *a ) ) if ( bli_obj_dt( a ) != bli_obj_target_dt( a ) )
bli_abort(); bli_abort();
// Extract various fields from the control tree and pass them in // Extract various fields from the control tree and pass them in
@@ -113,8 +113,8 @@ siz_t bli_packv_init_pack
cntx_t* cntx cntx_t* cntx
) )
{ {
num_t dt = bli_obj_dt( *a ); num_t dt = bli_obj_dt( a );
dim_t dim_a = bli_obj_vector_dim( *a ); dim_t dim_a = bli_obj_vector_dim( a );
dim_t bmult = bli_cntx_get_blksz_def_dt( dt, bmult_id, cntx ); dim_t bmult = bli_cntx_get_blksz_def_dt( dt, bmult_id, cntx );
membrk_t* membrk = bli_cntx_membrk( cntx ); membrk_t* membrk = bli_cntx_membrk( cntx );
@@ -129,23 +129,23 @@ siz_t bli_packv_init_pack
// We begin by copying the basic fields of c. // We begin by copying the basic fields of c.
bli_obj_alias_to( *a, *p ); bli_obj_alias_to( a, p );
// Update the dimensions. // Update the dimensions.
bli_obj_set_dims( dim_a, 1, *p ); bli_obj_set_dims( dim_a, 1, p );
// Reset the view offsets to (0,0). // Reset the view offsets to (0,0).
bli_obj_set_offs( 0, 0, *p ); bli_obj_set_offs( 0, 0, p );
// Set the pack schema in the p object to the value in the control tree // Set the pack schema in the p object to the value in the control tree
// node. // node.
bli_obj_set_pack_schema( schema, *p ); bli_obj_set_pack_schema( schema, p );
// Compute the dimensions padded by the dimension multiples. // Compute the dimensions padded by the dimension multiples.
m_p_pad = bli_align_dim_to_mult( bli_obj_vector_dim( *p ), bmult ); m_p_pad = bli_align_dim_to_mult( bli_obj_vector_dim( p ), bmult );
// Compute the size of the packed buffer. // Compute the size of the packed buffer.
size_p = m_p_pad * 1 * bli_obj_elem_size( *p ); size_p = m_p_pad * 1 * bli_obj_elem_size( p );
#if 0 #if 0
// Extract the address of the mem_t object within p that will track // Extract the address of the mem_t object within p that will track
@@ -179,11 +179,11 @@ siz_t bli_packv_init_pack
// copied when the value is already up-to-date, because it persists // copied when the value is already up-to-date, because it persists
// in the main object buffer field across loop iterations.) // in the main object buffer field across loop iterations.)
buf = bli_mem_buffer( mem_p ); buf = bli_mem_buffer( mem_p );
bli_obj_set_buffer( buf, *p ); bli_obj_set_buffer( buf, p );
#endif #endif
// Save the padded (packed) dimensions into the packed object. // Save the padded (packed) dimensions into the packed object.
bli_obj_set_padded_dims( m_p_pad, 1, *p ); bli_obj_set_padded_dims( m_p_pad, 1, p );
// Set the row and column strides of p based on the pack schema. // Set the row and column strides of p based on the pack schema.
if ( schema == BLIS_PACKED_VECTOR ) if ( schema == BLIS_PACKED_VECTOR )
@@ -193,9 +193,9 @@ siz_t bli_packv_init_pack
// how much space beyond the vector would need to be zero-padded, if // how much space beyond the vector would need to be zero-padded, if
// zero-padding was needed. // zero-padding was needed.
rs_p = 1; rs_p = 1;
cs_p = bli_obj_padded_length( *p ); cs_p = bli_obj_padded_length( p );
bli_obj_set_strides( rs_p, cs_p, *p ); bli_obj_set_strides( rs_p, cs_p, p );
} }
return size_p; return size_p;

View File

@@ -71,7 +71,7 @@ void bli_packv_int
// Sanity check; A should never have a zero dimension. If we must support // Sanity check; A should never have a zero dimension. If we must support
// it, then we should fold it into the next alias-and-early-exit block. // it, then we should fold it into the next alias-and-early-exit block.
//if ( bli_obj_has_zero_dim( *a ) ) bli_abort(); //if ( bli_obj_has_zero_dim( a ) ) bli_abort();
// First check if we are to skip this operation because the control tree // First check if we are to skip this operation because the control tree
// is NULL. We return without taking any action because a was already // is NULL. We return without taking any action because a was already
@@ -91,7 +91,7 @@ void bli_packv_int
// not important, as long as its packed into contiguous rows or // not important, as long as its packed into contiguous rows or
// contiguous columns. A good example of this is packing for matrix // contiguous columns. A good example of this is packing for matrix
// operands in the level-2 operations. // operands in the level-2 operations.
if ( bli_obj_pack_schema( *a ) == BLIS_PACKED_UNSPEC ) if ( bli_obj_pack_schema( a ) == BLIS_PACKED_UNSPEC )
{ {
return; return;
} }
@@ -104,7 +104,7 @@ void bli_packv_int
// already taken place, or does not need to take place, and so that will // already taken place, or does not need to take place, and so that will
// be indicated by the pack status). Also, not all combinations of // be indicated by the pack status). Also, not all combinations of
// current pack status and desired pack schema are valid. // current pack status and desired pack schema are valid.
if ( bli_obj_pack_schema( *a ) == cntl_pack_schema( cntl ) ) if ( bli_obj_pack_schema( a ) == cntl_pack_schema( cntl ) )
{ {
return; return;
} }

View File

@@ -51,15 +51,15 @@ void bli_packv_unb_var1( obj_t* c,
cntx_t* cntx, cntx_t* cntx,
packv_t* cntl ) packv_t* cntl )
{ {
num_t dt_cp = bli_obj_dt( *c ); num_t dt_cp = bli_obj_dt( c );
dim_t dim_p = bli_obj_vector_dim( *p ); dim_t dim_p = bli_obj_vector_dim( p );
void* buf_c = bli_obj_buffer_at_off( *c ); void* buf_c = bli_obj_buffer_at_off( c );
inc_t incc = bli_obj_vector_inc( *c ); inc_t incc = bli_obj_vector_inc( c );
void* buf_p = bli_obj_buffer_at_off( *p ); void* buf_p = bli_obj_buffer_at_off( p );
inc_t incp = bli_obj_vector_inc( *p ); inc_t incp = bli_obj_vector_inc( p );
FUNCPTR_T f; FUNCPTR_T f;

View File

@@ -54,7 +54,7 @@ void bli_scalv_int( obj_t* alpha,
FUNCPTR_T f; FUNCPTR_T f;
// Return early if one of the matrix operands has a zero dimension. // Return early if one of the matrix operands has a zero dimension.
if ( bli_obj_has_zero_dim( *x ) ) return; if ( bli_obj_has_zero_dim( x ) ) return;
// Check parameters. // Check parameters.
if ( bli_error_checking_is_enabled() ) if ( bli_error_checking_is_enabled() )

View File

@@ -75,7 +75,7 @@ void bli_unpackv_int( obj_t* p,
// Sanity check; A should never have a zero dimension. If we must support // Sanity check; A should never have a zero dimension. If we must support
// it, then we should fold it into the next alias-and-early-exit block. // it, then we should fold it into the next alias-and-early-exit block.
if ( bli_obj_has_zero_dim( *a ) ) bli_abort(); if ( bli_obj_has_zero_dim( a ) ) bli_abort();
// First check if we are to skip this operation because the control tree // First check if we are to skip this operation because the control tree
// is NULL, and if so, simply return. // is NULL, and if so, simply return.
@@ -87,17 +87,17 @@ void bli_unpackv_int( obj_t* p,
// If p was aliased to a during the pack stage (because it was already // If p was aliased to a during the pack stage (because it was already
// in an acceptable packed/contiguous format), then no unpack is actually // in an acceptable packed/contiguous format), then no unpack is actually
// necessary, so we return. // necessary, so we return.
if ( bli_obj_is_alias_of( *p, *a ) ) if ( bli_obj_is_alias_of( p, a ) )
{ {
return; return;
} }
// Now, if we are not skipping the unpack operation, then the only // Now, if we are not skipping the unpack operation, then the only
// question left is whether we are to typecast vector a after unpacking. // question left is whether we are to typecast vector a after unpacking.
if ( bli_obj_dt( *p ) != bli_obj_dt( *a ) ) if ( bli_obj_dt( p ) != bli_obj_dt( a ) )
bli_abort(); bli_abort();
/* /*
if ( bli_obj_dt( *p ) != bli_obj_dt( *a ) ) if ( bli_obj_dt( p ) != bli_obj_dt( a ) )
{ {
// Initialize an object c for the intermediate typecast vector. // Initialize an object c for the intermediate typecast vector.
bli_unpackv_init_cast( p, bli_unpackv_init_cast( p,
@@ -110,7 +110,7 @@ void bli_unpackv_int( obj_t* p,
// If no cast is needed, then aliasing object c to the original // If no cast is needed, then aliasing object c to the original
// vector serves as a minor optimization. This causes the unpackv // vector serves as a minor optimization. This causes the unpackv
// implementation to unpack directly into vector a. // implementation to unpack directly into vector a.
bli_obj_alias_to( *a, c ); bli_obj_alias_to( a, &c );
} }
// Now we are ready to proceed with the unpacking. // Now we are ready to proceed with the unpacking.
@@ -132,7 +132,7 @@ void bli_unpackv_int( obj_t* p,
// was not necessary, then we are done because the call to the unpackv // was not necessary, then we are done because the call to the unpackv
// implementation would have unpacked directly to vector a. // implementation would have unpacked directly to vector a.
/* /*
if ( bli_obj_dt( *p ) != bli_obj_dt( *a ) ) if ( bli_obj_dt( p ) != bli_obj_dt( a ) )
{ {
// Copy/typecast vector c to vector a. // Copy/typecast vector c to vector a.
// NOTE: Here, we use copynzv instead of copym because, in the cases // NOTE: Here, we use copynzv instead of copym because, in the cases
@@ -179,26 +179,26 @@ void bli_unpackv_init_cast( obj_t* p,
// already available. (After acquring a mem entry from the memory // already available. (After acquring a mem entry from the memory
// manager, it is cached within p for quick access later on.) // manager, it is cached within p for quick access later on.)
num_t dt_targ_a = bli_obj_target_dt( *a ); num_t dt_targ_a = bli_obj_target_dt( a );
dim_t dim_a = bli_obj_vector_dim( *a ); dim_t dim_a = bli_obj_vector_dim( a );
siz_t elem_size_c = bli_dt_size( dt_targ_a ); siz_t elem_size_c = bli_dt_size( dt_targ_a );
// We begin by copying the basic fields of a. // We begin by copying the basic fields of a.
bli_obj_alias_to( *a, *c ); bli_obj_alias_to( a, c );
// Update datatype and element size fields. // Update datatype and element size fields.
bli_obj_set_dt( dt_targ_a, *c ); bli_obj_set_dt( dt_targ_a, c );
bli_obj_set_elem_size( elem_size_c, *c ); bli_obj_set_elem_size( elem_size_c, c );
// Update the strides and dimensions. We set the increments to reflect a // Update the strides and dimensions. We set the increments to reflect a
// column-stored vector. Note that the column stride is set to dim(a), // column-stored vector. Note that the column stride is set to dim(a),
// though it should never be used because there is no second column to // though it should never be used because there is no second column to
// index into (and therefore it also does not need to be aligned). // index into (and therefore it also does not need to be aligned).
bli_obj_set_dims( dim_a, 1, *c ); bli_obj_set_dims( dim_a, 1, c );
bli_obj_set_strides( 1, dim_a, *c ); bli_obj_set_strides( 1, dim_a, c );
// Reset the view offsets to (0,0). // Reset the view offsets to (0,0).
bli_obj_set_offs( 0, 0, *c ); bli_obj_set_offs( 0, 0, c );
// Check the mem_t entry of p associated with the cast buffer. If it is // Check the mem_t entry of p associated with the cast buffer. If it is
// NULL, then acquire memory sufficient to hold the object data and cache // NULL, then acquire memory sufficient to hold the object data and cache

View File

@@ -51,15 +51,15 @@ void bli_unpackv_unb_var1( obj_t* p,
cntx_t* cntx, cntx_t* cntx,
unpackv_t* cntl ) unpackv_t* cntl )
{ {
num_t dt_pc = bli_obj_dt( *p ); num_t dt_pc = bli_obj_dt( p );
dim_t dim_c = bli_obj_vector_dim( *c ); dim_t dim_c = bli_obj_vector_dim( c );
void* buf_p = bli_obj_buffer_at_off( *p ); void* buf_p = bli_obj_buffer_at_off( p );
inc_t incp = bli_obj_vector_inc( *p ); inc_t incp = bli_obj_vector_inc( p );
void* buf_c = bli_obj_buffer_at_off( *c ); void* buf_c = bli_obj_buffer_at_off( c );
inc_t incc = bli_obj_vector_inc( *c ); inc_t incc = bli_obj_vector_inc( c );
FUNCPTR_T f; FUNCPTR_T f;

View File

@@ -54,19 +54,19 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *x ); \ num_t dt = bli_obj_dt( x ); \
\ \
doff_t diagoffx = bli_obj_diag_offset( *x ); \ doff_t diagoffx = bli_obj_diag_offset( x ); \
diag_t diagx = bli_obj_diag( *x ); \ diag_t diagx = bli_obj_diag( x ); \
trans_t transx = bli_obj_conjtrans_status( *x ); \ trans_t transx = bli_obj_conjtrans_status( x ); \
dim_t m = bli_obj_length( *y ); \ dim_t m = bli_obj_length( y ); \
dim_t n = bli_obj_width( *y ); \ dim_t n = bli_obj_width( y ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t rs_x = bli_obj_row_stride( *x ); \ inc_t rs_x = bli_obj_row_stride( x ); \
inc_t cs_x = bli_obj_col_stride( *x ); \ inc_t cs_x = bli_obj_col_stride( x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \ void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t rs_y = bli_obj_row_stride( *y ); \ inc_t rs_y = bli_obj_row_stride( y ); \
inc_t cs_y = bli_obj_col_stride( *y ); \ inc_t cs_y = bli_obj_col_stride( y ); \
\ \
if ( bli_error_checking_is_enabled() ) \ if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( x, y ); \ PASTEMAC(opname,_check)( x, y ); \
@@ -107,19 +107,19 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *x ); \ num_t dt = bli_obj_dt( x ); \
\ \
doff_t diagoffx = bli_obj_diag_offset( *x ); \ doff_t diagoffx = bli_obj_diag_offset( x ); \
diag_t diagx = bli_obj_diag( *x ); \ diag_t diagx = bli_obj_diag( x ); \
trans_t transx = bli_obj_conjtrans_status( *x ); \ trans_t transx = bli_obj_conjtrans_status( x ); \
dim_t m = bli_obj_length( *y ); \ dim_t m = bli_obj_length( y ); \
dim_t n = bli_obj_width( *y ); \ dim_t n = bli_obj_width( y ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t rs_x = bli_obj_row_stride( *x ); \ inc_t rs_x = bli_obj_row_stride( x ); \
inc_t cs_x = bli_obj_col_stride( *x ); \ inc_t cs_x = bli_obj_col_stride( x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \ void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t rs_y = bli_obj_row_stride( *y ); \ inc_t rs_y = bli_obj_row_stride( y ); \
inc_t cs_y = bli_obj_col_stride( *y ); \ inc_t cs_y = bli_obj_col_stride( y ); \
\ \
void* buf_alpha; \ void* buf_alpha; \
\ \
@@ -132,7 +132,7 @@ void PASTEMAC(opname,EX_SUF) \
as needed). */ \ as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
alpha, &alpha_local ); \ alpha, &alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
\ \
/* Invoke the typed function. */ \ /* Invoke the typed function. */ \
bli_call_ft_13 \ bli_call_ft_13 \
@@ -168,14 +168,14 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *x ); \ num_t dt = bli_obj_dt( x ); \
\ \
doff_t diagoffx = bli_obj_diag_offset( *x ); \ doff_t diagoffx = bli_obj_diag_offset( x ); \
dim_t m = bli_obj_length( *x ); \ dim_t m = bli_obj_length( x ); \
dim_t n = bli_obj_width( *x ); \ dim_t n = bli_obj_width( x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t rs_x = bli_obj_row_stride( *x ); \ inc_t rs_x = bli_obj_row_stride( x ); \
inc_t cs_x = bli_obj_col_stride( *x ); \ inc_t cs_x = bli_obj_col_stride( x ); \
\ \
if ( bli_error_checking_is_enabled() ) \ if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( x ); \ PASTEMAC(opname,_check)( x ); \
@@ -210,15 +210,15 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *x ); \ num_t dt = bli_obj_dt( x ); \
\ \
/* conj_t conjalpha = bli_obj_conj_status( *alpha ); */ \ /* conj_t conjalpha = bli_obj_conj_status( alpha ); */ \
doff_t diagoffx = bli_obj_diag_offset( *x ); \ doff_t diagoffx = bli_obj_diag_offset( x ); \
dim_t m = bli_obj_length( *x ); \ dim_t m = bli_obj_length( x ); \
dim_t n = bli_obj_width( *x ); \ dim_t n = bli_obj_width( x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t rs_x = bli_obj_row_stride( *x ); \ inc_t rs_x = bli_obj_row_stride( x ); \
inc_t cs_x = bli_obj_col_stride( *x ); \ inc_t cs_x = bli_obj_col_stride( x ); \
\ \
void* buf_alpha; \ void* buf_alpha; \
\ \
@@ -231,7 +231,7 @@ void PASTEMAC(opname,EX_SUF) \
as needed). */ \ as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
alpha, &alpha_local ); \ alpha, &alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
\ \
/* Invoke the typed function. */ \ /* Invoke the typed function. */ \
bli_call_ft_9 \ bli_call_ft_9 \
@@ -266,16 +266,16 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *x ); \ num_t dt = bli_obj_dt( x ); \
\ \
doff_t diagoffx = bli_obj_diag_offset( *x ); \ doff_t diagoffx = bli_obj_diag_offset( x ); \
dim_t m = bli_obj_length( *x ); \ dim_t m = bli_obj_length( x ); \
dim_t n = bli_obj_width( *x ); \ dim_t n = bli_obj_width( x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t rs_x = bli_obj_row_stride( *x ); \ inc_t rs_x = bli_obj_row_stride( x ); \
inc_t cs_x = bli_obj_col_stride( *x ); \ inc_t cs_x = bli_obj_col_stride( x ); \
\ \
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \ void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \
\ \
if ( bli_error_checking_is_enabled() ) \ if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( alpha, x ); \ PASTEMAC(opname,_check)( alpha, x ); \

View File

@@ -70,9 +70,12 @@ void PASTEMAC(ch,opname) \
\ \
/* Determine the distance to the diagonals, the number of diagonal /* Determine the distance to the diagonals, the number of diagonal
elements, and the diagonal increments. */ \ elements, and the diagonal increments. */ \
bli_set_dims_incs_2d( diagoffx, transx, \ bli_set_dims_incs_2d \
m, n, rs_x, cs_x, rs_y, cs_y, \ ( \
offx, offy, n_elem, incx, incy ); \ diagoffx, transx, \
m, n, rs_x, cs_x, rs_y, cs_y, \
&offx, &offy, &n_elem, &incx, &incy \
); \
\ \
conjx = bli_extract_conj( transx ); \ conjx = bli_extract_conj( transx ); \
\ \
@@ -144,9 +147,12 @@ void PASTEMAC(ch,opname) \
\ \
/* Determine the distance to the diagonals, the number of diagonal /* Determine the distance to the diagonals, the number of diagonal
elements, and the diagonal increments. */ \ elements, and the diagonal increments. */ \
bli_set_dims_incs_2d( diagoffx, transx, \ bli_set_dims_incs_2d \
m, n, rs_x, cs_x, rs_y, cs_y, \ ( \
offx, offy, n_elem, incx, incy ); \ diagoffx, transx, \
m, n, rs_x, cs_x, rs_y, cs_y, \
&offx, &offy, &n_elem, &incx, &incy \
); \
\ \
conjx = bli_extract_conj( transx ); \ conjx = bli_extract_conj( transx ); \
\ \
@@ -212,9 +218,12 @@ void PASTEMAC(ch,opname) \
\ \
/* Determine the distance to the diagonals, the number of diagonal /* Determine the distance to the diagonals, the number of diagonal
elements, and the diagonal increments. */ \ elements, and the diagonal increments. */ \
bli_set_dims_incs_1d( diagoffx, \ bli_set_dims_incs_1d \
m, n, rs_x, cs_x, \ ( \
offx, n_elem, incx ); \ diagoffx, \
m, n, rs_x, cs_x, \
&offx, &n_elem, &incx \
); \
\ \
x1 = x + offx; \ x1 = x + offx; \
\ \
@@ -264,9 +273,12 @@ void PASTEMAC(ch,opname) \
\ \
/* Determine the distance to the diagonals, the number of diagonal /* Determine the distance to the diagonals, the number of diagonal
elements, and the diagonal increments. */ \ elements, and the diagonal increments. */ \
bli_set_dims_incs_1d( diagoffx, \ bli_set_dims_incs_1d \
m, n, rs_x, cs_x, \ ( \
offx, n_elem, incx ); \ diagoffx, \
m, n, rs_x, cs_x, \
&offx, &n_elem, &incx \
); \
\ \
x1 = x + offx; \ x1 = x + offx; \
\ \
@@ -322,9 +334,12 @@ void PASTEMAC(ch,opname) \
\ \
/* Determine the distance to the diagonals, the number of diagonal /* Determine the distance to the diagonals, the number of diagonal
elements, and the diagonal increments. */ \ elements, and the diagonal increments. */ \
bli_set_dims_incs_1d( diagoffx, \ bli_set_dims_incs_1d \
m, n, rs_x, cs_x, \ ( \
offx, n_elem, incx ); \ diagoffx, \
m, n, rs_x, cs_x, \
&offx, &n_elem, &incx \
); \
\ \
/* Alternate implementation. (Substitute for remainder of function). */ \ /* Alternate implementation. (Substitute for remainder of function). */ \
/* for ( i = 0; i < n_elem; ++i ) \ /* for ( i = 0; i < n_elem; ++i ) \

View File

@@ -146,10 +146,10 @@ void bli_axpyf_check
e_val = bli_check_vector_object( y ); e_val = bli_check_vector_object( y );
bli_check_error_code( e_val ); bli_check_error_code( e_val );
e_val = bli_check_vector_dim_equals( x, bli_obj_width_after_trans( *a ) ); e_val = bli_check_vector_dim_equals( x, bli_obj_width_after_trans( a ) );
bli_check_error_code( e_val ); bli_check_error_code( e_val );
e_val = bli_check_vector_dim_equals( y, bli_obj_length_after_trans( *a ) ); e_val = bli_check_vector_dim_equals( y, bli_obj_length_after_trans( a ) );
bli_check_error_code( e_val ); bli_check_error_code( e_val );
// Check object buffers (for non-NULLness). // Check object buffers (for non-NULLness).
@@ -334,16 +334,16 @@ void bli_dotxaxpyf_check
e_val = bli_check_conformal_dims( at, a ); e_val = bli_check_conformal_dims( at, a );
bli_check_error_code( e_val ); bli_check_error_code( e_val );
e_val = bli_check_object_length_equals( at, bli_obj_vector_dim( *w ) ); e_val = bli_check_object_length_equals( at, bli_obj_vector_dim( w ) );
bli_check_error_code( e_val ); bli_check_error_code( e_val );
e_val = bli_check_object_width_equals( at, bli_obj_vector_dim( *y ) ); e_val = bli_check_object_width_equals( at, bli_obj_vector_dim( y ) );
bli_check_error_code( e_val ); bli_check_error_code( e_val );
e_val = bli_check_object_length_equals( a, bli_obj_vector_dim( *z ) ); e_val = bli_check_object_length_equals( a, bli_obj_vector_dim( z ) );
bli_check_error_code( e_val ); bli_check_error_code( e_val );
e_val = bli_check_object_width_equals( a, bli_obj_vector_dim( *x ) ); e_val = bli_check_object_width_equals( a, bli_obj_vector_dim( x ) );
bli_check_error_code( e_val ); bli_check_error_code( e_val );
// Check object aliases. // Check object aliases.
@@ -424,10 +424,10 @@ void bli_dotxf_check
e_val = bli_check_vector_object( y ); e_val = bli_check_vector_object( y );
bli_check_error_code( e_val ); bli_check_error_code( e_val );
e_val = bli_check_vector_dim_equals( x, bli_obj_length_after_trans( *a ) ); e_val = bli_check_vector_dim_equals( x, bli_obj_length_after_trans( a ) );
bli_check_error_code( e_val ); bli_check_error_code( e_val );
e_val = bli_check_vector_dim_equals( y, bli_obj_width_after_trans( *a ) ); e_val = bli_check_vector_dim_equals( y, bli_obj_width_after_trans( a ) );
bli_check_error_code( e_val ); bli_check_error_code( e_val );
// Check object buffers (for non-NULLness). // Check object buffers (for non-NULLness).

View File

@@ -57,17 +57,17 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *x ); \ num_t dt = bli_obj_dt( x ); \
\ \
conj_t conjx = bli_obj_conj_status( *x ); \ conj_t conjx = bli_obj_conj_status( x ); \
conj_t conjy = bli_obj_conj_status( *y ); \ conj_t conjy = bli_obj_conj_status( y ); \
dim_t n = bli_obj_vector_dim( *x ); \ dim_t n = bli_obj_vector_dim( x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \ inc_t inc_x = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \ void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t inc_y = bli_obj_vector_inc( *y ); \ inc_t inc_y = bli_obj_vector_inc( y ); \
void* buf_z = bli_obj_buffer_at_off( *z ); \ void* buf_z = bli_obj_buffer_at_off( z ); \
inc_t inc_z = bli_obj_vector_inc( *z ); \ inc_t inc_z = bli_obj_vector_inc( z ); \
\ \
void* buf_alphax; \ void* buf_alphax; \
void* buf_alphay; \ void* buf_alphay; \
@@ -84,8 +84,8 @@ void PASTEMAC(opname,EX_SUF) \
alphax, &alphax_local ); \ alphax, &alphax_local ); \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
alphay, &alphay_local ); \ alphay, &alphay_local ); \
buf_alphax = bli_obj_buffer_for_1x1( dt, alphax_local ); \ buf_alphax = bli_obj_buffer_for_1x1( dt, &alphax_local ); \
buf_alphay = bli_obj_buffer_for_1x1( dt, alphay_local ); \ buf_alphay = bli_obj_buffer_for_1x1( dt, &alphay_local ); \
\ \
/* Invoke the void pointer-based function. */ \ /* Invoke the void pointer-based function. */ \
bli_call_ft_12 \ bli_call_ft_12 \
@@ -123,19 +123,19 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *x ); \ num_t dt = bli_obj_dt( x ); \
\ \
conj_t conja = bli_obj_conj_status( *a ); \ conj_t conja = bli_obj_conj_status( a ); \
conj_t conjx = bli_obj_conj_status( *x ); \ conj_t conjx = bli_obj_conj_status( x ); \
dim_t m = bli_obj_vector_dim( *y ); \ dim_t m = bli_obj_vector_dim( y ); \
dim_t b_n = bli_obj_vector_dim( *x ); \ dim_t b_n = bli_obj_vector_dim( x ); \
void* buf_a = bli_obj_buffer_at_off( *a ); \ void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \ inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \ inc_t cs_a = bli_obj_col_stride( a ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \ inc_t inc_x = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \ void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t inc_y = bli_obj_vector_inc( *y ); \ inc_t inc_y = bli_obj_vector_inc( y ); \
\ \
void* buf_alpha; \ void* buf_alpha; \
\ \
@@ -148,10 +148,10 @@ void PASTEMAC(opname,EX_SUF) \
as needed). */ \ as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
alpha, &alpha_local ); \ alpha, &alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
\ \
/* Support cases where matrix A requires a transposition. */ \ /* Support cases where matrix A requires a transposition. */ \
if ( bli_obj_has_trans( *a ) ) { bli_swap_incs( rs_a, cs_a ); } \ if ( bli_obj_has_trans( a ) ) { bli_swap_incs( &rs_a, &cs_a ); } \
\ \
/* Invoke the void pointer-based function. */ \ /* Invoke the void pointer-based function. */ \
bli_call_ft_13 \ bli_call_ft_13 \
@@ -191,19 +191,19 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *x ); \ num_t dt = bli_obj_dt( x ); \
\ \
conj_t conjxt = bli_obj_conj_status( *xt ); \ conj_t conjxt = bli_obj_conj_status( xt ); \
conj_t conjx = bli_obj_conj_status( *x ); \ conj_t conjx = bli_obj_conj_status( x ); \
conj_t conjy = bli_obj_conj_status( *y ); \ conj_t conjy = bli_obj_conj_status( y ); \
dim_t n = bli_obj_vector_dim( *x ); \ dim_t n = bli_obj_vector_dim( x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \ inc_t inc_x = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \ void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t inc_y = bli_obj_vector_inc( *y ); \ inc_t inc_y = bli_obj_vector_inc( y ); \
void* buf_z = bli_obj_buffer_at_off( *z ); \ void* buf_z = bli_obj_buffer_at_off( z ); \
inc_t inc_z = bli_obj_vector_inc( *z ); \ inc_t inc_z = bli_obj_vector_inc( z ); \
void* buf_rho = bli_obj_buffer_at_off( *rho ); \ void* buf_rho = bli_obj_buffer_at_off( rho ); \
\ \
void* buf_alpha; \ void* buf_alpha; \
\ \
@@ -216,7 +216,7 @@ void PASTEMAC(opname,EX_SUF) \
as needed). */ \ as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
alpha, &alpha_local ); \ alpha, &alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
\ \
/* Invoke the void pointer-based function. */ \ /* Invoke the void pointer-based function. */ \
bli_call_ft_13 \ bli_call_ft_13 \
@@ -259,25 +259,25 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *x ); \ num_t dt = bli_obj_dt( x ); \
\ \
conj_t conjat = bli_obj_conj_status( *at ); \ conj_t conjat = bli_obj_conj_status( at ); \
conj_t conja = bli_obj_conj_status( *a ); \ conj_t conja = bli_obj_conj_status( a ); \
conj_t conjw = bli_obj_conj_status( *w ); \ conj_t conjw = bli_obj_conj_status( w ); \
conj_t conjx = bli_obj_conj_status( *x ); \ conj_t conjx = bli_obj_conj_status( x ); \
dim_t m = bli_obj_vector_dim( *z ); \ dim_t m = bli_obj_vector_dim( z ); \
dim_t b_n = bli_obj_vector_dim( *y ); \ dim_t b_n = bli_obj_vector_dim( y ); \
void* buf_a = bli_obj_buffer_at_off( *a ); \ void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \ inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \ inc_t cs_a = bli_obj_col_stride( a ); \
void* buf_w = bli_obj_buffer_at_off( *w ); \ void* buf_w = bli_obj_buffer_at_off( w ); \
inc_t inc_w = bli_obj_vector_inc( *w ); \ inc_t inc_w = bli_obj_vector_inc( w ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \ inc_t inc_x = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \ void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t inc_y = bli_obj_vector_inc( *y ); \ inc_t inc_y = bli_obj_vector_inc( y ); \
void* buf_z = bli_obj_buffer_at_off( *z ); \ void* buf_z = bli_obj_buffer_at_off( z ); \
inc_t inc_z = bli_obj_vector_inc( *z ); \ inc_t inc_z = bli_obj_vector_inc( z ); \
\ \
void* buf_alpha; \ void* buf_alpha; \
void* buf_beta; \ void* buf_beta; \
@@ -294,11 +294,11 @@ void PASTEMAC(opname,EX_SUF) \
alpha, &alpha_local ); \ alpha, &alpha_local ); \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
beta, &beta_local ); \ beta, &beta_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \ buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \
\ \
/* Support cases where matrix A requires a transposition. */ \ /* Support cases where matrix A requires a transposition. */ \
if ( bli_obj_has_trans( *a ) ) { bli_swap_incs( rs_a, cs_a ); } \ if ( bli_obj_has_trans( a ) ) { bli_swap_incs( &rs_a, &cs_a ); } \
\ \
/* Invoke the void pointer-based function. */ \ /* Invoke the void pointer-based function. */ \
bli_call_ft_20 \ bli_call_ft_20 \
@@ -342,19 +342,19 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *x ); \ num_t dt = bli_obj_dt( x ); \
\ \
conj_t conjat = bli_obj_conj_status( *a ); \ conj_t conjat = bli_obj_conj_status( a ); \
conj_t conjx = bli_obj_conj_status( *x ); \ conj_t conjx = bli_obj_conj_status( x ); \
dim_t m = bli_obj_vector_dim( *x ); \ dim_t m = bli_obj_vector_dim( x ); \
dim_t b_n = bli_obj_vector_dim( *y ); \ dim_t b_n = bli_obj_vector_dim( y ); \
void* buf_a = bli_obj_buffer_at_off( *a ); \ void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \ inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \ inc_t cs_a = bli_obj_col_stride( a ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t inc_x = bli_obj_vector_inc( *x ); \ inc_t inc_x = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \ void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t inc_y = bli_obj_vector_inc( *y ); \ inc_t inc_y = bli_obj_vector_inc( y ); \
\ \
void* buf_alpha; \ void* buf_alpha; \
void* buf_beta; \ void* buf_beta; \
@@ -371,11 +371,11 @@ void PASTEMAC(opname,EX_SUF) \
alpha, &alpha_local ); \ alpha, &alpha_local ); \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
beta, &beta_local ); \ beta, &beta_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \ buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \
\ \
/* Support cases where matrix A requires a transposition. */ \ /* Support cases where matrix A requires a transposition. */ \
if ( bli_obj_has_trans( *a ) ) { bli_swap_incs( rs_a, cs_a ); } \ if ( bli_obj_has_trans( a ) ) { bli_swap_incs( &rs_a, &cs_a ); } \
\ \
/* Invoke the void pointer-based function. */ \ /* Invoke the void pointer-based function. */ \
bli_call_ft_14 \ bli_call_ft_14 \

View File

@@ -54,20 +54,20 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *x ); \ num_t dt = bli_obj_dt( x ); \
\ \
doff_t diagoffx = bli_obj_diag_offset( *x ); \ doff_t diagoffx = bli_obj_diag_offset( x ); \
diag_t diagx = bli_obj_diag( *x ); \ diag_t diagx = bli_obj_diag( x ); \
uplo_t uplox = bli_obj_uplo( *x ); \ uplo_t uplox = bli_obj_uplo( x ); \
trans_t transx = bli_obj_conjtrans_status( *x ); \ trans_t transx = bli_obj_conjtrans_status( x ); \
dim_t m = bli_obj_length( *y ); \ dim_t m = bli_obj_length( y ); \
dim_t n = bli_obj_width( *y ); \ dim_t n = bli_obj_width( y ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t rs_x = bli_obj_row_stride( *x ); \ inc_t rs_x = bli_obj_row_stride( x ); \
inc_t cs_x = bli_obj_col_stride( *x ); \ inc_t cs_x = bli_obj_col_stride( x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \ void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t rs_y = bli_obj_row_stride( *y ); \ inc_t rs_y = bli_obj_row_stride( y ); \
inc_t cs_y = bli_obj_col_stride( *y ); \ inc_t cs_y = bli_obj_col_stride( y ); \
\ \
if ( bli_error_checking_is_enabled() ) \ if ( bli_error_checking_is_enabled() ) \
PASTEMAC(opname,_check)( x, y ); \ PASTEMAC(opname,_check)( x, y ); \
@@ -109,20 +109,20 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *x ); \ num_t dt = bli_obj_dt( x ); \
\ \
doff_t diagoffx = bli_obj_diag_offset( *x ); \ doff_t diagoffx = bli_obj_diag_offset( x ); \
diag_t diagx = bli_obj_diag( *x ); \ diag_t diagx = bli_obj_diag( x ); \
uplo_t uplox = bli_obj_uplo( *x ); \ uplo_t uplox = bli_obj_uplo( x ); \
trans_t transx = bli_obj_conjtrans_status( *x ); \ trans_t transx = bli_obj_conjtrans_status( x ); \
dim_t m = bli_obj_length( *y ); \ dim_t m = bli_obj_length( y ); \
dim_t n = bli_obj_width( *y ); \ dim_t n = bli_obj_width( y ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t rs_x = bli_obj_row_stride( *x ); \ inc_t rs_x = bli_obj_row_stride( x ); \
inc_t cs_x = bli_obj_col_stride( *x ); \ inc_t cs_x = bli_obj_col_stride( x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \ void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t rs_y = bli_obj_row_stride( *y ); \ inc_t rs_y = bli_obj_row_stride( y ); \
inc_t cs_y = bli_obj_col_stride( *y ); \ inc_t cs_y = bli_obj_col_stride( y ); \
\ \
void* buf_alpha; \ void* buf_alpha; \
\ \
@@ -135,7 +135,7 @@ void PASTEMAC(opname,EX_SUF) \
as needed). */ \ as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
alpha, &alpha_local ); \ alpha, &alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
\ \
/* Invoke the typed function. */ \ /* Invoke the typed function. */ \
bli_call_ft_14 \ bli_call_ft_14 \
@@ -173,17 +173,17 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *x ); \ num_t dt = bli_obj_dt( x ); \
\ \
/* conj_t conjalpha = bli_obj_conj_status( *alpha ); */ \ /* conj_t conjalpha = bli_obj_conj_status( alpha ); */ \
doff_t diagoffx = bli_obj_diag_offset( *x ); \ doff_t diagoffx = bli_obj_diag_offset( x ); \
diag_t diagx = bli_obj_diag( *x ); \ diag_t diagx = bli_obj_diag( x ); \
uplo_t uplox = bli_obj_uplo( *x ); \ uplo_t uplox = bli_obj_uplo( x ); \
dim_t m = bli_obj_length( *x ); \ dim_t m = bli_obj_length( x ); \
dim_t n = bli_obj_width( *x ); \ dim_t n = bli_obj_width( x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t rs_x = bli_obj_row_stride( *x ); \ inc_t rs_x = bli_obj_row_stride( x ); \
inc_t cs_x = bli_obj_col_stride( *x ); \ inc_t cs_x = bli_obj_col_stride( x ); \
\ \
void* buf_alpha; \ void* buf_alpha; \
\ \
@@ -194,7 +194,7 @@ void PASTEMAC(opname,EX_SUF) \
PASTEMAC(opname,_check)( alpha, x ); \ PASTEMAC(opname,_check)( alpha, x ); \
\ \
/* Alias x to x_local so we can apply alpha if it is non-unit. */ \ /* Alias x to x_local so we can apply alpha if it is non-unit. */ \
bli_obj_alias_to( *x, x_local ); \ bli_obj_alias_to( x, &x_local ); \
\ \
/* If alpha is non-unit, apply it to the scalar attached to x. */ \ /* If alpha is non-unit, apply it to the scalar attached to x. */ \
if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) \ if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) \
@@ -209,7 +209,7 @@ void PASTEMAC(opname,EX_SUF) \
\ \
/* Grab the address of the internal scalar buffer for the scalar /* Grab the address of the internal scalar buffer for the scalar
attached to x. */ \ attached to x. */ \
buf_alpha = bli_obj_internal_scalar_buffer( x_local ); \ buf_alpha = bli_obj_internal_scalar_buffer( &x_local ); \
\ \
/* Invoke the typed function. */ \ /* Invoke the typed function. */ \
bli_call_ft_11 \ bli_call_ft_11 \
@@ -245,17 +245,17 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *x ); \ num_t dt = bli_obj_dt( x ); \
\ \
/* conj_t conjalpha = bli_obj_conj_status( *alpha ); */ \ /* conj_t conjalpha = bli_obj_conj_status( alpha ); */ \
doff_t diagoffx = bli_obj_diag_offset( *x ); \ doff_t diagoffx = bli_obj_diag_offset( x ); \
diag_t diagx = bli_obj_diag( *x ); \ diag_t diagx = bli_obj_diag( x ); \
uplo_t uplox = bli_obj_uplo( *x ); \ uplo_t uplox = bli_obj_uplo( x ); \
dim_t m = bli_obj_length( *x ); \ dim_t m = bli_obj_length( x ); \
dim_t n = bli_obj_width( *x ); \ dim_t n = bli_obj_width( x ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t rs_x = bli_obj_row_stride( *x ); \ inc_t rs_x = bli_obj_row_stride( x ); \
inc_t cs_x = bli_obj_col_stride( *x ); \ inc_t cs_x = bli_obj_col_stride( x ); \
\ \
void* buf_alpha; \ void* buf_alpha; \
\ \
@@ -268,7 +268,7 @@ void PASTEMAC(opname,EX_SUF) \
as needed). */ \ as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
alpha, &alpha_local ); \ alpha, &alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
\ \
/* Invoke the typed function. */ \ /* Invoke the typed function. */ \
bli_call_ft_11 \ bli_call_ft_11 \

View File

@@ -146,7 +146,7 @@ void PASTEMAC(ch,opname) \
ctype* one = PASTEMAC(ch,1); \ ctype* one = PASTEMAC(ch,1); \
\ \
if ( bli_does_trans( transx ) ) \ if ( bli_does_trans( transx ) ) \
bli_negate_diag_offset( diagoffy ); \ bli_negate_diag_offset( &diagoffy ); \
\ \
PASTEMAC(ch,setd) \ PASTEMAC(ch,setd) \
( \ ( \
@@ -299,7 +299,7 @@ void PASTEMAC(ch,opname) \
doff_t diagoffy = diagoffx; \ doff_t diagoffy = diagoffx; \
\ \
if ( bli_does_trans( transx ) ) \ if ( bli_does_trans( transx ) ) \
bli_negate_diag_offset( diagoffy ); \ bli_negate_diag_offset( &diagoffy ); \
\ \
PASTEMAC(ch,setd) \ PASTEMAC(ch,setd) \
( \ ( \

View File

@@ -68,10 +68,13 @@ void PASTEMAC(ch,opname) \
dim_t ij0, n_shift; \ dim_t ij0, n_shift; \
\ \
/* Set various loop parameters. */ \ /* Set various loop parameters. */ \
bli_set_dims_incs_uplo_2m( diagoffx, diagx, transx, \ bli_set_dims_incs_uplo_2m \
uplox, m, n, rs_x, cs_x, rs_y, cs_y, \ ( \
uplox_eff, n_elem_max, n_iter, incx, ldx, incy, ldy, \ diagoffx, diagx, transx, \
ij0, n_shift ); \ uplox, m, n, rs_x, cs_x, rs_y, cs_y, \
&uplox_eff, &n_elem_max, &n_iter, &incx, &ldx, &incy, &ldy, \
&ij0, &n_shift \
); \
\ \
if ( bli_is_zeros( uplox_eff ) ) return; \ if ( bli_is_zeros( uplox_eff ) ) return; \
\ \
@@ -181,10 +184,13 @@ void PASTEMAC(ch,opname) \
dim_t ij0, n_shift; \ dim_t ij0, n_shift; \
\ \
/* Set various loop parameters. */ \ /* Set various loop parameters. */ \
bli_set_dims_incs_uplo_2m( diagoffx, diagx, transx, \ bli_set_dims_incs_uplo_2m \
uplox, m, n, rs_x, cs_x, rs_y, cs_y, \ ( \
uplox_eff, n_elem_max, n_iter, incx, ldx, incy, ldy, \ diagoffx, diagx, transx, \
ij0, n_shift ); \ uplox, m, n, rs_x, cs_x, rs_y, cs_y, \
&uplox_eff, &n_elem_max, &n_iter, &incx, &ldx, &incy, &ldy, \
&ij0, &n_shift \
); \
\ \
if ( bli_is_zeros( uplox_eff ) ) return; \ if ( bli_is_zeros( uplox_eff ) ) return; \
\ \
@@ -292,10 +298,13 @@ void PASTEMAC(ch,opname) \
dim_t ij0, n_shift; \ dim_t ij0, n_shift; \
\ \
/* Set various loop parameters. */ \ /* Set various loop parameters. */ \
bli_set_dims_incs_uplo_1m( diagoffx, diagx, \ bli_set_dims_incs_uplo_1m \
uplox, m, n, rs_x, cs_x, \ ( \
uplox_eff, n_elem_max, n_iter, incx, ldx, \ diagoffx, diagx, \
ij0, n_shift ); \ uplox, m, n, rs_x, cs_x, \
&uplox_eff, &n_elem_max, &n_iter, &incx, &ldx, \
&ij0, &n_shift \
); \
\ \
if ( bli_is_zeros( uplox_eff ) ) return; \ if ( bli_is_zeros( uplox_eff ) ) return; \
\ \

View File

@@ -108,33 +108,33 @@ void bli_packm_blk_var1
thrinfo_t* t thrinfo_t* t
) )
{ {
num_t dt_cp = bli_obj_dt( *c ); num_t dt_cp = bli_obj_dt( c );
struc_t strucc = bli_obj_struc( *c ); struc_t strucc = bli_obj_struc( c );
doff_t diagoffc = bli_obj_diag_offset( *c ); doff_t diagoffc = bli_obj_diag_offset( c );
diag_t diagc = bli_obj_diag( *c ); diag_t diagc = bli_obj_diag( c );
uplo_t uploc = bli_obj_uplo( *c ); uplo_t uploc = bli_obj_uplo( c );
trans_t transc = bli_obj_conjtrans_status( *c ); trans_t transc = bli_obj_conjtrans_status( c );
pack_t schema = bli_obj_pack_schema( *p ); pack_t schema = bli_obj_pack_schema( p );
bool_t invdiag = bli_obj_has_inverted_diag( *p ); bool_t invdiag = bli_obj_has_inverted_diag( p );
bool_t revifup = bli_obj_is_pack_rev_if_upper( *p ); bool_t revifup = bli_obj_is_pack_rev_if_upper( p );
bool_t reviflo = bli_obj_is_pack_rev_if_lower( *p ); bool_t reviflo = bli_obj_is_pack_rev_if_lower( p );
dim_t m_p = bli_obj_length( *p ); dim_t m_p = bli_obj_length( p );
dim_t n_p = bli_obj_width( *p ); dim_t n_p = bli_obj_width( p );
dim_t m_max_p = bli_obj_padded_length( *p ); dim_t m_max_p = bli_obj_padded_length( p );
dim_t n_max_p = bli_obj_padded_width( *p ); dim_t n_max_p = bli_obj_padded_width( p );
void* buf_c = bli_obj_buffer_at_off( *c ); void* buf_c = bli_obj_buffer_at_off( c );
inc_t rs_c = bli_obj_row_stride( *c ); inc_t rs_c = bli_obj_row_stride( c );
inc_t cs_c = bli_obj_col_stride( *c ); inc_t cs_c = bli_obj_col_stride( c );
void* buf_p = bli_obj_buffer_at_off( *p ); void* buf_p = bli_obj_buffer_at_off( p );
inc_t rs_p = bli_obj_row_stride( *p ); inc_t rs_p = bli_obj_row_stride( p );
inc_t cs_p = bli_obj_col_stride( *p ); inc_t cs_p = bli_obj_col_stride( p );
inc_t is_p = bli_obj_imag_stride( *p ); inc_t is_p = bli_obj_imag_stride( p );
dim_t pd_p = bli_obj_panel_dim( *p ); dim_t pd_p = bli_obj_panel_dim( p );
inc_t ps_p = bli_obj_panel_stride( *p ); inc_t ps_p = bli_obj_panel_stride( p );
obj_t kappa; obj_t kappa;
obj_t* kappa_p; obj_t* kappa_p;
@@ -155,7 +155,7 @@ void bli_packm_blk_var1
// higher-level operation. Thus, we use BLIS_ONE for kappa so // higher-level operation. Thus, we use BLIS_ONE for kappa so
// that the underlying packm implementation does not perform // that the underlying packm implementation does not perform
// any scaling during packing. // any scaling during packing.
buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE ); buf_kappa = bli_obj_buffer_for_const( dt_cp, &BLIS_ONE );
} }
else // if ( bli_is_ind_packed( schema ) ) else // if ( bli_is_ind_packed( schema ) )
{ {
@@ -187,7 +187,7 @@ void bli_packm_blk_var1
} }
// Acquire the buffer to the kappa chosen above. // Acquire the buffer to the kappa chosen above.
buf_kappa = bli_obj_buffer_for_1x1( dt_cp, *kappa_p ); buf_kappa = bli_obj_buffer_for_1x1( dt_cp, kappa_p );
} }
@@ -344,10 +344,10 @@ void PASTEMAC(ch,varname) \
express the remaining parameters and code. */ \ express the remaining parameters and code. */ \
if ( bli_does_trans( transc ) ) \ if ( bli_does_trans( transc ) ) \
{ \ { \
bli_swap_incs( rs_c, cs_c ); \ bli_swap_incs( &rs_c, &cs_c ); \
bli_negate_diag_offset( diagoffc ); \ bli_negate_diag_offset( &diagoffc ); \
bli_toggle_uplo( uploc ); \ bli_toggle_uplo( &uploc ); \
bli_toggle_trans( transc ); \ bli_toggle_trans( &transc ); \
} \ } \
\ \
/* Create flags to incidate row or column storage. Note that the /* Create flags to incidate row or column storage. Note that the

View File

@@ -68,33 +68,33 @@ void bli_packm_blk_var1( obj_t* c,
obj_t* p, obj_t* p,
packm_thrinfo_t* t ) packm_thrinfo_t* t )
{ {
num_t dt_cp = bli_obj_dt( *c ); num_t dt_cp = bli_obj_dt( c );
struc_t strucc = bli_obj_struc( *c ); struc_t strucc = bli_obj_struc( c );
doff_t diagoffc = bli_obj_diag_offset( *c ); doff_t diagoffc = bli_obj_diag_offset( c );
diag_t diagc = bli_obj_diag( *c ); diag_t diagc = bli_obj_diag( c );
uplo_t uploc = bli_obj_uplo( *c ); uplo_t uploc = bli_obj_uplo( c );
trans_t transc = bli_obj_conjtrans_status( *c ); trans_t transc = bli_obj_conjtrans_status( c );
pack_t schema = bli_obj_pack_schema( *p ); pack_t schema = bli_obj_pack_schema( p );
bool_t invdiag = bli_obj_has_inverted_diag( *p ); bool_t invdiag = bli_obj_has_inverted_diag( p );
bool_t revifup = bli_obj_is_pack_rev_if_upper( *p ); bool_t revifup = bli_obj_is_pack_rev_if_upper( p );
bool_t reviflo = bli_obj_is_pack_rev_if_lower( *p ); bool_t reviflo = bli_obj_is_pack_rev_if_lower( p );
dim_t m_p = bli_obj_length( *p ); dim_t m_p = bli_obj_length( p );
dim_t n_p = bli_obj_width( *p ); dim_t n_p = bli_obj_width( p );
dim_t m_max_p = bli_obj_padded_length( *p ); dim_t m_max_p = bli_obj_padded_length( p );
dim_t n_max_p = bli_obj_padded_width( *p ); dim_t n_max_p = bli_obj_padded_width( p );
void* buf_c = bli_obj_buffer_at_off( *c ); void* buf_c = bli_obj_buffer_at_off( c );
inc_t rs_c = bli_obj_row_stride( *c ); inc_t rs_c = bli_obj_row_stride( c );
inc_t cs_c = bli_obj_col_stride( *c ); inc_t cs_c = bli_obj_col_stride( c );
void* buf_p = bli_obj_buffer_at_off( *p ); void* buf_p = bli_obj_buffer_at_off( p );
inc_t rs_p = bli_obj_row_stride( *p ); inc_t rs_p = bli_obj_row_stride( p );
inc_t cs_p = bli_obj_col_stride( *p ); inc_t cs_p = bli_obj_col_stride( p );
inc_t is_p = bli_obj_imag_stride( *p ); inc_t is_p = bli_obj_imag_stride( p );
dim_t pd_p = bli_obj_panel_dim( *p ); dim_t pd_p = bli_obj_panel_dim( p );
inc_t ps_p = bli_obj_panel_stride( *p ); inc_t ps_p = bli_obj_panel_stride( p );
void* buf_kappa; void* buf_kappa;
@@ -107,7 +107,7 @@ void bli_packm_blk_var1( obj_t* c,
// alpha scalar of the higher-level operation. Thus, we use BLIS_ONE // alpha scalar of the higher-level operation. Thus, we use BLIS_ONE
// for kappa so that the underlying packm implementation does not // for kappa so that the underlying packm implementation does not
// scale during packing. // scale during packing.
buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE ); buf_kappa = bli_obj_buffer_for_const( dt_cp, &BLIS_ONE );
// Choose the correct func_t object. // Choose the correct func_t object.
packm_kers = packm_struc_cxk_kers; packm_kers = packm_struc_cxk_kers;
@@ -222,10 +222,10 @@ void PASTEMAC(ch,varname) \
express the remaining parameters and code. */ \ express the remaining parameters and code. */ \
if ( bli_does_trans( transc ) ) \ if ( bli_does_trans( transc ) ) \
{ \ { \
bli_swap_incs( rs_c, cs_c ); \ bli_swap_incs( &rs_c, &cs_c ); \
bli_negate_diag_offset( diagoffc ); \ bli_negate_diag_offset( &diagoffc ); \
bli_toggle_uplo( uploc ); \ bli_toggle_uplo( &uploc ); \
bli_toggle_trans( transc ); \ bli_toggle_trans( &transc ); \
} \ } \
\ \
/* Create flags to incidate row or column storage. Note that the /* Create flags to incidate row or column storage. Note that the

View File

@@ -83,9 +83,9 @@ siz_t bli_packm_init
// not important, as long as its packed into contiguous rows or // not important, as long as its packed into contiguous rows or
// contiguous columns. A good example of this is packing for matrix // contiguous columns. A good example of this is packing for matrix
// operands in the level-2 operations. // operands in the level-2 operations.
if ( bli_obj_pack_schema( *a ) == BLIS_PACKED_UNSPEC ) if ( bli_obj_pack_schema( a ) == BLIS_PACKED_UNSPEC )
{ {
bli_obj_alias_to( *a, *p ); bli_obj_alias_to( a, p );
return 0; return 0;
} }
@@ -97,18 +97,18 @@ siz_t bli_packm_init
// already taken place, or does not need to take place, and so that will // already taken place, or does not need to take place, and so that will
// be indicated by the pack status). Also, not all combinations of // be indicated by the pack status). Also, not all combinations of
// current pack status and desired pack schema are valid. // current pack status and desired pack schema are valid.
if ( bli_obj_pack_schema( *a ) == pack_schema ) if ( bli_obj_pack_schema( a ) == pack_schema )
{ {
bli_obj_alias_to( *a, *p ); bli_obj_alias_to( a, p );
return 0; return 0;
} }
#endif #endif
// If the object is marked as being filled with zeros, then we can skip // If the object is marked as being filled with zeros, then we can skip
// the packm operation entirely and alias. // the packm operation entirely and alias.
if ( bli_obj_is_zeros( *a ) ) if ( bli_obj_is_zeros( a ) )
{ {
bli_obj_alias_to( *a, *p ); bli_obj_alias_to( a, p );
return 0; return 0;
} }
@@ -189,10 +189,10 @@ siz_t bli_packm_init_pack
{ {
bli_init_once(); bli_init_once();
num_t dt = bli_obj_dt( *a ); num_t dt = bli_obj_dt( a );
trans_t transa = bli_obj_onlytrans_status( *a ); trans_t transa = bli_obj_onlytrans_status( a );
dim_t m_a = bli_obj_length( *a ); dim_t m_a = bli_obj_length( a );
dim_t n_a = bli_obj_width( *a ); dim_t n_a = bli_obj_width( a );
dim_t bmult_m_def = bli_cntx_get_blksz_def_dt( dt, bmult_id_m, cntx ); dim_t bmult_m_def = bli_cntx_get_blksz_def_dt( dt, bmult_id_m, cntx );
dim_t bmult_m_pack = bli_cntx_get_blksz_max_dt( dt, bmult_id_m, cntx ); dim_t bmult_m_pack = bli_cntx_get_blksz_max_dt( dt, bmult_id_m, cntx );
dim_t bmult_n_def = bli_cntx_get_blksz_def_dt( dt, bmult_id_n, cntx ); dim_t bmult_n_def = bli_cntx_get_blksz_def_dt( dt, bmult_id_n, cntx );
@@ -207,7 +207,7 @@ siz_t bli_packm_init_pack
// We begin by copying the fields of A. // We begin by copying the fields of A.
bli_obj_alias_to( *a, *p ); bli_obj_alias_to( a, p );
// Update the dimension fields to explicitly reflect a transposition, // Update the dimension fields to explicitly reflect a transposition,
// if needed. // if needed.
@@ -219,13 +219,13 @@ siz_t bli_packm_init_pack
// we either toggle the uplo of P. // we either toggle the uplo of P.
// Finally, if we mark P as dense since we assume that all matrices, // Finally, if we mark P as dense since we assume that all matrices,
// regardless of structure, will be densified. // regardless of structure, will be densified.
bli_obj_set_dims_with_trans( transa, m_a, n_a, *p ); bli_obj_set_dims_with_trans( transa, m_a, n_a, p );
bli_obj_set_conjtrans( BLIS_NO_TRANSPOSE, *p ); bli_obj_set_conjtrans( BLIS_NO_TRANSPOSE, p );
if ( bli_does_trans( transa ) ) if ( bli_does_trans( transa ) )
{ {
bli_obj_negate_diag_offset( *p ); bli_obj_negate_diag_offset( p );
if ( bli_obj_is_upper_or_lower( *a ) ) if ( bli_obj_is_upper_or_lower( a ) )
bli_obj_toggle_uplo( *p ); bli_obj_toggle_uplo( p );
} }
// If we are packing micro-panels, mark P as dense. Otherwise, we are // If we are packing micro-panels, mark P as dense. Otherwise, we are
@@ -236,22 +236,22 @@ siz_t bli_packm_init_pack
// execute a "lower" or "upper" branch of code. // execute a "lower" or "upper" branch of code.
if ( bli_is_panel_packed( schema ) ) if ( bli_is_panel_packed( schema ) )
{ {
bli_obj_set_uplo( BLIS_DENSE, *p ); bli_obj_set_uplo( BLIS_DENSE, p );
} }
// Reset the view offsets to (0,0). // Reset the view offsets to (0,0).
bli_obj_set_offs( 0, 0, *p ); bli_obj_set_offs( 0, 0, p );
// Set the invert diagonal field. // Set the invert diagonal field.
bli_obj_set_invert_diag( invert_diag, *p ); bli_obj_set_invert_diag( invert_diag, p );
// Set the pack status of P to the pack schema prescribed in the control // Set the pack status of P to the pack schema prescribed in the control
// tree node. // tree node.
bli_obj_set_pack_schema( schema, *p ); bli_obj_set_pack_schema( schema, p );
// Set the packing order bits. // Set the packing order bits.
bli_obj_set_pack_order_if_upper( pack_ord_if_up, *p ); bli_obj_set_pack_order_if_upper( pack_ord_if_up, p );
bli_obj_set_pack_order_if_lower( pack_ord_if_lo, *p ); bli_obj_set_pack_order_if_lower( pack_ord_if_lo, p );
// Compute the dimensions padded by the dimension multiples. These // Compute the dimensions padded by the dimension multiples. These
// dimensions will be the dimensions of the packed matrices, including // dimensions will be the dimensions of the packed matrices, including
@@ -260,15 +260,15 @@ siz_t bli_packm_init_pack
// in P) and aligning them to the dimension multiples (typically equal // in P) and aligning them to the dimension multiples (typically equal
// to register blocksizes). This does waste a little bit of space for // to register blocksizes). This does waste a little bit of space for
// level-2 operations, but that's okay with us. // level-2 operations, but that's okay with us.
m_p = bli_obj_length( *p ); m_p = bli_obj_length( p );
n_p = bli_obj_width( *p ); n_p = bli_obj_width( p );
m_p_pad = bli_align_dim_to_mult( m_p, bmult_m_def ); m_p_pad = bli_align_dim_to_mult( m_p, bmult_m_def );
n_p_pad = bli_align_dim_to_mult( n_p, bmult_n_def ); n_p_pad = bli_align_dim_to_mult( n_p, bmult_n_def );
// Save the padded dimensions into the packed object. It is important // Save the padded dimensions into the packed object. It is important
// to save these dimensions since they represent the actual dimensions // to save these dimensions since they represent the actual dimensions
// of the zero-padded matrix. // of the zero-padded matrix.
bli_obj_set_padded_dims( m_p_pad, n_p_pad, *p ); bli_obj_set_padded_dims( m_p_pad, n_p_pad, p );
// Now we prepare to compute strides, align them, and compute the // Now we prepare to compute strides, align them, and compute the
// total number of bytes needed for the packed buffer. The caller // total number of bytes needed for the packed buffer. The caller
@@ -276,7 +276,7 @@ siz_t bli_packm_init_pack
// from the memory allocator. // from the memory allocator.
// Extract the element size for the packed object. // Extract the element size for the packed object.
elem_size_p = bli_obj_elem_size( *p ); elem_size_p = bli_obj_elem_size( p );
// Set the row and column strides of p based on the pack schema. // Set the row and column strides of p based on the pack schema.
if ( bli_is_row_packed( schema ) && if ( bli_is_row_packed( schema ) &&
@@ -297,7 +297,7 @@ siz_t bli_packm_init_pack
BLIS_HEAP_STRIDE_ALIGN_SIZE ); BLIS_HEAP_STRIDE_ALIGN_SIZE );
// Store the strides in P. // Store the strides in P.
bli_obj_set_strides( rs_p, cs_p, *p ); bli_obj_set_strides( rs_p, cs_p, p );
// Compute the size of the packed buffer. // Compute the size of the packed buffer.
size_p = m_p_pad * rs_p * elem_size_p; size_p = m_p_pad * rs_p * elem_size_p;
@@ -320,7 +320,7 @@ siz_t bli_packm_init_pack
BLIS_HEAP_STRIDE_ALIGN_SIZE ); BLIS_HEAP_STRIDE_ALIGN_SIZE );
// Store the strides in P. // Store the strides in P.
bli_obj_set_strides( rs_p, cs_p, *p ); bli_obj_set_strides( rs_p, cs_p, p );
// Compute the size of the packed buffer. // Compute the size of the packed buffer.
size_p = cs_p * n_p_pad * elem_size_p; size_p = cs_p * n_p_pad * elem_size_p;
@@ -408,12 +408,12 @@ siz_t bli_packm_init_pack
else is_p = 1; else is_p = 1;
// Store the strides and panel dimension in P. // Store the strides and panel dimension in P.
bli_obj_set_strides( rs_p, cs_p, *p ); bli_obj_set_strides( rs_p, cs_p, p );
bli_obj_set_imag_stride( is_p, *p ); bli_obj_set_imag_stride( is_p, p );
bli_obj_set_panel_dim( m_panel, *p ); bli_obj_set_panel_dim( m_panel, p );
bli_obj_set_panel_stride( ps_p, *p ); bli_obj_set_panel_stride( ps_p, p );
bli_obj_set_panel_length( m_panel, *p ); bli_obj_set_panel_length( m_panel, p );
bli_obj_set_panel_width( n_p, *p ); bli_obj_set_panel_width( n_p, p );
// Compute the size of the packed buffer. // Compute the size of the packed buffer.
size_p = ps_p * ( m_p_pad / m_panel ) * elem_size_p; size_p = ps_p * ( m_p_pad / m_panel ) * elem_size_p;
@@ -501,12 +501,12 @@ siz_t bli_packm_init_pack
else is_p = 1; else is_p = 1;
// Store the strides and panel dimension in P. // Store the strides and panel dimension in P.
bli_obj_set_strides( rs_p, cs_p, *p ); bli_obj_set_strides( rs_p, cs_p, p );
bli_obj_set_imag_stride( is_p, *p ); bli_obj_set_imag_stride( is_p, p );
bli_obj_set_panel_dim( n_panel, *p ); bli_obj_set_panel_dim( n_panel, p );
bli_obj_set_panel_stride( ps_p, *p ); bli_obj_set_panel_stride( ps_p, p );
bli_obj_set_panel_length( m_p, *p ); bli_obj_set_panel_length( m_p, p );
bli_obj_set_panel_width( n_panel, *p ); bli_obj_set_panel_width( n_panel, p );
// Compute the size of the packed buffer. // Compute the size of the packed buffer.
size_p = ps_p * ( n_p_pad / n_panel ) * elem_size_p; size_p = ps_p * ( n_p_pad / n_panel ) * elem_size_p;

View File

@@ -53,7 +53,7 @@ void bli_packm_int
// Sanity check; A should never have a zero dimension. If we must support // Sanity check; A should never have a zero dimension. If we must support
// it, then we should fold it into the next alias-and-early-exit block. // it, then we should fold it into the next alias-and-early-exit block.
//if ( bli_obj_has_zero_dim( *a ) ) bli_abort(); //if ( bli_obj_has_zero_dim( a ) ) bli_abort();
// Let us now check to see if the object has already been packed. First // Let us now check to see if the object has already been packed. First
// we check if it has been packed to an unspecified (row or column) // we check if it has been packed to an unspecified (row or column)
@@ -65,7 +65,7 @@ void bli_packm_int
// not important, as long as its packed into contiguous rows or // not important, as long as its packed into contiguous rows or
// contiguous columns. A good example of this is packing for matrix // contiguous columns. A good example of this is packing for matrix
// operands in the level-2 operations. // operands in the level-2 operations.
if ( bli_obj_pack_schema( *a ) == BLIS_PACKED_UNSPEC ) if ( bli_obj_pack_schema( a ) == BLIS_PACKED_UNSPEC )
{ {
return; return;
} }
@@ -78,14 +78,14 @@ void bli_packm_int
// already taken place, or does not need to take place, and so that will // already taken place, or does not need to take place, and so that will
// be indicated by the pack status). Also, not all combinations of // be indicated by the pack status). Also, not all combinations of
// current pack status and desired pack schema are valid. // current pack status and desired pack schema are valid.
if ( bli_obj_pack_schema( *a ) == bli_cntl_packm_params_pack_schema( cntl ) ) if ( bli_obj_pack_schema( a ) == bli_cntl_packm_params_pack_schema( cntl ) )
{ {
return; return;
} }
// If the object is marked as being filled with zeros, then we can skip // If the object is marked as being filled with zeros, then we can skip
// the packm operation entirely. // the packm operation entirely.
if ( bli_obj_is_zeros( *a ) ) if ( bli_obj_is_zeros( a ) )
{ {
return; return;
} }

View File

@@ -54,14 +54,14 @@ void bli_packm_acquire_mpart_t2b( subpart_t requested_part,
// Partitioning top-to-bottom through packed column panels (which are // Partitioning top-to-bottom through packed column panels (which are
// row-stored) is not yet supported. // row-stored) is not yet supported.
if ( bli_obj_is_col_packed( *obj ) ) if ( bli_obj_is_col_packed( obj ) )
{ {
bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED );
} }
// Query the dimensions of the parent object. // Query the dimensions of the parent object.
m = bli_obj_length( *obj ); m = bli_obj_length( obj );
n = bli_obj_width( *obj ); n = bli_obj_width( obj );
// Foolproofing: do not let b exceed what's left of the m dimension at // Foolproofing: do not let b exceed what's left of the m dimension at
// row offset i. // row offset i.
@@ -71,10 +71,10 @@ void bli_packm_acquire_mpart_t2b( subpart_t requested_part,
// stride fields of the parent object. Note that this omits copying view // stride fields of the parent object. Note that this omits copying view
// information because the new partition will have its own dimensions // information because the new partition will have its own dimensions
// and offsets. // and offsets.
bli_obj_init_subpart_from( *obj, *sub_obj ); bli_obj_init_subpart_from( obj, sub_obj );
// Modify offsets and dimensions of requested partition. // Modify offsets and dimensions of requested partition.
bli_obj_set_dims( b, n, *sub_obj ); bli_obj_set_dims( b, n, sub_obj );
// Tweak the padded length of the subpartition to trick the underlying // Tweak the padded length of the subpartition to trick the underlying
// implementation into only zero-padding for the narrow submatrix of // implementation into only zero-padding for the narrow submatrix of
@@ -86,25 +86,25 @@ void bli_packm_acquire_mpart_t2b( subpart_t requested_part,
// b for the edge iteration). In these cases, we arrive at the new // b for the edge iteration). In these cases, we arrive at the new
// packed length by simply subtracting off i. // packed length by simply subtracting off i.
{ {
dim_t m_pack_max = bli_obj_padded_length( *sub_obj ); dim_t m_pack_max = bli_obj_padded_length( sub_obj );
dim_t m_pack_cur; dim_t m_pack_cur;
if ( i + b == m ) m_pack_cur = m_pack_max - i; if ( i + b == m ) m_pack_cur = m_pack_max - i;
else m_pack_cur = b; else m_pack_cur = b;
bli_obj_set_padded_length( m_pack_cur, *sub_obj ); bli_obj_set_padded_length( m_pack_cur, sub_obj );
} }
// Translate the desired offsets to a panel offset and adjust the // Translate the desired offsets to a panel offset and adjust the
// buffer pointer of the subpartition object. // buffer pointer of the subpartition object.
{ {
char* buf_p = bli_obj_buffer( *sub_obj ); char* buf_p = bli_obj_buffer( sub_obj );
siz_t elem_size = bli_obj_elem_size( *sub_obj ); siz_t elem_size = bli_obj_elem_size( sub_obj );
dim_t off_to_panel = bli_packm_offset_to_panel_for( i, sub_obj ); dim_t off_to_panel = bli_packm_offset_to_panel_for( i, sub_obj );
buf_p = buf_p + elem_size * off_to_panel; buf_p = buf_p + elem_size * off_to_panel;
bli_obj_set_buffer( ( void* )buf_p, *sub_obj ); bli_obj_set_buffer( buf_p, sub_obj );
} }
} }
@@ -130,14 +130,14 @@ void bli_packm_acquire_mpart_l2r( subpart_t requested_part,
// Partitioning left-to-right through packed row panels (which are // Partitioning left-to-right through packed row panels (which are
// column-stored) is not yet supported. // column-stored) is not yet supported.
if ( bli_obj_is_row_packed( *obj ) ) if ( bli_obj_is_row_packed( obj ) )
{ {
bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED );
} }
// Query the dimensions of the parent object. // Query the dimensions of the parent object.
m = bli_obj_length( *obj ); m = bli_obj_length( obj );
n = bli_obj_width( *obj ); n = bli_obj_width( obj );
// Foolproofing: do not let b exceed what's left of the n dimension at // Foolproofing: do not let b exceed what's left of the n dimension at
// column offset j. // column offset j.
@@ -147,10 +147,10 @@ void bli_packm_acquire_mpart_l2r( subpart_t requested_part,
// stride fields of the parent object. Note that this omits copying view // stride fields of the parent object. Note that this omits copying view
// information because the new partition will have its own dimensions // information because the new partition will have its own dimensions
// and offsets. // and offsets.
bli_obj_init_subpart_from( *obj, *sub_obj ); bli_obj_init_subpart_from( obj, sub_obj );
// Modify offsets and dimensions of requested partition. // Modify offsets and dimensions of requested partition.
bli_obj_set_dims( m, b, *sub_obj ); bli_obj_set_dims( m, b, sub_obj );
// Tweak the padded width of the subpartition to trick the underlying // Tweak the padded width of the subpartition to trick the underlying
// implementation into only zero-padding for the narrow submatrix of // implementation into only zero-padding for the narrow submatrix of
@@ -162,25 +162,25 @@ void bli_packm_acquire_mpart_l2r( subpart_t requested_part,
// b for the edge iteration). In these cases, we arrive at the new // b for the edge iteration). In these cases, we arrive at the new
// packed width by simply subtracting off j. // packed width by simply subtracting off j.
{ {
dim_t n_pack_max = bli_obj_padded_width( *sub_obj ); dim_t n_pack_max = bli_obj_padded_width( sub_obj );
dim_t n_pack_cur; dim_t n_pack_cur;
if ( j + b == n ) n_pack_cur = n_pack_max - j; if ( j + b == n ) n_pack_cur = n_pack_max - j;
else n_pack_cur = b; else n_pack_cur = b;
bli_obj_set_padded_width( n_pack_cur, *sub_obj ); bli_obj_set_padded_width( n_pack_cur, sub_obj );
} }
// Translate the desired offsets to a panel offset and adjust the // Translate the desired offsets to a panel offset and adjust the
// buffer pointer of the subpartition object. // buffer pointer of the subpartition object.
{ {
char* buf_p = bli_obj_buffer( *sub_obj ); char* buf_p = bli_obj_buffer( sub_obj );
siz_t elem_size = bli_obj_elem_size( *sub_obj ); siz_t elem_size = bli_obj_elem_size( sub_obj );
dim_t off_to_panel = bli_packm_offset_to_panel_for( j, sub_obj ); dim_t off_to_panel = bli_packm_offset_to_panel_for( j, sub_obj );
buf_p = buf_p + elem_size * off_to_panel; buf_p = buf_p + elem_size * off_to_panel;
bli_obj_set_buffer( ( void* )buf_p, *sub_obj ); bli_obj_set_buffer( buf_p, sub_obj );
} }
} }
@@ -201,47 +201,47 @@ dim_t bli_packm_offset_to_panel_for( dim_t offmn, obj_t* p )
{ {
dim_t panel_off; dim_t panel_off;
if ( bli_obj_pack_schema( *p ) == BLIS_PACKED_ROWS ) if ( bli_obj_pack_schema( p ) == BLIS_PACKED_ROWS )
{ {
// For the "packed rows" schema, a single row is effectively one // For the "packed rows" schema, a single row is effectively one
// row panel, and so we use the row offset as the panel offset. // row panel, and so we use the row offset as the panel offset.
// Then we multiply this offset by the effective panel stride // Then we multiply this offset by the effective panel stride
// (ie: the row stride) to arrive at the desired offset. // (ie: the row stride) to arrive at the desired offset.
panel_off = offmn * bli_obj_row_stride( *p ); panel_off = offmn * bli_obj_row_stride( p );
} }
else if ( bli_obj_pack_schema( *p ) == BLIS_PACKED_COLUMNS ) else if ( bli_obj_pack_schema( p ) == BLIS_PACKED_COLUMNS )
{ {
// For the "packed columns" schema, a single column is effectively one // For the "packed columns" schema, a single column is effectively one
// column panel, and so we use the column offset as the panel offset. // column panel, and so we use the column offset as the panel offset.
// Then we multiply this offset by the effective panel stride // Then we multiply this offset by the effective panel stride
// (ie: the column stride) to arrive at the desired offset. // (ie: the column stride) to arrive at the desired offset.
panel_off = offmn * bli_obj_col_stride( *p ); panel_off = offmn * bli_obj_col_stride( p );
} }
else if ( bli_obj_pack_schema( *p ) == BLIS_PACKED_ROW_PANELS ) else if ( bli_obj_pack_schema( p ) == BLIS_PACKED_ROW_PANELS )
{ {
// For the "packed row panels" schema, the column stride is equal to // For the "packed row panels" schema, the column stride is equal to
// the panel dimension (length). So we can divide it into offmn // the panel dimension (length). So we can divide it into offmn
// (interpreted as a row offset) to arrive at a panel offset. Then // (interpreted as a row offset) to arrive at a panel offset. Then
// we multiply this offset by the panel stride to arrive at the total // we multiply this offset by the panel stride to arrive at the total
// offset to the panel (in units of elements). // offset to the panel (in units of elements).
panel_off = offmn / bli_obj_col_stride( *p ); panel_off = offmn / bli_obj_col_stride( p );
panel_off = panel_off * bli_obj_panel_stride( *p ); panel_off = panel_off * bli_obj_panel_stride( p );
// Sanity check. // Sanity check.
if ( offmn % bli_obj_col_stride( *p ) > 0 ) bli_abort(); if ( offmn % bli_obj_col_stride( p ) > 0 ) bli_abort();
} }
else if ( bli_obj_pack_schema( *p ) == BLIS_PACKED_COL_PANELS ) else if ( bli_obj_pack_schema( p ) == BLIS_PACKED_COL_PANELS )
{ {
// For the "packed column panels" schema, the row stride is equal to // For the "packed column panels" schema, the row stride is equal to
// the panel dimension (width). So we can divide it into offmn // the panel dimension (width). So we can divide it into offmn
// (interpreted as a column offset) to arrive at a panel offset. Then // (interpreted as a column offset) to arrive at a panel offset. Then
// we multiply this offset by the panel stride to arrive at the total // we multiply this offset by the panel stride to arrive at the total
// offset to the panel (in units of elements). // offset to the panel (in units of elements).
panel_off = offmn / bli_obj_row_stride( *p ); panel_off = offmn / bli_obj_row_stride( p );
panel_off = panel_off * bli_obj_panel_stride( *p ); panel_off = panel_off * bli_obj_panel_stride( p );
// Sanity check. // Sanity check.
if ( offmn % bli_obj_row_stride( *p ) > 0 ) bli_abort(); if ( offmn % bli_obj_row_stride( p ) > 0 ) bli_abort();
} }
else else
{ {

View File

@@ -308,10 +308,10 @@ void PASTEMAC(ch,varname) \
{ \ { \
c = c + diagoffc * ( doff_t )cs_c + \ c = c + diagoffc * ( doff_t )cs_c + \
-diagoffc * ( doff_t )rs_c; \ -diagoffc * ( doff_t )rs_c; \
bli_swap_incs( incc, ldc ); \ bli_swap_incs( &incc, &ldc ); \
\ \
if ( bli_is_hermitian( strucc ) ) \ if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc ); \ bli_toggle_conj( &conjc ); \
} \ } \
\ \
/* Pack the full panel. */ \ /* Pack the full panel. */ \
@@ -376,7 +376,7 @@ void PASTEMAC(ch,varname) \
conjc12 = conjc; \ conjc12 = conjc; \
\ \
if ( bli_is_hermitian( strucc ) ) \ if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc12 ); \ bli_toggle_conj( &conjc12 ); \
} \ } \
else /* if ( ( row_stored && bli_is_lower( uploc ) ) || \ else /* if ( ( row_stored && bli_is_lower( uploc ) ) || \
( col_stored && bli_is_upper( uploc ) ) ) */ \ ( col_stored && bli_is_upper( uploc ) ) ) */ \
@@ -402,7 +402,7 @@ void PASTEMAC(ch,varname) \
conjc12 = conjc; \ conjc12 = conjc; \
\ \
if ( bli_is_hermitian( strucc ) ) \ if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc10 ); \ bli_toggle_conj( &conjc10 ); \
} \ } \
\ \
/* Pack to p10. For upper storage, this includes the unstored /* Pack to p10. For upper storage, this includes the unstored
@@ -573,8 +573,8 @@ void PASTEMAC(ch,varname) \
ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict zero = PASTEMAC(ch,0); \
uplo_t uplop = uploc; \ uplo_t uplop = uploc; \
\ \
bli_toggle_uplo( uplop ); \ bli_toggle_uplo( &uplop ); \
bli_shift_diag_offset_to_shrink_uplo( uplop, diagoffp ); \ bli_shift_diag_offset_to_shrink_uplo( uplop, &diagoffp ); \
\ \
PASTEMAC(ch,setm) \ PASTEMAC(ch,setm) \
( \ ( \

View File

@@ -310,10 +310,10 @@ void PASTEMAC(ch,varname) \
{ \ { \
c = c + diagoffc * ( doff_t )cs_c + \ c = c + diagoffc * ( doff_t )cs_c + \
-diagoffc * ( doff_t )rs_c; \ -diagoffc * ( doff_t )rs_c; \
bli_swap_incs( incc, ldc ); \ bli_swap_incs( &incc, &ldc ); \
\ \
if ( bli_is_hermitian( strucc ) ) \ if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc ); \ bli_toggle_conj( &conjc ); \
} \ } \
\ \
/* Pack the full panel. */ \ /* Pack the full panel. */ \
@@ -380,7 +380,7 @@ void PASTEMAC(ch,varname) \
conjc12 = conjc; \ conjc12 = conjc; \
\ \
if ( bli_is_hermitian( strucc ) ) \ if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc12 ); \ bli_toggle_conj( &conjc12 ); \
} \ } \
else /* if ( ( row_stored && bli_is_lower( uploc ) ) || \ else /* if ( ( row_stored && bli_is_lower( uploc ) ) || \
( col_stored && bli_is_upper( uploc ) ) ) */ \ ( col_stored && bli_is_upper( uploc ) ) ) */ \
@@ -406,7 +406,7 @@ void PASTEMAC(ch,varname) \
conjc12 = conjc; \ conjc12 = conjc; \
\ \
if ( bli_is_hermitian( strucc ) ) \ if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc10 ); \ bli_toggle_conj( &conjc10 ); \
} \ } \
\ \
/* Pack to p10. For upper storage, this includes the unstored /* Pack to p10. For upper storage, this includes the unstored
@@ -581,8 +581,8 @@ void PASTEMAC(ch,varname) \
doff_t diagoffp11_0 = 0; \ doff_t diagoffp11_0 = 0; \
dim_t p11_0_dim = panel_dim - 1; \ dim_t p11_0_dim = panel_dim - 1; \
\ \
bli_toggle_uplo( uplop ); \ bli_toggle_uplo( &uplop ); \
bli_shift_diag_offset_to_shrink_uplo( uplop, diagoffp11_0 ); \ bli_shift_diag_offset_to_shrink_uplo( uplop, &diagoffp11_0 ); \
\ \
/* Note that this macro works a little differently than the setm /* Note that this macro works a little differently than the setm
operation. Here, we pass in the dimensions of only p11, rather operation. Here, we pass in the dimensions of only p11, rather

View File

@@ -363,10 +363,10 @@ void PASTEMAC(ch,varname) \
{ \ { \
c = c + diagoffc * ( doff_t )cs_c + \ c = c + diagoffc * ( doff_t )cs_c + \
-diagoffc * ( doff_t )rs_c; \ -diagoffc * ( doff_t )rs_c; \
bli_swap_incs( incc, ldc ); \ bli_swap_incs( &incc, &ldc ); \
\ \
if ( bli_is_hermitian( strucc ) ) \ if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc ); \ bli_toggle_conj( &conjc ); \
} \ } \
\ \
/* Pack the full panel. */ \ /* Pack the full panel. */ \
@@ -436,7 +436,7 @@ void PASTEMAC(ch,varname) \
conjc12 = conjc; \ conjc12 = conjc; \
\ \
if ( bli_is_hermitian( strucc ) ) \ if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc12 ); \ bli_toggle_conj( &conjc12 ); \
} \ } \
else /* if ( ( row_stored && bli_is_lower( uploc ) ) || \ else /* if ( ( row_stored && bli_is_lower( uploc ) ) || \
( col_stored && bli_is_upper( uploc ) ) ) */ \ ( col_stored && bli_is_upper( uploc ) ) ) */ \
@@ -462,7 +462,7 @@ void PASTEMAC(ch,varname) \
conjc12 = conjc; \ conjc12 = conjc; \
\ \
if ( bli_is_hermitian( strucc ) ) \ if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc10 ); \ bli_toggle_conj( &conjc10 ); \
} \ } \
\ \
/* Pack to p10. For upper storage, this includes the unstored /* Pack to p10. For upper storage, this includes the unstored
@@ -744,8 +744,8 @@ void PASTEMAC(ch,varname) \
ctype_r* restrict zero_r = PASTEMAC(chr,0); \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \
uplo_t uplop = uploc; \ uplo_t uplop = uploc; \
\ \
bli_toggle_uplo( uplop ); \ bli_toggle_uplo( &uplop ); \
bli_shift_diag_offset_to_shrink_uplo( uplop, diagoffp ); \ bli_shift_diag_offset_to_shrink_uplo( uplop, &diagoffp ); \
\ \
PASTEMAC(chr,setm) \ PASTEMAC(chr,setm) \
( \ ( \

View File

@@ -337,10 +337,10 @@ void PASTEMAC(ch,varname) \
{ \ { \
c = c + diagoffc * ( doff_t )cs_c + \ c = c + diagoffc * ( doff_t )cs_c + \
-diagoffc * ( doff_t )rs_c; \ -diagoffc * ( doff_t )rs_c; \
bli_swap_incs( incc, ldc ); \ bli_swap_incs( &incc, &ldc ); \
\ \
if ( bli_is_hermitian( strucc ) ) \ if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc ); \ bli_toggle_conj( &conjc ); \
} \ } \
\ \
/* Pack the full panel. */ \ /* Pack the full panel. */ \
@@ -410,7 +410,7 @@ void PASTEMAC(ch,varname) \
conjc12 = conjc; \ conjc12 = conjc; \
\ \
if ( bli_is_hermitian( strucc ) ) \ if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc12 ); \ bli_toggle_conj( &conjc12 ); \
} \ } \
else /* if ( ( row_stored && bli_is_lower( uploc ) ) || \ else /* if ( ( row_stored && bli_is_lower( uploc ) ) || \
( col_stored && bli_is_upper( uploc ) ) ) */ \ ( col_stored && bli_is_upper( uploc ) ) ) */ \
@@ -436,7 +436,7 @@ void PASTEMAC(ch,varname) \
conjc12 = conjc; \ conjc12 = conjc; \
\ \
if ( bli_is_hermitian( strucc ) ) \ if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc10 ); \ bli_toggle_conj( &conjc10 ); \
} \ } \
\ \
/* Pack to p10. For upper storage, this includes the unstored /* Pack to p10. For upper storage, this includes the unstored
@@ -676,8 +676,8 @@ void PASTEMAC(ch,varname) \
ctype_r* restrict zero_r = PASTEMAC(chr,0); \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \
uplo_t uplop = uploc; \ uplo_t uplop = uploc; \
\ \
bli_toggle_uplo( uplop ); \ bli_toggle_uplo( &uplop ); \
bli_shift_diag_offset_to_shrink_uplo( uplop, diagoffp ); \ bli_shift_diag_offset_to_shrink_uplo( uplop, &diagoffp ); \
\ \
PASTEMAC(chr,setm) \ PASTEMAC(chr,setm) \
( \ ( \

View File

@@ -305,10 +305,10 @@ void PASTEMAC(ch,varname) \
{ \ { \
c = c + diagoffc * ( doff_t )cs_c + \ c = c + diagoffc * ( doff_t )cs_c + \
-diagoffc * ( doff_t )rs_c; \ -diagoffc * ( doff_t )rs_c; \
bli_swap_incs( incc, ldc ); \ bli_swap_incs( &incc, &ldc ); \
\ \
if ( bli_is_hermitian( strucc ) ) \ if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc ); \ bli_toggle_conj( &conjc ); \
} \ } \
\ \
/* Pack the full panel. */ \ /* Pack the full panel. */ \
@@ -376,7 +376,7 @@ void PASTEMAC(ch,varname) \
conjc12 = conjc; \ conjc12 = conjc; \
\ \
if ( bli_is_hermitian( strucc ) ) \ if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc12 ); \ bli_toggle_conj( &conjc12 ); \
} \ } \
else /* if ( ( row_stored && bli_is_lower( uploc ) ) || \ else /* if ( ( row_stored && bli_is_lower( uploc ) ) || \
( col_stored && bli_is_upper( uploc ) ) ) */ \ ( col_stored && bli_is_upper( uploc ) ) ) */ \
@@ -402,7 +402,7 @@ void PASTEMAC(ch,varname) \
conjc12 = conjc; \ conjc12 = conjc; \
\ \
if ( bli_is_hermitian( strucc ) ) \ if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( conjc10 ); \ bli_toggle_conj( &conjc10 ); \
} \ } \
\ \
/* Pack to p10. For upper storage, this includes the unstored /* Pack to p10. For upper storage, this includes the unstored
@@ -568,8 +568,8 @@ void PASTEMAC(ch,varname) \
ctype_r* restrict zero_r = PASTEMAC(chr,0); \ ctype_r* restrict zero_r = PASTEMAC(chr,0); \
uplo_t uplop = uploc; \ uplo_t uplop = uploc; \
\ \
bli_toggle_uplo( uplop ); \ bli_toggle_uplo( &uplop ); \
bli_shift_diag_offset_to_shrink_uplo( uplop, diagoffp ); \ bli_shift_diag_offset_to_shrink_uplo( uplop, &diagoffp ); \
\ \
PASTEMAC(chr,setm) \ PASTEMAC(chr,setm) \
( \ ( \

View File

@@ -64,26 +64,26 @@ void bli_packm_unb_var1
thrinfo_t* thread thrinfo_t* thread
) )
{ {
num_t dt_cp = bli_obj_dt( *c ); num_t dt_cp = bli_obj_dt( c );
struc_t strucc = bli_obj_struc( *c ); struc_t strucc = bli_obj_struc( c );
doff_t diagoffc = bli_obj_diag_offset( *c ); doff_t diagoffc = bli_obj_diag_offset( c );
diag_t diagc = bli_obj_diag( *c ); diag_t diagc = bli_obj_diag( c );
uplo_t uploc = bli_obj_uplo( *c ); uplo_t uploc = bli_obj_uplo( c );
trans_t transc = bli_obj_conjtrans_status( *c ); trans_t transc = bli_obj_conjtrans_status( c );
dim_t m_p = bli_obj_length( *p ); dim_t m_p = bli_obj_length( p );
dim_t n_p = bli_obj_width( *p ); dim_t n_p = bli_obj_width( p );
dim_t m_max_p = bli_obj_padded_length( *p ); dim_t m_max_p = bli_obj_padded_length( p );
dim_t n_max_p = bli_obj_padded_width( *p ); dim_t n_max_p = bli_obj_padded_width( p );
void* buf_c = bli_obj_buffer_at_off( *c ); void* buf_c = bli_obj_buffer_at_off( c );
inc_t rs_c = bli_obj_row_stride( *c ); inc_t rs_c = bli_obj_row_stride( c );
inc_t cs_c = bli_obj_col_stride( *c ); inc_t cs_c = bli_obj_col_stride( c );
void* buf_p = bli_obj_buffer_at_off( *p ); void* buf_p = bli_obj_buffer_at_off( p );
inc_t rs_p = bli_obj_row_stride( *p ); inc_t rs_p = bli_obj_row_stride( p );
inc_t cs_p = bli_obj_col_stride( *p ); inc_t cs_p = bli_obj_col_stride( p );
void* buf_kappa; void* buf_kappa;
@@ -94,7 +94,7 @@ void bli_packm_unb_var1
// the alpha scalar of the higher-level operation. Thus, we use BLIS_ONE // the alpha scalar of the higher-level operation. Thus, we use BLIS_ONE
// for kappa so that the underlying packm implementation does not scale // for kappa so that the underlying packm implementation does not scale
// during packing. // during packing.
buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE ); buf_kappa = bli_obj_buffer_for_const( dt_cp, &BLIS_ONE );
// Index into the type combination array to extract the correct // Index into the type combination array to extract the correct
// function pointer. // function pointer.
@@ -180,15 +180,15 @@ void PASTEMAC(ch,varname) \
side of the diagonal. */ \ side of the diagonal. */ \
c_cast = c_cast + diagoffc * ( doff_t )cs_c + \ c_cast = c_cast + diagoffc * ( doff_t )cs_c + \
-diagoffc * ( doff_t )rs_c; \ -diagoffc * ( doff_t )rs_c; \
bli_negate_diag_offset( diagoffc ); \ bli_negate_diag_offset( &diagoffc ); \
bli_toggle_trans( transc ); \ bli_toggle_trans( &transc ); \
if ( bli_is_upper( uploc ) ) diagoffc += 1; \ if ( bli_is_upper( uploc ) ) diagoffc += 1; \
else if ( bli_is_lower( uploc ) ) diagoffc -= 1; \ else if ( bli_is_lower( uploc ) ) diagoffc -= 1; \
\ \
/* If c is Hermitian, we need to apply a conjugation when /* If c is Hermitian, we need to apply a conjugation when
copying the region opposite the diagonal. */ \ copying the region opposite the diagonal. */ \
if ( bli_is_hermitian( strucc ) ) \ if ( bli_is_hermitian( strucc ) ) \
bli_toggle_conj( transc ); \ transc = bli_trans_toggled_conj( transc ); \
\ \
/* Copy the data from the region opposite the diagonal of c /* Copy the data from the region opposite the diagonal of c
(as specified by the original value of diagoffc). Notice (as specified by the original value of diagoffc). Notice
@@ -217,16 +217,16 @@ void PASTEMAC(ch,varname) \
we can derive from the parameters given. */ \ we can derive from the parameters given. */ \
if ( bli_does_trans( transc ) ) \ if ( bli_does_trans( transc ) ) \
{ \ { \
bli_negate_diag_offset( diagoffp ); \ bli_negate_diag_offset( &diagoffp ); \
bli_toggle_uplo( uplop ); \ bli_toggle_uplo( &uplop ); \
} \ } \
\ \
/* For triangular matrices, we wish to reference the region /* For triangular matrices, we wish to reference the region
strictly opposite the diagonal of C. This amounts to strictly opposite the diagonal of C. This amounts to
toggling uploc and then shifting the diagonal offset to toggling uploc and then shifting the diagonal offset to
shrink the stored region (by one diagonal). */ \ shrink the stored region (by one diagonal). */ \
bli_toggle_uplo( uplop ); \ bli_toggle_uplo( &uplop ); \
bli_shift_diag_offset_to_shrink_uplo( uplop, diagoffp ); \ bli_shift_diag_offset_to_shrink_uplo( uplop, &diagoffp ); \
\ \
/* Set the region opposite the diagonal of p to zero. */ \ /* Set the region opposite the diagonal of p to zero. */ \
PASTEMAC(ch,setm) \ PASTEMAC(ch,setm) \

View File

@@ -57,7 +57,7 @@ void bli_scalm_int( obj_t* alpha,
FUNCPTR_T f; FUNCPTR_T f;
// Return early if one of the matrix operands has a zero dimension. // Return early if one of the matrix operands has a zero dimension.
if ( bli_obj_has_zero_dim( *x ) ) return; if ( bli_obj_has_zero_dim( x ) ) return;
// Check parameters. // Check parameters.
if ( bli_error_checking_is_enabled() ) if ( bli_error_checking_is_enabled() )

View File

@@ -64,17 +64,17 @@ void bli_unpackm_blk_var1
thrinfo_t* thread thrinfo_t* thread
) )
{ {
num_t dt_cp = bli_obj_dt( *c ); num_t dt_cp = bli_obj_dt( c );
// Normally we take the parameters from the source argument. But here, // Normally we take the parameters from the source argument. But here,
// the packm/unpackm framework is not yet solidified enough for us to // the packm/unpackm framework is not yet solidified enough for us to
// assume that at this point struc(P) == struc(C), (ie: since // assume that at this point struc(P) == struc(C), (ie: since
// densification may have marked P's structure as dense when the root // densification may have marked P's structure as dense when the root
// is upper or lower). So, we take the struc field from C, not P. // is upper or lower). So, we take the struc field from C, not P.
struc_t strucc = bli_obj_struc( *c ); struc_t strucc = bli_obj_struc( c );
doff_t diagoffc = bli_obj_diag_offset( *c ); doff_t diagoffc = bli_obj_diag_offset( c );
diag_t diagc = bli_obj_diag( *c ); diag_t diagc = bli_obj_diag( c );
uplo_t uploc = bli_obj_uplo( *c ); uplo_t uploc = bli_obj_uplo( c );
// Again, normally the trans argument is on the source matrix. But we // Again, normally the trans argument is on the source matrix. But we
// know that the packed matrix is not transposed. If there is to be a // know that the packed matrix is not transposed. If there is to be a
@@ -83,22 +83,22 @@ void bli_unpackm_blk_var1
// the trans status (not the conjugation status), since we probably // the trans status (not the conjugation status), since we probably
// don't want to un-conjugate if the original matrix was conjugated // don't want to un-conjugate if the original matrix was conjugated
// when packed. // when packed.
trans_t transc = bli_obj_onlytrans_status( *c ); trans_t transc = bli_obj_onlytrans_status( c );
dim_t m_c = bli_obj_length( *c ); dim_t m_c = bli_obj_length( c );
dim_t n_c = bli_obj_width( *c ); dim_t n_c = bli_obj_width( c );
dim_t m_panel = bli_obj_panel_length( *c ); dim_t m_panel = bli_obj_panel_length( c );
dim_t n_panel = bli_obj_panel_width( *c ); dim_t n_panel = bli_obj_panel_width( c );
void* buf_p = bli_obj_buffer_at_off( *p ); void* buf_p = bli_obj_buffer_at_off( p );
inc_t rs_p = bli_obj_row_stride( *p ); inc_t rs_p = bli_obj_row_stride( p );
inc_t cs_p = bli_obj_col_stride( *p ); inc_t cs_p = bli_obj_col_stride( p );
dim_t pd_p = bli_obj_panel_dim( *p ); dim_t pd_p = bli_obj_panel_dim( p );
inc_t ps_p = bli_obj_panel_stride( *p ); inc_t ps_p = bli_obj_panel_stride( p );
void* buf_c = bli_obj_buffer_at_off( *c ); void* buf_c = bli_obj_buffer_at_off( c );
inc_t rs_c = bli_obj_row_stride( *c ); inc_t rs_c = bli_obj_row_stride( c );
inc_t cs_c = bli_obj_col_stride( *c ); inc_t cs_c = bli_obj_col_stride( c );
FUNCPTR_T f; FUNCPTR_T f;
@@ -170,10 +170,10 @@ void PASTEMAC(ch,varname) \
express the remaining parameters and code. */ \ express the remaining parameters and code. */ \
if ( bli_does_trans( transc ) ) \ if ( bli_does_trans( transc ) ) \
{ \ { \
bli_swap_incs( rs_c, cs_c ); \ bli_swap_incs( &rs_c, &cs_c ); \
bli_negate_diag_offset( diagoffc ); \ bli_negate_diag_offset( &diagoffc ); \
bli_toggle_uplo( uploc ); \ bli_toggle_uplo( &uploc ); \
bli_toggle_trans( transc ); \ bli_toggle_trans( &transc ); \
} \ } \
\ \
/* If the strides of p indicate row storage, then we are packing to /* If the strides of p indicate row storage, then we are packing to

View File

@@ -54,7 +54,7 @@ void bli_unpackm_int
// If p was aliased to a during the pack stage (because it was already // If p was aliased to a during the pack stage (because it was already
// in an acceptable packed/contiguous format), then no unpack is actually // in an acceptable packed/contiguous format), then no unpack is actually
// necessary, so we return. // necessary, so we return.
if ( bli_obj_is_alias_of( *p, *a ) ) return; if ( bli_obj_is_alias_of( p, a ) ) return;
// Extract the function pointer from the current control tree node. // Extract the function pointer from the current control tree node.
f = bli_cntl_unpackm_params_var_func( cntl ); f = bli_cntl_unpackm_params_var_func( cntl );

View File

@@ -59,22 +59,22 @@ void bli_unpackm_unb_var1
thrinfo_t* thread thrinfo_t* thread
) )
{ {
num_t dt_pc = bli_obj_dt( *p ); num_t dt_pc = bli_obj_dt( p );
doff_t diagoffp = bli_obj_diag_offset( *p ); doff_t diagoffp = bli_obj_diag_offset( p );
uplo_t uplop = bli_obj_uplo( *p ); uplo_t uplop = bli_obj_uplo( p );
trans_t transc = bli_obj_onlytrans_status( *c ); trans_t transc = bli_obj_onlytrans_status( c );
dim_t m_c = bli_obj_length( *c ); dim_t m_c = bli_obj_length( c );
dim_t n_c = bli_obj_width( *c ); dim_t n_c = bli_obj_width( c );
void* buf_p = bli_obj_buffer_at_off( *p ); void* buf_p = bli_obj_buffer_at_off( p );
inc_t rs_p = bli_obj_row_stride( *p ); inc_t rs_p = bli_obj_row_stride( p );
inc_t cs_p = bli_obj_col_stride( *p ); inc_t cs_p = bli_obj_col_stride( p );
void* buf_c = bli_obj_buffer_at_off( *c ); void* buf_c = bli_obj_buffer_at_off( c );
inc_t rs_c = bli_obj_row_stride( *c ); inc_t rs_c = bli_obj_row_stride( c );
inc_t cs_c = bli_obj_col_stride( *c ); inc_t cs_c = bli_obj_col_stride( c );
FUNCPTR_T f; FUNCPTR_T f;

View File

@@ -330,10 +330,10 @@ void bli_xxmv_check
e_val = bli_check_vector_object( y ); e_val = bli_check_vector_object( y );
bli_check_error_code( e_val ); bli_check_error_code( e_val );
e_val = bli_check_vector_dim_equals( x, bli_obj_width_after_trans( *a ) ); e_val = bli_check_vector_dim_equals( x, bli_obj_width_after_trans( a ) );
bli_check_error_code( e_val ); bli_check_error_code( e_val );
e_val = bli_check_vector_dim_equals( y, bli_obj_length_after_trans( *a ) ); e_val = bli_check_vector_dim_equals( y, bli_obj_length_after_trans( a ) );
bli_check_error_code( e_val ); bli_check_error_code( e_val );
// Check object buffers (for non-NULLness). // Check object buffers (for non-NULLness).
@@ -392,10 +392,10 @@ void bli_xxr_check
e_val = bli_check_matrix_object( a ); e_val = bli_check_matrix_object( a );
bli_check_error_code( e_val ); bli_check_error_code( e_val );
e_val = bli_check_vector_dim_equals( x, bli_obj_length_after_trans( *a ) ); e_val = bli_check_vector_dim_equals( x, bli_obj_length_after_trans( a ) );
bli_check_error_code( e_val ); bli_check_error_code( e_val );
e_val = bli_check_vector_dim_equals( y, bli_obj_width_after_trans( *a ) ); e_val = bli_check_vector_dim_equals( y, bli_obj_width_after_trans( a ) );
bli_check_error_code( e_val ); bli_check_error_code( e_val );
// Check object buffers (for non-NULLness). // Check object buffers (for non-NULLness).

View File

@@ -57,19 +57,19 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *a ); \ num_t dt = bli_obj_dt( a ); \
\ \
trans_t transa = bli_obj_conjtrans_status( *a ); \ trans_t transa = bli_obj_conjtrans_status( a ); \
conj_t conjx = bli_obj_conj_status( *x ); \ conj_t conjx = bli_obj_conj_status( x ); \
dim_t m = bli_obj_length( *a ); \ dim_t m = bli_obj_length( a ); \
dim_t n = bli_obj_width( *a ); \ dim_t n = bli_obj_width( a ); \
void* buf_a = bli_obj_buffer_at_off( *a ); \ void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \ inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \ inc_t cs_a = bli_obj_col_stride( a ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( *x ); \ inc_t incx = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \ void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t incy = bli_obj_vector_inc( *y ); \ inc_t incy = bli_obj_vector_inc( y ); \
\ \
void* buf_alpha; \ void* buf_alpha; \
void* buf_beta; \ void* buf_beta; \
@@ -86,8 +86,8 @@ void PASTEMAC(opname,EX_SUF) \
alpha, &alpha_local ); \ alpha, &alpha_local ); \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
beta, &beta_local ); \ beta, &beta_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \ buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \
\ \
/* Invoke the typed function. */ \ /* Invoke the typed function. */ \
bli_call_ft_14 \ bli_call_ft_14 \
@@ -126,19 +126,19 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *a ); \ num_t dt = bli_obj_dt( a ); \
\ \
conj_t conjx = bli_obj_conj_status( *x ); \ conj_t conjx = bli_obj_conj_status( x ); \
conj_t conjy = bli_obj_conj_status( *y ); \ conj_t conjy = bli_obj_conj_status( y ); \
dim_t m = bli_obj_length( *a ); \ dim_t m = bli_obj_length( a ); \
dim_t n = bli_obj_width( *a ); \ dim_t n = bli_obj_width( a ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( *x ); \ inc_t incx = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \ void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t incy = bli_obj_vector_inc( *y ); \ inc_t incy = bli_obj_vector_inc( y ); \
void* buf_a = bli_obj_buffer_at_off( *a ); \ void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \ inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \ inc_t cs_a = bli_obj_col_stride( a ); \
\ \
void* buf_alpha; \ void* buf_alpha; \
\ \
@@ -151,7 +151,7 @@ void PASTEMAC(opname,EX_SUF) \
as needed). */ \ as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
alpha, &alpha_local ); \ alpha, &alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
\ \
/* Invoke the typed function. */ \ /* Invoke the typed function. */ \
bli_call_ft_13 \ bli_call_ft_13 \
@@ -190,19 +190,19 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *a ); \ num_t dt = bli_obj_dt( a ); \
\ \
uplo_t uploa = bli_obj_uplo( *a ); \ uplo_t uploa = bli_obj_uplo( a ); \
conj_t conja = bli_obj_conj_status( *a ); \ conj_t conja = bli_obj_conj_status( a ); \
conj_t conjx = bli_obj_conj_status( *x ); \ conj_t conjx = bli_obj_conj_status( x ); \
dim_t m = bli_obj_length( *a ); \ dim_t m = bli_obj_length( a ); \
void* buf_a = bli_obj_buffer_at_off( *a ); \ void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \ inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \ inc_t cs_a = bli_obj_col_stride( a ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( *x ); \ inc_t incx = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \ void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t incy = bli_obj_vector_inc( *y ); \ inc_t incy = bli_obj_vector_inc( y ); \
\ \
void* buf_alpha; \ void* buf_alpha; \
void* buf_beta; \ void* buf_beta; \
@@ -219,8 +219,8 @@ void PASTEMAC(opname,EX_SUF) \
alpha, &alpha_local ); \ alpha, &alpha_local ); \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
beta, &beta_local ); \ beta, &beta_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \ buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \
\ \
/* Invoke the typed function. */ \ /* Invoke the typed function. */ \
bli_call_ft_14 \ bli_call_ft_14 \
@@ -259,16 +259,16 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *a ); \ num_t dt = bli_obj_dt( a ); \
\ \
uplo_t uploa = bli_obj_uplo( *a ); \ uplo_t uploa = bli_obj_uplo( a ); \
conj_t conjx = bli_obj_conj_status( *x ); \ conj_t conjx = bli_obj_conj_status( x ); \
dim_t m = bli_obj_length( *a ); \ dim_t m = bli_obj_length( a ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( *x ); \ inc_t incx = bli_obj_vector_inc( x ); \
void* buf_a = bli_obj_buffer_at_off( *a ); \ void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \ inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \ inc_t cs_a = bli_obj_col_stride( a ); \
\ \
void* buf_alpha; \ void* buf_alpha; \
\ \
@@ -281,7 +281,7 @@ void PASTEMAC(opname,EX_SUF) \
as needed). */ \ as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
alpha, &alpha_local ); \ alpha, &alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
\ \
/* Invoke the typed function. */ \ /* Invoke the typed function. */ \
bli_call_ft_10 \ bli_call_ft_10 \
@@ -318,19 +318,19 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *a ); \ num_t dt = bli_obj_dt( a ); \
\ \
uplo_t uploa = bli_obj_uplo( *a ); \ uplo_t uploa = bli_obj_uplo( a ); \
conj_t conjx = bli_obj_conj_status( *x ); \ conj_t conjx = bli_obj_conj_status( x ); \
conj_t conjy = bli_obj_conj_status( *y ); \ conj_t conjy = bli_obj_conj_status( y ); \
dim_t m = bli_obj_length( *a ); \ dim_t m = bli_obj_length( a ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( *x ); \ inc_t incx = bli_obj_vector_inc( x ); \
void* buf_y = bli_obj_buffer_at_off( *y ); \ void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t incy = bli_obj_vector_inc( *y ); \ inc_t incy = bli_obj_vector_inc( y ); \
void* buf_a = bli_obj_buffer_at_off( *a ); \ void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \ inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \ inc_t cs_a = bli_obj_col_stride( a ); \
\ \
void* buf_alpha; \ void* buf_alpha; \
\ \
@@ -343,7 +343,7 @@ void PASTEMAC(opname,EX_SUF) \
as needed). */ \ as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
alpha, &alpha_local ); \ alpha, &alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
\ \
/* Invoke the typed function. */ \ /* Invoke the typed function. */ \
bli_call_ft_13 \ bli_call_ft_13 \
@@ -381,17 +381,17 @@ void PASTEMAC(opname,EX_SUF) \
\ \
BLIS_OAPI_CNTX_DECL \ BLIS_OAPI_CNTX_DECL \
\ \
num_t dt = bli_obj_dt( *a ); \ num_t dt = bli_obj_dt( a ); \
\ \
uplo_t uploa = bli_obj_uplo( *a ); \ uplo_t uploa = bli_obj_uplo( a ); \
trans_t transa = bli_obj_conjtrans_status( *a ); \ trans_t transa = bli_obj_conjtrans_status( a ); \
diag_t diaga = bli_obj_diag( *a ); \ diag_t diaga = bli_obj_diag( a ); \
dim_t m = bli_obj_length( *a ); \ dim_t m = bli_obj_length( a ); \
void* buf_a = bli_obj_buffer_at_off( *a ); \ void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \ inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \ inc_t cs_a = bli_obj_col_stride( a ); \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( *x ); \ inc_t incx = bli_obj_vector_inc( x ); \
\ \
void* buf_alpha; \ void* buf_alpha; \
\ \
@@ -404,7 +404,7 @@ void PASTEMAC(opname,EX_SUF) \
as needed). */ \ as needed). */ \
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
alpha, &alpha_local ); \ alpha, &alpha_local ); \
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \ buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
\ \
/* Invoke the typed function. */ \ /* Invoke the typed function. */ \
bli_call_ft_11 \ bli_call_ft_11 \

View File

@@ -60,7 +60,7 @@ void PASTEMAC(ch,opname) \
dim_t m_y, n_x; \ dim_t m_y, n_x; \
\ \
/* Determine the dimensions of y and x. */ \ /* Determine the dimensions of y and x. */ \
bli_set_dims_with_trans( transa, m, n, m_y, n_x ); \ bli_set_dims_with_trans( transa, m, n, &m_y, &n_x ); \
\ \
/* If y has zero elements, return early. */ \ /* If y has zero elements, return early. */ \
if ( bli_zero_dim1( m_y ) ) return; \ if ( bli_zero_dim1( m_y ) ) return; \

View File

@@ -63,7 +63,7 @@ void PASTEMAC(ch,varname) \
\ \
bli_set_dims_incs_with_trans( transa, \ bli_set_dims_incs_with_trans( transa, \
m, n, rs_a, cs_a, \ m, n, rs_a, cs_a, \
n_iter, n_elem, rs_at, cs_at ); \ &n_iter, &n_elem, &rs_at, &cs_at ); \
\ \
conja = bli_extract_conj( transa ); \ conja = bli_extract_conj( transa ); \
\ \

View File

@@ -65,7 +65,7 @@ void PASTEMAC(ch,varname) \
\ \
bli_set_dims_incs_with_trans( transa, \ bli_set_dims_incs_with_trans( transa, \
m, n, rs_a, cs_a, \ m, n, rs_a, cs_a, \
n_elem, n_iter, rs_at, cs_at ); \ &n_elem, &n_iter, &rs_at, &cs_at ); \
\ \
conja = bli_extract_conj( transa ); \ conja = bli_extract_conj( transa ); \
\ \

View File

@@ -64,7 +64,7 @@ void PASTEMAC(ch,varname) \
\ \
bli_set_dims_incs_with_trans( transa, \ bli_set_dims_incs_with_trans( transa, \
m, n, rs_a, cs_a, \ m, n, rs_a, cs_a, \
n_iter, n_elem, rs_at, cs_at ); \ &n_iter, &n_elem, &rs_at, &cs_at ); \
\ \
conja = bli_extract_conj( transa ); \ conja = bli_extract_conj( transa ); \
\ \

View File

@@ -65,7 +65,7 @@ void PASTEMAC(ch,varname) \
\ \
bli_set_dims_incs_with_trans( transa, \ bli_set_dims_incs_with_trans( transa, \
m, n, rs_a, cs_a, \ m, n, rs_a, cs_a, \
n_elem, n_iter, rs_at, cs_at ); \ &n_elem, &n_iter, &rs_at, &cs_at ); \
\ \
conja = bli_extract_conj( transa ); \ conja = bli_extract_conj( transa ); \
\ \

View File

@@ -50,26 +50,26 @@ void PASTEMAC0(opname) \
{ \ { \
bli_init_once(); \ bli_init_once(); \
\ \
num_t dt = bli_obj_dt( *a ); \ num_t dt = bli_obj_dt( a ); \
\ \
trans_t transa = bli_obj_conjtrans_status( *a ); \ trans_t transa = bli_obj_conjtrans_status( a ); \
conj_t conjx = bli_obj_conj_status( *x ); \ conj_t conjx = bli_obj_conj_status( x ); \
\ \
dim_t m = bli_obj_length( *a ); \ dim_t m = bli_obj_length( a ); \
dim_t n = bli_obj_width( *a ); \ dim_t n = bli_obj_width( a ); \
\ \
void* buf_a = bli_obj_buffer_at_off( *a ); \ void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \ inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \ inc_t cs_a = bli_obj_col_stride( a ); \
\ \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( *x ); \ inc_t incx = bli_obj_vector_inc( x ); \
\ \
void* buf_y = bli_obj_buffer_at_off( *y ); \ void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t incy = bli_obj_vector_inc( *y ); \ inc_t incy = bli_obj_vector_inc( y ); \
\ \
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \ void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \
void* buf_beta = bli_obj_buffer_for_1x1( dt, *beta ); \ void* buf_beta = bli_obj_buffer_for_1x1( dt, beta ); \
\ \
/* Invoke the void pointer-based function for the given datatype. */ \ /* Invoke the void pointer-based function for the given datatype. */ \
bli_call_ft_14 \ bli_call_ft_14 \

View File

@@ -51,26 +51,26 @@ void PASTEMAC0(opname) \
gemv_t* cntl \ gemv_t* cntl \
) \ ) \
{ \ { \
num_t dt = bli_obj_dt( *a ); \ num_t dt = bli_obj_dt( a ); \
\ \
trans_t transa = bli_obj_conjtrans_status( *a ); \ trans_t transa = bli_obj_conjtrans_status( a ); \
conj_t conjx = bli_obj_conj_status( *x ); \ conj_t conjx = bli_obj_conj_status( x ); \
\ \
dim_t m = bli_obj_length( *a ); \ dim_t m = bli_obj_length( a ); \
dim_t n = bli_obj_width( *a ); \ dim_t n = bli_obj_width( a ); \
\ \
void* buf_a = bli_obj_buffer_at_off( *a ); \ void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \ inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \ inc_t cs_a = bli_obj_col_stride( a ); \
\ \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( *x ); \ inc_t incx = bli_obj_vector_inc( x ); \
\ \
void* buf_y = bli_obj_buffer_at_off( *y ); \ void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t incy = bli_obj_vector_inc( *y ); \ inc_t incy = bli_obj_vector_inc( y ); \
\ \
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \ void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \
void* buf_beta = bli_obj_buffer_for_1x1( dt, *beta ); \ void* buf_beta = bli_obj_buffer_for_1x1( dt, beta ); \
\ \
PASTECH(ftname,_vft) f = PASTECH(opname,_vfp)[dt]; \ PASTECH(ftname,_vft) f = PASTECH(opname,_vfp)[dt]; \
\ \

View File

@@ -54,7 +54,7 @@ void bli_gemv_blk_var1( obj_t* alpha,
bli_obj_init_pack( &y1_pack ); bli_obj_init_pack( &y1_pack );
// Query dimension in partitioning direction. // Query dimension in partitioning direction.
m_trans = bli_obj_length_after_trans( *a ); m_trans = bli_obj_length_after_trans( a );
// Partition along the m dimension. // Partition along the m dimension.
for ( i = 0; i < m_trans; i += b_alg ) for ( i = 0; i < m_trans; i += b_alg )

View File

@@ -54,7 +54,7 @@ void bli_gemv_blk_var2( obj_t* alpha,
bli_obj_init_pack( &x1_pack ); bli_obj_init_pack( &x1_pack );
// Query dimension in partitioning direction. // Query dimension in partitioning direction.
n_trans = bli_obj_width_after_trans( *a ); n_trans = bli_obj_width_after_trans( a );
// y = beta * y; // y = beta * y;
bli_scalv_int( beta, bli_scalv_int( beta,

View File

@@ -67,15 +67,15 @@ void bli_gemv_front
// Query the target datatypes of each object. // Query the target datatypes of each object.
dt_targ_a = bli_obj_target_dt( *a ); dt_targ_a = bli_obj_target_dt( a );
dt_targ_x = bli_obj_target_dt( *x ); dt_targ_x = bli_obj_target_dt( x );
dt_targ_y = bli_obj_target_dt( *y ); dt_targ_y = bli_obj_target_dt( y );
// Determine whether each operand is stored with unit stride. // Determine whether each operand is stored with unit stride.
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) || a_has_unit_inc = ( bli_obj_is_row_stored( a ) ||
bli_obj_is_col_stored( *a ) ); bli_obj_is_col_stored( a ) );
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 ); x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 ); y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 );
// Create an object to hold a copy-cast of alpha. Notice that we use // Create an object to hold a copy-cast of alpha. Notice that we use
@@ -112,14 +112,14 @@ void bli_gemv_front
// row-major cases with a transpose and column-major without a // row-major cases with a transpose and column-major without a
// transpose. For the general stride case, we mimic that of column- // transpose. For the general stride case, we mimic that of column-
// major storage since that is the format into which we copy/pack. // major storage since that is the format into which we copy/pack.
if ( bli_obj_has_notrans( *a ) ) if ( bli_obj_has_notrans( a ) )
{ {
if ( bli_obj_is_row_stored( *a ) ) gemv_cntl = gemv_cntl_bs_ke_dot; if ( bli_obj_is_row_stored( a ) ) gemv_cntl = gemv_cntl_bs_ke_dot;
else gemv_cntl = gemv_cntl_bs_ke_axpy; else gemv_cntl = gemv_cntl_bs_ke_axpy;
} }
else // if ( bli_obj_has_trans( *a ) ) else // if ( bli_obj_has_trans( a ) )
{ {
if ( bli_obj_is_row_stored( *a ) ) gemv_cntl = gemv_cntl_bs_ke_axpy; if ( bli_obj_is_row_stored( a ) ) gemv_cntl = gemv_cntl_bs_ke_axpy;
else gemv_cntl = gemv_cntl_bs_ke_dot; else gemv_cntl = gemv_cntl_bs_ke_dot;
} }
} }
@@ -127,20 +127,20 @@ void bli_gemv_front
{ {
// Mark objects with unit stride as already being packed. This prevents // Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm. // unnecessary packing from happening within the blocked algorithm.
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a ); if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, a );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y ); if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y );
// Here, we make a similar choice as above, except that (1) we look // Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking. // at storage tilt, and (2) we choose a tree that performs blocking.
if ( bli_obj_has_notrans( *a ) ) if ( bli_obj_has_notrans( a ) )
{ {
if ( bli_obj_is_row_tilted( *a ) ) gemv_cntl = gemv_cntl_ge_dot; if ( bli_obj_is_row_tilted( a ) ) gemv_cntl = gemv_cntl_ge_dot;
else gemv_cntl = gemv_cntl_ge_axpy; else gemv_cntl = gemv_cntl_ge_axpy;
} }
else // if ( bli_obj_has_trans( *a ) ) else // if ( bli_obj_has_trans( a ) )
{ {
if ( bli_obj_is_row_tilted( *a ) ) gemv_cntl = gemv_cntl_ge_axpy; if ( bli_obj_is_row_tilted( a ) ) gemv_cntl = gemv_cntl_ge_axpy;
else gemv_cntl = gemv_cntl_ge_dot; else gemv_cntl = gemv_cntl_ge_dot;
} }
} }
@@ -189,8 +189,8 @@ void PASTEMAC(ch,opname) \
inc_t rs_x, cs_x; \ inc_t rs_x, cs_x; \
inc_t rs_y, cs_y; \ inc_t rs_y, cs_y; \
\ \
bli_set_dims_with_trans( BLIS_NO_TRANSPOSE, m, n, m_a, n_a ); \ bli_set_dims_with_trans( BLIS_NO_TRANSPOSE, m, n, &m_a, &n_a ); \
bli_set_dims_with_trans( transa, m, n, m_y, m_x ); \ bli_set_dims_with_trans( transa, m, n, &m_y, &m_x ); \
\ \
rs_x = incx; cs_x = m_x * incx; \ rs_x = incx; cs_x = m_x * incx; \
rs_y = incy; cs_y = m_y * incy; \ rs_y = incy; cs_y = m_y * incy; \
@@ -202,8 +202,8 @@ void PASTEMAC(ch,opname) \
bli_obj_create_with_attached_buffer( dt, m_x, 1, x, rs_x, cs_x, &xo ); \ bli_obj_create_with_attached_buffer( dt, m_x, 1, x, rs_x, cs_x, &xo ); \
bli_obj_create_with_attached_buffer( dt, m_y, 1, y, rs_y, cs_y, &yo ); \ bli_obj_create_with_attached_buffer( dt, m_y, 1, y, rs_y, cs_y, &yo ); \
\ \
bli_obj_set_conjtrans( transa, ao ); \ bli_obj_set_conjtrans( transa, &ao ); \
bli_obj_set_conj( conjx, xo ); \ bli_obj_set_conj( conjx, &xo ); \
\ \
PASTEMAC0(opname)( &alphao, \ PASTEMAC0(opname)( &alphao, \
&ao, \ &ao, \

View File

@@ -69,8 +69,8 @@ void bli_gemv_int( trans_t transa,
obj_t x_local; obj_t x_local;
// Apply the trans and/or conj parameters to aliases of the objects. // Apply the trans and/or conj parameters to aliases of the objects.
bli_obj_alias_with_trans( transa, *a, a_local ); bli_obj_alias_with_trans( transa, a, &a_local );
bli_obj_alias_with_conj( conjx, *x, x_local ); bli_obj_alias_with_conj( conjx, x, &x_local );
// Check parameters. We use the aliased copy of A so the transa parameter // Check parameters. We use the aliased copy of A so the transa parameter
// is taken into account for dimension checking. // is taken into account for dimension checking.
@@ -78,10 +78,10 @@ void bli_gemv_int( trans_t transa,
bli_gemv_check( alpha, &a_local, &x_local, beta, y ); bli_gemv_check( alpha, &a_local, &x_local, beta, y );
// If y has a zero dimension, return early. // If y has a zero dimension, return early.
if ( bli_obj_has_zero_dim( *y ) ) return; if ( bli_obj_has_zero_dim( y ) ) return;
// If x has a zero dimension, scale y by beta and return early. // If x has a zero dimension, scale y by beta and return early.
if ( bli_obj_has_zero_dim( *x ) ) if ( bli_obj_has_zero_dim( x ) )
{ {
bli_scalm( beta, y ); bli_scalm( beta, y );
return; return;

View File

@@ -49,25 +49,25 @@ void PASTEMAC0(opname) \
{ \ { \
bli_init_once(); \ bli_init_once(); \
\ \
num_t dt = bli_obj_dt( *a ); \ num_t dt = bli_obj_dt( a ); \
\ \
conj_t conjx = bli_obj_conj_status( *x ); \ conj_t conjx = bli_obj_conj_status( x ); \
conj_t conjy = bli_obj_conj_status( *y ); \ conj_t conjy = bli_obj_conj_status( y ); \
\ \
dim_t m = bli_obj_length( *a ); \ dim_t m = bli_obj_length( a ); \
dim_t n = bli_obj_width( *a ); \ dim_t n = bli_obj_width( a ); \
\ \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( *x ); \ inc_t incx = bli_obj_vector_inc( x ); \
\ \
void* buf_y = bli_obj_buffer_at_off( *y ); \ void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t incy = bli_obj_vector_inc( *y ); \ inc_t incy = bli_obj_vector_inc( y ); \
\ \
void* buf_a = bli_obj_buffer_at_off( *a ); \ void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \ inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \ inc_t cs_a = bli_obj_col_stride( a ); \
\ \
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \ void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \
\ \
/* Invoke the void pointer-based function for the given datatype. */ \ /* Invoke the void pointer-based function for the given datatype. */ \
bli_call_ft_13 \ bli_call_ft_13 \

View File

@@ -53,7 +53,7 @@ void bli_ger_blk_var1( obj_t* alpha,
bli_obj_init_pack( &x1_pack ); bli_obj_init_pack( &x1_pack );
// Query dimension in partitioning direction. // Query dimension in partitioning direction.
m_trans = bli_obj_length_after_trans( *a ); m_trans = bli_obj_length_after_trans( a );
// Partition along the m dimension. // Partition along the m dimension.
for ( i = 0; i < m_trans; i += b_alg ) for ( i = 0; i < m_trans; i += b_alg )

View File

@@ -53,7 +53,7 @@ void bli_ger_blk_var2( obj_t* alpha,
bli_obj_init_pack( &y1_pack ); bli_obj_init_pack( &y1_pack );
// Query dimension in partitioning direction. // Query dimension in partitioning direction.
n_trans = bli_obj_width_after_trans( *a ); n_trans = bli_obj_width_after_trans( a );
// Partition along the n dimension. // Partition along the n dimension.
for ( i = 0; i < n_trans; i += b_alg ) for ( i = 0; i < n_trans; i += b_alg )

View File

@@ -64,15 +64,15 @@ void bli_ger_front
// Query the target datatypes of each object. // Query the target datatypes of each object.
dt_targ_x = bli_obj_target_dt( *x ); dt_targ_x = bli_obj_target_dt( x );
dt_targ_y = bli_obj_target_dt( *y ); dt_targ_y = bli_obj_target_dt( y );
//dt_targ_a = bli_obj_target_dt( *a ); //dt_targ_a = bli_obj_target_dt( a );
// Determine whether each operand with unit stride. // Determine whether each operand with unit stride.
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 ); x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 ); y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 );
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) || a_has_unit_inc = ( bli_obj_is_row_stored( a ) ||
bli_obj_is_col_stored( *a ) ); bli_obj_is_col_stored( a ) );
// Create an object to hold a copy-cast of alpha. Notice that we use // Create an object to hold a copy-cast of alpha. Notice that we use
@@ -93,20 +93,20 @@ void bli_ger_front
{ {
// Use different control trees depending on storage of the matrix // Use different control trees depending on storage of the matrix
// operand. // operand.
if ( bli_obj_is_row_stored( *a ) ) ger_cntl = ger_cntl_bs_ke_row; if ( bli_obj_is_row_stored( a ) ) ger_cntl = ger_cntl_bs_ke_row;
else ger_cntl = ger_cntl_bs_ke_col; else ger_cntl = ger_cntl_bs_ke_col;
} }
else else
{ {
// Mark objects with unit stride as already being packed. This prevents // Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm. // unnecessary packing from happening within the blocked algorithm.
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y ); if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y );
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a ); if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, a );
// Here, we make a similar choice as above, except that (1) we look // Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking. // at storage tilt, and (2) we choose a tree that performs blocking.
if ( bli_obj_is_row_tilted( *a ) ) ger_cntl = ger_cntl_ge_row; if ( bli_obj_is_row_tilted( a ) ) ger_cntl = ger_cntl_ge_row;
else ger_cntl = ger_cntl_ge_col; else ger_cntl = ger_cntl_ge_col;
} }
@@ -151,7 +151,7 @@ void PASTEMAC(ch,opname) \
inc_t rs_x, cs_x; \ inc_t rs_x, cs_x; \
inc_t rs_y, cs_y; \ inc_t rs_y, cs_y; \
\ \
bli_set_dims_with_trans( BLIS_NO_TRANSPOSE, m, n, m_x, m_y ); \ bli_set_dims_with_trans( BLIS_NO_TRANSPOSE, m, n, &m_x, &m_y ); \
\ \
rs_x = incx; cs_x = m_x * incx; \ rs_x = incx; cs_x = m_x * incx; \
rs_y = incy; cs_y = m_y * incy; \ rs_y = incy; cs_y = m_y * incy; \
@@ -162,8 +162,8 @@ void PASTEMAC(ch,opname) \
bli_obj_create_with_attached_buffer( dt, m_y, 1, y, rs_y, cs_y, &yo ); \ bli_obj_create_with_attached_buffer( dt, m_y, 1, y, rs_y, cs_y, &yo ); \
bli_obj_create_with_attached_buffer( dt, m, n, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m, n, a, rs_a, cs_a, &ao ); \
\ \
bli_obj_set_conj( conjx, xo ); \ bli_obj_set_conj( conjx, &xo ); \
bli_obj_set_conj( conjy, yo ); \ bli_obj_set_conj( conjy, &yo ); \
\ \
PASTEMAC0(opname)( &alphao, \ PASTEMAC0(opname)( &alphao, \
&xo, \ &xo, \

View File

@@ -74,27 +74,27 @@ void bli_ger_int( conj_t conjx,
bli_ger_check( alpha, x, y, a ); bli_ger_check( alpha, x, y, a );
// If A has a zero dimension, return early. // If A has a zero dimension, return early.
if ( bli_obj_has_zero_dim( *a ) ) return; if ( bli_obj_has_zero_dim( a ) ) return;
// If x or y has a zero dimension, return early. // If x or y has a zero dimension, return early.
if ( bli_obj_has_zero_dim( *x ) || if ( bli_obj_has_zero_dim( x ) ||
bli_obj_has_zero_dim( *y ) ) return; bli_obj_has_zero_dim( y ) ) return;
// Alias the objects, applying conjx and conjy to x and y, respectively. // Alias the objects, applying conjx and conjy to x and y, respectively.
bli_obj_alias_with_conj( conjx, *x, x_local ); bli_obj_alias_with_conj( conjx, x, &x_local );
bli_obj_alias_with_conj( conjy, *y, y_local ); bli_obj_alias_with_conj( conjy, y, &y_local );
bli_obj_alias_to( *a, a_local ); bli_obj_alias_to( a, &a_local );
// If matrix A is marked for conjugation, we interpret this as a request // If matrix A is marked for conjugation, we interpret this as a request
// to apply a conjugation to the other operands. // to apply a conjugation to the other operands.
if ( bli_obj_has_conj( a_local ) ) if ( bli_obj_has_conj( &a_local ) )
{ {
bli_obj_toggle_conj( a_local ); bli_obj_toggle_conj( &a_local );
bli_obj_toggle_conj( x_local ); bli_obj_toggle_conj( &x_local );
bli_obj_toggle_conj( y_local ); bli_obj_toggle_conj( &y_local );
bli_obj_scalar_init_detached_copy_of( bli_obj_dt( *alpha ), bli_obj_scalar_init_detached_copy_of( bli_obj_dt( alpha ),
BLIS_CONJUGATE, BLIS_CONJUGATE,
alpha, alpha,
&alpha_local ); &alpha_local );
@@ -107,10 +107,10 @@ void bli_ger_int( conj_t conjx,
// If we are about the call a leaf-level implementation, and matrix A // If we are about the call a leaf-level implementation, and matrix A
// still needs a transposition, then we must induce one by swapping the // still needs a transposition, then we must induce one by swapping the
// strides and dimensions. // strides and dimensions.
if ( bli_cntl_is_leaf( cntl ) && bli_obj_has_trans( a_local ) ) if ( bli_cntl_is_leaf( cntl ) && bli_obj_has_trans( &a_local ) )
{ {
bli_obj_induce_trans( a_local ); bli_obj_induce_trans( &a_local );
bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, a_local ); bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &a_local );
} }
// Extract the variant number and implementation type. // Extract the variant number and implementation type.

View File

@@ -51,26 +51,26 @@ void PASTEMAC0(opname) \
{ \ { \
bli_init_once(); \ bli_init_once(); \
\ \
num_t dt = bli_obj_dt( *a ); \ num_t dt = bli_obj_dt( a ); \
\ \
uplo_t uplo = bli_obj_uplo( *a ); \ uplo_t uplo = bli_obj_uplo( a ); \
conj_t conja = bli_obj_conj_status( *a ); \ conj_t conja = bli_obj_conj_status( a ); \
conj_t conjx = bli_obj_conj_status( *x ); \ conj_t conjx = bli_obj_conj_status( x ); \
\ \
dim_t m = bli_obj_length( *a ); \ dim_t m = bli_obj_length( a ); \
\ \
void* buf_a = bli_obj_buffer_at_off( *a ); \ void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \ inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \ inc_t cs_a = bli_obj_col_stride( a ); \
\ \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( *x ); \ inc_t incx = bli_obj_vector_inc( x ); \
\ \
void* buf_y = bli_obj_buffer_at_off( *y ); \ void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t incy = bli_obj_vector_inc( *y ); \ inc_t incy = bli_obj_vector_inc( y ); \
\ \
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \ void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \
void* buf_beta = bli_obj_buffer_for_1x1( dt, *beta ); \ void* buf_beta = bli_obj_buffer_for_1x1( dt, beta ); \
\ \
/* Invoke the void pointer-based function for the given datatype. */ \ /* Invoke the void pointer-based function for the given datatype. */ \
bli_call_ft_15 \ bli_call_ft_15 \

View File

@@ -69,7 +69,7 @@ void bli_hemv_blk_var1( conj_t conjh,
bli_obj_init_pack( &y1_pack ); bli_obj_init_pack( &y1_pack );
// Query dimension. // Query dimension.
mn = bli_obj_length( *a ); mn = bli_obj_length( a );
// y = beta * y; // y = beta * y;
bli_scalv_int( beta, bli_scalv_int( beta,

View File

@@ -70,7 +70,7 @@ void bli_hemv_blk_var2( conj_t conjh,
bli_obj_init_pack( &y1_pack ); bli_obj_init_pack( &y1_pack );
// Query dimension. // Query dimension.
mn = bli_obj_length( *a ); mn = bli_obj_length( a );
// y = beta * y; // y = beta * y;
bli_scalv_int( beta, bli_scalv_int( beta,

View File

@@ -69,7 +69,7 @@ void bli_hemv_blk_var3( conj_t conjh,
bli_obj_init_pack( &y1_pack ); bli_obj_init_pack( &y1_pack );
// Query dimension. // Query dimension.
mn = bli_obj_length( *a ); mn = bli_obj_length( a );
// y = beta * y; // y = beta * y;
bli_scalv_int( beta, bli_scalv_int( beta,

View File

@@ -70,7 +70,7 @@ void bli_hemv_blk_var4( conj_t conjh,
bli_obj_init_pack( &y1_pack ); bli_obj_init_pack( &y1_pack );
// Query dimension. // Query dimension.
mn = bli_obj_length( *a ); mn = bli_obj_length( a );
// y = beta * y; // y = beta * y;
bli_scalv_int( beta, bli_scalv_int( beta,

View File

@@ -67,15 +67,15 @@ void bli_hemv_front
// Query the target datatypes of each object. // Query the target datatypes of each object.
dt_targ_a = bli_obj_target_dt( *a ); dt_targ_a = bli_obj_target_dt( a );
dt_targ_x = bli_obj_target_dt( *x ); dt_targ_x = bli_obj_target_dt( x );
dt_targ_y = bli_obj_target_dt( *y ); dt_targ_y = bli_obj_target_dt( y );
// Determine whether each operand with unit stride. // Determine whether each operand with unit stride.
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) || a_has_unit_inc = ( bli_obj_is_row_stored( a ) ||
bli_obj_is_col_stored( *a ) ); bli_obj_is_col_stored( a ) );
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 ); x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 ); y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 );
// Create an object to hold a copy-cast of alpha. Notice that we use // Create an object to hold a copy-cast of alpha. Notice that we use
@@ -109,14 +109,14 @@ void bli_hemv_front
// combinations of upper/lower triangular storage and row/column-storage. // combinations of upper/lower triangular storage and row/column-storage.
// The row-stored lower triangular and column-stored upper triangular // The row-stored lower triangular and column-stored upper triangular
// trees are identical. Same for the remaining two trees. // trees are identical. Same for the remaining two trees.
if ( bli_obj_is_lower( *a ) ) if ( bli_obj_is_lower( a ) )
{ {
if ( bli_obj_is_row_stored( *a ) ) hemv_cntl = hemv_cntl_bs_ke_lrow_ucol; if ( bli_obj_is_row_stored( a ) ) hemv_cntl = hemv_cntl_bs_ke_lrow_ucol;
else hemv_cntl = hemv_cntl_bs_ke_lcol_urow; else hemv_cntl = hemv_cntl_bs_ke_lcol_urow;
} }
else // if ( bli_obj_is_upper( *a ) ) else // if ( bli_obj_is_upper( a ) )
{ {
if ( bli_obj_is_row_stored( *a ) ) hemv_cntl = hemv_cntl_bs_ke_lcol_urow; if ( bli_obj_is_row_stored( a ) ) hemv_cntl = hemv_cntl_bs_ke_lcol_urow;
else hemv_cntl = hemv_cntl_bs_ke_lrow_ucol; else hemv_cntl = hemv_cntl_bs_ke_lrow_ucol;
} }
} }
@@ -124,20 +124,20 @@ void bli_hemv_front
{ {
// Mark objects with unit stride as already being packed. This prevents // Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm. // unnecessary packing from happening within the blocked algorithm.
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a ); if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, a );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y ); if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y );
// Here, we make a similar choice as above, except that (1) we look // Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking. // at storage tilt, and (2) we choose a tree that performs blocking.
if ( bli_obj_is_lower( *a ) ) if ( bli_obj_is_lower( a ) )
{ {
if ( bli_obj_is_row_tilted( *a ) ) hemv_cntl = hemv_cntl_ge_lrow_ucol; if ( bli_obj_is_row_tilted( a ) ) hemv_cntl = hemv_cntl_ge_lrow_ucol;
else hemv_cntl = hemv_cntl_ge_lcol_urow; else hemv_cntl = hemv_cntl_ge_lcol_urow;
} }
else // if ( bli_obj_is_upper( *a ) ) else // if ( bli_obj_is_upper( a ) )
{ {
if ( bli_obj_is_row_tilted( *a ) ) hemv_cntl = hemv_cntl_ge_lcol_urow; if ( bli_obj_is_row_tilted( a ) ) hemv_cntl = hemv_cntl_ge_lcol_urow;
else hemv_cntl = hemv_cntl_ge_lrow_ucol; else hemv_cntl = hemv_cntl_ge_lrow_ucol;
} }
} }
@@ -193,11 +193,11 @@ void PASTEMAC(ch,opname) \
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
bli_obj_create_with_attached_buffer( dt, m, 1, y, rs_y, cs_y, &yo ); \ bli_obj_create_with_attached_buffer( dt, m, 1, y, rs_y, cs_y, &yo ); \
\ \
bli_obj_set_uplo( uploa, ao ); \ bli_obj_set_uplo( uploa, &ao ); \
bli_obj_set_conj( conja, ao ); \ bli_obj_set_conj( conja, &ao ); \
bli_obj_set_conj( conjx, xo ); \ bli_obj_set_conj( conjx, &xo ); \
\ \
bli_obj_set_struc( BLIS_HERMITIAN, ao ); \ bli_obj_set_struc( BLIS_HERMITIAN, &ao ); \
\ \
PASTEMAC0(opname)( &alphao, \ PASTEMAC0(opname)( &alphao, \
&ao, \ &ao, \

View File

@@ -76,17 +76,17 @@ void bli_hemv_int( conj_t conjh,
} }
// If y has a zero dimension, return early. // If y has a zero dimension, return early.
if ( bli_obj_has_zero_dim( *y ) ) return; if ( bli_obj_has_zero_dim( y ) ) return;
// If x has a zero dimension, scale y by beta and return early. // If x has a zero dimension, scale y by beta and return early.
if ( bli_obj_has_zero_dim( *x ) ) if ( bli_obj_has_zero_dim( x ) )
{ {
bli_scalm( beta, y ); bli_scalm( beta, y );
return; return;
} }
// Alias A in case we need to induce the upper triangular case. // Alias A in case we need to induce the upper triangular case.
bli_obj_alias_to( *a, a_local ); bli_obj_alias_to( a, &a_local );
/* /*
// Our blocked algorithms only [explicitly] implement the lower triangular // Our blocked algorithms only [explicitly] implement the lower triangular
@@ -96,10 +96,10 @@ void bli_hemv_int( conj_t conjh,
// triangular case. But we only need to do this for blocked algorithms, // triangular case. But we only need to do this for blocked algorithms,
// since unblocked algorithms are responsible for handling the upper case // since unblocked algorithms are responsible for handling the upper case
// explicitly (and they should not be inspecting the transposition bit anyway). // explicitly (and they should not be inspecting the transposition bit anyway).
if ( bli_cntl_is_blocked( cntl ) && bli_obj_is_upper( *a ) ) if ( bli_cntl_is_blocked( cntl ) && bli_obj_is_upper( a ) )
{ {
bli_obj_toggle_conj( a_local ); bli_obj_toggle_conj( &a_local );
bli_obj_toggle_trans( a_local ); bli_obj_toggle_trans( &a_local );
} }
*/ */

View File

@@ -49,21 +49,21 @@ void PASTEMAC0(opname) \
{ \ { \
bli_init_once(); \ bli_init_once(); \
\ \
num_t dt = bli_obj_dt( *c ); \ num_t dt = bli_obj_dt( c ); \
\ \
uplo_t uplo = bli_obj_uplo( *c ); \ uplo_t uplo = bli_obj_uplo( c ); \
conj_t conjx = bli_obj_conj_status( *x ); \ conj_t conjx = bli_obj_conj_status( x ); \
\ \
dim_t m = bli_obj_length( *c ); \ dim_t m = bli_obj_length( c ); \
\ \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( *x ); \ inc_t incx = bli_obj_vector_inc( x ); \
\ \
void* buf_c = bli_obj_buffer_at_off( *c ); \ void* buf_c = bli_obj_buffer_at_off( c ); \
inc_t rs_c = bli_obj_row_stride( *c ); \ inc_t rs_c = bli_obj_row_stride( c ); \
inc_t cs_c = bli_obj_col_stride( *c ); \ inc_t cs_c = bli_obj_col_stride( c ); \
\ \
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \ void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \
\ \
/* Invoke the void pointer-based function for the given datatype. */ \ /* Invoke the void pointer-based function for the given datatype. */ \
bli_call_ft_11 \ bli_call_ft_11 \

View File

@@ -64,7 +64,7 @@ void bli_her_blk_var1( conj_t conjh,
bli_obj_init_pack( &x1_pack ); bli_obj_init_pack( &x1_pack );
// Query dimension. // Query dimension.
mn = bli_obj_length( *c ); mn = bli_obj_length( c );
// Partition diagonally. // Partition diagonally.
for ( ij = 0; ij < mn; ij += b_alg ) for ( ij = 0; ij < mn; ij += b_alg )

View File

@@ -64,7 +64,7 @@ void bli_her_blk_var2( conj_t conjh,
bli_obj_init_pack( &x1_pack ); bli_obj_init_pack( &x1_pack );
// Query dimension. // Query dimension.
mn = bli_obj_length( *c ); mn = bli_obj_length( c );
// Partition diagonally. // Partition diagonally.
for ( ij = 0; ij < mn; ij += b_alg ) for ( ij = 0; ij < mn; ij += b_alg )

View File

@@ -61,13 +61,13 @@ void bli_her_front
// Query the target datatypes of each object. // Query the target datatypes of each object.
dt_targ_x = bli_obj_target_dt( *x ); dt_targ_x = bli_obj_target_dt( x );
//dt_targ_c = bli_obj_target_dt( *c ); //dt_targ_c = bli_obj_target_dt( c );
// Determine whether each operand with unit stride. // Determine whether each operand with unit stride.
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 ); x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
c_has_unit_inc = ( bli_obj_is_row_stored( *c ) || c_has_unit_inc = ( bli_obj_is_row_stored( c ) ||
bli_obj_is_col_stored( *c ) ); bli_obj_is_col_stored( c ) );
// Create object to hold a copy-cast of alpha. // Create object to hold a copy-cast of alpha.
@@ -87,14 +87,14 @@ void bli_her_front
// combinations of upper/lower triangular storage and row/column-storage. // combinations of upper/lower triangular storage and row/column-storage.
// The row-stored lower triangular and column-stored upper triangular // The row-stored lower triangular and column-stored upper triangular
// trees are identical. Same for the remaining two trees. // trees are identical. Same for the remaining two trees.
if ( bli_obj_is_lower( *c ) ) if ( bli_obj_is_lower( c ) )
{ {
if ( bli_obj_is_row_stored( *c ) ) her_cntl = her_cntl_bs_ke_lrow_ucol; if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_bs_ke_lrow_ucol;
else her_cntl = her_cntl_bs_ke_lcol_urow; else her_cntl = her_cntl_bs_ke_lcol_urow;
} }
else // if ( bli_obj_is_upper( *c ) ) else // if ( bli_obj_is_upper( c ) )
{ {
if ( bli_obj_is_row_stored( *c ) ) her_cntl = her_cntl_bs_ke_lcol_urow; if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_bs_ke_lcol_urow;
else her_cntl = her_cntl_bs_ke_lrow_ucol; else her_cntl = her_cntl_bs_ke_lrow_ucol;
} }
} }
@@ -102,19 +102,19 @@ void bli_her_front
{ {
// Mark objects with unit stride as already being packed. This prevents // Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm. // unnecessary packing from happening within the blocked algorithm.
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c ); if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, c );
// Here, we make a similar choice as above, except that (1) we look // Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking. // at storage tilt, and (2) we choose a tree that performs blocking.
if ( bli_obj_is_lower( *c ) ) if ( bli_obj_is_lower( c ) )
{ {
if ( bli_obj_is_row_stored( *c ) ) her_cntl = her_cntl_ge_lrow_ucol; if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_ge_lrow_ucol;
else her_cntl = her_cntl_ge_lcol_urow; else her_cntl = her_cntl_ge_lcol_urow;
} }
else // if ( bli_obj_is_upper( *c ) ) else // if ( bli_obj_is_upper( c ) )
{ {
if ( bli_obj_is_row_stored( *c ) ) her_cntl = her_cntl_ge_lcol_urow; if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_ge_lcol_urow;
else her_cntl = her_cntl_ge_lrow_ucol; else her_cntl = her_cntl_ge_lrow_ucol;
} }
} }
@@ -162,10 +162,10 @@ void PASTEMAC(ch,opname) \
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \ bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \
\ \
bli_obj_set_conj( conjx, xo ); \ bli_obj_set_conj( conjx, &xo ); \
bli_obj_set_uplo( uploc, co ); \ bli_obj_set_uplo( uploc, &co ); \
\ \
bli_obj_set_struc( BLIS_HERMITIAN, co ); \ bli_obj_set_struc( BLIS_HERMITIAN, &co ); \
\ \
PASTEMAC0(opname)( &alphao, \ PASTEMAC0(opname)( &alphao, \
&xo, \ &xo, \

View File

@@ -73,22 +73,22 @@ void bli_her_int( conj_t conjh,
} }
// If C or x has a zero dimension, return early. // If C or x has a zero dimension, return early.
if ( bli_obj_has_zero_dim( *c ) ) return; if ( bli_obj_has_zero_dim( c ) ) return;
if ( bli_obj_has_zero_dim( *x ) ) return; if ( bli_obj_has_zero_dim( x ) ) return;
// Alias the operands in case we need to apply conjugations. // Alias the operands in case we need to apply conjugations.
bli_obj_alias_to( *x, x_local ); bli_obj_alias_to( x, &x_local );
bli_obj_alias_to( *c, c_local ); bli_obj_alias_to( c, &c_local );
// If matrix C is marked for conjugation, we interpret this as a request // If matrix C is marked for conjugation, we interpret this as a request
// to apply a conjugation to the other operands. // to apply a conjugation to the other operands.
if ( bli_obj_has_conj( c_local ) ) if ( bli_obj_has_conj( &c_local ) )
{ {
bli_obj_toggle_conj( c_local ); bli_obj_toggle_conj( &c_local );
// Notice that we don't need to conjugate alpha since it is guaranteed // Notice that we don't need to conjugate alpha since it is guaranteed
// to be real. // to be real.
bli_obj_toggle_conj( x_local ); bli_obj_toggle_conj( &x_local );
} }
// Extract the variant number and implementation type. // Extract the variant number and implementation type.

View File

@@ -53,7 +53,6 @@ void PASTEMAC(ch,varname) \
{ \ { \
const num_t dt = PASTEMAC(ch,type); \ const num_t dt = PASTEMAC(ch,type); \
\ \
ctype* two = PASTEMAC(ch,2); \
ctype* x0; \ ctype* x0; \
ctype* chi1; \ ctype* chi1; \
ctype* y0; \ ctype* y0; \
@@ -156,7 +155,8 @@ void PASTEMAC(ch,varname) \
\ \
/* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \ /* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \
+ conj(alpha) * psi1 * conj(chi1); */ \ + conj(alpha) * psi1 * conj(chi1); */ \
PASTEMAC(ch,axpys)( *two, alpha0_chi1_psi1, *gamma11 ); \ PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
\ \
/* For her2, explicitly set the imaginary component of gamma11 to /* For her2, explicitly set the imaginary component of gamma11 to
zero. */ \ zero. */ \

View File

@@ -53,7 +53,6 @@ void PASTEMAC(ch,varname) \
{ \ { \
const num_t dt = PASTEMAC(ch,type); \ const num_t dt = PASTEMAC(ch,type); \
\ \
ctype* two = PASTEMAC(ch,2); \
ctype* x0; \ ctype* x0; \
ctype* chi1; \ ctype* chi1; \
ctype* x2; \ ctype* x2; \
@@ -165,7 +164,8 @@ void PASTEMAC(ch,varname) \
\ \
/* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \ /* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \
+ conj(alpha) * psi1 * conj(chi1); */ \ + conj(alpha) * psi1 * conj(chi1); */ \
PASTEMAC(ch,axpys)( *two, alpha0_chi1_psi1, *gamma11 ); \ PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
\ \
/* For her2, explicitly set the imaginary component of gamma11 to /* For her2, explicitly set the imaginary component of gamma11 to
zero. */ \ zero. */ \

View File

@@ -53,7 +53,6 @@ void PASTEMAC(ch,varname) \
{ \ { \
const num_t dt = PASTEMAC(ch,type); \ const num_t dt = PASTEMAC(ch,type); \
\ \
ctype* two = PASTEMAC(ch,2); \
ctype* chi1; \ ctype* chi1; \
ctype* y0; \ ctype* y0; \
ctype* psi1; \ ctype* psi1; \
@@ -165,7 +164,8 @@ void PASTEMAC(ch,varname) \
\ \
/* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \ /* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \
+ conj(alpha) * psi1 * conj(chi1); */ \ + conj(alpha) * psi1 * conj(chi1); */ \
PASTEMAC(ch,axpys)( *two, alpha0_chi1_psi1, *gamma11 ); \ PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
\ \
/* For her2, explicitly set the imaginary component of gamma11 to /* For her2, explicitly set the imaginary component of gamma11 to
zero. */ \ zero. */ \

View File

@@ -53,7 +53,6 @@ void PASTEMAC(ch,varname) \
{ \ { \
const num_t dt = PASTEMAC(ch,type); \ const num_t dt = PASTEMAC(ch,type); \
\ \
ctype* two = PASTEMAC(ch,2); \
ctype* chi1; \ ctype* chi1; \
ctype* x2; \ ctype* x2; \
ctype* psi1; \ ctype* psi1; \
@@ -164,7 +163,8 @@ void PASTEMAC(ch,varname) \
\ \
/* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \ /* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \
+ conj(alpha) * psi1 * conj(chi1); */ \ + conj(alpha) * psi1 * conj(chi1); */ \
PASTEMAC(ch,axpys)( *two, alpha0_chi1_psi1, *gamma11 ); \ PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
\ \
/* For her2, explicitly set the imaginary component of gamma11 to /* For her2, explicitly set the imaginary component of gamma11 to
zero. */ \ zero. */ \

View File

@@ -53,7 +53,6 @@ void PASTEMAC(ch,varname) \
{ \ { \
const num_t dt = PASTEMAC(ch,type); \ const num_t dt = PASTEMAC(ch,type); \
\ \
ctype* two = PASTEMAC(ch,2); \
ctype* x0; \ ctype* x0; \
ctype* chi1; \ ctype* chi1; \
ctype* y0; \ ctype* y0; \
@@ -149,7 +148,8 @@ void PASTEMAC(ch,varname) \
\ \
/* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \ /* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \
+ conj(alpha) * psi1 * conj(chi1); */ \ + conj(alpha) * psi1 * conj(chi1); */ \
PASTEMAC(ch,axpys)( *two, alpha0_chi1_psi1, *gamma11 ); \ PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
\ \
/* For her2, explicitly set the imaginary component of gamma11 to /* For her2, explicitly set the imaginary component of gamma11 to
zero. */ \ zero. */ \

View File

@@ -53,7 +53,6 @@ void PASTEMAC(ch,varname) \
{ \ { \
const num_t dt = PASTEMAC(ch,type); \ const num_t dt = PASTEMAC(ch,type); \
\ \
ctype* two = PASTEMAC(ch,2); \
ctype* chi1; \ ctype* chi1; \
ctype* x2; \ ctype* x2; \
ctype* psi1; \ ctype* psi1; \
@@ -157,7 +156,8 @@ void PASTEMAC(ch,varname) \
\ \
/* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \ /* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \
+ conj(alpha) * psi1 * conj(chi1); */ \ + conj(alpha) * psi1 * conj(chi1); */ \
PASTEMAC(ch,axpys)( *two, alpha0_chi1_psi1, *gamma11 ); \ PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
\ \
/* For her2, explicitly set the imaginary component of gamma11 to /* For her2, explicitly set the imaginary component of gamma11 to
zero. */ \ zero. */ \

View File

@@ -51,25 +51,25 @@ void PASTEMAC0(opname) \
{ \ { \
bli_init_once(); \ bli_init_once(); \
\ \
num_t dt = bli_obj_dt( *c ); \ num_t dt = bli_obj_dt( c ); \
\ \
uplo_t uplo = bli_obj_uplo( *c ); \ uplo_t uplo = bli_obj_uplo( c ); \
conj_t conjx = bli_obj_conj_status( *x ); \ conj_t conjx = bli_obj_conj_status( x ); \
conj_t conjy = bli_obj_conj_status( *y ); \ conj_t conjy = bli_obj_conj_status( y ); \
\ \
dim_t m = bli_obj_length( *c ); \ dim_t m = bli_obj_length( c ); \
\ \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( *x ); \ inc_t incx = bli_obj_vector_inc( x ); \
\ \
void* buf_y = bli_obj_buffer_at_off( *y ); \ void* buf_y = bli_obj_buffer_at_off( y ); \
inc_t incy = bli_obj_vector_inc( *y ); \ inc_t incy = bli_obj_vector_inc( y ); \
\ \
void* buf_c = bli_obj_buffer_at_off( *c ); \ void* buf_c = bli_obj_buffer_at_off( c ); \
inc_t rs_c = bli_obj_row_stride( *c ); \ inc_t rs_c = bli_obj_row_stride( c ); \
inc_t cs_c = bli_obj_col_stride( *c ); \ inc_t cs_c = bli_obj_col_stride( c ); \
\ \
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \ void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \
\ \
/* Invoke the void pointer-based function for the given datatype. */ \ /* Invoke the void pointer-based function for the given datatype. */ \
bli_call_ft_14 \ bli_call_ft_14 \

View File

@@ -69,7 +69,7 @@ void bli_her2_blk_var1( conj_t conjh,
bli_obj_init_pack( &y1_pack ); bli_obj_init_pack( &y1_pack );
// Query dimension. // Query dimension.
mn = bli_obj_length( *c ); mn = bli_obj_length( c );
// Partition diagonally. // Partition diagonally.
for ( ij = 0; ij < mn; ij += b_alg ) for ( ij = 0; ij < mn; ij += b_alg )

View File

@@ -70,7 +70,7 @@ void bli_her2_blk_var2( conj_t conjh,
bli_obj_init_pack( &y1_pack ); bli_obj_init_pack( &y1_pack );
// Query dimension. // Query dimension.
mn = bli_obj_length( *c ); mn = bli_obj_length( c );
// Partition diagonally. // Partition diagonally.
for ( ij = 0; ij < mn; ij += b_alg ) for ( ij = 0; ij < mn; ij += b_alg )

View File

@@ -70,7 +70,7 @@ void bli_her2_blk_var3( conj_t conjh,
bli_obj_init_pack( &y1_pack ); bli_obj_init_pack( &y1_pack );
// Query dimension. // Query dimension.
mn = bli_obj_length( *c ); mn = bli_obj_length( c );
// Partition diagonally. // Partition diagonally.
for ( ij = 0; ij < mn; ij += b_alg ) for ( ij = 0; ij < mn; ij += b_alg )

View File

@@ -69,7 +69,7 @@ void bli_her2_blk_var4( conj_t conjh,
bli_obj_init_pack( &y1_pack ); bli_obj_init_pack( &y1_pack );
// Query dimension. // Query dimension.
mn = bli_obj_length( *c ); mn = bli_obj_length( c );
// Partition diagonally. // Partition diagonally.
for ( ij = 0; ij < mn; ij += b_alg ) for ( ij = 0; ij < mn; ij += b_alg )

View File

@@ -65,15 +65,15 @@ void bli_her2_front
// Query the target datatypes of each object. // Query the target datatypes of each object.
dt_targ_x = bli_obj_target_dt( *x ); dt_targ_x = bli_obj_target_dt( x );
dt_targ_y = bli_obj_target_dt( *y ); dt_targ_y = bli_obj_target_dt( y );
//dt_targ_c = bli_obj_target_dt( *c ); //dt_targ_c = bli_obj_target_dt( c );
// Determine whether each operand with unit stride. // Determine whether each operand with unit stride.
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 ); x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 ); y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 );
c_has_unit_inc = ( bli_obj_is_row_stored( *c ) || c_has_unit_inc = ( bli_obj_is_row_stored( c ) ||
bli_obj_is_col_stored( *c ) ); bli_obj_is_col_stored( c ) );
// Create an object to hold a copy-cast of alpha. Notice that we use // Create an object to hold a copy-cast of alpha. Notice that we use
@@ -101,14 +101,14 @@ void bli_her2_front
// combinations of upper/lower triangular storage and row/column-storage. // combinations of upper/lower triangular storage and row/column-storage.
// The row-stored lower triangular and column-stored upper triangular // The row-stored lower triangular and column-stored upper triangular
// trees are identical. Same for the remaining two trees. // trees are identical. Same for the remaining two trees.
if ( bli_obj_is_lower( *c ) ) if ( bli_obj_is_lower( c ) )
{ {
if ( bli_obj_is_row_stored( *c ) ) her2_cntl = her2_cntl_bs_ke_lrow_ucol; if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_bs_ke_lrow_ucol;
else her2_cntl = her2_cntl_bs_ke_lcol_urow; else her2_cntl = her2_cntl_bs_ke_lcol_urow;
} }
else // if ( bli_obj_is_upper( *c ) ) else // if ( bli_obj_is_upper( c ) )
{ {
if ( bli_obj_is_row_stored( *c ) ) her2_cntl = her2_cntl_bs_ke_lcol_urow; if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_bs_ke_lcol_urow;
else her2_cntl = her2_cntl_bs_ke_lrow_ucol; else her2_cntl = her2_cntl_bs_ke_lrow_ucol;
} }
} }
@@ -116,20 +116,20 @@ void bli_her2_front
{ {
// Mark objects with unit stride as already being packed. This prevents // Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm. // unnecessary packing from happening within the blocked algorithm.
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y ); if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y );
if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c ); if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, c );
// Here, we make a similar choice as above, except that (1) we look // Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking. // at storage tilt, and (2) we choose a tree that performs blocking.
if ( bli_obj_is_lower( *c ) ) if ( bli_obj_is_lower( c ) )
{ {
if ( bli_obj_is_row_stored( *c ) ) her2_cntl = her2_cntl_ge_lrow_ucol; if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_ge_lrow_ucol;
else her2_cntl = her2_cntl_ge_lcol_urow; else her2_cntl = her2_cntl_ge_lcol_urow;
} }
else // if ( bli_obj_is_upper( *c ) ) else // if ( bli_obj_is_upper( c ) )
{ {
if ( bli_obj_is_row_stored( *c ) ) her2_cntl = her2_cntl_ge_lcol_urow; if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_ge_lcol_urow;
else her2_cntl = her2_cntl_ge_lrow_ucol; else her2_cntl = her2_cntl_ge_lrow_ucol;
} }
} }
@@ -183,11 +183,11 @@ void PASTEMAC(ch,opname) \
bli_obj_create_with_attached_buffer( dt, m, 1, y, rs_y, cs_y, &yo ); \ bli_obj_create_with_attached_buffer( dt, m, 1, y, rs_y, cs_y, &yo ); \
bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \ bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \
\ \
bli_obj_set_conj( conjx, xo ); \ bli_obj_set_conj( conjx, &xo ); \
bli_obj_set_conj( conjy, yo ); \ bli_obj_set_conj( conjy, &yo ); \
bli_obj_set_uplo( uploc, co ); \ bli_obj_set_uplo( uploc, &co ); \
\ \
bli_obj_set_struc( BLIS_HERMITIAN, co ); \ bli_obj_set_struc( BLIS_HERMITIAN, &co ); \
\ \
PASTEMAC0(opname)( &alphao, \ PASTEMAC0(opname)( &alphao, \
&xo, \ &xo, \

View File

@@ -80,29 +80,29 @@ void bli_her2_int( conj_t conjh,
} }
// If C, x, or y has a zero dimension, return early. // If C, x, or y has a zero dimension, return early.
if ( bli_obj_has_zero_dim( *c ) ) return; if ( bli_obj_has_zero_dim( c ) ) return;
if ( bli_obj_has_zero_dim( *x ) ) return; if ( bli_obj_has_zero_dim( x ) ) return;
if ( bli_obj_has_zero_dim( *y ) ) return; if ( bli_obj_has_zero_dim( y ) ) return;
// Alias the operands in case we need to apply conjugations. // Alias the operands in case we need to apply conjugations.
bli_obj_alias_to( *x, x_local ); bli_obj_alias_to( x, &x_local );
bli_obj_alias_to( *y, y_local ); bli_obj_alias_to( y, &y_local );
bli_obj_alias_to( *c, c_local ); bli_obj_alias_to( c, &c_local );
// If matrix C is marked for conjugation, we interpret this as a request // If matrix C is marked for conjugation, we interpret this as a request
// to apply a conjugation to the other operands. // to apply a conjugation to the other operands.
if ( bli_obj_has_conj( c_local ) ) if ( bli_obj_has_conj( &c_local ) )
{ {
bli_obj_toggle_conj( c_local ); bli_obj_toggle_conj( &c_local );
bli_obj_toggle_conj( x_local ); bli_obj_toggle_conj( &x_local );
bli_obj_toggle_conj( y_local ); bli_obj_toggle_conj( &y_local );
bli_obj_scalar_init_detached_copy_of( bli_obj_dt( *alpha ), bli_obj_scalar_init_detached_copy_of( bli_obj_dt( alpha ),
BLIS_CONJUGATE, BLIS_CONJUGATE,
alpha, alpha,
&alpha_local ); &alpha_local );
bli_obj_scalar_init_detached_copy_of( bli_obj_dt( *alpha_conj ), bli_obj_scalar_init_detached_copy_of( bli_obj_dt( alpha_conj ),
BLIS_CONJUGATE, BLIS_CONJUGATE,
alpha_conj, alpha_conj,
&alpha_conj_local ); &alpha_conj_local );

View File

@@ -67,15 +67,15 @@ void bli_symv_front
// Query the target datatypes of each object. // Query the target datatypes of each object.
dt_targ_a = bli_obj_target_dt( *a ); dt_targ_a = bli_obj_target_dt( a );
dt_targ_x = bli_obj_target_dt( *x ); dt_targ_x = bli_obj_target_dt( x );
dt_targ_y = bli_obj_target_dt( *y ); dt_targ_y = bli_obj_target_dt( y );
// Determine whether each operand with unit stride. // Determine whether each operand with unit stride.
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) || a_has_unit_inc = ( bli_obj_is_row_stored( a ) ||
bli_obj_is_col_stored( *a ) ); bli_obj_is_col_stored( a ) );
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 ); x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 ); y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 );
// Create an object to hold a copy-cast of alpha. Notice that we use // Create an object to hold a copy-cast of alpha. Notice that we use
@@ -109,14 +109,14 @@ void bli_symv_front
// combinations of upper/lower triangular storage and row/column-storage. // combinations of upper/lower triangular storage and row/column-storage.
// The row-stored lower triangular and column-stored upper triangular // The row-stored lower triangular and column-stored upper triangular
// trees are identical. Same for the remaining two trees. // trees are identical. Same for the remaining two trees.
if ( bli_obj_is_lower( *a ) ) if ( bli_obj_is_lower( a ) )
{ {
if ( bli_obj_is_row_stored( *a ) ) hemv_cntl = hemv_cntl_bs_ke_lrow_ucol; if ( bli_obj_is_row_stored( a ) ) hemv_cntl = hemv_cntl_bs_ke_lrow_ucol;
else hemv_cntl = hemv_cntl_bs_ke_lcol_urow; else hemv_cntl = hemv_cntl_bs_ke_lcol_urow;
} }
else // if ( bli_obj_is_upper( *a ) ) else // if ( bli_obj_is_upper( a ) )
{ {
if ( bli_obj_is_row_stored( *a ) ) hemv_cntl = hemv_cntl_bs_ke_lcol_urow; if ( bli_obj_is_row_stored( a ) ) hemv_cntl = hemv_cntl_bs_ke_lcol_urow;
else hemv_cntl = hemv_cntl_bs_ke_lrow_ucol; else hemv_cntl = hemv_cntl_bs_ke_lrow_ucol;
} }
} }
@@ -124,20 +124,20 @@ void bli_symv_front
{ {
// Mark objects with unit stride as already being packed. This prevents // Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm. // unnecessary packing from happening within the blocked algorithm.
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a ); if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, a );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y ); if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y );
// Here, we make a similar choice as above, except that (1) we look // Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking. // at storage tilt, and (2) we choose a tree that performs blocking.
if ( bli_obj_is_lower( *a ) ) if ( bli_obj_is_lower( a ) )
{ {
if ( bli_obj_is_row_tilted( *a ) ) hemv_cntl = hemv_cntl_ge_lrow_ucol; if ( bli_obj_is_row_tilted( a ) ) hemv_cntl = hemv_cntl_ge_lrow_ucol;
else hemv_cntl = hemv_cntl_ge_lcol_urow; else hemv_cntl = hemv_cntl_ge_lcol_urow;
} }
else // if ( bli_obj_is_upper( *a ) ) else // if ( bli_obj_is_upper( a ) )
{ {
if ( bli_obj_is_row_tilted( *a ) ) hemv_cntl = hemv_cntl_ge_lcol_urow; if ( bli_obj_is_row_tilted( a ) ) hemv_cntl = hemv_cntl_ge_lcol_urow;
else hemv_cntl = hemv_cntl_ge_lrow_ucol; else hemv_cntl = hemv_cntl_ge_lrow_ucol;
} }
} }
@@ -194,11 +194,11 @@ void PASTEMAC(ch,opname) \
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
bli_obj_create_with_attached_buffer( dt, m, 1, y, rs_y, cs_y, &yo ); \ bli_obj_create_with_attached_buffer( dt, m, 1, y, rs_y, cs_y, &yo ); \
\ \
bli_obj_set_uplo( uploa, ao ); \ bli_obj_set_uplo( uploa, &ao ); \
bli_obj_set_conj( conja, ao ); \ bli_obj_set_conj( conja, &ao ); \
bli_obj_set_conj( conjx, xo ); \ bli_obj_set_conj( conjx, &xo ); \
\ \
bli_obj_set_struc( BLIS_SYMMETRIC, ao ); \ bli_obj_set_struc( BLIS_SYMMETRIC, &ao ); \
\ \
PASTEMAC0(opname)( &alphao, \ PASTEMAC0(opname)( &alphao, \
&ao, \ &ao, \

View File

@@ -61,13 +61,13 @@ void bli_syr_front
// Query the target datatypes of each object. // Query the target datatypes of each object.
dt_targ_x = bli_obj_target_dt( *x ); dt_targ_x = bli_obj_target_dt( x );
dt_targ_c = bli_obj_target_dt( *c ); dt_targ_c = bli_obj_target_dt( c );
// Determine whether each operand with unit stride. // Determine whether each operand with unit stride.
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 ); x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
c_has_unit_inc = ( bli_obj_is_row_stored( *c ) || c_has_unit_inc = ( bli_obj_is_row_stored( c ) ||
bli_obj_is_col_stored( *c ) ); bli_obj_is_col_stored( c ) );
// Create an object to hold a copy-cast of alpha. Notice that we use // Create an object to hold a copy-cast of alpha. Notice that we use
@@ -89,14 +89,14 @@ void bli_syr_front
// combinations of upper/lower triangular storage and row/column-storage. // combinations of upper/lower triangular storage and row/column-storage.
// The row-stored lower triangular and column-stored upper triangular // The row-stored lower triangular and column-stored upper triangular
// trees are identical. Same for the remaining two trees. // trees are identical. Same for the remaining two trees.
if ( bli_obj_is_lower( *c ) ) if ( bli_obj_is_lower( c ) )
{ {
if ( bli_obj_is_row_stored( *c ) ) her_cntl = her_cntl_bs_ke_lrow_ucol; if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_bs_ke_lrow_ucol;
else her_cntl = her_cntl_bs_ke_lcol_urow; else her_cntl = her_cntl_bs_ke_lcol_urow;
} }
else // if ( bli_obj_is_upper( *c ) ) else // if ( bli_obj_is_upper( c ) )
{ {
if ( bli_obj_is_row_stored( *c ) ) her_cntl = her_cntl_bs_ke_lcol_urow; if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_bs_ke_lcol_urow;
else her_cntl = her_cntl_bs_ke_lrow_ucol; else her_cntl = her_cntl_bs_ke_lrow_ucol;
} }
} }
@@ -104,19 +104,19 @@ void bli_syr_front
{ {
// Mark objects with unit stride as already being packed. This prevents // Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm. // unnecessary packing from happening within the blocked algorithm.
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c ); if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, c );
// Here, we make a similar choice as above, except that (1) we look // Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking. // at storage tilt, and (2) we choose a tree that performs blocking.
if ( bli_obj_is_lower( *c ) ) if ( bli_obj_is_lower( c ) )
{ {
if ( bli_obj_is_row_stored( *c ) ) her_cntl = her_cntl_ge_lrow_ucol; if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_ge_lrow_ucol;
else her_cntl = her_cntl_ge_lcol_urow; else her_cntl = her_cntl_ge_lcol_urow;
} }
else // if ( bli_obj_is_upper( *c ) ) else // if ( bli_obj_is_upper( c ) )
{ {
if ( bli_obj_is_row_stored( *c ) ) her_cntl = her_cntl_ge_lcol_urow; if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_ge_lcol_urow;
else her_cntl = her_cntl_ge_lrow_ucol; else her_cntl = her_cntl_ge_lrow_ucol;
} }
} }
@@ -163,10 +163,10 @@ void PASTEMAC(ch,opname) \
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \ bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \
\ \
bli_obj_set_conj( conjx, xo ); \ bli_obj_set_conj( conjx, &xo ); \
bli_obj_set_uplo( uploc, co ); \ bli_obj_set_uplo( uploc, &co ); \
\ \
bli_obj_set_struc( BLIS_SYMMETRIC, co ); \ bli_obj_set_struc( BLIS_SYMMETRIC, &co ); \
\ \
PASTEMAC0(opname)( &alphao, \ PASTEMAC0(opname)( &alphao, \
&xo, \ &xo, \

View File

@@ -64,15 +64,15 @@ void bli_syr2_front
// Query the target datatypes of each object. // Query the target datatypes of each object.
dt_targ_x = bli_obj_target_dt( *x ); dt_targ_x = bli_obj_target_dt( x );
dt_targ_y = bli_obj_target_dt( *y ); dt_targ_y = bli_obj_target_dt( y );
//dt_targ_c = bli_obj_target_dt( *c ); //dt_targ_c = bli_obj_target_dt( c );
// Determine whether each operand with unit stride. // Determine whether each operand with unit stride.
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 ); x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 ); y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 );
c_has_unit_inc = ( bli_obj_is_row_stored( *c ) || c_has_unit_inc = ( bli_obj_is_row_stored( c ) ||
bli_obj_is_col_stored( *c ) ); bli_obj_is_col_stored( c ) );
// Create an object to hold a copy-cast of alpha. Notice that we use // Create an object to hold a copy-cast of alpha. Notice that we use
@@ -94,14 +94,14 @@ void bli_syr2_front
// combinations of upper/lower triangular storage and row/column-storage. // combinations of upper/lower triangular storage and row/column-storage.
// The row-stored lower triangular and column-stored upper triangular // The row-stored lower triangular and column-stored upper triangular
// trees are identical. Same for the remaining two trees. // trees are identical. Same for the remaining two trees.
if ( bli_obj_is_lower( *c ) ) if ( bli_obj_is_lower( c ) )
{ {
if ( bli_obj_is_row_stored( *c ) ) her2_cntl = her2_cntl_bs_ke_lrow_ucol; if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_bs_ke_lrow_ucol;
else her2_cntl = her2_cntl_bs_ke_lcol_urow; else her2_cntl = her2_cntl_bs_ke_lcol_urow;
} }
else // if ( bli_obj_is_upper( *c ) ) else // if ( bli_obj_is_upper( c ) )
{ {
if ( bli_obj_is_row_stored( *c ) ) her2_cntl = her2_cntl_bs_ke_lcol_urow; if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_bs_ke_lcol_urow;
else her2_cntl = her2_cntl_bs_ke_lrow_ucol; else her2_cntl = her2_cntl_bs_ke_lrow_ucol;
} }
} }
@@ -109,20 +109,20 @@ void bli_syr2_front
{ {
// Mark objects with unit stride as already being packed. This prevents // Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm. // unnecessary packing from happening within the blocked algorithm.
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y ); if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y );
if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c ); if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, c );
// Here, we make a similar choice as above, except that (1) we look // Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking. // at storage tilt, and (2) we choose a tree that performs blocking.
if ( bli_obj_is_lower( *c ) ) if ( bli_obj_is_lower( c ) )
{ {
if ( bli_obj_is_row_stored( *c ) ) her2_cntl = her2_cntl_ge_lrow_ucol; if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_ge_lrow_ucol;
else her2_cntl = her2_cntl_ge_lcol_urow; else her2_cntl = her2_cntl_ge_lcol_urow;
} }
else // if ( bli_obj_is_upper( *c ) ) else // if ( bli_obj_is_upper( c ) )
{ {
if ( bli_obj_is_row_stored( *c ) ) her2_cntl = her2_cntl_ge_lcol_urow; if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_ge_lcol_urow;
else her2_cntl = her2_cntl_ge_lrow_ucol; else her2_cntl = her2_cntl_ge_lrow_ucol;
} }
} }
@@ -176,11 +176,11 @@ void PASTEMAC(ch,opname) \
bli_obj_create_with_attached_buffer( dt, m, 1, y, rs_y, cs_y, &yo ); \ bli_obj_create_with_attached_buffer( dt, m, 1, y, rs_y, cs_y, &yo ); \
bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \ bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \
\ \
bli_obj_set_conj( conjx, xo ); \ bli_obj_set_conj( conjx, &xo ); \
bli_obj_set_conj( conjy, yo ); \ bli_obj_set_conj( conjy, &yo ); \
bli_obj_set_uplo( uploc, co ); \ bli_obj_set_uplo( uploc, &co ); \
\ \
bli_obj_set_struc( BLIS_SYMMETRIC, co ); \ bli_obj_set_struc( BLIS_SYMMETRIC, &co ); \
\ \
PASTEMAC0(opname)( &alphao, \ PASTEMAC0(opname)( &alphao, \
&xo, \ &xo, \

View File

@@ -48,22 +48,22 @@ void PASTEMAC0(opname) \
{ \ { \
bli_init_once(); \ bli_init_once(); \
\ \
num_t dt = bli_obj_dt( *a ); \ num_t dt = bli_obj_dt( a ); \
\ \
uplo_t uploa = bli_obj_uplo( *a ); \ uplo_t uploa = bli_obj_uplo( a ); \
trans_t transa = bli_obj_conjtrans_status( *a ); \ trans_t transa = bli_obj_conjtrans_status( a ); \
diag_t diaga = bli_obj_diag( *a ); \ diag_t diaga = bli_obj_diag( a ); \
\ \
dim_t m = bli_obj_length( *a ); \ dim_t m = bli_obj_length( a ); \
\ \
void* buf_a = bli_obj_buffer_at_off( *a ); \ void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \ inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \ inc_t cs_a = bli_obj_col_stride( a ); \
\ \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( *x ); \ inc_t incx = bli_obj_vector_inc( x ); \
\ \
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \ void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \
\ \
/* Invoke the void pointer-based function for the given datatype. */ \ /* Invoke the void pointer-based function for the given datatype. */ \
bli_call_ft_11 \ bli_call_ft_11 \

View File

@@ -61,13 +61,13 @@ void bli_trmv_front
// Query the target datatypes of each object. // Query the target datatypes of each object.
dt_targ_a = bli_obj_target_dt( *a ); dt_targ_a = bli_obj_target_dt( a );
dt_targ_x = bli_obj_target_dt( *x ); dt_targ_x = bli_obj_target_dt( x );
// Determine whether each operand with unit stride. // Determine whether each operand with unit stride.
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) || a_has_unit_inc = ( bli_obj_is_row_stored( a ) ||
bli_obj_is_col_stored( *a ) ); bli_obj_is_col_stored( a ) );
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 ); x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
// Create an object to hold a copy-cast of alpha. Notice that we use // Create an object to hold a copy-cast of alpha. Notice that we use
@@ -89,14 +89,14 @@ void bli_trmv_front
// combinations of transposition and row/column-storage. // combinations of transposition and row/column-storage.
// The row-stored without transpose and column-stored with transpose // The row-stored without transpose and column-stored with transpose
// trees are identical. Same for the remaining two trees. // trees are identical. Same for the remaining two trees.
if ( bli_obj_has_notrans( *a ) ) if ( bli_obj_has_notrans( a ) )
{ {
if ( bli_obj_is_row_stored( *a ) ) trmv_cntl = trmv_cntl_bs_ke_nrow_tcol; if ( bli_obj_is_row_stored( a ) ) trmv_cntl = trmv_cntl_bs_ke_nrow_tcol;
else trmv_cntl = trmv_cntl_bs_ke_ncol_trow; else trmv_cntl = trmv_cntl_bs_ke_ncol_trow;
} }
else // if ( bli_obj_has_trans( *a ) ) else // if ( bli_obj_has_trans( a ) )
{ {
if ( bli_obj_is_row_stored( *a ) ) trmv_cntl = trmv_cntl_bs_ke_ncol_trow; if ( bli_obj_is_row_stored( a ) ) trmv_cntl = trmv_cntl_bs_ke_ncol_trow;
else trmv_cntl = trmv_cntl_bs_ke_nrow_tcol; else trmv_cntl = trmv_cntl_bs_ke_nrow_tcol;
} }
} }
@@ -104,19 +104,19 @@ void bli_trmv_front
{ {
// Mark objects with unit stride as already being packed. This prevents // Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm. // unnecessary packing from happening within the blocked algorithm.
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a ); if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, a );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
// Here, we make a similar choice as above, except that (1) we look // Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking. // at storage tilt, and (2) we choose a tree that performs blocking.
if ( bli_obj_has_notrans( *a ) ) if ( bli_obj_has_notrans( a ) )
{ {
if ( bli_obj_is_row_tilted( *a ) ) trmv_cntl = trmv_cntl_ge_nrow_tcol; if ( bli_obj_is_row_tilted( a ) ) trmv_cntl = trmv_cntl_ge_nrow_tcol;
else trmv_cntl = trmv_cntl_ge_ncol_trow; else trmv_cntl = trmv_cntl_ge_ncol_trow;
} }
else // if ( bli_obj_has_trans( *a ) ) else // if ( bli_obj_has_trans( a ) )
{ {
if ( bli_obj_is_row_tilted( *a ) ) trmv_cntl = trmv_cntl_ge_ncol_trow; if ( bli_obj_is_row_tilted( a ) ) trmv_cntl = trmv_cntl_ge_ncol_trow;
else trmv_cntl = trmv_cntl_ge_nrow_tcol; else trmv_cntl = trmv_cntl_ge_nrow_tcol;
} }
} }
@@ -162,11 +162,11 @@ void PASTEMAC(ch,opname) \
bli_obj_create_with_attached_buffer( dt, m, m, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m, m, a, rs_a, cs_a, &ao ); \
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
\ \
bli_obj_set_uplo( uploa, ao ); \ bli_obj_set_uplo( uploa, &ao ); \
bli_obj_set_conjtrans( transa, ao ); \ bli_obj_set_conjtrans( transa, &ao ); \
bli_obj_set_diag( diaga, ao ); \ bli_obj_set_diag( diaga, &ao ); \
\ \
bli_obj_set_struc( BLIS_TRIANGULAR, ao ); \ bli_obj_set_struc( BLIS_TRIANGULAR, &ao ); \
\ \
PASTEMAC0(opname)( &alphao, \ PASTEMAC0(opname)( &alphao, \
&ao, \ &ao, \

View File

@@ -77,18 +77,18 @@ void bli_trmv_int( obj_t* alpha,
bli_trmv_check( alpha, a, x ); bli_trmv_check( alpha, a, x );
// If A or x has a zero dimension, return early. // If A or x has a zero dimension, return early.
if ( bli_obj_has_zero_dim( *a ) ) return; if ( bli_obj_has_zero_dim( a ) ) return;
if ( bli_obj_has_zero_dim( *x ) ) return; if ( bli_obj_has_zero_dim( x ) ) return;
// Alias A in case we need to induce a transformation (ie: transposition). // Alias A in case we need to induce a transformation (ie: transposition).
bli_obj_alias_to( *a, a_local ); bli_obj_alias_to( a, &a_local );
// NOTE: to support cases where B is complex and A is real, we will // NOTE: to support cases where B is complex and A is real, we will
// need to have the default side case be BLIS_RIGHT and then express // need to have the default side case be BLIS_RIGHT and then express
// the left case in terms of it, rather than the other way around. // the left case in terms of it, rather than the other way around.
// Determine uplo (for indexing to the correct function pointer). // Determine uplo (for indexing to the correct function pointer).
if ( bli_obj_is_lower( a_local ) ) uplo = 0; if ( bli_obj_is_lower( &a_local ) ) uplo = 0;
else uplo = 1; else uplo = 1;
// We do not explicitly implement the cases where A is transposed. // We do not explicitly implement the cases where A is transposed.
@@ -107,11 +107,12 @@ void bli_trmv_int( obj_t* alpha,
// affect the optimal choice of kernel (ie: a column-major column panel // affect the optimal choice of kernel (ie: a column-major column panel
// matrix with transpose times a vector would use the same kernel as a // matrix with transpose times a vector would use the same kernel as a
// row-major row panel matrix with no transpose times a vector). // row-major row panel matrix with no transpose times a vector).
if ( bli_obj_has_trans( a_local ) ) if ( bli_obj_has_trans( &a_local ) )
{ {
//bli_obj_induce_trans( a_local ); //bli_obj_induce_trans( &a_local );
//bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, a_local ); //bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &a_local );
bli_toggle_bool( uplo ); if ( uplo == 1 ) uplo = 0;
else uplo = 1;
} }
// Extract the variant number and implementation type. // Extract the variant number and implementation type.

View File

@@ -54,7 +54,7 @@ void bli_trmv_l_blk_var1( obj_t* alpha,
bli_obj_init_pack( &x1_pack ); bli_obj_init_pack( &x1_pack );
// Query dimension. // Query dimension.
mn = bli_obj_length( *a ); mn = bli_obj_length( a );
// Partition diagonally. // Partition diagonally.
for ( ij = 0; ij < mn; ij += b_alg ) for ( ij = 0; ij < mn; ij += b_alg )

View File

@@ -54,7 +54,7 @@ void bli_trmv_l_blk_var2( obj_t* alpha,
bli_obj_init_pack( &x1_pack ); bli_obj_init_pack( &x1_pack );
// Query dimension. // Query dimension.
mn = bli_obj_length( *a ); mn = bli_obj_length( a );
// Partition diagonally. // Partition diagonally.
for ( ij = 0; ij < mn; ij += b_alg ) for ( ij = 0; ij < mn; ij += b_alg )

View File

@@ -54,7 +54,7 @@ void bli_trmv_u_blk_var1( obj_t* alpha,
bli_obj_init_pack( &x1_pack ); bli_obj_init_pack( &x1_pack );
// Query dimension. // Query dimension.
mn = bli_obj_length( *a ); mn = bli_obj_length( a );
// Partition diagonally. // Partition diagonally.
for ( ij = 0; ij < mn; ij += b_alg ) for ( ij = 0; ij < mn; ij += b_alg )

View File

@@ -54,7 +54,7 @@ void bli_trmv_u_blk_var2( obj_t* alpha,
bli_obj_init_pack( &x1_pack ); bli_obj_init_pack( &x1_pack );
// Query dimension. // Query dimension.
mn = bli_obj_length( *a ); mn = bli_obj_length( a );
// Partition diagonally. // Partition diagonally.
for ( ij = 0; ij < mn; ij += b_alg ) for ( ij = 0; ij < mn; ij += b_alg )

View File

@@ -48,22 +48,22 @@ void PASTEMAC0(opname) \
{ \ { \
bli_init_once(); \ bli_init_once(); \
\ \
num_t dt = bli_obj_dt( *a ); \ num_t dt = bli_obj_dt( a ); \
\ \
uplo_t uploa = bli_obj_uplo( *a ); \ uplo_t uploa = bli_obj_uplo( a ); \
trans_t transa = bli_obj_conjtrans_status( *a ); \ trans_t transa = bli_obj_conjtrans_status( a ); \
diag_t diaga = bli_obj_diag( *a ); \ diag_t diaga = bli_obj_diag( a ); \
\ \
dim_t m = bli_obj_length( *a ); \ dim_t m = bli_obj_length( a ); \
\ \
void* buf_a = bli_obj_buffer_at_off( *a ); \ void* buf_a = bli_obj_buffer_at_off( a ); \
inc_t rs_a = bli_obj_row_stride( *a ); \ inc_t rs_a = bli_obj_row_stride( a ); \
inc_t cs_a = bli_obj_col_stride( *a ); \ inc_t cs_a = bli_obj_col_stride( a ); \
\ \
void* buf_x = bli_obj_buffer_at_off( *x ); \ void* buf_x = bli_obj_buffer_at_off( x ); \
inc_t incx = bli_obj_vector_inc( *x ); \ inc_t incx = bli_obj_vector_inc( x ); \
\ \
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \ void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \
\ \
/* Invoke the void pointer-based function for the given datatype. */ \ /* Invoke the void pointer-based function for the given datatype. */ \
bli_call_ft_11 \ bli_call_ft_11 \

View File

@@ -61,13 +61,13 @@ void bli_trsv_front
// Query the target datatypes of each object. // Query the target datatypes of each object.
dt_targ_a = bli_obj_dt( *a ); dt_targ_a = bli_obj_dt( a );
dt_targ_x = bli_obj_dt( *x ); dt_targ_x = bli_obj_dt( x );
// Determine whether each operand with unit stride. // Determine whether each operand with unit stride.
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) || a_has_unit_inc = ( bli_obj_is_row_stored( a ) ||
bli_obj_is_col_stored( *a ) ); bli_obj_is_col_stored( a ) );
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 ); x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
// Create an object to hold a copy-cast of alpha. Notice that we use // Create an object to hold a copy-cast of alpha. Notice that we use
@@ -85,14 +85,14 @@ void bli_trsv_front
if ( a_has_unit_inc && if ( a_has_unit_inc &&
x_has_unit_inc ) x_has_unit_inc )
{ {
if ( bli_obj_has_notrans( *a ) ) if ( bli_obj_has_notrans( a ) )
{ {
if ( bli_obj_is_row_stored( *a ) ) trsv_cntl = trsv_cntl_bs_ke_nrow_tcol; if ( bli_obj_is_row_stored( a ) ) trsv_cntl = trsv_cntl_bs_ke_nrow_tcol;
else trsv_cntl = trsv_cntl_bs_ke_ncol_trow; else trsv_cntl = trsv_cntl_bs_ke_ncol_trow;
} }
else // if ( bli_obj_has_trans( *a ) ) else // if ( bli_obj_has_trans( a ) )
{ {
if ( bli_obj_is_row_stored( *a ) ) trsv_cntl = trsv_cntl_bs_ke_ncol_trow; if ( bli_obj_is_row_stored( a ) ) trsv_cntl = trsv_cntl_bs_ke_ncol_trow;
else trsv_cntl = trsv_cntl_bs_ke_nrow_tcol; else trsv_cntl = trsv_cntl_bs_ke_nrow_tcol;
} }
} }
@@ -100,19 +100,19 @@ void bli_trsv_front
{ {
// Mark objects with unit stride as already being packed. This prevents // Mark objects with unit stride as already being packed. This prevents
// unnecessary packing from happening within the blocked algorithm. // unnecessary packing from happening within the blocked algorithm.
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a ); if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, a );
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x ); if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
// Here, we make a similar choice as above, except that (1) we look // Here, we make a similar choice as above, except that (1) we look
// at storage tilt, and (2) we choose a tree that performs blocking. // at storage tilt, and (2) we choose a tree that performs blocking.
if ( bli_obj_has_notrans( *a ) ) if ( bli_obj_has_notrans( a ) )
{ {
if ( bli_obj_is_row_tilted( *a ) ) trsv_cntl = trsv_cntl_ge_nrow_tcol; if ( bli_obj_is_row_tilted( a ) ) trsv_cntl = trsv_cntl_ge_nrow_tcol;
else trsv_cntl = trsv_cntl_ge_ncol_trow; else trsv_cntl = trsv_cntl_ge_ncol_trow;
} }
else // if ( bli_obj_has_trans( *a ) ) else // if ( bli_obj_has_trans( a ) )
{ {
if ( bli_obj_is_row_tilted( *a ) ) trsv_cntl = trsv_cntl_ge_ncol_trow; if ( bli_obj_is_row_tilted( a ) ) trsv_cntl = trsv_cntl_ge_ncol_trow;
else trsv_cntl = trsv_cntl_ge_nrow_tcol; else trsv_cntl = trsv_cntl_ge_nrow_tcol;
} }
} }
@@ -158,11 +158,11 @@ void PASTEMAC(ch,opname) \
bli_obj_create_with_attached_buffer( dt, m, m, a, rs_a, cs_a, &ao ); \ bli_obj_create_with_attached_buffer( dt, m, m, a, rs_a, cs_a, &ao ); \
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \ bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
\ \
bli_obj_set_uplo( uploa, ao ); \ bli_obj_set_uplo( uploa, &ao ); \
bli_obj_set_conjtrans( transa, ao ); \ bli_obj_set_conjtrans( transa, &ao ); \
bli_obj_set_diag( diaga, ao ); \ bli_obj_set_diag( diaga, &ao ); \
\ \
bli_obj_set_struc( BLIS_TRIANGULAR, ao ); \ bli_obj_set_struc( BLIS_TRIANGULAR, &ao ); \
\ \
PASTEMAC0(opname)( &alphao, \ PASTEMAC0(opname)( &alphao, \
&ao, \ &ao, \

View File

@@ -77,19 +77,19 @@ void bli_trsv_int( obj_t* alpha,
bli_trsv_check( alpha, a, x ); bli_trsv_check( alpha, a, x );
// If A or x has a zero dimension, return early. // If A or x has a zero dimension, return early.
if ( bli_obj_has_zero_dim( *a ) ) return; if ( bli_obj_has_zero_dim( a ) ) return;
if ( bli_obj_has_zero_dim( *x ) ) return; if ( bli_obj_has_zero_dim( x ) ) return;
// Alias A in case we need to induce a transformation (ie: transposition). // Alias A in case we need to induce a transformation (ie: transposition).
bli_obj_alias_to( *a, a_local ); bli_obj_alias_to( a, &a_local );
// NOTE: to support cases where B is complex and A is real, we will // NOTE: to support cases where B is complex and A is real, we will
// need to have the default side case be BLIS_RIGHT and then express // need to have the default side case be BLIS_RIGHT and then express
// the left case in terms of it, rather than the other way around. // the left case in terms of it, rather than the other way around.
// Determine uplo (for indexing to the correct function pointer). // Determine uplo (for indexing to the correct function pointer).
if ( bli_obj_is_lower( a_local ) ) uplo = 0; if ( bli_obj_is_lower( &a_local ) ) uplo = 0;
else uplo = 1; else uplo = 1;
// We do not explicitly implement the cases where A is transposed. // We do not explicitly implement the cases where A is transposed.
// However, we can still handle them. Specifically, if A is marked as // However, we can still handle them. Specifically, if A is marked as
@@ -107,11 +107,12 @@ void bli_trsv_int( obj_t* alpha,
// affect the optimal choice of kernel (ie: a column-major column panel // affect the optimal choice of kernel (ie: a column-major column panel
// matrix with transpose times a vector would use the same kernel as a // matrix with transpose times a vector would use the same kernel as a
// row-major row panel matrix with no transpose times a vector). // row-major row panel matrix with no transpose times a vector).
if ( bli_obj_has_trans( a_local ) ) if ( bli_obj_has_trans( &a_local ) )
{ {
//bli_obj_induce_trans( a_local ); //bli_obj_induce_trans( &a_local );
//bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, a_local ); //bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &a_local );
bli_toggle_bool( uplo ); if ( uplo == 1 ) uplo = 0;
else uplo = 1;
} }
// Extract the variant number and implementation type. // Extract the variant number and implementation type.

View File

@@ -54,7 +54,7 @@ void bli_trsv_l_blk_var1( obj_t* alpha,
bli_obj_init_pack( &x1_pack ); bli_obj_init_pack( &x1_pack );
// Query dimension. // Query dimension.
mn = bli_obj_length( *a ); mn = bli_obj_length( a );
// x = alpha * x; // x = alpha * x;
bli_scalv_int( alpha, bli_scalv_int( alpha,

View File

@@ -54,7 +54,7 @@ void bli_trsv_l_blk_var2( obj_t* alpha,
bli_obj_init_pack( &x1_pack ); bli_obj_init_pack( &x1_pack );
// Query dimension. // Query dimension.
mn = bli_obj_length( *a ); mn = bli_obj_length( a );
// x = alpha * x; // x = alpha * x;
bli_scalv_int( alpha, bli_scalv_int( alpha,

View File

@@ -54,7 +54,7 @@ void bli_trsv_u_blk_var1( obj_t* alpha,
bli_obj_init_pack( &x1_pack ); bli_obj_init_pack( &x1_pack );
// Query dimension. // Query dimension.
mn = bli_obj_length( *a ); mn = bli_obj_length( a );
// x = alpha * x; // x = alpha * x;
bli_scalv_int( alpha, bli_scalv_int( alpha,

Some files were not shown because too many files have changed in this diff Show More