mirror of
https://github.com/amd/blis.git
synced 2026-03-24 03:07:22 +00:00
Converted function-like macros to static functions.
Details: - Converted most C preprocessor macros in bli_param_macro_defs.h and bli_obj_macro_defs.h to static functions. - Reshuffled some functions/macros to bli_misc_macro_defs.h and also between bli_param_macro_defs.h and bli_obj_macro_defs.h. - Changed obj_t-initializing macros in bli_type_defs.h to static functions. - Removed some old references to BLIS_TWO and BLIS_MINUS_TWO from bli_constants.h. - Whitespace changes in select files (four spaces to single tab).
This commit is contained in:
@@ -194,7 +194,7 @@ void bli_zdotv_template_noopt
|
||||
// toggling the effective conjugation of x and then conjugating the
|
||||
// resulting dot product.
|
||||
if ( bli_is_conj( conjy ) )
|
||||
bli_toggle_conj( conjx_use );
|
||||
bli_toggle_conj( &conjx_use );
|
||||
|
||||
|
||||
// Iterate over elements of x and y to compute:
|
||||
|
||||
@@ -211,7 +211,7 @@ void bli_zdotaxpyv_template_noopt
|
||||
// toggling the effective conjugation of xt and then conjugating the
|
||||
// resulting dot product.
|
||||
if ( bli_is_conj( conjy ) )
|
||||
bli_toggle_conj( conjxt_use );
|
||||
bli_toggle_conj( &conjxt_use );
|
||||
|
||||
|
||||
// Iterate over elements of x, y, and z to compute:
|
||||
|
||||
@@ -264,7 +264,7 @@ void bli_zdotxaxpyf_template_noopt
|
||||
// toggling the effective conjugation of At and then conjugating the
|
||||
// resulting dot products.
|
||||
if ( bli_is_conj( conjw ) )
|
||||
bli_toggle_conj( conjat_use );
|
||||
bli_toggle_conj( &conjat_use );
|
||||
|
||||
|
||||
// Iterate over the columns of A and elements of w and z to compute:
|
||||
|
||||
@@ -237,7 +237,7 @@ void bli_zdotxf_template_noopt
|
||||
// toggling the effective conjugation of A and then conjugating the
|
||||
// resulting product A^T*x.
|
||||
if ( bli_is_conj( conjx ) )
|
||||
bli_toggle_conj( conjat_use );
|
||||
bli_toggle_conj( &conjat_use );
|
||||
|
||||
|
||||
// Iterate over columns of A and rows of x to compute:
|
||||
|
||||
@@ -121,12 +121,12 @@ int main( int argc, char** argv )
|
||||
|
||||
// Let's inspect the amount of padding inserted for alignment. Note
|
||||
// the difference between the m dimension and the column stride.
|
||||
printf( "datatype %s\n", bli_dt_string( bli_obj_dt( a8 ) ) );
|
||||
printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( a8 ) ) );
|
||||
printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( a8 ) );
|
||||
printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( a8 ) );
|
||||
printf( "row stride: %d\n", ( int )bli_obj_row_stride( a8 ) );
|
||||
printf( "col stride: %d\n", ( int )bli_obj_col_stride( a8 ) );
|
||||
printf( "datatype %s\n", bli_dt_string( bli_obj_dt( &a8 ) ) );
|
||||
printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( &a8 ) ) );
|
||||
printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( &a8 ) );
|
||||
printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( &a8 ) );
|
||||
printf( "row stride: %d\n", ( int )bli_obj_row_stride( &a8 ) );
|
||||
printf( "col stride: %d\n", ( int )bli_obj_col_stride( &a8 ) );
|
||||
|
||||
//
|
||||
// Example 6: Inspect object fields after creation of other floating-
|
||||
@@ -139,28 +139,28 @@ int main( int argc, char** argv )
|
||||
bli_obj_create( BLIS_SCOMPLEX, 3, 5, 0, 0, &a10);
|
||||
bli_obj_create( BLIS_DCOMPLEX, 3, 5, 0, 0, &a11 );
|
||||
|
||||
printf( "datatype %s\n", bli_dt_string( bli_obj_dt( a9 ) ) );
|
||||
printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( a9 ) ) );
|
||||
printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( a9 ) );
|
||||
printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( a9 ) );
|
||||
printf( "row stride: %d\n", ( int )bli_obj_row_stride( a9 ) );
|
||||
printf( "col stride: %d\n", ( int )bli_obj_col_stride( a9 ) );
|
||||
printf( "datatype %s\n", bli_dt_string( bli_obj_dt( &a9 ) ) );
|
||||
printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( &a9 ) ) );
|
||||
printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( &a9 ) );
|
||||
printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( &a9 ) );
|
||||
printf( "row stride: %d\n", ( int )bli_obj_row_stride( &a9 ) );
|
||||
printf( "col stride: %d\n", ( int )bli_obj_col_stride( &a9 ) );
|
||||
|
||||
printf( "\n" );
|
||||
printf( "datatype %s\n", bli_dt_string( bli_obj_dt( a10 ) ) );
|
||||
printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( a10 ) ) );
|
||||
printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( a10 ) );
|
||||
printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( a10 ) );
|
||||
printf( "row stride: %d\n", ( int )bli_obj_row_stride( a10 ) );
|
||||
printf( "col stride: %d\n", ( int )bli_obj_col_stride( a10 ) );
|
||||
printf( "datatype %s\n", bli_dt_string( bli_obj_dt( &a10 ) ) );
|
||||
printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( &a10 ) ) );
|
||||
printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( &a10 ) );
|
||||
printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( &a10 ) );
|
||||
printf( "row stride: %d\n", ( int )bli_obj_row_stride( &a10 ) );
|
||||
printf( "col stride: %d\n", ( int )bli_obj_col_stride( &a10 ) );
|
||||
|
||||
printf( "\n" );
|
||||
printf( "datatype %s\n", bli_dt_string( bli_obj_dt( a11 ) ) );
|
||||
printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( a11 ) ) );
|
||||
printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( a11 ) );
|
||||
printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( a11 ) );
|
||||
printf( "row stride: %d\n", ( int )bli_obj_row_stride( a11 ) );
|
||||
printf( "col stride: %d\n", ( int )bli_obj_col_stride( a11 ) );
|
||||
printf( "datatype %s\n", bli_dt_string( bli_obj_dt( &a11 ) ) );
|
||||
printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( &a11 ) ) );
|
||||
printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( &a11 ) );
|
||||
printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( &a11 ) );
|
||||
printf( "row stride: %d\n", ( int )bli_obj_row_stride( &a11 ) );
|
||||
printf( "col stride: %d\n", ( int )bli_obj_col_stride( &a11 ) );
|
||||
|
||||
//
|
||||
// Example 7: Initialize an object's elements to random values and then
|
||||
|
||||
@@ -148,7 +148,7 @@ int main( int argc, char** argv )
|
||||
// on 'e', the input operand. Transposition can be indicated by setting a
|
||||
// bit in the object. Since it always starts out as "no transpose", we can
|
||||
// simply toggle the bit.
|
||||
bli_obj_toggle_trans( e );
|
||||
bli_obj_toggle_trans( &e );
|
||||
|
||||
// Another way to mark and object for transposition is to set it directly.
|
||||
//bli_obj_set_onlytrans( BLIS_TRANSPOSE, &e );
|
||||
@@ -192,8 +192,8 @@ int main( int argc, char** argv )
|
||||
bli_printm( "h (initial value):", &h, "%4.1f", "" );
|
||||
|
||||
// Set both the transpose and conjugation bits.
|
||||
bli_obj_toggle_trans( g );
|
||||
bli_obj_toggle_conj( g );
|
||||
bli_obj_toggle_trans( &g );
|
||||
bli_obj_toggle_conj( &g );
|
||||
|
||||
// Copy 'g' to 'h', conjugating and transposing 'g' in the process.
|
||||
// Once again, notice that it's the source operand that we've marked for
|
||||
|
||||
@@ -59,14 +59,14 @@ int main( int argc, char** argv )
|
||||
bli_obj_create( dt, m, n, rs, cs, &a );
|
||||
|
||||
// First, we mark the matrix structure as triangular.
|
||||
bli_obj_set_struc( BLIS_TRIANGULAR, a )
|
||||
bli_obj_set_struc( BLIS_TRIANGULAR, &a )
|
||||
|
||||
// Next, we specify whether the lower part or the upper part is to be
|
||||
// recognized as the "stored" region (which we call the uplo field). The
|
||||
// strictly opposite part (in this case, the strictly lower region) will
|
||||
// be *assumed* to be zero during computation. However, when printed out,
|
||||
// the strictly lower part may contain junk values.
|
||||
bli_obj_set_uplo( BLIS_UPPER, a );
|
||||
bli_obj_set_uplo( BLIS_UPPER, &a );
|
||||
|
||||
// Now set the upper triangle to random values.
|
||||
bli_randm( &a );
|
||||
@@ -89,8 +89,8 @@ int main( int argc, char** argv )
|
||||
bli_obj_create( dt, m, n, rs, cs, &b );
|
||||
|
||||
// Set structure and uplo.
|
||||
bli_obj_set_struc( BLIS_TRIANGULAR, b )
|
||||
bli_obj_set_uplo( BLIS_UPPER, b );
|
||||
bli_obj_set_struc( BLIS_TRIANGULAR, &b )
|
||||
bli_obj_set_uplo( BLIS_UPPER, &b );
|
||||
|
||||
// Create an alias, 'bl', of the original object 'b'. Both objects will
|
||||
// refer to the same underlying matrix elements, but now we will have two
|
||||
@@ -98,7 +98,7 @@ int main( int argc, char** argv )
|
||||
// of the objects, meaning no additional memory allocation takes place.
|
||||
// Therefore it is up to the API user (you) to make sure that you only
|
||||
// free the original object (or exactly one of the aliases).
|
||||
bli_obj_alias_to( b, bl );
|
||||
bli_obj_alias_to( &b, &bl );
|
||||
|
||||
// Digression: Each object contains a diagonal offset (even vectors),
|
||||
// even if it is never needed. The diagonal offset for a newly-created
|
||||
@@ -111,10 +111,10 @@ int main( int argc, char** argv )
|
||||
// x-axis value.
|
||||
|
||||
// Set the diagonal offset of 'bl' to -1.
|
||||
bli_obj_set_diag_offset( -1, bl );
|
||||
bli_obj_set_diag_offset( -1, &bl );
|
||||
|
||||
// Set the uplo field of 'bl' to "lower".
|
||||
bli_obj_set_uplo( BLIS_LOWER, bl );
|
||||
bli_obj_set_uplo( BLIS_LOWER, &bl );
|
||||
|
||||
// Set the upper triangle of 'b' to random values.
|
||||
bli_randm( &b );
|
||||
@@ -148,7 +148,7 @@ int main( int argc, char** argv )
|
||||
bli_obj_create( dt, m, n, rs, cs, &c );
|
||||
|
||||
// Reset the diagonal offset of 'bl' to 0.
|
||||
bli_obj_set_diag_offset( 0, bl );
|
||||
bli_obj_set_diag_offset( 0, &bl );
|
||||
|
||||
// Copy the lower triangle of matrix 'b' from Example 2 to object 'c'.
|
||||
// This should give us -1.0 in the strictly lower part and some non-zero
|
||||
@@ -212,7 +212,7 @@ int main( int argc, char** argv )
|
||||
|
||||
// We want to pluck out the lower triangle and transpose it into the upper
|
||||
// triangle of 'd'.
|
||||
bli_obj_toggle_trans( bl );
|
||||
bli_obj_toggle_trans( &bl );
|
||||
|
||||
// Now we copy the transpose of the lower part of 'bl' into the upper
|
||||
// part of 'd'. (Again, notice that we haven't modified any properties of
|
||||
@@ -242,11 +242,11 @@ int main( int argc, char** argv )
|
||||
bli_printm( "e: initial value (all -1.0)", &e, "%4.1f", "" );
|
||||
|
||||
// Create an alias to work with.
|
||||
bli_obj_alias_to( e, el );
|
||||
bli_obj_alias_to( &e, &el );
|
||||
|
||||
// Set structure and uplo of 'el'.
|
||||
bli_obj_set_struc( BLIS_TRIANGULAR, el )
|
||||
bli_obj_set_uplo( BLIS_LOWER, el );
|
||||
bli_obj_set_struc( BLIS_TRIANGULAR, &el )
|
||||
bli_obj_set_uplo( BLIS_LOWER, &el );
|
||||
|
||||
// Digression: Notice that "triangular" structure does not require that
|
||||
// the matrix be square. Rather, it simply means that either the part above
|
||||
@@ -259,8 +259,8 @@ int main( int argc, char** argv )
|
||||
|
||||
// Move the diagonal offset of 'el' to 1 and flip the uplo field to
|
||||
// "upper".
|
||||
bli_obj_set_diag_offset( 1, el );
|
||||
bli_obj_set_uplo( BLIS_UPPER, el );
|
||||
bli_obj_set_diag_offset( 1, &el );
|
||||
bli_obj_set_uplo( BLIS_UPPER, &el );
|
||||
|
||||
// Set the upper triangle to zero.
|
||||
bli_setm( &BLIS_ZERO, &el );
|
||||
@@ -287,11 +287,11 @@ int main( int argc, char** argv )
|
||||
bli_printm( "h: initial value (all -1.0)", &h, "%4.1f", "" );
|
||||
|
||||
// Set the diagonal offset of 'h' to -1.
|
||||
bli_obj_set_diag_offset( -1, h );
|
||||
bli_obj_set_diag_offset( -1, &h );
|
||||
|
||||
// Set the structure and uplo of 'h'.
|
||||
bli_obj_set_struc( BLIS_TRIANGULAR, h )
|
||||
bli_obj_set_uplo( BLIS_UPPER, h );
|
||||
bli_obj_set_struc( BLIS_TRIANGULAR, &h )
|
||||
bli_obj_set_uplo( BLIS_UPPER, &h );
|
||||
|
||||
// Randomize the elements on and above the first subdiagonal.
|
||||
bli_randm( &h );
|
||||
@@ -299,11 +299,11 @@ int main( int argc, char** argv )
|
||||
bli_printm( "h: after randomizing above first subdiagonal", &h, "%4.1f", "" );
|
||||
|
||||
// Create an alias to work with.
|
||||
bli_obj_alias_to( h, hl );
|
||||
bli_obj_alias_to( &h, &hl );
|
||||
|
||||
// Flip the uplo of 'hl' and move the diagonal down by one.
|
||||
bli_obj_set_uplo( BLIS_LOWER, hl );
|
||||
bli_obj_set_diag_offset( -2, hl );
|
||||
bli_obj_set_uplo( BLIS_LOWER, &hl );
|
||||
bli_obj_set_diag_offset( -2, &hl );
|
||||
|
||||
// Set the region strictly below the first subdiagonal (on or below
|
||||
// the second subdiagonal) to zero.
|
||||
|
||||
@@ -157,8 +157,8 @@ int main( int argc, char** argv )
|
||||
|
||||
// Mark matrix 'a' as symmetric and stored in the lower triangle, and
|
||||
// then randomize that lower triangle.
|
||||
bli_obj_set_struc( BLIS_SYMMETRIC, a )
|
||||
bli_obj_set_uplo( BLIS_LOWER, a );
|
||||
bli_obj_set_struc( BLIS_SYMMETRIC, &a )
|
||||
bli_obj_set_uplo( BLIS_LOWER, &a );
|
||||
bli_randm( &a );
|
||||
|
||||
bli_printm( "x: set to random values", &x, "%4.1f", "" );
|
||||
@@ -200,8 +200,8 @@ int main( int argc, char** argv )
|
||||
|
||||
// Mark matrix 'a' as symmetric and stored in the upper triangle, and
|
||||
// then randomize that upper triangle.
|
||||
bli_obj_set_struc( BLIS_SYMMETRIC, a )
|
||||
bli_obj_set_uplo( BLIS_UPPER, a );
|
||||
bli_obj_set_struc( BLIS_SYMMETRIC, &a )
|
||||
bli_obj_set_uplo( BLIS_UPPER, &a );
|
||||
bli_randm( &a );
|
||||
|
||||
bli_printm( "a: randomized (zeros in lower triangle)", &a, "%4.1f", "" );
|
||||
@@ -242,8 +242,8 @@ int main( int argc, char** argv )
|
||||
|
||||
// Mark matrix 'a' as triangular and stored in the lower triangle, and
|
||||
// then randomize that lower triangle.
|
||||
bli_obj_set_struc( BLIS_TRIANGULAR, a )
|
||||
bli_obj_set_uplo( BLIS_LOWER, a );
|
||||
bli_obj_set_struc( BLIS_TRIANGULAR, &a )
|
||||
bli_obj_set_uplo( BLIS_LOWER, &a );
|
||||
bli_randm( &a );
|
||||
|
||||
bli_printm( "a: randomized (zeros in upper triangle)", &a, "%4.1f", "" );
|
||||
@@ -283,8 +283,8 @@ int main( int argc, char** argv )
|
||||
|
||||
// Mark matrix 'a' as triangular and stored in the lower triangle, and
|
||||
// then randomize that lower triangle.
|
||||
bli_obj_set_struc( BLIS_TRIANGULAR, a )
|
||||
bli_obj_set_uplo( BLIS_LOWER, a );
|
||||
bli_obj_set_struc( BLIS_TRIANGULAR, &a )
|
||||
bli_obj_set_uplo( BLIS_LOWER, &a );
|
||||
bli_randm( &a );
|
||||
|
||||
// Load the diagonal. By setting the diagonal to something of greater
|
||||
|
||||
@@ -111,7 +111,7 @@ int main( int argc, char** argv )
|
||||
bli_setm( &BLIS_ZERO, &cc );
|
||||
|
||||
// Set the transpose bit in 'aa'.
|
||||
bli_obj_toggle_trans( aa );
|
||||
bli_obj_toggle_trans( &aa );
|
||||
|
||||
bli_printm( "a: randomized", &aa, "%4.1f", "" );
|
||||
bli_printm( "b: set to 1.0", &bb, "%4.1f", "" );
|
||||
@@ -148,8 +148,8 @@ int main( int argc, char** argv )
|
||||
|
||||
// Mark matrix 'c' as symmetric and stored in the lower triangle, and
|
||||
// then randomize that lower triangle.
|
||||
bli_obj_set_struc( BLIS_SYMMETRIC, c )
|
||||
bli_obj_set_uplo( BLIS_LOWER, c );
|
||||
bli_obj_set_struc( BLIS_SYMMETRIC, &c )
|
||||
bli_obj_set_uplo( BLIS_LOWER, &c );
|
||||
bli_randm( &c );
|
||||
|
||||
bli_printm( "a: set to random values", &a, "%4.1f", "" );
|
||||
@@ -194,8 +194,8 @@ int main( int argc, char** argv )
|
||||
|
||||
// Mark matrix 'a' as symmetric and stored in the upper triangle, and
|
||||
// then randomize that upper triangle.
|
||||
bli_obj_set_struc( BLIS_SYMMETRIC, a )
|
||||
bli_obj_set_uplo( BLIS_UPPER, a );
|
||||
bli_obj_set_struc( BLIS_SYMMETRIC, &a )
|
||||
bli_obj_set_uplo( BLIS_UPPER, &a );
|
||||
bli_randm( &a );
|
||||
|
||||
bli_printm( "a: randomized (zeros in lower triangle)", &a, "%4.1f", "" );
|
||||
@@ -241,8 +241,8 @@ int main( int argc, char** argv )
|
||||
|
||||
// Mark matrix 'a' as triangular and stored in the lower triangle, and
|
||||
// then randomize that lower triangle.
|
||||
bli_obj_set_struc( BLIS_TRIANGULAR, a )
|
||||
bli_obj_set_uplo( BLIS_LOWER, a );
|
||||
bli_obj_set_struc( BLIS_TRIANGULAR, &a )
|
||||
bli_obj_set_uplo( BLIS_LOWER, &a );
|
||||
bli_randm( &a );
|
||||
|
||||
bli_printm( "a: randomized (zeros in upper triangle)", &a, "%4.1f", "" );
|
||||
@@ -286,8 +286,8 @@ int main( int argc, char** argv )
|
||||
|
||||
// Mark matrix 'a' as triangular and stored in the lower triangle, and
|
||||
// then randomize that lower triangle.
|
||||
bli_obj_set_struc( BLIS_TRIANGULAR, a )
|
||||
bli_obj_set_uplo( BLIS_LOWER, a );
|
||||
bli_obj_set_struc( BLIS_TRIANGULAR, &a )
|
||||
bli_obj_set_uplo( BLIS_LOWER, &a );
|
||||
bli_randm( &a );
|
||||
|
||||
// Load the diagonal. By setting the diagonal to something of greater
|
||||
|
||||
@@ -147,8 +147,8 @@ int main( int argc, char** argv )
|
||||
bli_setm( &BLIS_MINUS_ONE, &c );
|
||||
|
||||
// Set the structure and uplo of 'c'.
|
||||
bli_obj_set_struc( BLIS_SYMMETRIC, c )
|
||||
bli_obj_set_uplo( BLIS_LOWER, c );
|
||||
bli_obj_set_struc( BLIS_SYMMETRIC, &c )
|
||||
bli_obj_set_uplo( BLIS_LOWER, &c );
|
||||
|
||||
// Randomize the lower triangle of 'c'.
|
||||
bli_randm( &c );
|
||||
@@ -170,8 +170,8 @@ int main( int argc, char** argv )
|
||||
// Initialize all of 'd' to -1.0 to simulate junk values.
|
||||
bli_setm( &BLIS_MINUS_ONE, &d );
|
||||
|
||||
bli_obj_set_struc( BLIS_HERMITIAN, d )
|
||||
bli_obj_set_uplo( BLIS_LOWER, d );
|
||||
bli_obj_set_struc( BLIS_HERMITIAN, &d )
|
||||
bli_obj_set_uplo( BLIS_LOWER, &d );
|
||||
|
||||
// Randomize the lower triangle of 'd'.
|
||||
bli_randm( &d );
|
||||
@@ -185,8 +185,8 @@ int main( int argc, char** argv )
|
||||
bli_printm( "d (after mkherm):", &d, "%4.1f", "" );
|
||||
|
||||
// Set the structure and uplo of 'd'.
|
||||
bli_obj_set_struc( BLIS_HERMITIAN, d )
|
||||
bli_obj_set_uplo( BLIS_LOWER, d );
|
||||
bli_obj_set_struc( BLIS_HERMITIAN, &d )
|
||||
bli_obj_set_uplo( BLIS_LOWER, &d );
|
||||
|
||||
//
|
||||
// Example 4: Make a complex matrix explicitly symmetric or Hermitian.
|
||||
@@ -203,8 +203,8 @@ int main( int argc, char** argv )
|
||||
bli_setm( &BLIS_MINUS_ONE, &e );
|
||||
|
||||
// Set the structure and uplo of 'e'.
|
||||
bli_obj_set_struc( BLIS_SYMMETRIC, e )
|
||||
bli_obj_set_uplo( BLIS_UPPER, e );
|
||||
bli_obj_set_struc( BLIS_SYMMETRIC, &e )
|
||||
bli_obj_set_uplo( BLIS_UPPER, &e );
|
||||
|
||||
// Randomize the upper triangle of 'e'.
|
||||
bli_randm( &e );
|
||||
@@ -221,8 +221,8 @@ int main( int argc, char** argv )
|
||||
bli_setm( &BLIS_MINUS_ONE, &f );
|
||||
|
||||
// Set the structure and uplo of 'f'.
|
||||
bli_obj_set_struc( BLIS_HERMITIAN, f )
|
||||
bli_obj_set_uplo( BLIS_UPPER, f );
|
||||
bli_obj_set_struc( BLIS_HERMITIAN, &f )
|
||||
bli_obj_set_uplo( BLIS_UPPER, &f );
|
||||
|
||||
// Randomize the upper triangle of 'f'.
|
||||
bli_randm( &f );
|
||||
@@ -249,8 +249,8 @@ int main( int argc, char** argv )
|
||||
bli_setm( &BLIS_MINUS_ONE, &g );
|
||||
|
||||
// Set the structure and uplo of 'g'.
|
||||
bli_obj_set_struc( BLIS_TRIANGULAR, g )
|
||||
bli_obj_set_uplo( BLIS_LOWER, g );
|
||||
bli_obj_set_struc( BLIS_TRIANGULAR, &g )
|
||||
bli_obj_set_uplo( BLIS_LOWER, &g );
|
||||
|
||||
// Randomize the lower triangle of 'g'.
|
||||
bli_randm( &g );
|
||||
|
||||
@@ -50,10 +50,10 @@ void PASTEMAC0(opname) \
|
||||
bli_init_once(); \
|
||||
\
|
||||
num_t dt_chi; \
|
||||
num_t dt_absq_c = bli_obj_dt_proj_to_complex( *absq ); \
|
||||
num_t dt_absq_c = bli_obj_dt_proj_to_complex( absq ); \
|
||||
\
|
||||
void* buf_chi; \
|
||||
void* buf_absq = bli_obj_buffer_at_off( *absq ); \
|
||||
void* buf_chi; \
|
||||
void* buf_absq = bli_obj_buffer_at_off( absq ); \
|
||||
\
|
||||
if ( bli_error_checking_is_enabled() ) \
|
||||
PASTEMAC(opname,_check)( chi, absq ); \
|
||||
@@ -61,7 +61,7 @@ void PASTEMAC0(opname) \
|
||||
/* If chi is a scalar constant, use dt_absq_c to extract the address of the
|
||||
corresponding constant value; otherwise, use the datatype encoded
|
||||
within the chi object and extract the buffer at the chi offset. */ \
|
||||
bli_set_scalar_dt_buffer( chi, dt_absq_c, dt_chi, buf_chi ); \
|
||||
bli_obj_scalar_set_dt_buffer( chi, dt_absq_c, &dt_chi, &buf_chi ); \
|
||||
\
|
||||
/* Invoke the typed function. */ \
|
||||
bli_call_ft_2 \
|
||||
@@ -88,12 +88,12 @@ void PASTEMAC0(opname) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *psi ); \
|
||||
num_t dt = bli_obj_dt( psi ); \
|
||||
\
|
||||
conj_t conjchi = bli_obj_conj_status( *chi ); \
|
||||
conj_t conjchi = bli_obj_conj_status( chi ); \
|
||||
\
|
||||
void* buf_chi = bli_obj_buffer_for_1x1( dt, *chi ); \
|
||||
void* buf_psi = bli_obj_buffer_at_off( *psi ); \
|
||||
void* buf_chi = bli_obj_buffer_for_1x1( dt, chi ); \
|
||||
void* buf_psi = bli_obj_buffer_at_off( psi ); \
|
||||
\
|
||||
if ( bli_error_checking_is_enabled() ) \
|
||||
PASTEMAC(opname,_check)( chi, psi ); \
|
||||
@@ -125,11 +125,11 @@ void PASTEMAC0(opname) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *chi ); \
|
||||
num_t dt = bli_obj_dt( chi ); \
|
||||
\
|
||||
conj_t conjchi = bli_obj_conj_status( *chi ); \
|
||||
conj_t conjchi = bli_obj_conj_status( chi ); \
|
||||
\
|
||||
void* buf_chi = bli_obj_buffer_for_1x1( dt, *chi ); \
|
||||
void* buf_chi = bli_obj_buffer_for_1x1( dt, chi ); \
|
||||
\
|
||||
if ( bli_error_checking_is_enabled() ) \
|
||||
PASTEMAC(opname,_check)( chi ); \
|
||||
@@ -158,10 +158,10 @@ void PASTEMAC0(opname) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *psi ); \
|
||||
num_t dt = bli_obj_dt( psi ); \
|
||||
\
|
||||
void* buf_chi = bli_obj_buffer_for_1x1( dt, *chi ); \
|
||||
void* buf_psi = bli_obj_buffer_at_off( *psi ); \
|
||||
void* buf_chi = bli_obj_buffer_for_1x1( dt, chi ); \
|
||||
void* buf_psi = bli_obj_buffer_at_off( psi ); \
|
||||
\
|
||||
if ( bli_error_checking_is_enabled() ) \
|
||||
PASTEMAC(opname,_check)( chi, psi ); \
|
||||
@@ -191,14 +191,14 @@ void PASTEMAC0(opname) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
num_t dt_chi = bli_obj_dt( *chi ); \
|
||||
num_t dt_chi = bli_obj_dt( chi ); \
|
||||
num_t dt_def = BLIS_DCOMPLEX; \
|
||||
num_t dt_use; \
|
||||
\
|
||||
/* If chi is a constant object, default to using the dcomplex
|
||||
value to maximize precision, and since we don't know if the
|
||||
caller needs just the real or the real and imaginary parts. */ \
|
||||
void* buf_chi = bli_obj_buffer_for_1x1( dt_def, *chi ); \
|
||||
void* buf_chi = bli_obj_buffer_for_1x1( dt_def, chi ); \
|
||||
\
|
||||
if ( bli_error_checking_is_enabled() ) \
|
||||
PASTEMAC(opname,_check)( chi, zeta_r, zeta_i ); \
|
||||
@@ -234,9 +234,9 @@ void PASTEMAC0(opname) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
num_t dt_chi = bli_obj_dt( *chi ); \
|
||||
num_t dt_chi = bli_obj_dt( chi ); \
|
||||
\
|
||||
void* buf_chi = bli_obj_buffer_at_off( *chi ); \
|
||||
void* buf_chi = bli_obj_buffer_at_off( chi ); \
|
||||
\
|
||||
if ( bli_error_checking_is_enabled() ) \
|
||||
PASTEMAC(opname,_check)( zeta_r, zeta_i, chi ); \
|
||||
@@ -268,12 +268,12 @@ void PASTEMAC0(opname) \
|
||||
bli_init_once(); \
|
||||
\
|
||||
num_t dt_chi; \
|
||||
num_t dt_zeta_c = bli_obj_dt_proj_to_complex( *zeta_r ); \
|
||||
num_t dt_zeta_c = bli_obj_dt_proj_to_complex( zeta_r ); \
|
||||
\
|
||||
void* buf_chi; \
|
||||
void* buf_chi; \
|
||||
\
|
||||
void* buf_zeta_r = bli_obj_buffer_at_off( *zeta_r ); \
|
||||
void* buf_zeta_i = bli_obj_buffer_at_off( *zeta_i ); \
|
||||
void* buf_zeta_r = bli_obj_buffer_at_off( zeta_r ); \
|
||||
void* buf_zeta_i = bli_obj_buffer_at_off( zeta_i ); \
|
||||
\
|
||||
if ( bli_error_checking_is_enabled() ) \
|
||||
PASTEMAC(opname,_check)( chi, zeta_r, zeta_i ); \
|
||||
@@ -281,7 +281,7 @@ void PASTEMAC0(opname) \
|
||||
/* If chi is a scalar constant, use dt_zeta_c to extract the address of the
|
||||
corresponding constant value; otherwise, use the datatype encoded
|
||||
within the chi object and extract the buffer at the chi offset. */ \
|
||||
bli_set_scalar_dt_buffer( chi, dt_zeta_c, dt_chi, buf_chi ); \
|
||||
bli_obj_scalar_set_dt_buffer( chi, dt_zeta_c, &dt_chi, &buf_chi ); \
|
||||
\
|
||||
/* Invoke the typed function. */ \
|
||||
bli_call_ft_3 \
|
||||
@@ -309,12 +309,12 @@ void PASTEMAC0(opname) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
num_t dt_chi = bli_obj_dt( *chi ); \
|
||||
num_t dt_chi = bli_obj_dt( chi ); \
|
||||
\
|
||||
void* buf_zeta_r = bli_obj_buffer_for_1x1( dt_chi, *zeta_r ); \
|
||||
void* buf_zeta_i = bli_obj_buffer_for_1x1( dt_chi, *zeta_i ); \
|
||||
void* buf_zeta_r = bli_obj_buffer_for_1x1( dt_chi, zeta_r ); \
|
||||
void* buf_zeta_i = bli_obj_buffer_for_1x1( dt_chi, zeta_i ); \
|
||||
\
|
||||
void* buf_chi = bli_obj_buffer_at_off( *chi ); \
|
||||
void* buf_chi = bli_obj_buffer_at_off( chi ); \
|
||||
\
|
||||
if ( bli_error_checking_is_enabled() ) \
|
||||
PASTEMAC(opname,_check)( chi, zeta_r, zeta_i ); \
|
||||
|
||||
@@ -63,10 +63,10 @@ void PASTEMAC0(opname) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
conj_t conjchi = bli_obj_conj_status( *chi ); \
|
||||
conj_t conjchi = bli_obj_conj_status( chi ); \
|
||||
\
|
||||
num_t dt_psi = bli_obj_dt( *psi ); \
|
||||
void* buf_psi = bli_obj_buffer_at_off( *psi ); \
|
||||
num_t dt_psi = bli_obj_dt( psi ); \
|
||||
void* buf_psi = bli_obj_buffer_at_off( psi ); \
|
||||
\
|
||||
num_t dt_chi; \
|
||||
void* buf_chi; \
|
||||
@@ -79,7 +79,7 @@ void PASTEMAC0(opname) \
|
||||
/* If chi is a scalar constant, use dt_psi to extract the address of the
|
||||
corresponding constant value; otherwise, use the datatype encoded
|
||||
within the chi object and extract the buffer at the chi offset. */ \
|
||||
bli_set_scalar_dt_buffer( chi, dt_psi, dt_chi, buf_chi ); \
|
||||
bli_obj_scalar_set_dt_buffer( chi, dt_psi, &dt_chi, &buf_chi ); \
|
||||
\
|
||||
/* Index into the type combination array to extract the correct
|
||||
function pointer. */ \
|
||||
|
||||
@@ -54,14 +54,14 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *x ); \
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
conj_t conjx = bli_obj_conj_status( *x ); \
|
||||
dim_t n = bli_obj_vector_dim( *x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( *x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( *y ); \
|
||||
inc_t inc_y = bli_obj_vector_inc( *y ); \
|
||||
conj_t conjx = bli_obj_conj_status( x ); \
|
||||
dim_t n = bli_obj_vector_dim( x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( y ); \
|
||||
inc_t inc_y = bli_obj_vector_inc( y ); \
|
||||
\
|
||||
if ( bli_error_checking_is_enabled() ) \
|
||||
PASTEMAC(opname,_check)( x, y ); \
|
||||
@@ -98,13 +98,13 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *x ); \
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
dim_t n = bli_obj_vector_dim( *x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t incx = bli_obj_vector_inc( *x ); \
|
||||
dim_t n = bli_obj_vector_dim( x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t incx = bli_obj_vector_inc( x ); \
|
||||
\
|
||||
void* buf_index = bli_obj_buffer_at_off( *index ); \
|
||||
void* buf_index = bli_obj_buffer_at_off( index ); \
|
||||
\
|
||||
if ( bli_error_checking_is_enabled() ) \
|
||||
PASTEMAC(opname,_check)( x, index ); \
|
||||
@@ -140,14 +140,14 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *x ); \
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
conj_t conjx = bli_obj_conj_status( *x ); \
|
||||
dim_t n = bli_obj_vector_dim( *x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( *x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( *y ); \
|
||||
inc_t inc_y = bli_obj_vector_inc( *y ); \
|
||||
conj_t conjx = bli_obj_conj_status( x ); \
|
||||
dim_t n = bli_obj_vector_dim( x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( y ); \
|
||||
inc_t inc_y = bli_obj_vector_inc( y ); \
|
||||
\
|
||||
void* buf_alpha; \
|
||||
void* buf_beta; \
|
||||
@@ -164,8 +164,8 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
alpha, &alpha_local ); \
|
||||
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
|
||||
beta, &beta_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \
|
||||
\
|
||||
/* Invoke the void pointer-based function. */ \
|
||||
bli_call_ft_9 \
|
||||
@@ -200,14 +200,14 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *x ); \
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
conj_t conjx = bli_obj_conj_status( *x ); \
|
||||
dim_t n = bli_obj_vector_dim( *x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( *x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( *y ); \
|
||||
inc_t inc_y = bli_obj_vector_inc( *y ); \
|
||||
conj_t conjx = bli_obj_conj_status( x ); \
|
||||
dim_t n = bli_obj_vector_dim( x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( y ); \
|
||||
inc_t inc_y = bli_obj_vector_inc( y ); \
|
||||
\
|
||||
void* buf_alpha; \
|
||||
\
|
||||
@@ -220,7 +220,7 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
as needed). */ \
|
||||
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
|
||||
alpha, &alpha_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
|
||||
\
|
||||
/* Invoke the void pointer-based function. */ \
|
||||
bli_call_ft_8 \
|
||||
@@ -255,16 +255,16 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *x ); \
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
conj_t conjx = bli_obj_conj_status( *x ); \
|
||||
conj_t conjy = bli_obj_conj_status( *y ); \
|
||||
dim_t n = bli_obj_vector_dim( *x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( *x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( *y ); \
|
||||
inc_t inc_y = bli_obj_vector_inc( *y ); \
|
||||
void* buf_rho = bli_obj_buffer_at_off( *rho ); \
|
||||
conj_t conjx = bli_obj_conj_status( x ); \
|
||||
conj_t conjy = bli_obj_conj_status( y ); \
|
||||
dim_t n = bli_obj_vector_dim( x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( y ); \
|
||||
inc_t inc_y = bli_obj_vector_inc( y ); \
|
||||
void* buf_rho = bli_obj_buffer_at_off( rho ); \
|
||||
\
|
||||
if ( bli_error_checking_is_enabled() ) \
|
||||
PASTEMAC(opname,_check)( x, y, rho ); \
|
||||
@@ -304,16 +304,16 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *x ); \
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
conj_t conjx = bli_obj_conj_status( *x ); \
|
||||
conj_t conjy = bli_obj_conj_status( *y ); \
|
||||
dim_t n = bli_obj_vector_dim( *x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( *x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( *y ); \
|
||||
inc_t inc_y = bli_obj_vector_inc( *y ); \
|
||||
void* buf_rho = bli_obj_buffer_at_off( *rho ); \
|
||||
conj_t conjx = bli_obj_conj_status( x ); \
|
||||
conj_t conjy = bli_obj_conj_status( y ); \
|
||||
dim_t n = bli_obj_vector_dim( x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( y ); \
|
||||
inc_t inc_y = bli_obj_vector_inc( y ); \
|
||||
void* buf_rho = bli_obj_buffer_at_off( rho ); \
|
||||
\
|
||||
void* buf_alpha; \
|
||||
void* buf_beta; \
|
||||
@@ -330,8 +330,8 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
alpha, &alpha_local ); \
|
||||
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
|
||||
beta, &beta_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \
|
||||
\
|
||||
/* Invoke the void pointer-based function. */ \
|
||||
bli_call_ft_11 \
|
||||
@@ -366,11 +366,11 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *x ); \
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
dim_t n = bli_obj_vector_dim( *x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( *x ); \
|
||||
dim_t n = bli_obj_vector_dim( x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( x ); \
|
||||
\
|
||||
if ( bli_error_checking_is_enabled() ) \
|
||||
PASTEMAC(opname,_check)( x ); \
|
||||
@@ -403,12 +403,12 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *x ); \
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
/* conj_t conjalpha = bli_obj_conj_status( *alpha ); */ \
|
||||
dim_t n = bli_obj_vector_dim( *x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( *x ); \
|
||||
/* conj_t conjalpha = bli_obj_conj_status( alpha ); */ \
|
||||
dim_t n = bli_obj_vector_dim( x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( x ); \
|
||||
\
|
||||
void* buf_alpha; \
|
||||
\
|
||||
@@ -421,7 +421,7 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
as needed). */ \
|
||||
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
|
||||
alpha, &alpha_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
|
||||
\
|
||||
/* Invoke the void pointer-based function. */ \
|
||||
bli_call_ft_6 \
|
||||
@@ -454,13 +454,13 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *x ); \
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
dim_t n = bli_obj_vector_dim( *x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( *x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( *y ); \
|
||||
inc_t inc_y = bli_obj_vector_inc( *y ); \
|
||||
dim_t n = bli_obj_vector_dim( x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( y ); \
|
||||
inc_t inc_y = bli_obj_vector_inc( y ); \
|
||||
\
|
||||
if ( bli_error_checking_is_enabled() ) \
|
||||
PASTEMAC(opname,_check)( x, y ); \
|
||||
@@ -495,14 +495,14 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *x ); \
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
conj_t conjx = bli_obj_conj_status( *x ); \
|
||||
dim_t n = bli_obj_vector_dim( *x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( *x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( *y ); \
|
||||
inc_t inc_y = bli_obj_vector_inc( *y ); \
|
||||
conj_t conjx = bli_obj_conj_status( x ); \
|
||||
dim_t n = bli_obj_vector_dim( x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( y ); \
|
||||
inc_t inc_y = bli_obj_vector_inc( y ); \
|
||||
\
|
||||
void* buf_beta; \
|
||||
\
|
||||
@@ -515,7 +515,7 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
as needed). */ \
|
||||
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
|
||||
beta, &beta_local ); \
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \
|
||||
\
|
||||
/* Invoke the void pointer-based function. */ \
|
||||
bli_call_ft_8 \
|
||||
|
||||
@@ -61,7 +61,7 @@ void bli_packv_init
|
||||
// is NULL, and if so, simply alias the object to its packed counterpart.
|
||||
if ( bli_cntl_is_noop( cntl ) )
|
||||
{
|
||||
bli_obj_alias_to( *a, *p );
|
||||
bli_obj_alias_to( a, p );
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -73,15 +73,15 @@ void bli_packv_init
|
||||
// BLIS_NOT_PACKED and thus packing will be called for (but in some
|
||||
// cases packing has already taken place). Also, not all combinations
|
||||
// of current pack status and desired pack schema are valid.
|
||||
if ( bli_obj_pack_schema( *a ) == cntl_pack_schema( cntl ) )
|
||||
if ( bli_obj_pack_schema( a ) == cntl_pack_schema( cntl ) )
|
||||
{
|
||||
bli_obj_alias_to( *a, *p );
|
||||
bli_obj_alias_to( a, p );
|
||||
return;
|
||||
}
|
||||
|
||||
// Now, if we are not skipping the pack operation, then the only question
|
||||
// left is whether we are to typecast vector a before packing.
|
||||
if ( bli_obj_dt( *a ) != bli_obj_target_dt( *a ) )
|
||||
if ( bli_obj_dt( a ) != bli_obj_target_dt( a ) )
|
||||
bli_abort();
|
||||
|
||||
// Extract various fields from the control tree and pass them in
|
||||
@@ -113,8 +113,8 @@ siz_t bli_packv_init_pack
|
||||
cntx_t* cntx
|
||||
)
|
||||
{
|
||||
num_t dt = bli_obj_dt( *a );
|
||||
dim_t dim_a = bli_obj_vector_dim( *a );
|
||||
num_t dt = bli_obj_dt( a );
|
||||
dim_t dim_a = bli_obj_vector_dim( a );
|
||||
dim_t bmult = bli_cntx_get_blksz_def_dt( dt, bmult_id, cntx );
|
||||
|
||||
membrk_t* membrk = bli_cntx_membrk( cntx );
|
||||
@@ -129,23 +129,23 @@ siz_t bli_packv_init_pack
|
||||
|
||||
|
||||
// We begin by copying the basic fields of c.
|
||||
bli_obj_alias_to( *a, *p );
|
||||
bli_obj_alias_to( a, p );
|
||||
|
||||
// Update the dimensions.
|
||||
bli_obj_set_dims( dim_a, 1, *p );
|
||||
bli_obj_set_dims( dim_a, 1, p );
|
||||
|
||||
// Reset the view offsets to (0,0).
|
||||
bli_obj_set_offs( 0, 0, *p );
|
||||
bli_obj_set_offs( 0, 0, p );
|
||||
|
||||
// Set the pack schema in the p object to the value in the control tree
|
||||
// node.
|
||||
bli_obj_set_pack_schema( schema, *p );
|
||||
bli_obj_set_pack_schema( schema, p );
|
||||
|
||||
// Compute the dimensions padded by the dimension multiples.
|
||||
m_p_pad = bli_align_dim_to_mult( bli_obj_vector_dim( *p ), bmult );
|
||||
m_p_pad = bli_align_dim_to_mult( bli_obj_vector_dim( p ), bmult );
|
||||
|
||||
// Compute the size of the packed buffer.
|
||||
size_p = m_p_pad * 1 * bli_obj_elem_size( *p );
|
||||
size_p = m_p_pad * 1 * bli_obj_elem_size( p );
|
||||
|
||||
#if 0
|
||||
// Extract the address of the mem_t object within p that will track
|
||||
@@ -179,11 +179,11 @@ siz_t bli_packv_init_pack
|
||||
// copied when the value is already up-to-date, because it persists
|
||||
// in the main object buffer field across loop iterations.)
|
||||
buf = bli_mem_buffer( mem_p );
|
||||
bli_obj_set_buffer( buf, *p );
|
||||
bli_obj_set_buffer( buf, p );
|
||||
#endif
|
||||
|
||||
// Save the padded (packed) dimensions into the packed object.
|
||||
bli_obj_set_padded_dims( m_p_pad, 1, *p );
|
||||
bli_obj_set_padded_dims( m_p_pad, 1, p );
|
||||
|
||||
// Set the row and column strides of p based on the pack schema.
|
||||
if ( schema == BLIS_PACKED_VECTOR )
|
||||
@@ -193,9 +193,9 @@ siz_t bli_packv_init_pack
|
||||
// how much space beyond the vector would need to be zero-padded, if
|
||||
// zero-padding was needed.
|
||||
rs_p = 1;
|
||||
cs_p = bli_obj_padded_length( *p );
|
||||
cs_p = bli_obj_padded_length( p );
|
||||
|
||||
bli_obj_set_strides( rs_p, cs_p, *p );
|
||||
bli_obj_set_strides( rs_p, cs_p, p );
|
||||
}
|
||||
|
||||
return size_p;
|
||||
|
||||
@@ -71,7 +71,7 @@ void bli_packv_int
|
||||
|
||||
// Sanity check; A should never have a zero dimension. If we must support
|
||||
// it, then we should fold it into the next alias-and-early-exit block.
|
||||
//if ( bli_obj_has_zero_dim( *a ) ) bli_abort();
|
||||
//if ( bli_obj_has_zero_dim( a ) ) bli_abort();
|
||||
|
||||
// First check if we are to skip this operation because the control tree
|
||||
// is NULL. We return without taking any action because a was already
|
||||
@@ -91,7 +91,7 @@ void bli_packv_int
|
||||
// not important, as long as its packed into contiguous rows or
|
||||
// contiguous columns. A good example of this is packing for matrix
|
||||
// operands in the level-2 operations.
|
||||
if ( bli_obj_pack_schema( *a ) == BLIS_PACKED_UNSPEC )
|
||||
if ( bli_obj_pack_schema( a ) == BLIS_PACKED_UNSPEC )
|
||||
{
|
||||
return;
|
||||
}
|
||||
@@ -104,7 +104,7 @@ void bli_packv_int
|
||||
// already taken place, or does not need to take place, and so that will
|
||||
// be indicated by the pack status). Also, not all combinations of
|
||||
// current pack status and desired pack schema are valid.
|
||||
if ( bli_obj_pack_schema( *a ) == cntl_pack_schema( cntl ) )
|
||||
if ( bli_obj_pack_schema( a ) == cntl_pack_schema( cntl ) )
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -51,15 +51,15 @@ void bli_packv_unb_var1( obj_t* c,
|
||||
cntx_t* cntx,
|
||||
packv_t* cntl )
|
||||
{
|
||||
num_t dt_cp = bli_obj_dt( *c );
|
||||
num_t dt_cp = bli_obj_dt( c );
|
||||
|
||||
dim_t dim_p = bli_obj_vector_dim( *p );
|
||||
dim_t dim_p = bli_obj_vector_dim( p );
|
||||
|
||||
void* buf_c = bli_obj_buffer_at_off( *c );
|
||||
inc_t incc = bli_obj_vector_inc( *c );
|
||||
void* buf_c = bli_obj_buffer_at_off( c );
|
||||
inc_t incc = bli_obj_vector_inc( c );
|
||||
|
||||
void* buf_p = bli_obj_buffer_at_off( *p );
|
||||
inc_t incp = bli_obj_vector_inc( *p );
|
||||
void* buf_p = bli_obj_buffer_at_off( p );
|
||||
inc_t incp = bli_obj_vector_inc( p );
|
||||
|
||||
FUNCPTR_T f;
|
||||
|
||||
|
||||
@@ -54,7 +54,7 @@ void bli_scalv_int( obj_t* alpha,
|
||||
FUNCPTR_T f;
|
||||
|
||||
// Return early if one of the matrix operands has a zero dimension.
|
||||
if ( bli_obj_has_zero_dim( *x ) ) return;
|
||||
if ( bli_obj_has_zero_dim( x ) ) return;
|
||||
|
||||
// Check parameters.
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
|
||||
@@ -75,7 +75,7 @@ void bli_unpackv_int( obj_t* p,
|
||||
|
||||
// Sanity check; A should never have a zero dimension. If we must support
|
||||
// it, then we should fold it into the next alias-and-early-exit block.
|
||||
if ( bli_obj_has_zero_dim( *a ) ) bli_abort();
|
||||
if ( bli_obj_has_zero_dim( a ) ) bli_abort();
|
||||
|
||||
// First check if we are to skip this operation because the control tree
|
||||
// is NULL, and if so, simply return.
|
||||
@@ -87,17 +87,17 @@ void bli_unpackv_int( obj_t* p,
|
||||
// If p was aliased to a during the pack stage (because it was already
|
||||
// in an acceptable packed/contiguous format), then no unpack is actually
|
||||
// necessary, so we return.
|
||||
if ( bli_obj_is_alias_of( *p, *a ) )
|
||||
if ( bli_obj_is_alias_of( p, a ) )
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Now, if we are not skipping the unpack operation, then the only
|
||||
// question left is whether we are to typecast vector a after unpacking.
|
||||
if ( bli_obj_dt( *p ) != bli_obj_dt( *a ) )
|
||||
if ( bli_obj_dt( p ) != bli_obj_dt( a ) )
|
||||
bli_abort();
|
||||
/*
|
||||
if ( bli_obj_dt( *p ) != bli_obj_dt( *a ) )
|
||||
if ( bli_obj_dt( p ) != bli_obj_dt( a ) )
|
||||
{
|
||||
// Initialize an object c for the intermediate typecast vector.
|
||||
bli_unpackv_init_cast( p,
|
||||
@@ -110,7 +110,7 @@ void bli_unpackv_int( obj_t* p,
|
||||
// If no cast is needed, then aliasing object c to the original
|
||||
// vector serves as a minor optimization. This causes the unpackv
|
||||
// implementation to unpack directly into vector a.
|
||||
bli_obj_alias_to( *a, c );
|
||||
bli_obj_alias_to( a, &c );
|
||||
}
|
||||
|
||||
// Now we are ready to proceed with the unpacking.
|
||||
@@ -132,7 +132,7 @@ void bli_unpackv_int( obj_t* p,
|
||||
// was not necessary, then we are done because the call to the unpackv
|
||||
// implementation would have unpacked directly to vector a.
|
||||
/*
|
||||
if ( bli_obj_dt( *p ) != bli_obj_dt( *a ) )
|
||||
if ( bli_obj_dt( p ) != bli_obj_dt( a ) )
|
||||
{
|
||||
// Copy/typecast vector c to vector a.
|
||||
// NOTE: Here, we use copynzv instead of copym because, in the cases
|
||||
@@ -179,26 +179,26 @@ void bli_unpackv_init_cast( obj_t* p,
|
||||
// already available. (After acquring a mem entry from the memory
|
||||
// manager, it is cached within p for quick access later on.)
|
||||
|
||||
num_t dt_targ_a = bli_obj_target_dt( *a );
|
||||
dim_t dim_a = bli_obj_vector_dim( *a );
|
||||
num_t dt_targ_a = bli_obj_target_dt( a );
|
||||
dim_t dim_a = bli_obj_vector_dim( a );
|
||||
siz_t elem_size_c = bli_dt_size( dt_targ_a );
|
||||
|
||||
// We begin by copying the basic fields of a.
|
||||
bli_obj_alias_to( *a, *c );
|
||||
bli_obj_alias_to( a, c );
|
||||
|
||||
// Update datatype and element size fields.
|
||||
bli_obj_set_dt( dt_targ_a, *c );
|
||||
bli_obj_set_elem_size( elem_size_c, *c );
|
||||
bli_obj_set_dt( dt_targ_a, c );
|
||||
bli_obj_set_elem_size( elem_size_c, c );
|
||||
|
||||
// Update the strides and dimensions. We set the increments to reflect a
|
||||
// column-stored vector. Note that the column stride is set to dim(a),
|
||||
// though it should never be used because there is no second column to
|
||||
// index into (and therefore it also does not need to be aligned).
|
||||
bli_obj_set_dims( dim_a, 1, *c );
|
||||
bli_obj_set_strides( 1, dim_a, *c );
|
||||
bli_obj_set_dims( dim_a, 1, c );
|
||||
bli_obj_set_strides( 1, dim_a, c );
|
||||
|
||||
// Reset the view offsets to (0,0).
|
||||
bli_obj_set_offs( 0, 0, *c );
|
||||
bli_obj_set_offs( 0, 0, c );
|
||||
|
||||
// Check the mem_t entry of p associated with the cast buffer. If it is
|
||||
// NULL, then acquire memory sufficient to hold the object data and cache
|
||||
|
||||
@@ -51,15 +51,15 @@ void bli_unpackv_unb_var1( obj_t* p,
|
||||
cntx_t* cntx,
|
||||
unpackv_t* cntl )
|
||||
{
|
||||
num_t dt_pc = bli_obj_dt( *p );
|
||||
num_t dt_pc = bli_obj_dt( p );
|
||||
|
||||
dim_t dim_c = bli_obj_vector_dim( *c );
|
||||
dim_t dim_c = bli_obj_vector_dim( c );
|
||||
|
||||
void* buf_p = bli_obj_buffer_at_off( *p );
|
||||
inc_t incp = bli_obj_vector_inc( *p );
|
||||
void* buf_p = bli_obj_buffer_at_off( p );
|
||||
inc_t incp = bli_obj_vector_inc( p );
|
||||
|
||||
void* buf_c = bli_obj_buffer_at_off( *c );
|
||||
inc_t incc = bli_obj_vector_inc( *c );
|
||||
void* buf_c = bli_obj_buffer_at_off( c );
|
||||
inc_t incc = bli_obj_vector_inc( c );
|
||||
|
||||
FUNCPTR_T f;
|
||||
|
||||
|
||||
@@ -54,19 +54,19 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *x ); \
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
doff_t diagoffx = bli_obj_diag_offset( *x ); \
|
||||
diag_t diagx = bli_obj_diag( *x ); \
|
||||
trans_t transx = bli_obj_conjtrans_status( *x ); \
|
||||
dim_t m = bli_obj_length( *y ); \
|
||||
dim_t n = bli_obj_width( *y ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t rs_x = bli_obj_row_stride( *x ); \
|
||||
inc_t cs_x = bli_obj_col_stride( *x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( *y ); \
|
||||
inc_t rs_y = bli_obj_row_stride( *y ); \
|
||||
inc_t cs_y = bli_obj_col_stride( *y ); \
|
||||
doff_t diagoffx = bli_obj_diag_offset( x ); \
|
||||
diag_t diagx = bli_obj_diag( x ); \
|
||||
trans_t transx = bli_obj_conjtrans_status( x ); \
|
||||
dim_t m = bli_obj_length( y ); \
|
||||
dim_t n = bli_obj_width( y ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t rs_x = bli_obj_row_stride( x ); \
|
||||
inc_t cs_x = bli_obj_col_stride( x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( y ); \
|
||||
inc_t rs_y = bli_obj_row_stride( y ); \
|
||||
inc_t cs_y = bli_obj_col_stride( y ); \
|
||||
\
|
||||
if ( bli_error_checking_is_enabled() ) \
|
||||
PASTEMAC(opname,_check)( x, y ); \
|
||||
@@ -107,19 +107,19 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *x ); \
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
doff_t diagoffx = bli_obj_diag_offset( *x ); \
|
||||
diag_t diagx = bli_obj_diag( *x ); \
|
||||
trans_t transx = bli_obj_conjtrans_status( *x ); \
|
||||
dim_t m = bli_obj_length( *y ); \
|
||||
dim_t n = bli_obj_width( *y ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t rs_x = bli_obj_row_stride( *x ); \
|
||||
inc_t cs_x = bli_obj_col_stride( *x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( *y ); \
|
||||
inc_t rs_y = bli_obj_row_stride( *y ); \
|
||||
inc_t cs_y = bli_obj_col_stride( *y ); \
|
||||
doff_t diagoffx = bli_obj_diag_offset( x ); \
|
||||
diag_t diagx = bli_obj_diag( x ); \
|
||||
trans_t transx = bli_obj_conjtrans_status( x ); \
|
||||
dim_t m = bli_obj_length( y ); \
|
||||
dim_t n = bli_obj_width( y ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t rs_x = bli_obj_row_stride( x ); \
|
||||
inc_t cs_x = bli_obj_col_stride( x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( y ); \
|
||||
inc_t rs_y = bli_obj_row_stride( y ); \
|
||||
inc_t cs_y = bli_obj_col_stride( y ); \
|
||||
\
|
||||
void* buf_alpha; \
|
||||
\
|
||||
@@ -132,7 +132,7 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
as needed). */ \
|
||||
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
|
||||
alpha, &alpha_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
|
||||
\
|
||||
/* Invoke the typed function. */ \
|
||||
bli_call_ft_13 \
|
||||
@@ -168,14 +168,14 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *x ); \
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
doff_t diagoffx = bli_obj_diag_offset( *x ); \
|
||||
dim_t m = bli_obj_length( *x ); \
|
||||
dim_t n = bli_obj_width( *x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t rs_x = bli_obj_row_stride( *x ); \
|
||||
inc_t cs_x = bli_obj_col_stride( *x ); \
|
||||
doff_t diagoffx = bli_obj_diag_offset( x ); \
|
||||
dim_t m = bli_obj_length( x ); \
|
||||
dim_t n = bli_obj_width( x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t rs_x = bli_obj_row_stride( x ); \
|
||||
inc_t cs_x = bli_obj_col_stride( x ); \
|
||||
\
|
||||
if ( bli_error_checking_is_enabled() ) \
|
||||
PASTEMAC(opname,_check)( x ); \
|
||||
@@ -210,15 +210,15 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *x ); \
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
/* conj_t conjalpha = bli_obj_conj_status( *alpha ); */ \
|
||||
doff_t diagoffx = bli_obj_diag_offset( *x ); \
|
||||
dim_t m = bli_obj_length( *x ); \
|
||||
dim_t n = bli_obj_width( *x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t rs_x = bli_obj_row_stride( *x ); \
|
||||
inc_t cs_x = bli_obj_col_stride( *x ); \
|
||||
/* conj_t conjalpha = bli_obj_conj_status( alpha ); */ \
|
||||
doff_t diagoffx = bli_obj_diag_offset( x ); \
|
||||
dim_t m = bli_obj_length( x ); \
|
||||
dim_t n = bli_obj_width( x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t rs_x = bli_obj_row_stride( x ); \
|
||||
inc_t cs_x = bli_obj_col_stride( x ); \
|
||||
\
|
||||
void* buf_alpha; \
|
||||
\
|
||||
@@ -231,7 +231,7 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
as needed). */ \
|
||||
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
|
||||
alpha, &alpha_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
|
||||
\
|
||||
/* Invoke the typed function. */ \
|
||||
bli_call_ft_9 \
|
||||
@@ -266,16 +266,16 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *x ); \
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
doff_t diagoffx = bli_obj_diag_offset( *x ); \
|
||||
dim_t m = bli_obj_length( *x ); \
|
||||
dim_t n = bli_obj_width( *x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t rs_x = bli_obj_row_stride( *x ); \
|
||||
inc_t cs_x = bli_obj_col_stride( *x ); \
|
||||
doff_t diagoffx = bli_obj_diag_offset( x ); \
|
||||
dim_t m = bli_obj_length( x ); \
|
||||
dim_t n = bli_obj_width( x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t rs_x = bli_obj_row_stride( x ); \
|
||||
inc_t cs_x = bli_obj_col_stride( x ); \
|
||||
\
|
||||
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \
|
||||
void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \
|
||||
\
|
||||
if ( bli_error_checking_is_enabled() ) \
|
||||
PASTEMAC(opname,_check)( alpha, x ); \
|
||||
|
||||
@@ -70,9 +70,12 @@ void PASTEMAC(ch,opname) \
|
||||
\
|
||||
/* Determine the distance to the diagonals, the number of diagonal
|
||||
elements, and the diagonal increments. */ \
|
||||
bli_set_dims_incs_2d( diagoffx, transx, \
|
||||
m, n, rs_x, cs_x, rs_y, cs_y, \
|
||||
offx, offy, n_elem, incx, incy ); \
|
||||
bli_set_dims_incs_2d \
|
||||
( \
|
||||
diagoffx, transx, \
|
||||
m, n, rs_x, cs_x, rs_y, cs_y, \
|
||||
&offx, &offy, &n_elem, &incx, &incy \
|
||||
); \
|
||||
\
|
||||
conjx = bli_extract_conj( transx ); \
|
||||
\
|
||||
@@ -144,9 +147,12 @@ void PASTEMAC(ch,opname) \
|
||||
\
|
||||
/* Determine the distance to the diagonals, the number of diagonal
|
||||
elements, and the diagonal increments. */ \
|
||||
bli_set_dims_incs_2d( diagoffx, transx, \
|
||||
m, n, rs_x, cs_x, rs_y, cs_y, \
|
||||
offx, offy, n_elem, incx, incy ); \
|
||||
bli_set_dims_incs_2d \
|
||||
( \
|
||||
diagoffx, transx, \
|
||||
m, n, rs_x, cs_x, rs_y, cs_y, \
|
||||
&offx, &offy, &n_elem, &incx, &incy \
|
||||
); \
|
||||
\
|
||||
conjx = bli_extract_conj( transx ); \
|
||||
\
|
||||
@@ -212,9 +218,12 @@ void PASTEMAC(ch,opname) \
|
||||
\
|
||||
/* Determine the distance to the diagonals, the number of diagonal
|
||||
elements, and the diagonal increments. */ \
|
||||
bli_set_dims_incs_1d( diagoffx, \
|
||||
m, n, rs_x, cs_x, \
|
||||
offx, n_elem, incx ); \
|
||||
bli_set_dims_incs_1d \
|
||||
( \
|
||||
diagoffx, \
|
||||
m, n, rs_x, cs_x, \
|
||||
&offx, &n_elem, &incx \
|
||||
); \
|
||||
\
|
||||
x1 = x + offx; \
|
||||
\
|
||||
@@ -264,9 +273,12 @@ void PASTEMAC(ch,opname) \
|
||||
\
|
||||
/* Determine the distance to the diagonals, the number of diagonal
|
||||
elements, and the diagonal increments. */ \
|
||||
bli_set_dims_incs_1d( diagoffx, \
|
||||
m, n, rs_x, cs_x, \
|
||||
offx, n_elem, incx ); \
|
||||
bli_set_dims_incs_1d \
|
||||
( \
|
||||
diagoffx, \
|
||||
m, n, rs_x, cs_x, \
|
||||
&offx, &n_elem, &incx \
|
||||
); \
|
||||
\
|
||||
x1 = x + offx; \
|
||||
\
|
||||
@@ -322,9 +334,12 @@ void PASTEMAC(ch,opname) \
|
||||
\
|
||||
/* Determine the distance to the diagonals, the number of diagonal
|
||||
elements, and the diagonal increments. */ \
|
||||
bli_set_dims_incs_1d( diagoffx, \
|
||||
m, n, rs_x, cs_x, \
|
||||
offx, n_elem, incx ); \
|
||||
bli_set_dims_incs_1d \
|
||||
( \
|
||||
diagoffx, \
|
||||
m, n, rs_x, cs_x, \
|
||||
&offx, &n_elem, &incx \
|
||||
); \
|
||||
\
|
||||
/* Alternate implementation. (Substitute for remainder of function). */ \
|
||||
/* for ( i = 0; i < n_elem; ++i ) \
|
||||
|
||||
@@ -146,10 +146,10 @@ void bli_axpyf_check
|
||||
e_val = bli_check_vector_object( y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_vector_dim_equals( x, bli_obj_width_after_trans( *a ) );
|
||||
e_val = bli_check_vector_dim_equals( x, bli_obj_width_after_trans( a ) );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_vector_dim_equals( y, bli_obj_length_after_trans( *a ) );
|
||||
e_val = bli_check_vector_dim_equals( y, bli_obj_length_after_trans( a ) );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object buffers (for non-NULLness).
|
||||
@@ -334,16 +334,16 @@ void bli_dotxaxpyf_check
|
||||
e_val = bli_check_conformal_dims( at, a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_object_length_equals( at, bli_obj_vector_dim( *w ) );
|
||||
e_val = bli_check_object_length_equals( at, bli_obj_vector_dim( w ) );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_object_width_equals( at, bli_obj_vector_dim( *y ) );
|
||||
e_val = bli_check_object_width_equals( at, bli_obj_vector_dim( y ) );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_object_length_equals( a, bli_obj_vector_dim( *z ) );
|
||||
e_val = bli_check_object_length_equals( a, bli_obj_vector_dim( z ) );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_object_width_equals( a, bli_obj_vector_dim( *x ) );
|
||||
e_val = bli_check_object_width_equals( a, bli_obj_vector_dim( x ) );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object aliases.
|
||||
@@ -424,10 +424,10 @@ void bli_dotxf_check
|
||||
e_val = bli_check_vector_object( y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_vector_dim_equals( x, bli_obj_length_after_trans( *a ) );
|
||||
e_val = bli_check_vector_dim_equals( x, bli_obj_length_after_trans( a ) );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_vector_dim_equals( y, bli_obj_width_after_trans( *a ) );
|
||||
e_val = bli_check_vector_dim_equals( y, bli_obj_width_after_trans( a ) );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object buffers (for non-NULLness).
|
||||
|
||||
@@ -57,17 +57,17 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *x ); \
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
conj_t conjx = bli_obj_conj_status( *x ); \
|
||||
conj_t conjy = bli_obj_conj_status( *y ); \
|
||||
dim_t n = bli_obj_vector_dim( *x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( *x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( *y ); \
|
||||
inc_t inc_y = bli_obj_vector_inc( *y ); \
|
||||
void* buf_z = bli_obj_buffer_at_off( *z ); \
|
||||
inc_t inc_z = bli_obj_vector_inc( *z ); \
|
||||
conj_t conjx = bli_obj_conj_status( x ); \
|
||||
conj_t conjy = bli_obj_conj_status( y ); \
|
||||
dim_t n = bli_obj_vector_dim( x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( y ); \
|
||||
inc_t inc_y = bli_obj_vector_inc( y ); \
|
||||
void* buf_z = bli_obj_buffer_at_off( z ); \
|
||||
inc_t inc_z = bli_obj_vector_inc( z ); \
|
||||
\
|
||||
void* buf_alphax; \
|
||||
void* buf_alphay; \
|
||||
@@ -84,8 +84,8 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
alphax, &alphax_local ); \
|
||||
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
|
||||
alphay, &alphay_local ); \
|
||||
buf_alphax = bli_obj_buffer_for_1x1( dt, alphax_local ); \
|
||||
buf_alphay = bli_obj_buffer_for_1x1( dt, alphay_local ); \
|
||||
buf_alphax = bli_obj_buffer_for_1x1( dt, &alphax_local ); \
|
||||
buf_alphay = bli_obj_buffer_for_1x1( dt, &alphay_local ); \
|
||||
\
|
||||
/* Invoke the void pointer-based function. */ \
|
||||
bli_call_ft_12 \
|
||||
@@ -123,19 +123,19 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *x ); \
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
conj_t conja = bli_obj_conj_status( *a ); \
|
||||
conj_t conjx = bli_obj_conj_status( *x ); \
|
||||
dim_t m = bli_obj_vector_dim( *y ); \
|
||||
dim_t b_n = bli_obj_vector_dim( *x ); \
|
||||
void* buf_a = bli_obj_buffer_at_off( *a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( *a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( *a ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( *x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( *y ); \
|
||||
inc_t inc_y = bli_obj_vector_inc( *y ); \
|
||||
conj_t conja = bli_obj_conj_status( a ); \
|
||||
conj_t conjx = bli_obj_conj_status( x ); \
|
||||
dim_t m = bli_obj_vector_dim( y ); \
|
||||
dim_t b_n = bli_obj_vector_dim( x ); \
|
||||
void* buf_a = bli_obj_buffer_at_off( a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( a ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( y ); \
|
||||
inc_t inc_y = bli_obj_vector_inc( y ); \
|
||||
\
|
||||
void* buf_alpha; \
|
||||
\
|
||||
@@ -148,10 +148,10 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
as needed). */ \
|
||||
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
|
||||
alpha, &alpha_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
|
||||
\
|
||||
/* Support cases where matrix A requires a transposition. */ \
|
||||
if ( bli_obj_has_trans( *a ) ) { bli_swap_incs( rs_a, cs_a ); } \
|
||||
if ( bli_obj_has_trans( a ) ) { bli_swap_incs( &rs_a, &cs_a ); } \
|
||||
\
|
||||
/* Invoke the void pointer-based function. */ \
|
||||
bli_call_ft_13 \
|
||||
@@ -191,19 +191,19 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *x ); \
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
conj_t conjxt = bli_obj_conj_status( *xt ); \
|
||||
conj_t conjx = bli_obj_conj_status( *x ); \
|
||||
conj_t conjy = bli_obj_conj_status( *y ); \
|
||||
dim_t n = bli_obj_vector_dim( *x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( *x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( *y ); \
|
||||
inc_t inc_y = bli_obj_vector_inc( *y ); \
|
||||
void* buf_z = bli_obj_buffer_at_off( *z ); \
|
||||
inc_t inc_z = bli_obj_vector_inc( *z ); \
|
||||
void* buf_rho = bli_obj_buffer_at_off( *rho ); \
|
||||
conj_t conjxt = bli_obj_conj_status( xt ); \
|
||||
conj_t conjx = bli_obj_conj_status( x ); \
|
||||
conj_t conjy = bli_obj_conj_status( y ); \
|
||||
dim_t n = bli_obj_vector_dim( x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( y ); \
|
||||
inc_t inc_y = bli_obj_vector_inc( y ); \
|
||||
void* buf_z = bli_obj_buffer_at_off( z ); \
|
||||
inc_t inc_z = bli_obj_vector_inc( z ); \
|
||||
void* buf_rho = bli_obj_buffer_at_off( rho ); \
|
||||
\
|
||||
void* buf_alpha; \
|
||||
\
|
||||
@@ -216,7 +216,7 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
as needed). */ \
|
||||
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
|
||||
alpha, &alpha_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
|
||||
\
|
||||
/* Invoke the void pointer-based function. */ \
|
||||
bli_call_ft_13 \
|
||||
@@ -259,25 +259,25 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *x ); \
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
conj_t conjat = bli_obj_conj_status( *at ); \
|
||||
conj_t conja = bli_obj_conj_status( *a ); \
|
||||
conj_t conjw = bli_obj_conj_status( *w ); \
|
||||
conj_t conjx = bli_obj_conj_status( *x ); \
|
||||
dim_t m = bli_obj_vector_dim( *z ); \
|
||||
dim_t b_n = bli_obj_vector_dim( *y ); \
|
||||
void* buf_a = bli_obj_buffer_at_off( *a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( *a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( *a ); \
|
||||
void* buf_w = bli_obj_buffer_at_off( *w ); \
|
||||
inc_t inc_w = bli_obj_vector_inc( *w ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( *x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( *y ); \
|
||||
inc_t inc_y = bli_obj_vector_inc( *y ); \
|
||||
void* buf_z = bli_obj_buffer_at_off( *z ); \
|
||||
inc_t inc_z = bli_obj_vector_inc( *z ); \
|
||||
conj_t conjat = bli_obj_conj_status( at ); \
|
||||
conj_t conja = bli_obj_conj_status( a ); \
|
||||
conj_t conjw = bli_obj_conj_status( w ); \
|
||||
conj_t conjx = bli_obj_conj_status( x ); \
|
||||
dim_t m = bli_obj_vector_dim( z ); \
|
||||
dim_t b_n = bli_obj_vector_dim( y ); \
|
||||
void* buf_a = bli_obj_buffer_at_off( a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( a ); \
|
||||
void* buf_w = bli_obj_buffer_at_off( w ); \
|
||||
inc_t inc_w = bli_obj_vector_inc( w ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( y ); \
|
||||
inc_t inc_y = bli_obj_vector_inc( y ); \
|
||||
void* buf_z = bli_obj_buffer_at_off( z ); \
|
||||
inc_t inc_z = bli_obj_vector_inc( z ); \
|
||||
\
|
||||
void* buf_alpha; \
|
||||
void* buf_beta; \
|
||||
@@ -294,11 +294,11 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
alpha, &alpha_local ); \
|
||||
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
|
||||
beta, &beta_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \
|
||||
\
|
||||
/* Support cases where matrix A requires a transposition. */ \
|
||||
if ( bli_obj_has_trans( *a ) ) { bli_swap_incs( rs_a, cs_a ); } \
|
||||
if ( bli_obj_has_trans( a ) ) { bli_swap_incs( &rs_a, &cs_a ); } \
|
||||
\
|
||||
/* Invoke the void pointer-based function. */ \
|
||||
bli_call_ft_20 \
|
||||
@@ -342,19 +342,19 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *x ); \
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
conj_t conjat = bli_obj_conj_status( *a ); \
|
||||
conj_t conjx = bli_obj_conj_status( *x ); \
|
||||
dim_t m = bli_obj_vector_dim( *x ); \
|
||||
dim_t b_n = bli_obj_vector_dim( *y ); \
|
||||
void* buf_a = bli_obj_buffer_at_off( *a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( *a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( *a ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( *x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( *y ); \
|
||||
inc_t inc_y = bli_obj_vector_inc( *y ); \
|
||||
conj_t conjat = bli_obj_conj_status( a ); \
|
||||
conj_t conjx = bli_obj_conj_status( x ); \
|
||||
dim_t m = bli_obj_vector_dim( x ); \
|
||||
dim_t b_n = bli_obj_vector_dim( y ); \
|
||||
void* buf_a = bli_obj_buffer_at_off( a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( a ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t inc_x = bli_obj_vector_inc( x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( y ); \
|
||||
inc_t inc_y = bli_obj_vector_inc( y ); \
|
||||
\
|
||||
void* buf_alpha; \
|
||||
void* buf_beta; \
|
||||
@@ -371,11 +371,11 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
alpha, &alpha_local ); \
|
||||
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
|
||||
beta, &beta_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \
|
||||
\
|
||||
/* Support cases where matrix A requires a transposition. */ \
|
||||
if ( bli_obj_has_trans( *a ) ) { bli_swap_incs( rs_a, cs_a ); } \
|
||||
if ( bli_obj_has_trans( a ) ) { bli_swap_incs( &rs_a, &cs_a ); } \
|
||||
\
|
||||
/* Invoke the void pointer-based function. */ \
|
||||
bli_call_ft_14 \
|
||||
|
||||
@@ -54,20 +54,20 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *x ); \
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
doff_t diagoffx = bli_obj_diag_offset( *x ); \
|
||||
diag_t diagx = bli_obj_diag( *x ); \
|
||||
uplo_t uplox = bli_obj_uplo( *x ); \
|
||||
trans_t transx = bli_obj_conjtrans_status( *x ); \
|
||||
dim_t m = bli_obj_length( *y ); \
|
||||
dim_t n = bli_obj_width( *y ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t rs_x = bli_obj_row_stride( *x ); \
|
||||
inc_t cs_x = bli_obj_col_stride( *x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( *y ); \
|
||||
inc_t rs_y = bli_obj_row_stride( *y ); \
|
||||
inc_t cs_y = bli_obj_col_stride( *y ); \
|
||||
doff_t diagoffx = bli_obj_diag_offset( x ); \
|
||||
diag_t diagx = bli_obj_diag( x ); \
|
||||
uplo_t uplox = bli_obj_uplo( x ); \
|
||||
trans_t transx = bli_obj_conjtrans_status( x ); \
|
||||
dim_t m = bli_obj_length( y ); \
|
||||
dim_t n = bli_obj_width( y ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t rs_x = bli_obj_row_stride( x ); \
|
||||
inc_t cs_x = bli_obj_col_stride( x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( y ); \
|
||||
inc_t rs_y = bli_obj_row_stride( y ); \
|
||||
inc_t cs_y = bli_obj_col_stride( y ); \
|
||||
\
|
||||
if ( bli_error_checking_is_enabled() ) \
|
||||
PASTEMAC(opname,_check)( x, y ); \
|
||||
@@ -109,20 +109,20 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *x ); \
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
doff_t diagoffx = bli_obj_diag_offset( *x ); \
|
||||
diag_t diagx = bli_obj_diag( *x ); \
|
||||
uplo_t uplox = bli_obj_uplo( *x ); \
|
||||
trans_t transx = bli_obj_conjtrans_status( *x ); \
|
||||
dim_t m = bli_obj_length( *y ); \
|
||||
dim_t n = bli_obj_width( *y ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t rs_x = bli_obj_row_stride( *x ); \
|
||||
inc_t cs_x = bli_obj_col_stride( *x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( *y ); \
|
||||
inc_t rs_y = bli_obj_row_stride( *y ); \
|
||||
inc_t cs_y = bli_obj_col_stride( *y ); \
|
||||
doff_t diagoffx = bli_obj_diag_offset( x ); \
|
||||
diag_t diagx = bli_obj_diag( x ); \
|
||||
uplo_t uplox = bli_obj_uplo( x ); \
|
||||
trans_t transx = bli_obj_conjtrans_status( x ); \
|
||||
dim_t m = bli_obj_length( y ); \
|
||||
dim_t n = bli_obj_width( y ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t rs_x = bli_obj_row_stride( x ); \
|
||||
inc_t cs_x = bli_obj_col_stride( x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( y ); \
|
||||
inc_t rs_y = bli_obj_row_stride( y ); \
|
||||
inc_t cs_y = bli_obj_col_stride( y ); \
|
||||
\
|
||||
void* buf_alpha; \
|
||||
\
|
||||
@@ -135,7 +135,7 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
as needed). */ \
|
||||
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
|
||||
alpha, &alpha_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
|
||||
\
|
||||
/* Invoke the typed function. */ \
|
||||
bli_call_ft_14 \
|
||||
@@ -173,17 +173,17 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *x ); \
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
/* conj_t conjalpha = bli_obj_conj_status( *alpha ); */ \
|
||||
doff_t diagoffx = bli_obj_diag_offset( *x ); \
|
||||
diag_t diagx = bli_obj_diag( *x ); \
|
||||
uplo_t uplox = bli_obj_uplo( *x ); \
|
||||
dim_t m = bli_obj_length( *x ); \
|
||||
dim_t n = bli_obj_width( *x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t rs_x = bli_obj_row_stride( *x ); \
|
||||
inc_t cs_x = bli_obj_col_stride( *x ); \
|
||||
/* conj_t conjalpha = bli_obj_conj_status( alpha ); */ \
|
||||
doff_t diagoffx = bli_obj_diag_offset( x ); \
|
||||
diag_t diagx = bli_obj_diag( x ); \
|
||||
uplo_t uplox = bli_obj_uplo( x ); \
|
||||
dim_t m = bli_obj_length( x ); \
|
||||
dim_t n = bli_obj_width( x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t rs_x = bli_obj_row_stride( x ); \
|
||||
inc_t cs_x = bli_obj_col_stride( x ); \
|
||||
\
|
||||
void* buf_alpha; \
|
||||
\
|
||||
@@ -194,7 +194,7 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
PASTEMAC(opname,_check)( alpha, x ); \
|
||||
\
|
||||
/* Alias x to x_local so we can apply alpha if it is non-unit. */ \
|
||||
bli_obj_alias_to( *x, x_local ); \
|
||||
bli_obj_alias_to( x, &x_local ); \
|
||||
\
|
||||
/* If alpha is non-unit, apply it to the scalar attached to x. */ \
|
||||
if ( !bli_obj_equals( alpha, &BLIS_ONE ) ) \
|
||||
@@ -209,7 +209,7 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
/* Grab the address of the internal scalar buffer for the scalar
|
||||
attached to x. */ \
|
||||
buf_alpha = bli_obj_internal_scalar_buffer( x_local ); \
|
||||
buf_alpha = bli_obj_internal_scalar_buffer( &x_local ); \
|
||||
\
|
||||
/* Invoke the typed function. */ \
|
||||
bli_call_ft_11 \
|
||||
@@ -245,17 +245,17 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *x ); \
|
||||
num_t dt = bli_obj_dt( x ); \
|
||||
\
|
||||
/* conj_t conjalpha = bli_obj_conj_status( *alpha ); */ \
|
||||
doff_t diagoffx = bli_obj_diag_offset( *x ); \
|
||||
diag_t diagx = bli_obj_diag( *x ); \
|
||||
uplo_t uplox = bli_obj_uplo( *x ); \
|
||||
dim_t m = bli_obj_length( *x ); \
|
||||
dim_t n = bli_obj_width( *x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t rs_x = bli_obj_row_stride( *x ); \
|
||||
inc_t cs_x = bli_obj_col_stride( *x ); \
|
||||
/* conj_t conjalpha = bli_obj_conj_status( alpha ); */ \
|
||||
doff_t diagoffx = bli_obj_diag_offset( x ); \
|
||||
diag_t diagx = bli_obj_diag( x ); \
|
||||
uplo_t uplox = bli_obj_uplo( x ); \
|
||||
dim_t m = bli_obj_length( x ); \
|
||||
dim_t n = bli_obj_width( x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t rs_x = bli_obj_row_stride( x ); \
|
||||
inc_t cs_x = bli_obj_col_stride( x ); \
|
||||
\
|
||||
void* buf_alpha; \
|
||||
\
|
||||
@@ -268,7 +268,7 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
as needed). */ \
|
||||
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
|
||||
alpha, &alpha_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
|
||||
\
|
||||
/* Invoke the typed function. */ \
|
||||
bli_call_ft_11 \
|
||||
|
||||
@@ -146,7 +146,7 @@ void PASTEMAC(ch,opname) \
|
||||
ctype* one = PASTEMAC(ch,1); \
|
||||
\
|
||||
if ( bli_does_trans( transx ) ) \
|
||||
bli_negate_diag_offset( diagoffy ); \
|
||||
bli_negate_diag_offset( &diagoffy ); \
|
||||
\
|
||||
PASTEMAC(ch,setd) \
|
||||
( \
|
||||
@@ -299,7 +299,7 @@ void PASTEMAC(ch,opname) \
|
||||
doff_t diagoffy = diagoffx; \
|
||||
\
|
||||
if ( bli_does_trans( transx ) ) \
|
||||
bli_negate_diag_offset( diagoffy ); \
|
||||
bli_negate_diag_offset( &diagoffy ); \
|
||||
\
|
||||
PASTEMAC(ch,setd) \
|
||||
( \
|
||||
|
||||
@@ -68,10 +68,13 @@ void PASTEMAC(ch,opname) \
|
||||
dim_t ij0, n_shift; \
|
||||
\
|
||||
/* Set various loop parameters. */ \
|
||||
bli_set_dims_incs_uplo_2m( diagoffx, diagx, transx, \
|
||||
uplox, m, n, rs_x, cs_x, rs_y, cs_y, \
|
||||
uplox_eff, n_elem_max, n_iter, incx, ldx, incy, ldy, \
|
||||
ij0, n_shift ); \
|
||||
bli_set_dims_incs_uplo_2m \
|
||||
( \
|
||||
diagoffx, diagx, transx, \
|
||||
uplox, m, n, rs_x, cs_x, rs_y, cs_y, \
|
||||
&uplox_eff, &n_elem_max, &n_iter, &incx, &ldx, &incy, &ldy, \
|
||||
&ij0, &n_shift \
|
||||
); \
|
||||
\
|
||||
if ( bli_is_zeros( uplox_eff ) ) return; \
|
||||
\
|
||||
@@ -181,10 +184,13 @@ void PASTEMAC(ch,opname) \
|
||||
dim_t ij0, n_shift; \
|
||||
\
|
||||
/* Set various loop parameters. */ \
|
||||
bli_set_dims_incs_uplo_2m( diagoffx, diagx, transx, \
|
||||
uplox, m, n, rs_x, cs_x, rs_y, cs_y, \
|
||||
uplox_eff, n_elem_max, n_iter, incx, ldx, incy, ldy, \
|
||||
ij0, n_shift ); \
|
||||
bli_set_dims_incs_uplo_2m \
|
||||
( \
|
||||
diagoffx, diagx, transx, \
|
||||
uplox, m, n, rs_x, cs_x, rs_y, cs_y, \
|
||||
&uplox_eff, &n_elem_max, &n_iter, &incx, &ldx, &incy, &ldy, \
|
||||
&ij0, &n_shift \
|
||||
); \
|
||||
\
|
||||
if ( bli_is_zeros( uplox_eff ) ) return; \
|
||||
\
|
||||
@@ -292,10 +298,13 @@ void PASTEMAC(ch,opname) \
|
||||
dim_t ij0, n_shift; \
|
||||
\
|
||||
/* Set various loop parameters. */ \
|
||||
bli_set_dims_incs_uplo_1m( diagoffx, diagx, \
|
||||
uplox, m, n, rs_x, cs_x, \
|
||||
uplox_eff, n_elem_max, n_iter, incx, ldx, \
|
||||
ij0, n_shift ); \
|
||||
bli_set_dims_incs_uplo_1m \
|
||||
( \
|
||||
diagoffx, diagx, \
|
||||
uplox, m, n, rs_x, cs_x, \
|
||||
&uplox_eff, &n_elem_max, &n_iter, &incx, &ldx, \
|
||||
&ij0, &n_shift \
|
||||
); \
|
||||
\
|
||||
if ( bli_is_zeros( uplox_eff ) ) return; \
|
||||
\
|
||||
|
||||
@@ -108,33 +108,33 @@ void bli_packm_blk_var1
|
||||
thrinfo_t* t
|
||||
)
|
||||
{
|
||||
num_t dt_cp = bli_obj_dt( *c );
|
||||
num_t dt_cp = bli_obj_dt( c );
|
||||
|
||||
struc_t strucc = bli_obj_struc( *c );
|
||||
doff_t diagoffc = bli_obj_diag_offset( *c );
|
||||
diag_t diagc = bli_obj_diag( *c );
|
||||
uplo_t uploc = bli_obj_uplo( *c );
|
||||
trans_t transc = bli_obj_conjtrans_status( *c );
|
||||
pack_t schema = bli_obj_pack_schema( *p );
|
||||
bool_t invdiag = bli_obj_has_inverted_diag( *p );
|
||||
bool_t revifup = bli_obj_is_pack_rev_if_upper( *p );
|
||||
bool_t reviflo = bli_obj_is_pack_rev_if_lower( *p );
|
||||
struc_t strucc = bli_obj_struc( c );
|
||||
doff_t diagoffc = bli_obj_diag_offset( c );
|
||||
diag_t diagc = bli_obj_diag( c );
|
||||
uplo_t uploc = bli_obj_uplo( c );
|
||||
trans_t transc = bli_obj_conjtrans_status( c );
|
||||
pack_t schema = bli_obj_pack_schema( p );
|
||||
bool_t invdiag = bli_obj_has_inverted_diag( p );
|
||||
bool_t revifup = bli_obj_is_pack_rev_if_upper( p );
|
||||
bool_t reviflo = bli_obj_is_pack_rev_if_lower( p );
|
||||
|
||||
dim_t m_p = bli_obj_length( *p );
|
||||
dim_t n_p = bli_obj_width( *p );
|
||||
dim_t m_max_p = bli_obj_padded_length( *p );
|
||||
dim_t n_max_p = bli_obj_padded_width( *p );
|
||||
dim_t m_p = bli_obj_length( p );
|
||||
dim_t n_p = bli_obj_width( p );
|
||||
dim_t m_max_p = bli_obj_padded_length( p );
|
||||
dim_t n_max_p = bli_obj_padded_width( p );
|
||||
|
||||
void* buf_c = bli_obj_buffer_at_off( *c );
|
||||
inc_t rs_c = bli_obj_row_stride( *c );
|
||||
inc_t cs_c = bli_obj_col_stride( *c );
|
||||
void* buf_c = bli_obj_buffer_at_off( c );
|
||||
inc_t rs_c = bli_obj_row_stride( c );
|
||||
inc_t cs_c = bli_obj_col_stride( c );
|
||||
|
||||
void* buf_p = bli_obj_buffer_at_off( *p );
|
||||
inc_t rs_p = bli_obj_row_stride( *p );
|
||||
inc_t cs_p = bli_obj_col_stride( *p );
|
||||
inc_t is_p = bli_obj_imag_stride( *p );
|
||||
dim_t pd_p = bli_obj_panel_dim( *p );
|
||||
inc_t ps_p = bli_obj_panel_stride( *p );
|
||||
void* buf_p = bli_obj_buffer_at_off( p );
|
||||
inc_t rs_p = bli_obj_row_stride( p );
|
||||
inc_t cs_p = bli_obj_col_stride( p );
|
||||
inc_t is_p = bli_obj_imag_stride( p );
|
||||
dim_t pd_p = bli_obj_panel_dim( p );
|
||||
inc_t ps_p = bli_obj_panel_stride( p );
|
||||
|
||||
obj_t kappa;
|
||||
obj_t* kappa_p;
|
||||
@@ -155,7 +155,7 @@ void bli_packm_blk_var1
|
||||
// higher-level operation. Thus, we use BLIS_ONE for kappa so
|
||||
// that the underlying packm implementation does not perform
|
||||
// any scaling during packing.
|
||||
buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE );
|
||||
buf_kappa = bli_obj_buffer_for_const( dt_cp, &BLIS_ONE );
|
||||
}
|
||||
else // if ( bli_is_ind_packed( schema ) )
|
||||
{
|
||||
@@ -187,7 +187,7 @@ void bli_packm_blk_var1
|
||||
}
|
||||
|
||||
// Acquire the buffer to the kappa chosen above.
|
||||
buf_kappa = bli_obj_buffer_for_1x1( dt_cp, *kappa_p );
|
||||
buf_kappa = bli_obj_buffer_for_1x1( dt_cp, kappa_p );
|
||||
}
|
||||
|
||||
|
||||
@@ -344,10 +344,10 @@ void PASTEMAC(ch,varname) \
|
||||
express the remaining parameters and code. */ \
|
||||
if ( bli_does_trans( transc ) ) \
|
||||
{ \
|
||||
bli_swap_incs( rs_c, cs_c ); \
|
||||
bli_negate_diag_offset( diagoffc ); \
|
||||
bli_toggle_uplo( uploc ); \
|
||||
bli_toggle_trans( transc ); \
|
||||
bli_swap_incs( &rs_c, &cs_c ); \
|
||||
bli_negate_diag_offset( &diagoffc ); \
|
||||
bli_toggle_uplo( &uploc ); \
|
||||
bli_toggle_trans( &transc ); \
|
||||
} \
|
||||
\
|
||||
/* Create flags to incidate row or column storage. Note that the
|
||||
|
||||
@@ -68,33 +68,33 @@ void bli_packm_blk_var1( obj_t* c,
|
||||
obj_t* p,
|
||||
packm_thrinfo_t* t )
|
||||
{
|
||||
num_t dt_cp = bli_obj_dt( *c );
|
||||
num_t dt_cp = bli_obj_dt( c );
|
||||
|
||||
struc_t strucc = bli_obj_struc( *c );
|
||||
doff_t diagoffc = bli_obj_diag_offset( *c );
|
||||
diag_t diagc = bli_obj_diag( *c );
|
||||
uplo_t uploc = bli_obj_uplo( *c );
|
||||
trans_t transc = bli_obj_conjtrans_status( *c );
|
||||
pack_t schema = bli_obj_pack_schema( *p );
|
||||
bool_t invdiag = bli_obj_has_inverted_diag( *p );
|
||||
bool_t revifup = bli_obj_is_pack_rev_if_upper( *p );
|
||||
bool_t reviflo = bli_obj_is_pack_rev_if_lower( *p );
|
||||
struc_t strucc = bli_obj_struc( c );
|
||||
doff_t diagoffc = bli_obj_diag_offset( c );
|
||||
diag_t diagc = bli_obj_diag( c );
|
||||
uplo_t uploc = bli_obj_uplo( c );
|
||||
trans_t transc = bli_obj_conjtrans_status( c );
|
||||
pack_t schema = bli_obj_pack_schema( p );
|
||||
bool_t invdiag = bli_obj_has_inverted_diag( p );
|
||||
bool_t revifup = bli_obj_is_pack_rev_if_upper( p );
|
||||
bool_t reviflo = bli_obj_is_pack_rev_if_lower( p );
|
||||
|
||||
dim_t m_p = bli_obj_length( *p );
|
||||
dim_t n_p = bli_obj_width( *p );
|
||||
dim_t m_max_p = bli_obj_padded_length( *p );
|
||||
dim_t n_max_p = bli_obj_padded_width( *p );
|
||||
dim_t m_p = bli_obj_length( p );
|
||||
dim_t n_p = bli_obj_width( p );
|
||||
dim_t m_max_p = bli_obj_padded_length( p );
|
||||
dim_t n_max_p = bli_obj_padded_width( p );
|
||||
|
||||
void* buf_c = bli_obj_buffer_at_off( *c );
|
||||
inc_t rs_c = bli_obj_row_stride( *c );
|
||||
inc_t cs_c = bli_obj_col_stride( *c );
|
||||
void* buf_c = bli_obj_buffer_at_off( c );
|
||||
inc_t rs_c = bli_obj_row_stride( c );
|
||||
inc_t cs_c = bli_obj_col_stride( c );
|
||||
|
||||
void* buf_p = bli_obj_buffer_at_off( *p );
|
||||
inc_t rs_p = bli_obj_row_stride( *p );
|
||||
inc_t cs_p = bli_obj_col_stride( *p );
|
||||
inc_t is_p = bli_obj_imag_stride( *p );
|
||||
dim_t pd_p = bli_obj_panel_dim( *p );
|
||||
inc_t ps_p = bli_obj_panel_stride( *p );
|
||||
void* buf_p = bli_obj_buffer_at_off( p );
|
||||
inc_t rs_p = bli_obj_row_stride( p );
|
||||
inc_t cs_p = bli_obj_col_stride( p );
|
||||
inc_t is_p = bli_obj_imag_stride( p );
|
||||
dim_t pd_p = bli_obj_panel_dim( p );
|
||||
inc_t ps_p = bli_obj_panel_stride( p );
|
||||
|
||||
void* buf_kappa;
|
||||
|
||||
@@ -107,7 +107,7 @@ void bli_packm_blk_var1( obj_t* c,
|
||||
// alpha scalar of the higher-level operation. Thus, we use BLIS_ONE
|
||||
// for kappa so that the underlying packm implementation does not
|
||||
// scale during packing.
|
||||
buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE );
|
||||
buf_kappa = bli_obj_buffer_for_const( dt_cp, &BLIS_ONE );
|
||||
|
||||
// Choose the correct func_t object.
|
||||
packm_kers = packm_struc_cxk_kers;
|
||||
@@ -222,10 +222,10 @@ void PASTEMAC(ch,varname) \
|
||||
express the remaining parameters and code. */ \
|
||||
if ( bli_does_trans( transc ) ) \
|
||||
{ \
|
||||
bli_swap_incs( rs_c, cs_c ); \
|
||||
bli_negate_diag_offset( diagoffc ); \
|
||||
bli_toggle_uplo( uploc ); \
|
||||
bli_toggle_trans( transc ); \
|
||||
bli_swap_incs( &rs_c, &cs_c ); \
|
||||
bli_negate_diag_offset( &diagoffc ); \
|
||||
bli_toggle_uplo( &uploc ); \
|
||||
bli_toggle_trans( &transc ); \
|
||||
} \
|
||||
\
|
||||
/* Create flags to incidate row or column storage. Note that the
|
||||
|
||||
@@ -83,9 +83,9 @@ siz_t bli_packm_init
|
||||
// not important, as long as its packed into contiguous rows or
|
||||
// contiguous columns. A good example of this is packing for matrix
|
||||
// operands in the level-2 operations.
|
||||
if ( bli_obj_pack_schema( *a ) == BLIS_PACKED_UNSPEC )
|
||||
if ( bli_obj_pack_schema( a ) == BLIS_PACKED_UNSPEC )
|
||||
{
|
||||
bli_obj_alias_to( *a, *p );
|
||||
bli_obj_alias_to( a, p );
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -97,18 +97,18 @@ siz_t bli_packm_init
|
||||
// already taken place, or does not need to take place, and so that will
|
||||
// be indicated by the pack status). Also, not all combinations of
|
||||
// current pack status and desired pack schema are valid.
|
||||
if ( bli_obj_pack_schema( *a ) == pack_schema )
|
||||
if ( bli_obj_pack_schema( a ) == pack_schema )
|
||||
{
|
||||
bli_obj_alias_to( *a, *p );
|
||||
bli_obj_alias_to( a, p );
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
// If the object is marked as being filled with zeros, then we can skip
|
||||
// the packm operation entirely and alias.
|
||||
if ( bli_obj_is_zeros( *a ) )
|
||||
if ( bli_obj_is_zeros( a ) )
|
||||
{
|
||||
bli_obj_alias_to( *a, *p );
|
||||
bli_obj_alias_to( a, p );
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -189,10 +189,10 @@ siz_t bli_packm_init_pack
|
||||
{
|
||||
bli_init_once();
|
||||
|
||||
num_t dt = bli_obj_dt( *a );
|
||||
trans_t transa = bli_obj_onlytrans_status( *a );
|
||||
dim_t m_a = bli_obj_length( *a );
|
||||
dim_t n_a = bli_obj_width( *a );
|
||||
num_t dt = bli_obj_dt( a );
|
||||
trans_t transa = bli_obj_onlytrans_status( a );
|
||||
dim_t m_a = bli_obj_length( a );
|
||||
dim_t n_a = bli_obj_width( a );
|
||||
dim_t bmult_m_def = bli_cntx_get_blksz_def_dt( dt, bmult_id_m, cntx );
|
||||
dim_t bmult_m_pack = bli_cntx_get_blksz_max_dt( dt, bmult_id_m, cntx );
|
||||
dim_t bmult_n_def = bli_cntx_get_blksz_def_dt( dt, bmult_id_n, cntx );
|
||||
@@ -207,7 +207,7 @@ siz_t bli_packm_init_pack
|
||||
|
||||
|
||||
// We begin by copying the fields of A.
|
||||
bli_obj_alias_to( *a, *p );
|
||||
bli_obj_alias_to( a, p );
|
||||
|
||||
// Update the dimension fields to explicitly reflect a transposition,
|
||||
// if needed.
|
||||
@@ -219,13 +219,13 @@ siz_t bli_packm_init_pack
|
||||
// we either toggle the uplo of P.
|
||||
// Finally, if we mark P as dense since we assume that all matrices,
|
||||
// regardless of structure, will be densified.
|
||||
bli_obj_set_dims_with_trans( transa, m_a, n_a, *p );
|
||||
bli_obj_set_conjtrans( BLIS_NO_TRANSPOSE, *p );
|
||||
bli_obj_set_dims_with_trans( transa, m_a, n_a, p );
|
||||
bli_obj_set_conjtrans( BLIS_NO_TRANSPOSE, p );
|
||||
if ( bli_does_trans( transa ) )
|
||||
{
|
||||
bli_obj_negate_diag_offset( *p );
|
||||
if ( bli_obj_is_upper_or_lower( *a ) )
|
||||
bli_obj_toggle_uplo( *p );
|
||||
bli_obj_negate_diag_offset( p );
|
||||
if ( bli_obj_is_upper_or_lower( a ) )
|
||||
bli_obj_toggle_uplo( p );
|
||||
}
|
||||
|
||||
// If we are packing micro-panels, mark P as dense. Otherwise, we are
|
||||
@@ -236,22 +236,22 @@ siz_t bli_packm_init_pack
|
||||
// execute a "lower" or "upper" branch of code.
|
||||
if ( bli_is_panel_packed( schema ) )
|
||||
{
|
||||
bli_obj_set_uplo( BLIS_DENSE, *p );
|
||||
bli_obj_set_uplo( BLIS_DENSE, p );
|
||||
}
|
||||
|
||||
// Reset the view offsets to (0,0).
|
||||
bli_obj_set_offs( 0, 0, *p );
|
||||
bli_obj_set_offs( 0, 0, p );
|
||||
|
||||
// Set the invert diagonal field.
|
||||
bli_obj_set_invert_diag( invert_diag, *p );
|
||||
bli_obj_set_invert_diag( invert_diag, p );
|
||||
|
||||
// Set the pack status of P to the pack schema prescribed in the control
|
||||
// tree node.
|
||||
bli_obj_set_pack_schema( schema, *p );
|
||||
bli_obj_set_pack_schema( schema, p );
|
||||
|
||||
// Set the packing order bits.
|
||||
bli_obj_set_pack_order_if_upper( pack_ord_if_up, *p );
|
||||
bli_obj_set_pack_order_if_lower( pack_ord_if_lo, *p );
|
||||
bli_obj_set_pack_order_if_upper( pack_ord_if_up, p );
|
||||
bli_obj_set_pack_order_if_lower( pack_ord_if_lo, p );
|
||||
|
||||
// Compute the dimensions padded by the dimension multiples. These
|
||||
// dimensions will be the dimensions of the packed matrices, including
|
||||
@@ -260,15 +260,15 @@ siz_t bli_packm_init_pack
|
||||
// in P) and aligning them to the dimension multiples (typically equal
|
||||
// to register blocksizes). This does waste a little bit of space for
|
||||
// level-2 operations, but that's okay with us.
|
||||
m_p = bli_obj_length( *p );
|
||||
n_p = bli_obj_width( *p );
|
||||
m_p = bli_obj_length( p );
|
||||
n_p = bli_obj_width( p );
|
||||
m_p_pad = bli_align_dim_to_mult( m_p, bmult_m_def );
|
||||
n_p_pad = bli_align_dim_to_mult( n_p, bmult_n_def );
|
||||
|
||||
// Save the padded dimensions into the packed object. It is important
|
||||
// to save these dimensions since they represent the actual dimensions
|
||||
// of the zero-padded matrix.
|
||||
bli_obj_set_padded_dims( m_p_pad, n_p_pad, *p );
|
||||
bli_obj_set_padded_dims( m_p_pad, n_p_pad, p );
|
||||
|
||||
// Now we prepare to compute strides, align them, and compute the
|
||||
// total number of bytes needed for the packed buffer. The caller
|
||||
@@ -276,7 +276,7 @@ siz_t bli_packm_init_pack
|
||||
// from the memory allocator.
|
||||
|
||||
// Extract the element size for the packed object.
|
||||
elem_size_p = bli_obj_elem_size( *p );
|
||||
elem_size_p = bli_obj_elem_size( p );
|
||||
|
||||
// Set the row and column strides of p based on the pack schema.
|
||||
if ( bli_is_row_packed( schema ) &&
|
||||
@@ -297,7 +297,7 @@ siz_t bli_packm_init_pack
|
||||
BLIS_HEAP_STRIDE_ALIGN_SIZE );
|
||||
|
||||
// Store the strides in P.
|
||||
bli_obj_set_strides( rs_p, cs_p, *p );
|
||||
bli_obj_set_strides( rs_p, cs_p, p );
|
||||
|
||||
// Compute the size of the packed buffer.
|
||||
size_p = m_p_pad * rs_p * elem_size_p;
|
||||
@@ -320,7 +320,7 @@ siz_t bli_packm_init_pack
|
||||
BLIS_HEAP_STRIDE_ALIGN_SIZE );
|
||||
|
||||
// Store the strides in P.
|
||||
bli_obj_set_strides( rs_p, cs_p, *p );
|
||||
bli_obj_set_strides( rs_p, cs_p, p );
|
||||
|
||||
// Compute the size of the packed buffer.
|
||||
size_p = cs_p * n_p_pad * elem_size_p;
|
||||
@@ -408,12 +408,12 @@ siz_t bli_packm_init_pack
|
||||
else is_p = 1;
|
||||
|
||||
// Store the strides and panel dimension in P.
|
||||
bli_obj_set_strides( rs_p, cs_p, *p );
|
||||
bli_obj_set_imag_stride( is_p, *p );
|
||||
bli_obj_set_panel_dim( m_panel, *p );
|
||||
bli_obj_set_panel_stride( ps_p, *p );
|
||||
bli_obj_set_panel_length( m_panel, *p );
|
||||
bli_obj_set_panel_width( n_p, *p );
|
||||
bli_obj_set_strides( rs_p, cs_p, p );
|
||||
bli_obj_set_imag_stride( is_p, p );
|
||||
bli_obj_set_panel_dim( m_panel, p );
|
||||
bli_obj_set_panel_stride( ps_p, p );
|
||||
bli_obj_set_panel_length( m_panel, p );
|
||||
bli_obj_set_panel_width( n_p, p );
|
||||
|
||||
// Compute the size of the packed buffer.
|
||||
size_p = ps_p * ( m_p_pad / m_panel ) * elem_size_p;
|
||||
@@ -501,12 +501,12 @@ siz_t bli_packm_init_pack
|
||||
else is_p = 1;
|
||||
|
||||
// Store the strides and panel dimension in P.
|
||||
bli_obj_set_strides( rs_p, cs_p, *p );
|
||||
bli_obj_set_imag_stride( is_p, *p );
|
||||
bli_obj_set_panel_dim( n_panel, *p );
|
||||
bli_obj_set_panel_stride( ps_p, *p );
|
||||
bli_obj_set_panel_length( m_p, *p );
|
||||
bli_obj_set_panel_width( n_panel, *p );
|
||||
bli_obj_set_strides( rs_p, cs_p, p );
|
||||
bli_obj_set_imag_stride( is_p, p );
|
||||
bli_obj_set_panel_dim( n_panel, p );
|
||||
bli_obj_set_panel_stride( ps_p, p );
|
||||
bli_obj_set_panel_length( m_p, p );
|
||||
bli_obj_set_panel_width( n_panel, p );
|
||||
|
||||
// Compute the size of the packed buffer.
|
||||
size_p = ps_p * ( n_p_pad / n_panel ) * elem_size_p;
|
||||
|
||||
@@ -53,7 +53,7 @@ void bli_packm_int
|
||||
|
||||
// Sanity check; A should never have a zero dimension. If we must support
|
||||
// it, then we should fold it into the next alias-and-early-exit block.
|
||||
//if ( bli_obj_has_zero_dim( *a ) ) bli_abort();
|
||||
//if ( bli_obj_has_zero_dim( a ) ) bli_abort();
|
||||
|
||||
// Let us now check to see if the object has already been packed. First
|
||||
// we check if it has been packed to an unspecified (row or column)
|
||||
@@ -65,7 +65,7 @@ void bli_packm_int
|
||||
// not important, as long as its packed into contiguous rows or
|
||||
// contiguous columns. A good example of this is packing for matrix
|
||||
// operands in the level-2 operations.
|
||||
if ( bli_obj_pack_schema( *a ) == BLIS_PACKED_UNSPEC )
|
||||
if ( bli_obj_pack_schema( a ) == BLIS_PACKED_UNSPEC )
|
||||
{
|
||||
return;
|
||||
}
|
||||
@@ -78,14 +78,14 @@ void bli_packm_int
|
||||
// already taken place, or does not need to take place, and so that will
|
||||
// be indicated by the pack status). Also, not all combinations of
|
||||
// current pack status and desired pack schema are valid.
|
||||
if ( bli_obj_pack_schema( *a ) == bli_cntl_packm_params_pack_schema( cntl ) )
|
||||
if ( bli_obj_pack_schema( a ) == bli_cntl_packm_params_pack_schema( cntl ) )
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// If the object is marked as being filled with zeros, then we can skip
|
||||
// the packm operation entirely.
|
||||
if ( bli_obj_is_zeros( *a ) )
|
||||
if ( bli_obj_is_zeros( a ) )
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -54,14 +54,14 @@ void bli_packm_acquire_mpart_t2b( subpart_t requested_part,
|
||||
|
||||
// Partitioning top-to-bottom through packed column panels (which are
|
||||
// row-stored) is not yet supported.
|
||||
if ( bli_obj_is_col_packed( *obj ) )
|
||||
if ( bli_obj_is_col_packed( obj ) )
|
||||
{
|
||||
bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED );
|
||||
}
|
||||
|
||||
// Query the dimensions of the parent object.
|
||||
m = bli_obj_length( *obj );
|
||||
n = bli_obj_width( *obj );
|
||||
m = bli_obj_length( obj );
|
||||
n = bli_obj_width( obj );
|
||||
|
||||
// Foolproofing: do not let b exceed what's left of the m dimension at
|
||||
// row offset i.
|
||||
@@ -71,10 +71,10 @@ void bli_packm_acquire_mpart_t2b( subpart_t requested_part,
|
||||
// stride fields of the parent object. Note that this omits copying view
|
||||
// information because the new partition will have its own dimensions
|
||||
// and offsets.
|
||||
bli_obj_init_subpart_from( *obj, *sub_obj );
|
||||
bli_obj_init_subpart_from( obj, sub_obj );
|
||||
|
||||
// Modify offsets and dimensions of requested partition.
|
||||
bli_obj_set_dims( b, n, *sub_obj );
|
||||
bli_obj_set_dims( b, n, sub_obj );
|
||||
|
||||
// Tweak the padded length of the subpartition to trick the underlying
|
||||
// implementation into only zero-padding for the narrow submatrix of
|
||||
@@ -86,25 +86,25 @@ void bli_packm_acquire_mpart_t2b( subpart_t requested_part,
|
||||
// b for the edge iteration). In these cases, we arrive at the new
|
||||
// packed length by simply subtracting off i.
|
||||
{
|
||||
dim_t m_pack_max = bli_obj_padded_length( *sub_obj );
|
||||
dim_t m_pack_max = bli_obj_padded_length( sub_obj );
|
||||
dim_t m_pack_cur;
|
||||
|
||||
if ( i + b == m ) m_pack_cur = m_pack_max - i;
|
||||
else m_pack_cur = b;
|
||||
|
||||
bli_obj_set_padded_length( m_pack_cur, *sub_obj );
|
||||
bli_obj_set_padded_length( m_pack_cur, sub_obj );
|
||||
}
|
||||
|
||||
// Translate the desired offsets to a panel offset and adjust the
|
||||
// buffer pointer of the subpartition object.
|
||||
{
|
||||
char* buf_p = bli_obj_buffer( *sub_obj );
|
||||
siz_t elem_size = bli_obj_elem_size( *sub_obj );
|
||||
char* buf_p = bli_obj_buffer( sub_obj );
|
||||
siz_t elem_size = bli_obj_elem_size( sub_obj );
|
||||
dim_t off_to_panel = bli_packm_offset_to_panel_for( i, sub_obj );
|
||||
|
||||
buf_p = buf_p + elem_size * off_to_panel;
|
||||
|
||||
bli_obj_set_buffer( ( void* )buf_p, *sub_obj );
|
||||
bli_obj_set_buffer( buf_p, sub_obj );
|
||||
}
|
||||
}
|
||||
|
||||
@@ -130,14 +130,14 @@ void bli_packm_acquire_mpart_l2r( subpart_t requested_part,
|
||||
|
||||
// Partitioning left-to-right through packed row panels (which are
|
||||
// column-stored) is not yet supported.
|
||||
if ( bli_obj_is_row_packed( *obj ) )
|
||||
if ( bli_obj_is_row_packed( obj ) )
|
||||
{
|
||||
bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED );
|
||||
}
|
||||
|
||||
// Query the dimensions of the parent object.
|
||||
m = bli_obj_length( *obj );
|
||||
n = bli_obj_width( *obj );
|
||||
m = bli_obj_length( obj );
|
||||
n = bli_obj_width( obj );
|
||||
|
||||
// Foolproofing: do not let b exceed what's left of the n dimension at
|
||||
// column offset j.
|
||||
@@ -147,10 +147,10 @@ void bli_packm_acquire_mpart_l2r( subpart_t requested_part,
|
||||
// stride fields of the parent object. Note that this omits copying view
|
||||
// information because the new partition will have its own dimensions
|
||||
// and offsets.
|
||||
bli_obj_init_subpart_from( *obj, *sub_obj );
|
||||
bli_obj_init_subpart_from( obj, sub_obj );
|
||||
|
||||
// Modify offsets and dimensions of requested partition.
|
||||
bli_obj_set_dims( m, b, *sub_obj );
|
||||
bli_obj_set_dims( m, b, sub_obj );
|
||||
|
||||
// Tweak the padded width of the subpartition to trick the underlying
|
||||
// implementation into only zero-padding for the narrow submatrix of
|
||||
@@ -162,25 +162,25 @@ void bli_packm_acquire_mpart_l2r( subpart_t requested_part,
|
||||
// b for the edge iteration). In these cases, we arrive at the new
|
||||
// packed width by simply subtracting off j.
|
||||
{
|
||||
dim_t n_pack_max = bli_obj_padded_width( *sub_obj );
|
||||
dim_t n_pack_max = bli_obj_padded_width( sub_obj );
|
||||
dim_t n_pack_cur;
|
||||
|
||||
if ( j + b == n ) n_pack_cur = n_pack_max - j;
|
||||
else n_pack_cur = b;
|
||||
|
||||
bli_obj_set_padded_width( n_pack_cur, *sub_obj );
|
||||
bli_obj_set_padded_width( n_pack_cur, sub_obj );
|
||||
}
|
||||
|
||||
// Translate the desired offsets to a panel offset and adjust the
|
||||
// buffer pointer of the subpartition object.
|
||||
{
|
||||
char* buf_p = bli_obj_buffer( *sub_obj );
|
||||
siz_t elem_size = bli_obj_elem_size( *sub_obj );
|
||||
char* buf_p = bli_obj_buffer( sub_obj );
|
||||
siz_t elem_size = bli_obj_elem_size( sub_obj );
|
||||
dim_t off_to_panel = bli_packm_offset_to_panel_for( j, sub_obj );
|
||||
|
||||
buf_p = buf_p + elem_size * off_to_panel;
|
||||
|
||||
bli_obj_set_buffer( ( void* )buf_p, *sub_obj );
|
||||
bli_obj_set_buffer( buf_p, sub_obj );
|
||||
}
|
||||
}
|
||||
|
||||
@@ -201,47 +201,47 @@ dim_t bli_packm_offset_to_panel_for( dim_t offmn, obj_t* p )
|
||||
{
|
||||
dim_t panel_off;
|
||||
|
||||
if ( bli_obj_pack_schema( *p ) == BLIS_PACKED_ROWS )
|
||||
if ( bli_obj_pack_schema( p ) == BLIS_PACKED_ROWS )
|
||||
{
|
||||
// For the "packed rows" schema, a single row is effectively one
|
||||
// row panel, and so we use the row offset as the panel offset.
|
||||
// Then we multiply this offset by the effective panel stride
|
||||
// (ie: the row stride) to arrive at the desired offset.
|
||||
panel_off = offmn * bli_obj_row_stride( *p );
|
||||
panel_off = offmn * bli_obj_row_stride( p );
|
||||
}
|
||||
else if ( bli_obj_pack_schema( *p ) == BLIS_PACKED_COLUMNS )
|
||||
else if ( bli_obj_pack_schema( p ) == BLIS_PACKED_COLUMNS )
|
||||
{
|
||||
// For the "packed columns" schema, a single column is effectively one
|
||||
// column panel, and so we use the column offset as the panel offset.
|
||||
// Then we multiply this offset by the effective panel stride
|
||||
// (ie: the column stride) to arrive at the desired offset.
|
||||
panel_off = offmn * bli_obj_col_stride( *p );
|
||||
panel_off = offmn * bli_obj_col_stride( p );
|
||||
}
|
||||
else if ( bli_obj_pack_schema( *p ) == BLIS_PACKED_ROW_PANELS )
|
||||
else if ( bli_obj_pack_schema( p ) == BLIS_PACKED_ROW_PANELS )
|
||||
{
|
||||
// For the "packed row panels" schema, the column stride is equal to
|
||||
// the panel dimension (length). So we can divide it into offmn
|
||||
// (interpreted as a row offset) to arrive at a panel offset. Then
|
||||
// we multiply this offset by the panel stride to arrive at the total
|
||||
// offset to the panel (in units of elements).
|
||||
panel_off = offmn / bli_obj_col_stride( *p );
|
||||
panel_off = panel_off * bli_obj_panel_stride( *p );
|
||||
panel_off = offmn / bli_obj_col_stride( p );
|
||||
panel_off = panel_off * bli_obj_panel_stride( p );
|
||||
|
||||
// Sanity check.
|
||||
if ( offmn % bli_obj_col_stride( *p ) > 0 ) bli_abort();
|
||||
if ( offmn % bli_obj_col_stride( p ) > 0 ) bli_abort();
|
||||
}
|
||||
else if ( bli_obj_pack_schema( *p ) == BLIS_PACKED_COL_PANELS )
|
||||
else if ( bli_obj_pack_schema( p ) == BLIS_PACKED_COL_PANELS )
|
||||
{
|
||||
// For the "packed column panels" schema, the row stride is equal to
|
||||
// the panel dimension (width). So we can divide it into offmn
|
||||
// (interpreted as a column offset) to arrive at a panel offset. Then
|
||||
// we multiply this offset by the panel stride to arrive at the total
|
||||
// offset to the panel (in units of elements).
|
||||
panel_off = offmn / bli_obj_row_stride( *p );
|
||||
panel_off = panel_off * bli_obj_panel_stride( *p );
|
||||
panel_off = offmn / bli_obj_row_stride( p );
|
||||
panel_off = panel_off * bli_obj_panel_stride( p );
|
||||
|
||||
// Sanity check.
|
||||
if ( offmn % bli_obj_row_stride( *p ) > 0 ) bli_abort();
|
||||
if ( offmn % bli_obj_row_stride( p ) > 0 ) bli_abort();
|
||||
}
|
||||
else
|
||||
{
|
||||
|
||||
@@ -308,10 +308,10 @@ void PASTEMAC(ch,varname) \
|
||||
{ \
|
||||
c = c + diagoffc * ( doff_t )cs_c + \
|
||||
-diagoffc * ( doff_t )rs_c; \
|
||||
bli_swap_incs( incc, ldc ); \
|
||||
bli_swap_incs( &incc, &ldc ); \
|
||||
\
|
||||
if ( bli_is_hermitian( strucc ) ) \
|
||||
bli_toggle_conj( conjc ); \
|
||||
bli_toggle_conj( &conjc ); \
|
||||
} \
|
||||
\
|
||||
/* Pack the full panel. */ \
|
||||
@@ -376,7 +376,7 @@ void PASTEMAC(ch,varname) \
|
||||
conjc12 = conjc; \
|
||||
\
|
||||
if ( bli_is_hermitian( strucc ) ) \
|
||||
bli_toggle_conj( conjc12 ); \
|
||||
bli_toggle_conj( &conjc12 ); \
|
||||
} \
|
||||
else /* if ( ( row_stored && bli_is_lower( uploc ) ) || \
|
||||
( col_stored && bli_is_upper( uploc ) ) ) */ \
|
||||
@@ -402,7 +402,7 @@ void PASTEMAC(ch,varname) \
|
||||
conjc12 = conjc; \
|
||||
\
|
||||
if ( bli_is_hermitian( strucc ) ) \
|
||||
bli_toggle_conj( conjc10 ); \
|
||||
bli_toggle_conj( &conjc10 ); \
|
||||
} \
|
||||
\
|
||||
/* Pack to p10. For upper storage, this includes the unstored
|
||||
@@ -573,8 +573,8 @@ void PASTEMAC(ch,varname) \
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
uplo_t uplop = uploc; \
|
||||
\
|
||||
bli_toggle_uplo( uplop ); \
|
||||
bli_shift_diag_offset_to_shrink_uplo( uplop, diagoffp ); \
|
||||
bli_toggle_uplo( &uplop ); \
|
||||
bli_shift_diag_offset_to_shrink_uplo( uplop, &diagoffp ); \
|
||||
\
|
||||
PASTEMAC(ch,setm) \
|
||||
( \
|
||||
|
||||
@@ -310,10 +310,10 @@ void PASTEMAC(ch,varname) \
|
||||
{ \
|
||||
c = c + diagoffc * ( doff_t )cs_c + \
|
||||
-diagoffc * ( doff_t )rs_c; \
|
||||
bli_swap_incs( incc, ldc ); \
|
||||
bli_swap_incs( &incc, &ldc ); \
|
||||
\
|
||||
if ( bli_is_hermitian( strucc ) ) \
|
||||
bli_toggle_conj( conjc ); \
|
||||
bli_toggle_conj( &conjc ); \
|
||||
} \
|
||||
\
|
||||
/* Pack the full panel. */ \
|
||||
@@ -380,7 +380,7 @@ void PASTEMAC(ch,varname) \
|
||||
conjc12 = conjc; \
|
||||
\
|
||||
if ( bli_is_hermitian( strucc ) ) \
|
||||
bli_toggle_conj( conjc12 ); \
|
||||
bli_toggle_conj( &conjc12 ); \
|
||||
} \
|
||||
else /* if ( ( row_stored && bli_is_lower( uploc ) ) || \
|
||||
( col_stored && bli_is_upper( uploc ) ) ) */ \
|
||||
@@ -406,7 +406,7 @@ void PASTEMAC(ch,varname) \
|
||||
conjc12 = conjc; \
|
||||
\
|
||||
if ( bli_is_hermitian( strucc ) ) \
|
||||
bli_toggle_conj( conjc10 ); \
|
||||
bli_toggle_conj( &conjc10 ); \
|
||||
} \
|
||||
\
|
||||
/* Pack to p10. For upper storage, this includes the unstored
|
||||
@@ -581,8 +581,8 @@ void PASTEMAC(ch,varname) \
|
||||
doff_t diagoffp11_0 = 0; \
|
||||
dim_t p11_0_dim = panel_dim - 1; \
|
||||
\
|
||||
bli_toggle_uplo( uplop ); \
|
||||
bli_shift_diag_offset_to_shrink_uplo( uplop, diagoffp11_0 ); \
|
||||
bli_toggle_uplo( &uplop ); \
|
||||
bli_shift_diag_offset_to_shrink_uplo( uplop, &diagoffp11_0 ); \
|
||||
\
|
||||
/* Note that this macro works a little differently than the setm
|
||||
operation. Here, we pass in the dimensions of only p11, rather
|
||||
|
||||
@@ -363,10 +363,10 @@ void PASTEMAC(ch,varname) \
|
||||
{ \
|
||||
c = c + diagoffc * ( doff_t )cs_c + \
|
||||
-diagoffc * ( doff_t )rs_c; \
|
||||
bli_swap_incs( incc, ldc ); \
|
||||
bli_swap_incs( &incc, &ldc ); \
|
||||
\
|
||||
if ( bli_is_hermitian( strucc ) ) \
|
||||
bli_toggle_conj( conjc ); \
|
||||
bli_toggle_conj( &conjc ); \
|
||||
} \
|
||||
\
|
||||
/* Pack the full panel. */ \
|
||||
@@ -436,7 +436,7 @@ void PASTEMAC(ch,varname) \
|
||||
conjc12 = conjc; \
|
||||
\
|
||||
if ( bli_is_hermitian( strucc ) ) \
|
||||
bli_toggle_conj( conjc12 ); \
|
||||
bli_toggle_conj( &conjc12 ); \
|
||||
} \
|
||||
else /* if ( ( row_stored && bli_is_lower( uploc ) ) || \
|
||||
( col_stored && bli_is_upper( uploc ) ) ) */ \
|
||||
@@ -462,7 +462,7 @@ void PASTEMAC(ch,varname) \
|
||||
conjc12 = conjc; \
|
||||
\
|
||||
if ( bli_is_hermitian( strucc ) ) \
|
||||
bli_toggle_conj( conjc10 ); \
|
||||
bli_toggle_conj( &conjc10 ); \
|
||||
} \
|
||||
\
|
||||
/* Pack to p10. For upper storage, this includes the unstored
|
||||
@@ -744,8 +744,8 @@ void PASTEMAC(ch,varname) \
|
||||
ctype_r* restrict zero_r = PASTEMAC(chr,0); \
|
||||
uplo_t uplop = uploc; \
|
||||
\
|
||||
bli_toggle_uplo( uplop ); \
|
||||
bli_shift_diag_offset_to_shrink_uplo( uplop, diagoffp ); \
|
||||
bli_toggle_uplo( &uplop ); \
|
||||
bli_shift_diag_offset_to_shrink_uplo( uplop, &diagoffp ); \
|
||||
\
|
||||
PASTEMAC(chr,setm) \
|
||||
( \
|
||||
|
||||
@@ -337,10 +337,10 @@ void PASTEMAC(ch,varname) \
|
||||
{ \
|
||||
c = c + diagoffc * ( doff_t )cs_c + \
|
||||
-diagoffc * ( doff_t )rs_c; \
|
||||
bli_swap_incs( incc, ldc ); \
|
||||
bli_swap_incs( &incc, &ldc ); \
|
||||
\
|
||||
if ( bli_is_hermitian( strucc ) ) \
|
||||
bli_toggle_conj( conjc ); \
|
||||
bli_toggle_conj( &conjc ); \
|
||||
} \
|
||||
\
|
||||
/* Pack the full panel. */ \
|
||||
@@ -410,7 +410,7 @@ void PASTEMAC(ch,varname) \
|
||||
conjc12 = conjc; \
|
||||
\
|
||||
if ( bli_is_hermitian( strucc ) ) \
|
||||
bli_toggle_conj( conjc12 ); \
|
||||
bli_toggle_conj( &conjc12 ); \
|
||||
} \
|
||||
else /* if ( ( row_stored && bli_is_lower( uploc ) ) || \
|
||||
( col_stored && bli_is_upper( uploc ) ) ) */ \
|
||||
@@ -436,7 +436,7 @@ void PASTEMAC(ch,varname) \
|
||||
conjc12 = conjc; \
|
||||
\
|
||||
if ( bli_is_hermitian( strucc ) ) \
|
||||
bli_toggle_conj( conjc10 ); \
|
||||
bli_toggle_conj( &conjc10 ); \
|
||||
} \
|
||||
\
|
||||
/* Pack to p10. For upper storage, this includes the unstored
|
||||
@@ -676,8 +676,8 @@ void PASTEMAC(ch,varname) \
|
||||
ctype_r* restrict zero_r = PASTEMAC(chr,0); \
|
||||
uplo_t uplop = uploc; \
|
||||
\
|
||||
bli_toggle_uplo( uplop ); \
|
||||
bli_shift_diag_offset_to_shrink_uplo( uplop, diagoffp ); \
|
||||
bli_toggle_uplo( &uplop ); \
|
||||
bli_shift_diag_offset_to_shrink_uplo( uplop, &diagoffp ); \
|
||||
\
|
||||
PASTEMAC(chr,setm) \
|
||||
( \
|
||||
|
||||
@@ -305,10 +305,10 @@ void PASTEMAC(ch,varname) \
|
||||
{ \
|
||||
c = c + diagoffc * ( doff_t )cs_c + \
|
||||
-diagoffc * ( doff_t )rs_c; \
|
||||
bli_swap_incs( incc, ldc ); \
|
||||
bli_swap_incs( &incc, &ldc ); \
|
||||
\
|
||||
if ( bli_is_hermitian( strucc ) ) \
|
||||
bli_toggle_conj( conjc ); \
|
||||
bli_toggle_conj( &conjc ); \
|
||||
} \
|
||||
\
|
||||
/* Pack the full panel. */ \
|
||||
@@ -376,7 +376,7 @@ void PASTEMAC(ch,varname) \
|
||||
conjc12 = conjc; \
|
||||
\
|
||||
if ( bli_is_hermitian( strucc ) ) \
|
||||
bli_toggle_conj( conjc12 ); \
|
||||
bli_toggle_conj( &conjc12 ); \
|
||||
} \
|
||||
else /* if ( ( row_stored && bli_is_lower( uploc ) ) || \
|
||||
( col_stored && bli_is_upper( uploc ) ) ) */ \
|
||||
@@ -402,7 +402,7 @@ void PASTEMAC(ch,varname) \
|
||||
conjc12 = conjc; \
|
||||
\
|
||||
if ( bli_is_hermitian( strucc ) ) \
|
||||
bli_toggle_conj( conjc10 ); \
|
||||
bli_toggle_conj( &conjc10 ); \
|
||||
} \
|
||||
\
|
||||
/* Pack to p10. For upper storage, this includes the unstored
|
||||
@@ -568,8 +568,8 @@ void PASTEMAC(ch,varname) \
|
||||
ctype_r* restrict zero_r = PASTEMAC(chr,0); \
|
||||
uplo_t uplop = uploc; \
|
||||
\
|
||||
bli_toggle_uplo( uplop ); \
|
||||
bli_shift_diag_offset_to_shrink_uplo( uplop, diagoffp ); \
|
||||
bli_toggle_uplo( &uplop ); \
|
||||
bli_shift_diag_offset_to_shrink_uplo( uplop, &diagoffp ); \
|
||||
\
|
||||
PASTEMAC(chr,setm) \
|
||||
( \
|
||||
|
||||
@@ -64,26 +64,26 @@ void bli_packm_unb_var1
|
||||
thrinfo_t* thread
|
||||
)
|
||||
{
|
||||
num_t dt_cp = bli_obj_dt( *c );
|
||||
num_t dt_cp = bli_obj_dt( c );
|
||||
|
||||
struc_t strucc = bli_obj_struc( *c );
|
||||
doff_t diagoffc = bli_obj_diag_offset( *c );
|
||||
diag_t diagc = bli_obj_diag( *c );
|
||||
uplo_t uploc = bli_obj_uplo( *c );
|
||||
trans_t transc = bli_obj_conjtrans_status( *c );
|
||||
struc_t strucc = bli_obj_struc( c );
|
||||
doff_t diagoffc = bli_obj_diag_offset( c );
|
||||
diag_t diagc = bli_obj_diag( c );
|
||||
uplo_t uploc = bli_obj_uplo( c );
|
||||
trans_t transc = bli_obj_conjtrans_status( c );
|
||||
|
||||
dim_t m_p = bli_obj_length( *p );
|
||||
dim_t n_p = bli_obj_width( *p );
|
||||
dim_t m_max_p = bli_obj_padded_length( *p );
|
||||
dim_t n_max_p = bli_obj_padded_width( *p );
|
||||
dim_t m_p = bli_obj_length( p );
|
||||
dim_t n_p = bli_obj_width( p );
|
||||
dim_t m_max_p = bli_obj_padded_length( p );
|
||||
dim_t n_max_p = bli_obj_padded_width( p );
|
||||
|
||||
void* buf_c = bli_obj_buffer_at_off( *c );
|
||||
inc_t rs_c = bli_obj_row_stride( *c );
|
||||
inc_t cs_c = bli_obj_col_stride( *c );
|
||||
void* buf_c = bli_obj_buffer_at_off( c );
|
||||
inc_t rs_c = bli_obj_row_stride( c );
|
||||
inc_t cs_c = bli_obj_col_stride( c );
|
||||
|
||||
void* buf_p = bli_obj_buffer_at_off( *p );
|
||||
inc_t rs_p = bli_obj_row_stride( *p );
|
||||
inc_t cs_p = bli_obj_col_stride( *p );
|
||||
void* buf_p = bli_obj_buffer_at_off( p );
|
||||
inc_t rs_p = bli_obj_row_stride( p );
|
||||
inc_t cs_p = bli_obj_col_stride( p );
|
||||
|
||||
void* buf_kappa;
|
||||
|
||||
@@ -94,7 +94,7 @@ void bli_packm_unb_var1
|
||||
// the alpha scalar of the higher-level operation. Thus, we use BLIS_ONE
|
||||
// for kappa so that the underlying packm implementation does not scale
|
||||
// during packing.
|
||||
buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE );
|
||||
buf_kappa = bli_obj_buffer_for_const( dt_cp, &BLIS_ONE );
|
||||
|
||||
// Index into the type combination array to extract the correct
|
||||
// function pointer.
|
||||
@@ -180,15 +180,15 @@ void PASTEMAC(ch,varname) \
|
||||
side of the diagonal. */ \
|
||||
c_cast = c_cast + diagoffc * ( doff_t )cs_c + \
|
||||
-diagoffc * ( doff_t )rs_c; \
|
||||
bli_negate_diag_offset( diagoffc ); \
|
||||
bli_toggle_trans( transc ); \
|
||||
bli_negate_diag_offset( &diagoffc ); \
|
||||
bli_toggle_trans( &transc ); \
|
||||
if ( bli_is_upper( uploc ) ) diagoffc += 1; \
|
||||
else if ( bli_is_lower( uploc ) ) diagoffc -= 1; \
|
||||
\
|
||||
/* If c is Hermitian, we need to apply a conjugation when
|
||||
copying the region opposite the diagonal. */ \
|
||||
if ( bli_is_hermitian( strucc ) ) \
|
||||
bli_toggle_conj( transc ); \
|
||||
transc = bli_trans_toggled_conj( transc ); \
|
||||
\
|
||||
/* Copy the data from the region opposite the diagonal of c
|
||||
(as specified by the original value of diagoffc). Notice
|
||||
@@ -217,16 +217,16 @@ void PASTEMAC(ch,varname) \
|
||||
we can derive from the parameters given. */ \
|
||||
if ( bli_does_trans( transc ) ) \
|
||||
{ \
|
||||
bli_negate_diag_offset( diagoffp ); \
|
||||
bli_toggle_uplo( uplop ); \
|
||||
bli_negate_diag_offset( &diagoffp ); \
|
||||
bli_toggle_uplo( &uplop ); \
|
||||
} \
|
||||
\
|
||||
/* For triangular matrices, we wish to reference the region
|
||||
strictly opposite the diagonal of C. This amounts to
|
||||
toggling uploc and then shifting the diagonal offset to
|
||||
shrink the stored region (by one diagonal). */ \
|
||||
bli_toggle_uplo( uplop ); \
|
||||
bli_shift_diag_offset_to_shrink_uplo( uplop, diagoffp ); \
|
||||
bli_toggle_uplo( &uplop ); \
|
||||
bli_shift_diag_offset_to_shrink_uplo( uplop, &diagoffp ); \
|
||||
\
|
||||
/* Set the region opposite the diagonal of p to zero. */ \
|
||||
PASTEMAC(ch,setm) \
|
||||
|
||||
@@ -57,7 +57,7 @@ void bli_scalm_int( obj_t* alpha,
|
||||
FUNCPTR_T f;
|
||||
|
||||
// Return early if one of the matrix operands has a zero dimension.
|
||||
if ( bli_obj_has_zero_dim( *x ) ) return;
|
||||
if ( bli_obj_has_zero_dim( x ) ) return;
|
||||
|
||||
// Check parameters.
|
||||
if ( bli_error_checking_is_enabled() )
|
||||
|
||||
@@ -64,17 +64,17 @@ void bli_unpackm_blk_var1
|
||||
thrinfo_t* thread
|
||||
)
|
||||
{
|
||||
num_t dt_cp = bli_obj_dt( *c );
|
||||
num_t dt_cp = bli_obj_dt( c );
|
||||
|
||||
// Normally we take the parameters from the source argument. But here,
|
||||
// the packm/unpackm framework is not yet solidified enough for us to
|
||||
// assume that at this point struc(P) == struc(C), (ie: since
|
||||
// densification may have marked P's structure as dense when the root
|
||||
// is upper or lower). So, we take the struc field from C, not P.
|
||||
struc_t strucc = bli_obj_struc( *c );
|
||||
doff_t diagoffc = bli_obj_diag_offset( *c );
|
||||
diag_t diagc = bli_obj_diag( *c );
|
||||
uplo_t uploc = bli_obj_uplo( *c );
|
||||
struc_t strucc = bli_obj_struc( c );
|
||||
doff_t diagoffc = bli_obj_diag_offset( c );
|
||||
diag_t diagc = bli_obj_diag( c );
|
||||
uplo_t uploc = bli_obj_uplo( c );
|
||||
|
||||
// Again, normally the trans argument is on the source matrix. But we
|
||||
// know that the packed matrix is not transposed. If there is to be a
|
||||
@@ -83,22 +83,22 @@ void bli_unpackm_blk_var1
|
||||
// the trans status (not the conjugation status), since we probably
|
||||
// don't want to un-conjugate if the original matrix was conjugated
|
||||
// when packed.
|
||||
trans_t transc = bli_obj_onlytrans_status( *c );
|
||||
trans_t transc = bli_obj_onlytrans_status( c );
|
||||
|
||||
dim_t m_c = bli_obj_length( *c );
|
||||
dim_t n_c = bli_obj_width( *c );
|
||||
dim_t m_panel = bli_obj_panel_length( *c );
|
||||
dim_t n_panel = bli_obj_panel_width( *c );
|
||||
dim_t m_c = bli_obj_length( c );
|
||||
dim_t n_c = bli_obj_width( c );
|
||||
dim_t m_panel = bli_obj_panel_length( c );
|
||||
dim_t n_panel = bli_obj_panel_width( c );
|
||||
|
||||
void* buf_p = bli_obj_buffer_at_off( *p );
|
||||
inc_t rs_p = bli_obj_row_stride( *p );
|
||||
inc_t cs_p = bli_obj_col_stride( *p );
|
||||
dim_t pd_p = bli_obj_panel_dim( *p );
|
||||
inc_t ps_p = bli_obj_panel_stride( *p );
|
||||
void* buf_p = bli_obj_buffer_at_off( p );
|
||||
inc_t rs_p = bli_obj_row_stride( p );
|
||||
inc_t cs_p = bli_obj_col_stride( p );
|
||||
dim_t pd_p = bli_obj_panel_dim( p );
|
||||
inc_t ps_p = bli_obj_panel_stride( p );
|
||||
|
||||
void* buf_c = bli_obj_buffer_at_off( *c );
|
||||
inc_t rs_c = bli_obj_row_stride( *c );
|
||||
inc_t cs_c = bli_obj_col_stride( *c );
|
||||
void* buf_c = bli_obj_buffer_at_off( c );
|
||||
inc_t rs_c = bli_obj_row_stride( c );
|
||||
inc_t cs_c = bli_obj_col_stride( c );
|
||||
|
||||
FUNCPTR_T f;
|
||||
|
||||
@@ -170,10 +170,10 @@ void PASTEMAC(ch,varname) \
|
||||
express the remaining parameters and code. */ \
|
||||
if ( bli_does_trans( transc ) ) \
|
||||
{ \
|
||||
bli_swap_incs( rs_c, cs_c ); \
|
||||
bli_negate_diag_offset( diagoffc ); \
|
||||
bli_toggle_uplo( uploc ); \
|
||||
bli_toggle_trans( transc ); \
|
||||
bli_swap_incs( &rs_c, &cs_c ); \
|
||||
bli_negate_diag_offset( &diagoffc ); \
|
||||
bli_toggle_uplo( &uploc ); \
|
||||
bli_toggle_trans( &transc ); \
|
||||
} \
|
||||
\
|
||||
/* If the strides of p indicate row storage, then we are packing to
|
||||
|
||||
@@ -54,7 +54,7 @@ void bli_unpackm_int
|
||||
// If p was aliased to a during the pack stage (because it was already
|
||||
// in an acceptable packed/contiguous format), then no unpack is actually
|
||||
// necessary, so we return.
|
||||
if ( bli_obj_is_alias_of( *p, *a ) ) return;
|
||||
if ( bli_obj_is_alias_of( p, a ) ) return;
|
||||
|
||||
// Extract the function pointer from the current control tree node.
|
||||
f = bli_cntl_unpackm_params_var_func( cntl );
|
||||
|
||||
@@ -59,22 +59,22 @@ void bli_unpackm_unb_var1
|
||||
thrinfo_t* thread
|
||||
)
|
||||
{
|
||||
num_t dt_pc = bli_obj_dt( *p );
|
||||
num_t dt_pc = bli_obj_dt( p );
|
||||
|
||||
doff_t diagoffp = bli_obj_diag_offset( *p );
|
||||
uplo_t uplop = bli_obj_uplo( *p );
|
||||
trans_t transc = bli_obj_onlytrans_status( *c );
|
||||
doff_t diagoffp = bli_obj_diag_offset( p );
|
||||
uplo_t uplop = bli_obj_uplo( p );
|
||||
trans_t transc = bli_obj_onlytrans_status( c );
|
||||
|
||||
dim_t m_c = bli_obj_length( *c );
|
||||
dim_t n_c = bli_obj_width( *c );
|
||||
dim_t m_c = bli_obj_length( c );
|
||||
dim_t n_c = bli_obj_width( c );
|
||||
|
||||
void* buf_p = bli_obj_buffer_at_off( *p );
|
||||
inc_t rs_p = bli_obj_row_stride( *p );
|
||||
inc_t cs_p = bli_obj_col_stride( *p );
|
||||
void* buf_p = bli_obj_buffer_at_off( p );
|
||||
inc_t rs_p = bli_obj_row_stride( p );
|
||||
inc_t cs_p = bli_obj_col_stride( p );
|
||||
|
||||
void* buf_c = bli_obj_buffer_at_off( *c );
|
||||
inc_t rs_c = bli_obj_row_stride( *c );
|
||||
inc_t cs_c = bli_obj_col_stride( *c );
|
||||
void* buf_c = bli_obj_buffer_at_off( c );
|
||||
inc_t rs_c = bli_obj_row_stride( c );
|
||||
inc_t cs_c = bli_obj_col_stride( c );
|
||||
|
||||
FUNCPTR_T f;
|
||||
|
||||
|
||||
@@ -330,10 +330,10 @@ void bli_xxmv_check
|
||||
e_val = bli_check_vector_object( y );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_vector_dim_equals( x, bli_obj_width_after_trans( *a ) );
|
||||
e_val = bli_check_vector_dim_equals( x, bli_obj_width_after_trans( a ) );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_vector_dim_equals( y, bli_obj_length_after_trans( *a ) );
|
||||
e_val = bli_check_vector_dim_equals( y, bli_obj_length_after_trans( a ) );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object buffers (for non-NULLness).
|
||||
@@ -392,10 +392,10 @@ void bli_xxr_check
|
||||
e_val = bli_check_matrix_object( a );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_vector_dim_equals( x, bli_obj_length_after_trans( *a ) );
|
||||
e_val = bli_check_vector_dim_equals( x, bli_obj_length_after_trans( a ) );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
e_val = bli_check_vector_dim_equals( y, bli_obj_width_after_trans( *a ) );
|
||||
e_val = bli_check_vector_dim_equals( y, bli_obj_width_after_trans( a ) );
|
||||
bli_check_error_code( e_val );
|
||||
|
||||
// Check object buffers (for non-NULLness).
|
||||
|
||||
@@ -57,19 +57,19 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *a ); \
|
||||
num_t dt = bli_obj_dt( a ); \
|
||||
\
|
||||
trans_t transa = bli_obj_conjtrans_status( *a ); \
|
||||
conj_t conjx = bli_obj_conj_status( *x ); \
|
||||
dim_t m = bli_obj_length( *a ); \
|
||||
dim_t n = bli_obj_width( *a ); \
|
||||
void* buf_a = bli_obj_buffer_at_off( *a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( *a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( *a ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t incx = bli_obj_vector_inc( *x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( *y ); \
|
||||
inc_t incy = bli_obj_vector_inc( *y ); \
|
||||
trans_t transa = bli_obj_conjtrans_status( a ); \
|
||||
conj_t conjx = bli_obj_conj_status( x ); \
|
||||
dim_t m = bli_obj_length( a ); \
|
||||
dim_t n = bli_obj_width( a ); \
|
||||
void* buf_a = bli_obj_buffer_at_off( a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( a ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t incx = bli_obj_vector_inc( x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( y ); \
|
||||
inc_t incy = bli_obj_vector_inc( y ); \
|
||||
\
|
||||
void* buf_alpha; \
|
||||
void* buf_beta; \
|
||||
@@ -86,8 +86,8 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
alpha, &alpha_local ); \
|
||||
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
|
||||
beta, &beta_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \
|
||||
\
|
||||
/* Invoke the typed function. */ \
|
||||
bli_call_ft_14 \
|
||||
@@ -126,19 +126,19 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *a ); \
|
||||
num_t dt = bli_obj_dt( a ); \
|
||||
\
|
||||
conj_t conjx = bli_obj_conj_status( *x ); \
|
||||
conj_t conjy = bli_obj_conj_status( *y ); \
|
||||
dim_t m = bli_obj_length( *a ); \
|
||||
dim_t n = bli_obj_width( *a ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t incx = bli_obj_vector_inc( *x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( *y ); \
|
||||
inc_t incy = bli_obj_vector_inc( *y ); \
|
||||
void* buf_a = bli_obj_buffer_at_off( *a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( *a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( *a ); \
|
||||
conj_t conjx = bli_obj_conj_status( x ); \
|
||||
conj_t conjy = bli_obj_conj_status( y ); \
|
||||
dim_t m = bli_obj_length( a ); \
|
||||
dim_t n = bli_obj_width( a ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t incx = bli_obj_vector_inc( x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( y ); \
|
||||
inc_t incy = bli_obj_vector_inc( y ); \
|
||||
void* buf_a = bli_obj_buffer_at_off( a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( a ); \
|
||||
\
|
||||
void* buf_alpha; \
|
||||
\
|
||||
@@ -151,7 +151,7 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
as needed). */ \
|
||||
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
|
||||
alpha, &alpha_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
|
||||
\
|
||||
/* Invoke the typed function. */ \
|
||||
bli_call_ft_13 \
|
||||
@@ -190,19 +190,19 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *a ); \
|
||||
num_t dt = bli_obj_dt( a ); \
|
||||
\
|
||||
uplo_t uploa = bli_obj_uplo( *a ); \
|
||||
conj_t conja = bli_obj_conj_status( *a ); \
|
||||
conj_t conjx = bli_obj_conj_status( *x ); \
|
||||
dim_t m = bli_obj_length( *a ); \
|
||||
void* buf_a = bli_obj_buffer_at_off( *a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( *a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( *a ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t incx = bli_obj_vector_inc( *x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( *y ); \
|
||||
inc_t incy = bli_obj_vector_inc( *y ); \
|
||||
uplo_t uploa = bli_obj_uplo( a ); \
|
||||
conj_t conja = bli_obj_conj_status( a ); \
|
||||
conj_t conjx = bli_obj_conj_status( x ); \
|
||||
dim_t m = bli_obj_length( a ); \
|
||||
void* buf_a = bli_obj_buffer_at_off( a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( a ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t incx = bli_obj_vector_inc( x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( y ); \
|
||||
inc_t incy = bli_obj_vector_inc( y ); \
|
||||
\
|
||||
void* buf_alpha; \
|
||||
void* buf_beta; \
|
||||
@@ -219,8 +219,8 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
alpha, &alpha_local ); \
|
||||
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
|
||||
beta, &beta_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
|
||||
buf_beta = bli_obj_buffer_for_1x1( dt, &beta_local ); \
|
||||
\
|
||||
/* Invoke the typed function. */ \
|
||||
bli_call_ft_14 \
|
||||
@@ -259,16 +259,16 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *a ); \
|
||||
num_t dt = bli_obj_dt( a ); \
|
||||
\
|
||||
uplo_t uploa = bli_obj_uplo( *a ); \
|
||||
conj_t conjx = bli_obj_conj_status( *x ); \
|
||||
dim_t m = bli_obj_length( *a ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t incx = bli_obj_vector_inc( *x ); \
|
||||
void* buf_a = bli_obj_buffer_at_off( *a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( *a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( *a ); \
|
||||
uplo_t uploa = bli_obj_uplo( a ); \
|
||||
conj_t conjx = bli_obj_conj_status( x ); \
|
||||
dim_t m = bli_obj_length( a ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t incx = bli_obj_vector_inc( x ); \
|
||||
void* buf_a = bli_obj_buffer_at_off( a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( a ); \
|
||||
\
|
||||
void* buf_alpha; \
|
||||
\
|
||||
@@ -281,7 +281,7 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
as needed). */ \
|
||||
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
|
||||
alpha, &alpha_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
|
||||
\
|
||||
/* Invoke the typed function. */ \
|
||||
bli_call_ft_10 \
|
||||
@@ -318,19 +318,19 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *a ); \
|
||||
num_t dt = bli_obj_dt( a ); \
|
||||
\
|
||||
uplo_t uploa = bli_obj_uplo( *a ); \
|
||||
conj_t conjx = bli_obj_conj_status( *x ); \
|
||||
conj_t conjy = bli_obj_conj_status( *y ); \
|
||||
dim_t m = bli_obj_length( *a ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t incx = bli_obj_vector_inc( *x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( *y ); \
|
||||
inc_t incy = bli_obj_vector_inc( *y ); \
|
||||
void* buf_a = bli_obj_buffer_at_off( *a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( *a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( *a ); \
|
||||
uplo_t uploa = bli_obj_uplo( a ); \
|
||||
conj_t conjx = bli_obj_conj_status( x ); \
|
||||
conj_t conjy = bli_obj_conj_status( y ); \
|
||||
dim_t m = bli_obj_length( a ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t incx = bli_obj_vector_inc( x ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( y ); \
|
||||
inc_t incy = bli_obj_vector_inc( y ); \
|
||||
void* buf_a = bli_obj_buffer_at_off( a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( a ); \
|
||||
\
|
||||
void* buf_alpha; \
|
||||
\
|
||||
@@ -343,7 +343,7 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
as needed). */ \
|
||||
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
|
||||
alpha, &alpha_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
|
||||
\
|
||||
/* Invoke the typed function. */ \
|
||||
bli_call_ft_13 \
|
||||
@@ -381,17 +381,17 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
\
|
||||
BLIS_OAPI_CNTX_DECL \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *a ); \
|
||||
num_t dt = bli_obj_dt( a ); \
|
||||
\
|
||||
uplo_t uploa = bli_obj_uplo( *a ); \
|
||||
trans_t transa = bli_obj_conjtrans_status( *a ); \
|
||||
diag_t diaga = bli_obj_diag( *a ); \
|
||||
dim_t m = bli_obj_length( *a ); \
|
||||
void* buf_a = bli_obj_buffer_at_off( *a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( *a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( *a ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t incx = bli_obj_vector_inc( *x ); \
|
||||
uplo_t uploa = bli_obj_uplo( a ); \
|
||||
trans_t transa = bli_obj_conjtrans_status( a ); \
|
||||
diag_t diaga = bli_obj_diag( a ); \
|
||||
dim_t m = bli_obj_length( a ); \
|
||||
void* buf_a = bli_obj_buffer_at_off( a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( a ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t incx = bli_obj_vector_inc( x ); \
|
||||
\
|
||||
void* buf_alpha; \
|
||||
\
|
||||
@@ -404,7 +404,7 @@ void PASTEMAC(opname,EX_SUF) \
|
||||
as needed). */ \
|
||||
bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \
|
||||
alpha, &alpha_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \
|
||||
buf_alpha = bli_obj_buffer_for_1x1( dt, &alpha_local ); \
|
||||
\
|
||||
/* Invoke the typed function. */ \
|
||||
bli_call_ft_11 \
|
||||
|
||||
@@ -60,7 +60,7 @@ void PASTEMAC(ch,opname) \
|
||||
dim_t m_y, n_x; \
|
||||
\
|
||||
/* Determine the dimensions of y and x. */ \
|
||||
bli_set_dims_with_trans( transa, m, n, m_y, n_x ); \
|
||||
bli_set_dims_with_trans( transa, m, n, &m_y, &n_x ); \
|
||||
\
|
||||
/* If y has zero elements, return early. */ \
|
||||
if ( bli_zero_dim1( m_y ) ) return; \
|
||||
|
||||
@@ -63,7 +63,7 @@ void PASTEMAC(ch,varname) \
|
||||
\
|
||||
bli_set_dims_incs_with_trans( transa, \
|
||||
m, n, rs_a, cs_a, \
|
||||
n_iter, n_elem, rs_at, cs_at ); \
|
||||
&n_iter, &n_elem, &rs_at, &cs_at ); \
|
||||
\
|
||||
conja = bli_extract_conj( transa ); \
|
||||
\
|
||||
|
||||
@@ -65,7 +65,7 @@ void PASTEMAC(ch,varname) \
|
||||
\
|
||||
bli_set_dims_incs_with_trans( transa, \
|
||||
m, n, rs_a, cs_a, \
|
||||
n_elem, n_iter, rs_at, cs_at ); \
|
||||
&n_elem, &n_iter, &rs_at, &cs_at ); \
|
||||
\
|
||||
conja = bli_extract_conj( transa ); \
|
||||
\
|
||||
|
||||
@@ -64,7 +64,7 @@ void PASTEMAC(ch,varname) \
|
||||
\
|
||||
bli_set_dims_incs_with_trans( transa, \
|
||||
m, n, rs_a, cs_a, \
|
||||
n_iter, n_elem, rs_at, cs_at ); \
|
||||
&n_iter, &n_elem, &rs_at, &cs_at ); \
|
||||
\
|
||||
conja = bli_extract_conj( transa ); \
|
||||
\
|
||||
|
||||
@@ -65,7 +65,7 @@ void PASTEMAC(ch,varname) \
|
||||
\
|
||||
bli_set_dims_incs_with_trans( transa, \
|
||||
m, n, rs_a, cs_a, \
|
||||
n_elem, n_iter, rs_at, cs_at ); \
|
||||
&n_elem, &n_iter, &rs_at, &cs_at ); \
|
||||
\
|
||||
conja = bli_extract_conj( transa ); \
|
||||
\
|
||||
|
||||
@@ -50,26 +50,26 @@ void PASTEMAC0(opname) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *a ); \
|
||||
num_t dt = bli_obj_dt( a ); \
|
||||
\
|
||||
trans_t transa = bli_obj_conjtrans_status( *a ); \
|
||||
conj_t conjx = bli_obj_conj_status( *x ); \
|
||||
trans_t transa = bli_obj_conjtrans_status( a ); \
|
||||
conj_t conjx = bli_obj_conj_status( x ); \
|
||||
\
|
||||
dim_t m = bli_obj_length( *a ); \
|
||||
dim_t n = bli_obj_width( *a ); \
|
||||
dim_t m = bli_obj_length( a ); \
|
||||
dim_t n = bli_obj_width( a ); \
|
||||
\
|
||||
void* buf_a = bli_obj_buffer_at_off( *a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( *a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( *a ); \
|
||||
void* buf_a = bli_obj_buffer_at_off( a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( a ); \
|
||||
\
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t incx = bli_obj_vector_inc( *x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t incx = bli_obj_vector_inc( x ); \
|
||||
\
|
||||
void* buf_y = bli_obj_buffer_at_off( *y ); \
|
||||
inc_t incy = bli_obj_vector_inc( *y ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( y ); \
|
||||
inc_t incy = bli_obj_vector_inc( y ); \
|
||||
\
|
||||
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \
|
||||
void* buf_beta = bli_obj_buffer_for_1x1( dt, *beta ); \
|
||||
void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \
|
||||
void* buf_beta = bli_obj_buffer_for_1x1( dt, beta ); \
|
||||
\
|
||||
/* Invoke the void pointer-based function for the given datatype. */ \
|
||||
bli_call_ft_14 \
|
||||
|
||||
@@ -51,26 +51,26 @@ void PASTEMAC0(opname) \
|
||||
gemv_t* cntl \
|
||||
) \
|
||||
{ \
|
||||
num_t dt = bli_obj_dt( *a ); \
|
||||
num_t dt = bli_obj_dt( a ); \
|
||||
\
|
||||
trans_t transa = bli_obj_conjtrans_status( *a ); \
|
||||
conj_t conjx = bli_obj_conj_status( *x ); \
|
||||
trans_t transa = bli_obj_conjtrans_status( a ); \
|
||||
conj_t conjx = bli_obj_conj_status( x ); \
|
||||
\
|
||||
dim_t m = bli_obj_length( *a ); \
|
||||
dim_t n = bli_obj_width( *a ); \
|
||||
dim_t m = bli_obj_length( a ); \
|
||||
dim_t n = bli_obj_width( a ); \
|
||||
\
|
||||
void* buf_a = bli_obj_buffer_at_off( *a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( *a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( *a ); \
|
||||
void* buf_a = bli_obj_buffer_at_off( a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( a ); \
|
||||
\
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t incx = bli_obj_vector_inc( *x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t incx = bli_obj_vector_inc( x ); \
|
||||
\
|
||||
void* buf_y = bli_obj_buffer_at_off( *y ); \
|
||||
inc_t incy = bli_obj_vector_inc( *y ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( y ); \
|
||||
inc_t incy = bli_obj_vector_inc( y ); \
|
||||
\
|
||||
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \
|
||||
void* buf_beta = bli_obj_buffer_for_1x1( dt, *beta ); \
|
||||
void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \
|
||||
void* buf_beta = bli_obj_buffer_for_1x1( dt, beta ); \
|
||||
\
|
||||
PASTECH(ftname,_vft) f = PASTECH(opname,_vfp)[dt]; \
|
||||
\
|
||||
|
||||
@@ -54,7 +54,7 @@ void bli_gemv_blk_var1( obj_t* alpha,
|
||||
bli_obj_init_pack( &y1_pack );
|
||||
|
||||
// Query dimension in partitioning direction.
|
||||
m_trans = bli_obj_length_after_trans( *a );
|
||||
m_trans = bli_obj_length_after_trans( a );
|
||||
|
||||
// Partition along the m dimension.
|
||||
for ( i = 0; i < m_trans; i += b_alg )
|
||||
|
||||
@@ -54,7 +54,7 @@ void bli_gemv_blk_var2( obj_t* alpha,
|
||||
bli_obj_init_pack( &x1_pack );
|
||||
|
||||
// Query dimension in partitioning direction.
|
||||
n_trans = bli_obj_width_after_trans( *a );
|
||||
n_trans = bli_obj_width_after_trans( a );
|
||||
|
||||
// y = beta * y;
|
||||
bli_scalv_int( beta,
|
||||
|
||||
@@ -67,15 +67,15 @@ void bli_gemv_front
|
||||
|
||||
|
||||
// Query the target datatypes of each object.
|
||||
dt_targ_a = bli_obj_target_dt( *a );
|
||||
dt_targ_x = bli_obj_target_dt( *x );
|
||||
dt_targ_y = bli_obj_target_dt( *y );
|
||||
dt_targ_a = bli_obj_target_dt( a );
|
||||
dt_targ_x = bli_obj_target_dt( x );
|
||||
dt_targ_y = bli_obj_target_dt( y );
|
||||
|
||||
// Determine whether each operand is stored with unit stride.
|
||||
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
|
||||
bli_obj_is_col_stored( *a ) );
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 );
|
||||
a_has_unit_inc = ( bli_obj_is_row_stored( a ) ||
|
||||
bli_obj_is_col_stored( a ) );
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
|
||||
y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 );
|
||||
|
||||
|
||||
// Create an object to hold a copy-cast of alpha. Notice that we use
|
||||
@@ -112,14 +112,14 @@ void bli_gemv_front
|
||||
// row-major cases with a transpose and column-major without a
|
||||
// transpose. For the general stride case, we mimic that of column-
|
||||
// major storage since that is the format into which we copy/pack.
|
||||
if ( bli_obj_has_notrans( *a ) )
|
||||
if ( bli_obj_has_notrans( a ) )
|
||||
{
|
||||
if ( bli_obj_is_row_stored( *a ) ) gemv_cntl = gemv_cntl_bs_ke_dot;
|
||||
if ( bli_obj_is_row_stored( a ) ) gemv_cntl = gemv_cntl_bs_ke_dot;
|
||||
else gemv_cntl = gemv_cntl_bs_ke_axpy;
|
||||
}
|
||||
else // if ( bli_obj_has_trans( *a ) )
|
||||
else // if ( bli_obj_has_trans( a ) )
|
||||
{
|
||||
if ( bli_obj_is_row_stored( *a ) ) gemv_cntl = gemv_cntl_bs_ke_axpy;
|
||||
if ( bli_obj_is_row_stored( a ) ) gemv_cntl = gemv_cntl_bs_ke_axpy;
|
||||
else gemv_cntl = gemv_cntl_bs_ke_dot;
|
||||
}
|
||||
}
|
||||
@@ -127,20 +127,20 @@ void bli_gemv_front
|
||||
{
|
||||
// Mark objects with unit stride as already being packed. This prevents
|
||||
// unnecessary packing from happening within the blocked algorithm.
|
||||
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
|
||||
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, a );
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
|
||||
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y );
|
||||
|
||||
// Here, we make a similar choice as above, except that (1) we look
|
||||
// at storage tilt, and (2) we choose a tree that performs blocking.
|
||||
if ( bli_obj_has_notrans( *a ) )
|
||||
if ( bli_obj_has_notrans( a ) )
|
||||
{
|
||||
if ( bli_obj_is_row_tilted( *a ) ) gemv_cntl = gemv_cntl_ge_dot;
|
||||
if ( bli_obj_is_row_tilted( a ) ) gemv_cntl = gemv_cntl_ge_dot;
|
||||
else gemv_cntl = gemv_cntl_ge_axpy;
|
||||
}
|
||||
else // if ( bli_obj_has_trans( *a ) )
|
||||
else // if ( bli_obj_has_trans( a ) )
|
||||
{
|
||||
if ( bli_obj_is_row_tilted( *a ) ) gemv_cntl = gemv_cntl_ge_axpy;
|
||||
if ( bli_obj_is_row_tilted( a ) ) gemv_cntl = gemv_cntl_ge_axpy;
|
||||
else gemv_cntl = gemv_cntl_ge_dot;
|
||||
}
|
||||
}
|
||||
@@ -189,8 +189,8 @@ void PASTEMAC(ch,opname) \
|
||||
inc_t rs_x, cs_x; \
|
||||
inc_t rs_y, cs_y; \
|
||||
\
|
||||
bli_set_dims_with_trans( BLIS_NO_TRANSPOSE, m, n, m_a, n_a ); \
|
||||
bli_set_dims_with_trans( transa, m, n, m_y, m_x ); \
|
||||
bli_set_dims_with_trans( BLIS_NO_TRANSPOSE, m, n, &m_a, &n_a ); \
|
||||
bli_set_dims_with_trans( transa, m, n, &m_y, &m_x ); \
|
||||
\
|
||||
rs_x = incx; cs_x = m_x * incx; \
|
||||
rs_y = incy; cs_y = m_y * incy; \
|
||||
@@ -202,8 +202,8 @@ void PASTEMAC(ch,opname) \
|
||||
bli_obj_create_with_attached_buffer( dt, m_x, 1, x, rs_x, cs_x, &xo ); \
|
||||
bli_obj_create_with_attached_buffer( dt, m_y, 1, y, rs_y, cs_y, &yo ); \
|
||||
\
|
||||
bli_obj_set_conjtrans( transa, ao ); \
|
||||
bli_obj_set_conj( conjx, xo ); \
|
||||
bli_obj_set_conjtrans( transa, &ao ); \
|
||||
bli_obj_set_conj( conjx, &xo ); \
|
||||
\
|
||||
PASTEMAC0(opname)( &alphao, \
|
||||
&ao, \
|
||||
|
||||
@@ -69,8 +69,8 @@ void bli_gemv_int( trans_t transa,
|
||||
obj_t x_local;
|
||||
|
||||
// Apply the trans and/or conj parameters to aliases of the objects.
|
||||
bli_obj_alias_with_trans( transa, *a, a_local );
|
||||
bli_obj_alias_with_conj( conjx, *x, x_local );
|
||||
bli_obj_alias_with_trans( transa, a, &a_local );
|
||||
bli_obj_alias_with_conj( conjx, x, &x_local );
|
||||
|
||||
// Check parameters. We use the aliased copy of A so the transa parameter
|
||||
// is taken into account for dimension checking.
|
||||
@@ -78,10 +78,10 @@ void bli_gemv_int( trans_t transa,
|
||||
bli_gemv_check( alpha, &a_local, &x_local, beta, y );
|
||||
|
||||
// If y has a zero dimension, return early.
|
||||
if ( bli_obj_has_zero_dim( *y ) ) return;
|
||||
if ( bli_obj_has_zero_dim( y ) ) return;
|
||||
|
||||
// If x has a zero dimension, scale y by beta and return early.
|
||||
if ( bli_obj_has_zero_dim( *x ) )
|
||||
if ( bli_obj_has_zero_dim( x ) )
|
||||
{
|
||||
bli_scalm( beta, y );
|
||||
return;
|
||||
|
||||
@@ -49,25 +49,25 @@ void PASTEMAC0(opname) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *a ); \
|
||||
num_t dt = bli_obj_dt( a ); \
|
||||
\
|
||||
conj_t conjx = bli_obj_conj_status( *x ); \
|
||||
conj_t conjy = bli_obj_conj_status( *y ); \
|
||||
conj_t conjx = bli_obj_conj_status( x ); \
|
||||
conj_t conjy = bli_obj_conj_status( y ); \
|
||||
\
|
||||
dim_t m = bli_obj_length( *a ); \
|
||||
dim_t n = bli_obj_width( *a ); \
|
||||
dim_t m = bli_obj_length( a ); \
|
||||
dim_t n = bli_obj_width( a ); \
|
||||
\
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t incx = bli_obj_vector_inc( *x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t incx = bli_obj_vector_inc( x ); \
|
||||
\
|
||||
void* buf_y = bli_obj_buffer_at_off( *y ); \
|
||||
inc_t incy = bli_obj_vector_inc( *y ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( y ); \
|
||||
inc_t incy = bli_obj_vector_inc( y ); \
|
||||
\
|
||||
void* buf_a = bli_obj_buffer_at_off( *a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( *a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( *a ); \
|
||||
void* buf_a = bli_obj_buffer_at_off( a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( a ); \
|
||||
\
|
||||
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \
|
||||
void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \
|
||||
\
|
||||
/* Invoke the void pointer-based function for the given datatype. */ \
|
||||
bli_call_ft_13 \
|
||||
|
||||
@@ -53,7 +53,7 @@ void bli_ger_blk_var1( obj_t* alpha,
|
||||
bli_obj_init_pack( &x1_pack );
|
||||
|
||||
// Query dimension in partitioning direction.
|
||||
m_trans = bli_obj_length_after_trans( *a );
|
||||
m_trans = bli_obj_length_after_trans( a );
|
||||
|
||||
// Partition along the m dimension.
|
||||
for ( i = 0; i < m_trans; i += b_alg )
|
||||
|
||||
@@ -53,7 +53,7 @@ void bli_ger_blk_var2( obj_t* alpha,
|
||||
bli_obj_init_pack( &y1_pack );
|
||||
|
||||
// Query dimension in partitioning direction.
|
||||
n_trans = bli_obj_width_after_trans( *a );
|
||||
n_trans = bli_obj_width_after_trans( a );
|
||||
|
||||
// Partition along the n dimension.
|
||||
for ( i = 0; i < n_trans; i += b_alg )
|
||||
|
||||
@@ -64,15 +64,15 @@ void bli_ger_front
|
||||
|
||||
|
||||
// Query the target datatypes of each object.
|
||||
dt_targ_x = bli_obj_target_dt( *x );
|
||||
dt_targ_y = bli_obj_target_dt( *y );
|
||||
//dt_targ_a = bli_obj_target_dt( *a );
|
||||
dt_targ_x = bli_obj_target_dt( x );
|
||||
dt_targ_y = bli_obj_target_dt( y );
|
||||
//dt_targ_a = bli_obj_target_dt( a );
|
||||
|
||||
// Determine whether each operand with unit stride.
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 );
|
||||
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
|
||||
bli_obj_is_col_stored( *a ) );
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
|
||||
y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 );
|
||||
a_has_unit_inc = ( bli_obj_is_row_stored( a ) ||
|
||||
bli_obj_is_col_stored( a ) );
|
||||
|
||||
|
||||
// Create an object to hold a copy-cast of alpha. Notice that we use
|
||||
@@ -93,20 +93,20 @@ void bli_ger_front
|
||||
{
|
||||
// Use different control trees depending on storage of the matrix
|
||||
// operand.
|
||||
if ( bli_obj_is_row_stored( *a ) ) ger_cntl = ger_cntl_bs_ke_row;
|
||||
if ( bli_obj_is_row_stored( a ) ) ger_cntl = ger_cntl_bs_ke_row;
|
||||
else ger_cntl = ger_cntl_bs_ke_col;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Mark objects with unit stride as already being packed. This prevents
|
||||
// unnecessary packing from happening within the blocked algorithm.
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
|
||||
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
|
||||
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y );
|
||||
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, a );
|
||||
|
||||
// Here, we make a similar choice as above, except that (1) we look
|
||||
// at storage tilt, and (2) we choose a tree that performs blocking.
|
||||
if ( bli_obj_is_row_tilted( *a ) ) ger_cntl = ger_cntl_ge_row;
|
||||
if ( bli_obj_is_row_tilted( a ) ) ger_cntl = ger_cntl_ge_row;
|
||||
else ger_cntl = ger_cntl_ge_col;
|
||||
}
|
||||
|
||||
@@ -151,7 +151,7 @@ void PASTEMAC(ch,opname) \
|
||||
inc_t rs_x, cs_x; \
|
||||
inc_t rs_y, cs_y; \
|
||||
\
|
||||
bli_set_dims_with_trans( BLIS_NO_TRANSPOSE, m, n, m_x, m_y ); \
|
||||
bli_set_dims_with_trans( BLIS_NO_TRANSPOSE, m, n, &m_x, &m_y ); \
|
||||
\
|
||||
rs_x = incx; cs_x = m_x * incx; \
|
||||
rs_y = incy; cs_y = m_y * incy; \
|
||||
@@ -162,8 +162,8 @@ void PASTEMAC(ch,opname) \
|
||||
bli_obj_create_with_attached_buffer( dt, m_y, 1, y, rs_y, cs_y, &yo ); \
|
||||
bli_obj_create_with_attached_buffer( dt, m, n, a, rs_a, cs_a, &ao ); \
|
||||
\
|
||||
bli_obj_set_conj( conjx, xo ); \
|
||||
bli_obj_set_conj( conjy, yo ); \
|
||||
bli_obj_set_conj( conjx, &xo ); \
|
||||
bli_obj_set_conj( conjy, &yo ); \
|
||||
\
|
||||
PASTEMAC0(opname)( &alphao, \
|
||||
&xo, \
|
||||
|
||||
@@ -74,27 +74,27 @@ void bli_ger_int( conj_t conjx,
|
||||
bli_ger_check( alpha, x, y, a );
|
||||
|
||||
// If A has a zero dimension, return early.
|
||||
if ( bli_obj_has_zero_dim( *a ) ) return;
|
||||
if ( bli_obj_has_zero_dim( a ) ) return;
|
||||
|
||||
// If x or y has a zero dimension, return early.
|
||||
if ( bli_obj_has_zero_dim( *x ) ||
|
||||
bli_obj_has_zero_dim( *y ) ) return;
|
||||
if ( bli_obj_has_zero_dim( x ) ||
|
||||
bli_obj_has_zero_dim( y ) ) return;
|
||||
|
||||
// Alias the objects, applying conjx and conjy to x and y, respectively.
|
||||
bli_obj_alias_with_conj( conjx, *x, x_local );
|
||||
bli_obj_alias_with_conj( conjy, *y, y_local );
|
||||
bli_obj_alias_to( *a, a_local );
|
||||
bli_obj_alias_with_conj( conjx, x, &x_local );
|
||||
bli_obj_alias_with_conj( conjy, y, &y_local );
|
||||
bli_obj_alias_to( a, &a_local );
|
||||
|
||||
// If matrix A is marked for conjugation, we interpret this as a request
|
||||
// to apply a conjugation to the other operands.
|
||||
if ( bli_obj_has_conj( a_local ) )
|
||||
if ( bli_obj_has_conj( &a_local ) )
|
||||
{
|
||||
bli_obj_toggle_conj( a_local );
|
||||
bli_obj_toggle_conj( &a_local );
|
||||
|
||||
bli_obj_toggle_conj( x_local );
|
||||
bli_obj_toggle_conj( y_local );
|
||||
bli_obj_toggle_conj( &x_local );
|
||||
bli_obj_toggle_conj( &y_local );
|
||||
|
||||
bli_obj_scalar_init_detached_copy_of( bli_obj_dt( *alpha ),
|
||||
bli_obj_scalar_init_detached_copy_of( bli_obj_dt( alpha ),
|
||||
BLIS_CONJUGATE,
|
||||
alpha,
|
||||
&alpha_local );
|
||||
@@ -107,10 +107,10 @@ void bli_ger_int( conj_t conjx,
|
||||
// If we are about the call a leaf-level implementation, and matrix A
|
||||
// still needs a transposition, then we must induce one by swapping the
|
||||
// strides and dimensions.
|
||||
if ( bli_cntl_is_leaf( cntl ) && bli_obj_has_trans( a_local ) )
|
||||
if ( bli_cntl_is_leaf( cntl ) && bli_obj_has_trans( &a_local ) )
|
||||
{
|
||||
bli_obj_induce_trans( a_local );
|
||||
bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, a_local );
|
||||
bli_obj_induce_trans( &a_local );
|
||||
bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &a_local );
|
||||
}
|
||||
|
||||
// Extract the variant number and implementation type.
|
||||
|
||||
@@ -51,26 +51,26 @@ void PASTEMAC0(opname) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *a ); \
|
||||
num_t dt = bli_obj_dt( a ); \
|
||||
\
|
||||
uplo_t uplo = bli_obj_uplo( *a ); \
|
||||
conj_t conja = bli_obj_conj_status( *a ); \
|
||||
conj_t conjx = bli_obj_conj_status( *x ); \
|
||||
uplo_t uplo = bli_obj_uplo( a ); \
|
||||
conj_t conja = bli_obj_conj_status( a ); \
|
||||
conj_t conjx = bli_obj_conj_status( x ); \
|
||||
\
|
||||
dim_t m = bli_obj_length( *a ); \
|
||||
dim_t m = bli_obj_length( a ); \
|
||||
\
|
||||
void* buf_a = bli_obj_buffer_at_off( *a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( *a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( *a ); \
|
||||
void* buf_a = bli_obj_buffer_at_off( a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( a ); \
|
||||
\
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t incx = bli_obj_vector_inc( *x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t incx = bli_obj_vector_inc( x ); \
|
||||
\
|
||||
void* buf_y = bli_obj_buffer_at_off( *y ); \
|
||||
inc_t incy = bli_obj_vector_inc( *y ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( y ); \
|
||||
inc_t incy = bli_obj_vector_inc( y ); \
|
||||
\
|
||||
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \
|
||||
void* buf_beta = bli_obj_buffer_for_1x1( dt, *beta ); \
|
||||
void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \
|
||||
void* buf_beta = bli_obj_buffer_for_1x1( dt, beta ); \
|
||||
\
|
||||
/* Invoke the void pointer-based function for the given datatype. */ \
|
||||
bli_call_ft_15 \
|
||||
|
||||
@@ -69,7 +69,7 @@ void bli_hemv_blk_var1( conj_t conjh,
|
||||
bli_obj_init_pack( &y1_pack );
|
||||
|
||||
// Query dimension.
|
||||
mn = bli_obj_length( *a );
|
||||
mn = bli_obj_length( a );
|
||||
|
||||
// y = beta * y;
|
||||
bli_scalv_int( beta,
|
||||
|
||||
@@ -70,7 +70,7 @@ void bli_hemv_blk_var2( conj_t conjh,
|
||||
bli_obj_init_pack( &y1_pack );
|
||||
|
||||
// Query dimension.
|
||||
mn = bli_obj_length( *a );
|
||||
mn = bli_obj_length( a );
|
||||
|
||||
// y = beta * y;
|
||||
bli_scalv_int( beta,
|
||||
|
||||
@@ -69,7 +69,7 @@ void bli_hemv_blk_var3( conj_t conjh,
|
||||
bli_obj_init_pack( &y1_pack );
|
||||
|
||||
// Query dimension.
|
||||
mn = bli_obj_length( *a );
|
||||
mn = bli_obj_length( a );
|
||||
|
||||
// y = beta * y;
|
||||
bli_scalv_int( beta,
|
||||
|
||||
@@ -70,7 +70,7 @@ void bli_hemv_blk_var4( conj_t conjh,
|
||||
bli_obj_init_pack( &y1_pack );
|
||||
|
||||
// Query dimension.
|
||||
mn = bli_obj_length( *a );
|
||||
mn = bli_obj_length( a );
|
||||
|
||||
// y = beta * y;
|
||||
bli_scalv_int( beta,
|
||||
|
||||
@@ -67,15 +67,15 @@ void bli_hemv_front
|
||||
|
||||
|
||||
// Query the target datatypes of each object.
|
||||
dt_targ_a = bli_obj_target_dt( *a );
|
||||
dt_targ_x = bli_obj_target_dt( *x );
|
||||
dt_targ_y = bli_obj_target_dt( *y );
|
||||
dt_targ_a = bli_obj_target_dt( a );
|
||||
dt_targ_x = bli_obj_target_dt( x );
|
||||
dt_targ_y = bli_obj_target_dt( y );
|
||||
|
||||
// Determine whether each operand with unit stride.
|
||||
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
|
||||
bli_obj_is_col_stored( *a ) );
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 );
|
||||
a_has_unit_inc = ( bli_obj_is_row_stored( a ) ||
|
||||
bli_obj_is_col_stored( a ) );
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
|
||||
y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 );
|
||||
|
||||
|
||||
// Create an object to hold a copy-cast of alpha. Notice that we use
|
||||
@@ -109,14 +109,14 @@ void bli_hemv_front
|
||||
// combinations of upper/lower triangular storage and row/column-storage.
|
||||
// The row-stored lower triangular and column-stored upper triangular
|
||||
// trees are identical. Same for the remaining two trees.
|
||||
if ( bli_obj_is_lower( *a ) )
|
||||
if ( bli_obj_is_lower( a ) )
|
||||
{
|
||||
if ( bli_obj_is_row_stored( *a ) ) hemv_cntl = hemv_cntl_bs_ke_lrow_ucol;
|
||||
if ( bli_obj_is_row_stored( a ) ) hemv_cntl = hemv_cntl_bs_ke_lrow_ucol;
|
||||
else hemv_cntl = hemv_cntl_bs_ke_lcol_urow;
|
||||
}
|
||||
else // if ( bli_obj_is_upper( *a ) )
|
||||
else // if ( bli_obj_is_upper( a ) )
|
||||
{
|
||||
if ( bli_obj_is_row_stored( *a ) ) hemv_cntl = hemv_cntl_bs_ke_lcol_urow;
|
||||
if ( bli_obj_is_row_stored( a ) ) hemv_cntl = hemv_cntl_bs_ke_lcol_urow;
|
||||
else hemv_cntl = hemv_cntl_bs_ke_lrow_ucol;
|
||||
}
|
||||
}
|
||||
@@ -124,20 +124,20 @@ void bli_hemv_front
|
||||
{
|
||||
// Mark objects with unit stride as already being packed. This prevents
|
||||
// unnecessary packing from happening within the blocked algorithm.
|
||||
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
|
||||
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, a );
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
|
||||
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y );
|
||||
|
||||
// Here, we make a similar choice as above, except that (1) we look
|
||||
// at storage tilt, and (2) we choose a tree that performs blocking.
|
||||
if ( bli_obj_is_lower( *a ) )
|
||||
if ( bli_obj_is_lower( a ) )
|
||||
{
|
||||
if ( bli_obj_is_row_tilted( *a ) ) hemv_cntl = hemv_cntl_ge_lrow_ucol;
|
||||
if ( bli_obj_is_row_tilted( a ) ) hemv_cntl = hemv_cntl_ge_lrow_ucol;
|
||||
else hemv_cntl = hemv_cntl_ge_lcol_urow;
|
||||
}
|
||||
else // if ( bli_obj_is_upper( *a ) )
|
||||
else // if ( bli_obj_is_upper( a ) )
|
||||
{
|
||||
if ( bli_obj_is_row_tilted( *a ) ) hemv_cntl = hemv_cntl_ge_lcol_urow;
|
||||
if ( bli_obj_is_row_tilted( a ) ) hemv_cntl = hemv_cntl_ge_lcol_urow;
|
||||
else hemv_cntl = hemv_cntl_ge_lrow_ucol;
|
||||
}
|
||||
}
|
||||
@@ -193,11 +193,11 @@ void PASTEMAC(ch,opname) \
|
||||
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
|
||||
bli_obj_create_with_attached_buffer( dt, m, 1, y, rs_y, cs_y, &yo ); \
|
||||
\
|
||||
bli_obj_set_uplo( uploa, ao ); \
|
||||
bli_obj_set_conj( conja, ao ); \
|
||||
bli_obj_set_conj( conjx, xo ); \
|
||||
bli_obj_set_uplo( uploa, &ao ); \
|
||||
bli_obj_set_conj( conja, &ao ); \
|
||||
bli_obj_set_conj( conjx, &xo ); \
|
||||
\
|
||||
bli_obj_set_struc( BLIS_HERMITIAN, ao ); \
|
||||
bli_obj_set_struc( BLIS_HERMITIAN, &ao ); \
|
||||
\
|
||||
PASTEMAC0(opname)( &alphao, \
|
||||
&ao, \
|
||||
|
||||
@@ -76,17 +76,17 @@ void bli_hemv_int( conj_t conjh,
|
||||
}
|
||||
|
||||
// If y has a zero dimension, return early.
|
||||
if ( bli_obj_has_zero_dim( *y ) ) return;
|
||||
if ( bli_obj_has_zero_dim( y ) ) return;
|
||||
|
||||
// If x has a zero dimension, scale y by beta and return early.
|
||||
if ( bli_obj_has_zero_dim( *x ) )
|
||||
if ( bli_obj_has_zero_dim( x ) )
|
||||
{
|
||||
bli_scalm( beta, y );
|
||||
return;
|
||||
}
|
||||
|
||||
// Alias A in case we need to induce the upper triangular case.
|
||||
bli_obj_alias_to( *a, a_local );
|
||||
bli_obj_alias_to( a, &a_local );
|
||||
|
||||
/*
|
||||
// Our blocked algorithms only [explicitly] implement the lower triangular
|
||||
@@ -96,10 +96,10 @@ void bli_hemv_int( conj_t conjh,
|
||||
// triangular case. But we only need to do this for blocked algorithms,
|
||||
// since unblocked algorithms are responsible for handling the upper case
|
||||
// explicitly (and they should not be inspecting the transposition bit anyway).
|
||||
if ( bli_cntl_is_blocked( cntl ) && bli_obj_is_upper( *a ) )
|
||||
if ( bli_cntl_is_blocked( cntl ) && bli_obj_is_upper( a ) )
|
||||
{
|
||||
bli_obj_toggle_conj( a_local );
|
||||
bli_obj_toggle_trans( a_local );
|
||||
bli_obj_toggle_conj( &a_local );
|
||||
bli_obj_toggle_trans( &a_local );
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
@@ -49,21 +49,21 @@ void PASTEMAC0(opname) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *c ); \
|
||||
num_t dt = bli_obj_dt( c ); \
|
||||
\
|
||||
uplo_t uplo = bli_obj_uplo( *c ); \
|
||||
conj_t conjx = bli_obj_conj_status( *x ); \
|
||||
uplo_t uplo = bli_obj_uplo( c ); \
|
||||
conj_t conjx = bli_obj_conj_status( x ); \
|
||||
\
|
||||
dim_t m = bli_obj_length( *c ); \
|
||||
dim_t m = bli_obj_length( c ); \
|
||||
\
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t incx = bli_obj_vector_inc( *x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t incx = bli_obj_vector_inc( x ); \
|
||||
\
|
||||
void* buf_c = bli_obj_buffer_at_off( *c ); \
|
||||
inc_t rs_c = bli_obj_row_stride( *c ); \
|
||||
inc_t cs_c = bli_obj_col_stride( *c ); \
|
||||
void* buf_c = bli_obj_buffer_at_off( c ); \
|
||||
inc_t rs_c = bli_obj_row_stride( c ); \
|
||||
inc_t cs_c = bli_obj_col_stride( c ); \
|
||||
\
|
||||
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \
|
||||
void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \
|
||||
\
|
||||
/* Invoke the void pointer-based function for the given datatype. */ \
|
||||
bli_call_ft_11 \
|
||||
|
||||
@@ -64,7 +64,7 @@ void bli_her_blk_var1( conj_t conjh,
|
||||
bli_obj_init_pack( &x1_pack );
|
||||
|
||||
// Query dimension.
|
||||
mn = bli_obj_length( *c );
|
||||
mn = bli_obj_length( c );
|
||||
|
||||
// Partition diagonally.
|
||||
for ( ij = 0; ij < mn; ij += b_alg )
|
||||
|
||||
@@ -64,7 +64,7 @@ void bli_her_blk_var2( conj_t conjh,
|
||||
bli_obj_init_pack( &x1_pack );
|
||||
|
||||
// Query dimension.
|
||||
mn = bli_obj_length( *c );
|
||||
mn = bli_obj_length( c );
|
||||
|
||||
// Partition diagonally.
|
||||
for ( ij = 0; ij < mn; ij += b_alg )
|
||||
|
||||
@@ -61,13 +61,13 @@ void bli_her_front
|
||||
|
||||
|
||||
// Query the target datatypes of each object.
|
||||
dt_targ_x = bli_obj_target_dt( *x );
|
||||
//dt_targ_c = bli_obj_target_dt( *c );
|
||||
dt_targ_x = bli_obj_target_dt( x );
|
||||
//dt_targ_c = bli_obj_target_dt( c );
|
||||
|
||||
// Determine whether each operand with unit stride.
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
c_has_unit_inc = ( bli_obj_is_row_stored( *c ) ||
|
||||
bli_obj_is_col_stored( *c ) );
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
|
||||
c_has_unit_inc = ( bli_obj_is_row_stored( c ) ||
|
||||
bli_obj_is_col_stored( c ) );
|
||||
|
||||
|
||||
// Create object to hold a copy-cast of alpha.
|
||||
@@ -87,14 +87,14 @@ void bli_her_front
|
||||
// combinations of upper/lower triangular storage and row/column-storage.
|
||||
// The row-stored lower triangular and column-stored upper triangular
|
||||
// trees are identical. Same for the remaining two trees.
|
||||
if ( bli_obj_is_lower( *c ) )
|
||||
if ( bli_obj_is_lower( c ) )
|
||||
{
|
||||
if ( bli_obj_is_row_stored( *c ) ) her_cntl = her_cntl_bs_ke_lrow_ucol;
|
||||
if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_bs_ke_lrow_ucol;
|
||||
else her_cntl = her_cntl_bs_ke_lcol_urow;
|
||||
}
|
||||
else // if ( bli_obj_is_upper( *c ) )
|
||||
else // if ( bli_obj_is_upper( c ) )
|
||||
{
|
||||
if ( bli_obj_is_row_stored( *c ) ) her_cntl = her_cntl_bs_ke_lcol_urow;
|
||||
if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_bs_ke_lcol_urow;
|
||||
else her_cntl = her_cntl_bs_ke_lrow_ucol;
|
||||
}
|
||||
}
|
||||
@@ -102,19 +102,19 @@ void bli_her_front
|
||||
{
|
||||
// Mark objects with unit stride as already being packed. This prevents
|
||||
// unnecessary packing from happening within the blocked algorithm.
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c );
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
|
||||
if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, c );
|
||||
|
||||
// Here, we make a similar choice as above, except that (1) we look
|
||||
// at storage tilt, and (2) we choose a tree that performs blocking.
|
||||
if ( bli_obj_is_lower( *c ) )
|
||||
if ( bli_obj_is_lower( c ) )
|
||||
{
|
||||
if ( bli_obj_is_row_stored( *c ) ) her_cntl = her_cntl_ge_lrow_ucol;
|
||||
if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_ge_lrow_ucol;
|
||||
else her_cntl = her_cntl_ge_lcol_urow;
|
||||
}
|
||||
else // if ( bli_obj_is_upper( *c ) )
|
||||
else // if ( bli_obj_is_upper( c ) )
|
||||
{
|
||||
if ( bli_obj_is_row_stored( *c ) ) her_cntl = her_cntl_ge_lcol_urow;
|
||||
if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_ge_lcol_urow;
|
||||
else her_cntl = her_cntl_ge_lrow_ucol;
|
||||
}
|
||||
}
|
||||
@@ -162,10 +162,10 @@ void PASTEMAC(ch,opname) \
|
||||
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
|
||||
bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \
|
||||
\
|
||||
bli_obj_set_conj( conjx, xo ); \
|
||||
bli_obj_set_uplo( uploc, co ); \
|
||||
bli_obj_set_conj( conjx, &xo ); \
|
||||
bli_obj_set_uplo( uploc, &co ); \
|
||||
\
|
||||
bli_obj_set_struc( BLIS_HERMITIAN, co ); \
|
||||
bli_obj_set_struc( BLIS_HERMITIAN, &co ); \
|
||||
\
|
||||
PASTEMAC0(opname)( &alphao, \
|
||||
&xo, \
|
||||
|
||||
@@ -73,22 +73,22 @@ void bli_her_int( conj_t conjh,
|
||||
}
|
||||
|
||||
// If C or x has a zero dimension, return early.
|
||||
if ( bli_obj_has_zero_dim( *c ) ) return;
|
||||
if ( bli_obj_has_zero_dim( *x ) ) return;
|
||||
if ( bli_obj_has_zero_dim( c ) ) return;
|
||||
if ( bli_obj_has_zero_dim( x ) ) return;
|
||||
|
||||
// Alias the operands in case we need to apply conjugations.
|
||||
bli_obj_alias_to( *x, x_local );
|
||||
bli_obj_alias_to( *c, c_local );
|
||||
bli_obj_alias_to( x, &x_local );
|
||||
bli_obj_alias_to( c, &c_local );
|
||||
|
||||
// If matrix C is marked for conjugation, we interpret this as a request
|
||||
// to apply a conjugation to the other operands.
|
||||
if ( bli_obj_has_conj( c_local ) )
|
||||
if ( bli_obj_has_conj( &c_local ) )
|
||||
{
|
||||
bli_obj_toggle_conj( c_local );
|
||||
bli_obj_toggle_conj( &c_local );
|
||||
|
||||
// Notice that we don't need to conjugate alpha since it is guaranteed
|
||||
// to be real.
|
||||
bli_obj_toggle_conj( x_local );
|
||||
bli_obj_toggle_conj( &x_local );
|
||||
}
|
||||
|
||||
// Extract the variant number and implementation type.
|
||||
|
||||
@@ -53,7 +53,6 @@ void PASTEMAC(ch,varname) \
|
||||
{ \
|
||||
const num_t dt = PASTEMAC(ch,type); \
|
||||
\
|
||||
ctype* two = PASTEMAC(ch,2); \
|
||||
ctype* x0; \
|
||||
ctype* chi1; \
|
||||
ctype* y0; \
|
||||
@@ -156,7 +155,8 @@ void PASTEMAC(ch,varname) \
|
||||
\
|
||||
/* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \
|
||||
+ conj(alpha) * psi1 * conj(chi1); */ \
|
||||
PASTEMAC(ch,axpys)( *two, alpha0_chi1_psi1, *gamma11 ); \
|
||||
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
|
||||
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
|
||||
\
|
||||
/* For her2, explicitly set the imaginary component of gamma11 to
|
||||
zero. */ \
|
||||
|
||||
@@ -53,7 +53,6 @@ void PASTEMAC(ch,varname) \
|
||||
{ \
|
||||
const num_t dt = PASTEMAC(ch,type); \
|
||||
\
|
||||
ctype* two = PASTEMAC(ch,2); \
|
||||
ctype* x0; \
|
||||
ctype* chi1; \
|
||||
ctype* x2; \
|
||||
@@ -165,7 +164,8 @@ void PASTEMAC(ch,varname) \
|
||||
\
|
||||
/* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \
|
||||
+ conj(alpha) * psi1 * conj(chi1); */ \
|
||||
PASTEMAC(ch,axpys)( *two, alpha0_chi1_psi1, *gamma11 ); \
|
||||
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
|
||||
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
|
||||
\
|
||||
/* For her2, explicitly set the imaginary component of gamma11 to
|
||||
zero. */ \
|
||||
|
||||
@@ -53,7 +53,6 @@ void PASTEMAC(ch,varname) \
|
||||
{ \
|
||||
const num_t dt = PASTEMAC(ch,type); \
|
||||
\
|
||||
ctype* two = PASTEMAC(ch,2); \
|
||||
ctype* chi1; \
|
||||
ctype* y0; \
|
||||
ctype* psi1; \
|
||||
@@ -165,7 +164,8 @@ void PASTEMAC(ch,varname) \
|
||||
\
|
||||
/* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \
|
||||
+ conj(alpha) * psi1 * conj(chi1); */ \
|
||||
PASTEMAC(ch,axpys)( *two, alpha0_chi1_psi1, *gamma11 ); \
|
||||
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
|
||||
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
|
||||
\
|
||||
/* For her2, explicitly set the imaginary component of gamma11 to
|
||||
zero. */ \
|
||||
|
||||
@@ -53,7 +53,6 @@ void PASTEMAC(ch,varname) \
|
||||
{ \
|
||||
const num_t dt = PASTEMAC(ch,type); \
|
||||
\
|
||||
ctype* two = PASTEMAC(ch,2); \
|
||||
ctype* chi1; \
|
||||
ctype* x2; \
|
||||
ctype* psi1; \
|
||||
@@ -164,7 +163,8 @@ void PASTEMAC(ch,varname) \
|
||||
\
|
||||
/* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \
|
||||
+ conj(alpha) * psi1 * conj(chi1); */ \
|
||||
PASTEMAC(ch,axpys)( *two, alpha0_chi1_psi1, *gamma11 ); \
|
||||
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
|
||||
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
|
||||
\
|
||||
/* For her2, explicitly set the imaginary component of gamma11 to
|
||||
zero. */ \
|
||||
|
||||
@@ -53,7 +53,6 @@ void PASTEMAC(ch,varname) \
|
||||
{ \
|
||||
const num_t dt = PASTEMAC(ch,type); \
|
||||
\
|
||||
ctype* two = PASTEMAC(ch,2); \
|
||||
ctype* x0; \
|
||||
ctype* chi1; \
|
||||
ctype* y0; \
|
||||
@@ -149,7 +148,8 @@ void PASTEMAC(ch,varname) \
|
||||
\
|
||||
/* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \
|
||||
+ conj(alpha) * psi1 * conj(chi1); */ \
|
||||
PASTEMAC(ch,axpys)( *two, alpha0_chi1_psi1, *gamma11 ); \
|
||||
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
|
||||
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
|
||||
\
|
||||
/* For her2, explicitly set the imaginary component of gamma11 to
|
||||
zero. */ \
|
||||
|
||||
@@ -53,7 +53,6 @@ void PASTEMAC(ch,varname) \
|
||||
{ \
|
||||
const num_t dt = PASTEMAC(ch,type); \
|
||||
\
|
||||
ctype* two = PASTEMAC(ch,2); \
|
||||
ctype* chi1; \
|
||||
ctype* x2; \
|
||||
ctype* psi1; \
|
||||
@@ -157,7 +156,8 @@ void PASTEMAC(ch,varname) \
|
||||
\
|
||||
/* gamma11 = gamma11 + alpha * chi1 * conj(psi1) \
|
||||
+ conj(alpha) * psi1 * conj(chi1); */ \
|
||||
PASTEMAC(ch,axpys)( *two, alpha0_chi1_psi1, *gamma11 ); \
|
||||
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
|
||||
PASTEMAC(ch,adds)( alpha0_chi1_psi1, *gamma11 ); \
|
||||
\
|
||||
/* For her2, explicitly set the imaginary component of gamma11 to
|
||||
zero. */ \
|
||||
|
||||
@@ -51,25 +51,25 @@ void PASTEMAC0(opname) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *c ); \
|
||||
num_t dt = bli_obj_dt( c ); \
|
||||
\
|
||||
uplo_t uplo = bli_obj_uplo( *c ); \
|
||||
conj_t conjx = bli_obj_conj_status( *x ); \
|
||||
conj_t conjy = bli_obj_conj_status( *y ); \
|
||||
uplo_t uplo = bli_obj_uplo( c ); \
|
||||
conj_t conjx = bli_obj_conj_status( x ); \
|
||||
conj_t conjy = bli_obj_conj_status( y ); \
|
||||
\
|
||||
dim_t m = bli_obj_length( *c ); \
|
||||
dim_t m = bli_obj_length( c ); \
|
||||
\
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t incx = bli_obj_vector_inc( *x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t incx = bli_obj_vector_inc( x ); \
|
||||
\
|
||||
void* buf_y = bli_obj_buffer_at_off( *y ); \
|
||||
inc_t incy = bli_obj_vector_inc( *y ); \
|
||||
void* buf_y = bli_obj_buffer_at_off( y ); \
|
||||
inc_t incy = bli_obj_vector_inc( y ); \
|
||||
\
|
||||
void* buf_c = bli_obj_buffer_at_off( *c ); \
|
||||
inc_t rs_c = bli_obj_row_stride( *c ); \
|
||||
inc_t cs_c = bli_obj_col_stride( *c ); \
|
||||
void* buf_c = bli_obj_buffer_at_off( c ); \
|
||||
inc_t rs_c = bli_obj_row_stride( c ); \
|
||||
inc_t cs_c = bli_obj_col_stride( c ); \
|
||||
\
|
||||
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \
|
||||
void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \
|
||||
\
|
||||
/* Invoke the void pointer-based function for the given datatype. */ \
|
||||
bli_call_ft_14 \
|
||||
|
||||
@@ -69,7 +69,7 @@ void bli_her2_blk_var1( conj_t conjh,
|
||||
bli_obj_init_pack( &y1_pack );
|
||||
|
||||
// Query dimension.
|
||||
mn = bli_obj_length( *c );
|
||||
mn = bli_obj_length( c );
|
||||
|
||||
// Partition diagonally.
|
||||
for ( ij = 0; ij < mn; ij += b_alg )
|
||||
|
||||
@@ -70,7 +70,7 @@ void bli_her2_blk_var2( conj_t conjh,
|
||||
bli_obj_init_pack( &y1_pack );
|
||||
|
||||
// Query dimension.
|
||||
mn = bli_obj_length( *c );
|
||||
mn = bli_obj_length( c );
|
||||
|
||||
// Partition diagonally.
|
||||
for ( ij = 0; ij < mn; ij += b_alg )
|
||||
|
||||
@@ -70,7 +70,7 @@ void bli_her2_blk_var3( conj_t conjh,
|
||||
bli_obj_init_pack( &y1_pack );
|
||||
|
||||
// Query dimension.
|
||||
mn = bli_obj_length( *c );
|
||||
mn = bli_obj_length( c );
|
||||
|
||||
// Partition diagonally.
|
||||
for ( ij = 0; ij < mn; ij += b_alg )
|
||||
|
||||
@@ -69,7 +69,7 @@ void bli_her2_blk_var4( conj_t conjh,
|
||||
bli_obj_init_pack( &y1_pack );
|
||||
|
||||
// Query dimension.
|
||||
mn = bli_obj_length( *c );
|
||||
mn = bli_obj_length( c );
|
||||
|
||||
// Partition diagonally.
|
||||
for ( ij = 0; ij < mn; ij += b_alg )
|
||||
|
||||
@@ -65,15 +65,15 @@ void bli_her2_front
|
||||
|
||||
|
||||
// Query the target datatypes of each object.
|
||||
dt_targ_x = bli_obj_target_dt( *x );
|
||||
dt_targ_y = bli_obj_target_dt( *y );
|
||||
//dt_targ_c = bli_obj_target_dt( *c );
|
||||
dt_targ_x = bli_obj_target_dt( x );
|
||||
dt_targ_y = bli_obj_target_dt( y );
|
||||
//dt_targ_c = bli_obj_target_dt( c );
|
||||
|
||||
// Determine whether each operand with unit stride.
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 );
|
||||
c_has_unit_inc = ( bli_obj_is_row_stored( *c ) ||
|
||||
bli_obj_is_col_stored( *c ) );
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
|
||||
y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 );
|
||||
c_has_unit_inc = ( bli_obj_is_row_stored( c ) ||
|
||||
bli_obj_is_col_stored( c ) );
|
||||
|
||||
|
||||
// Create an object to hold a copy-cast of alpha. Notice that we use
|
||||
@@ -101,14 +101,14 @@ void bli_her2_front
|
||||
// combinations of upper/lower triangular storage and row/column-storage.
|
||||
// The row-stored lower triangular and column-stored upper triangular
|
||||
// trees are identical. Same for the remaining two trees.
|
||||
if ( bli_obj_is_lower( *c ) )
|
||||
if ( bli_obj_is_lower( c ) )
|
||||
{
|
||||
if ( bli_obj_is_row_stored( *c ) ) her2_cntl = her2_cntl_bs_ke_lrow_ucol;
|
||||
if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_bs_ke_lrow_ucol;
|
||||
else her2_cntl = her2_cntl_bs_ke_lcol_urow;
|
||||
}
|
||||
else // if ( bli_obj_is_upper( *c ) )
|
||||
else // if ( bli_obj_is_upper( c ) )
|
||||
{
|
||||
if ( bli_obj_is_row_stored( *c ) ) her2_cntl = her2_cntl_bs_ke_lcol_urow;
|
||||
if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_bs_ke_lcol_urow;
|
||||
else her2_cntl = her2_cntl_bs_ke_lrow_ucol;
|
||||
}
|
||||
}
|
||||
@@ -116,20 +116,20 @@ void bli_her2_front
|
||||
{
|
||||
// Mark objects with unit stride as already being packed. This prevents
|
||||
// unnecessary packing from happening within the blocked algorithm.
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
|
||||
if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c );
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
|
||||
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y );
|
||||
if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, c );
|
||||
|
||||
// Here, we make a similar choice as above, except that (1) we look
|
||||
// at storage tilt, and (2) we choose a tree that performs blocking.
|
||||
if ( bli_obj_is_lower( *c ) )
|
||||
if ( bli_obj_is_lower( c ) )
|
||||
{
|
||||
if ( bli_obj_is_row_stored( *c ) ) her2_cntl = her2_cntl_ge_lrow_ucol;
|
||||
if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_ge_lrow_ucol;
|
||||
else her2_cntl = her2_cntl_ge_lcol_urow;
|
||||
}
|
||||
else // if ( bli_obj_is_upper( *c ) )
|
||||
else // if ( bli_obj_is_upper( c ) )
|
||||
{
|
||||
if ( bli_obj_is_row_stored( *c ) ) her2_cntl = her2_cntl_ge_lcol_urow;
|
||||
if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_ge_lcol_urow;
|
||||
else her2_cntl = her2_cntl_ge_lrow_ucol;
|
||||
}
|
||||
}
|
||||
@@ -183,11 +183,11 @@ void PASTEMAC(ch,opname) \
|
||||
bli_obj_create_with_attached_buffer( dt, m, 1, y, rs_y, cs_y, &yo ); \
|
||||
bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \
|
||||
\
|
||||
bli_obj_set_conj( conjx, xo ); \
|
||||
bli_obj_set_conj( conjy, yo ); \
|
||||
bli_obj_set_uplo( uploc, co ); \
|
||||
bli_obj_set_conj( conjx, &xo ); \
|
||||
bli_obj_set_conj( conjy, &yo ); \
|
||||
bli_obj_set_uplo( uploc, &co ); \
|
||||
\
|
||||
bli_obj_set_struc( BLIS_HERMITIAN, co ); \
|
||||
bli_obj_set_struc( BLIS_HERMITIAN, &co ); \
|
||||
\
|
||||
PASTEMAC0(opname)( &alphao, \
|
||||
&xo, \
|
||||
|
||||
@@ -80,29 +80,29 @@ void bli_her2_int( conj_t conjh,
|
||||
}
|
||||
|
||||
// If C, x, or y has a zero dimension, return early.
|
||||
if ( bli_obj_has_zero_dim( *c ) ) return;
|
||||
if ( bli_obj_has_zero_dim( *x ) ) return;
|
||||
if ( bli_obj_has_zero_dim( *y ) ) return;
|
||||
if ( bli_obj_has_zero_dim( c ) ) return;
|
||||
if ( bli_obj_has_zero_dim( x ) ) return;
|
||||
if ( bli_obj_has_zero_dim( y ) ) return;
|
||||
|
||||
// Alias the operands in case we need to apply conjugations.
|
||||
bli_obj_alias_to( *x, x_local );
|
||||
bli_obj_alias_to( *y, y_local );
|
||||
bli_obj_alias_to( *c, c_local );
|
||||
bli_obj_alias_to( x, &x_local );
|
||||
bli_obj_alias_to( y, &y_local );
|
||||
bli_obj_alias_to( c, &c_local );
|
||||
|
||||
// If matrix C is marked for conjugation, we interpret this as a request
|
||||
// to apply a conjugation to the other operands.
|
||||
if ( bli_obj_has_conj( c_local ) )
|
||||
if ( bli_obj_has_conj( &c_local ) )
|
||||
{
|
||||
bli_obj_toggle_conj( c_local );
|
||||
bli_obj_toggle_conj( &c_local );
|
||||
|
||||
bli_obj_toggle_conj( x_local );
|
||||
bli_obj_toggle_conj( y_local );
|
||||
bli_obj_toggle_conj( &x_local );
|
||||
bli_obj_toggle_conj( &y_local );
|
||||
|
||||
bli_obj_scalar_init_detached_copy_of( bli_obj_dt( *alpha ),
|
||||
bli_obj_scalar_init_detached_copy_of( bli_obj_dt( alpha ),
|
||||
BLIS_CONJUGATE,
|
||||
alpha,
|
||||
&alpha_local );
|
||||
bli_obj_scalar_init_detached_copy_of( bli_obj_dt( *alpha_conj ),
|
||||
bli_obj_scalar_init_detached_copy_of( bli_obj_dt( alpha_conj ),
|
||||
BLIS_CONJUGATE,
|
||||
alpha_conj,
|
||||
&alpha_conj_local );
|
||||
|
||||
@@ -67,15 +67,15 @@ void bli_symv_front
|
||||
|
||||
|
||||
// Query the target datatypes of each object.
|
||||
dt_targ_a = bli_obj_target_dt( *a );
|
||||
dt_targ_x = bli_obj_target_dt( *x );
|
||||
dt_targ_y = bli_obj_target_dt( *y );
|
||||
dt_targ_a = bli_obj_target_dt( a );
|
||||
dt_targ_x = bli_obj_target_dt( x );
|
||||
dt_targ_y = bli_obj_target_dt( y );
|
||||
|
||||
// Determine whether each operand with unit stride.
|
||||
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
|
||||
bli_obj_is_col_stored( *a ) );
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 );
|
||||
a_has_unit_inc = ( bli_obj_is_row_stored( a ) ||
|
||||
bli_obj_is_col_stored( a ) );
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
|
||||
y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 );
|
||||
|
||||
|
||||
// Create an object to hold a copy-cast of alpha. Notice that we use
|
||||
@@ -109,14 +109,14 @@ void bli_symv_front
|
||||
// combinations of upper/lower triangular storage and row/column-storage.
|
||||
// The row-stored lower triangular and column-stored upper triangular
|
||||
// trees are identical. Same for the remaining two trees.
|
||||
if ( bli_obj_is_lower( *a ) )
|
||||
if ( bli_obj_is_lower( a ) )
|
||||
{
|
||||
if ( bli_obj_is_row_stored( *a ) ) hemv_cntl = hemv_cntl_bs_ke_lrow_ucol;
|
||||
if ( bli_obj_is_row_stored( a ) ) hemv_cntl = hemv_cntl_bs_ke_lrow_ucol;
|
||||
else hemv_cntl = hemv_cntl_bs_ke_lcol_urow;
|
||||
}
|
||||
else // if ( bli_obj_is_upper( *a ) )
|
||||
else // if ( bli_obj_is_upper( a ) )
|
||||
{
|
||||
if ( bli_obj_is_row_stored( *a ) ) hemv_cntl = hemv_cntl_bs_ke_lcol_urow;
|
||||
if ( bli_obj_is_row_stored( a ) ) hemv_cntl = hemv_cntl_bs_ke_lcol_urow;
|
||||
else hemv_cntl = hemv_cntl_bs_ke_lrow_ucol;
|
||||
}
|
||||
}
|
||||
@@ -124,20 +124,20 @@ void bli_symv_front
|
||||
{
|
||||
// Mark objects with unit stride as already being packed. This prevents
|
||||
// unnecessary packing from happening within the blocked algorithm.
|
||||
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
|
||||
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, a );
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
|
||||
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y );
|
||||
|
||||
// Here, we make a similar choice as above, except that (1) we look
|
||||
// at storage tilt, and (2) we choose a tree that performs blocking.
|
||||
if ( bli_obj_is_lower( *a ) )
|
||||
if ( bli_obj_is_lower( a ) )
|
||||
{
|
||||
if ( bli_obj_is_row_tilted( *a ) ) hemv_cntl = hemv_cntl_ge_lrow_ucol;
|
||||
if ( bli_obj_is_row_tilted( a ) ) hemv_cntl = hemv_cntl_ge_lrow_ucol;
|
||||
else hemv_cntl = hemv_cntl_ge_lcol_urow;
|
||||
}
|
||||
else // if ( bli_obj_is_upper( *a ) )
|
||||
else // if ( bli_obj_is_upper( a ) )
|
||||
{
|
||||
if ( bli_obj_is_row_tilted( *a ) ) hemv_cntl = hemv_cntl_ge_lcol_urow;
|
||||
if ( bli_obj_is_row_tilted( a ) ) hemv_cntl = hemv_cntl_ge_lcol_urow;
|
||||
else hemv_cntl = hemv_cntl_ge_lrow_ucol;
|
||||
}
|
||||
}
|
||||
@@ -194,11 +194,11 @@ void PASTEMAC(ch,opname) \
|
||||
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
|
||||
bli_obj_create_with_attached_buffer( dt, m, 1, y, rs_y, cs_y, &yo ); \
|
||||
\
|
||||
bli_obj_set_uplo( uploa, ao ); \
|
||||
bli_obj_set_conj( conja, ao ); \
|
||||
bli_obj_set_conj( conjx, xo ); \
|
||||
bli_obj_set_uplo( uploa, &ao ); \
|
||||
bli_obj_set_conj( conja, &ao ); \
|
||||
bli_obj_set_conj( conjx, &xo ); \
|
||||
\
|
||||
bli_obj_set_struc( BLIS_SYMMETRIC, ao ); \
|
||||
bli_obj_set_struc( BLIS_SYMMETRIC, &ao ); \
|
||||
\
|
||||
PASTEMAC0(opname)( &alphao, \
|
||||
&ao, \
|
||||
|
||||
@@ -61,13 +61,13 @@ void bli_syr_front
|
||||
|
||||
|
||||
// Query the target datatypes of each object.
|
||||
dt_targ_x = bli_obj_target_dt( *x );
|
||||
dt_targ_c = bli_obj_target_dt( *c );
|
||||
dt_targ_x = bli_obj_target_dt( x );
|
||||
dt_targ_c = bli_obj_target_dt( c );
|
||||
|
||||
// Determine whether each operand with unit stride.
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
c_has_unit_inc = ( bli_obj_is_row_stored( *c ) ||
|
||||
bli_obj_is_col_stored( *c ) );
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
|
||||
c_has_unit_inc = ( bli_obj_is_row_stored( c ) ||
|
||||
bli_obj_is_col_stored( c ) );
|
||||
|
||||
|
||||
// Create an object to hold a copy-cast of alpha. Notice that we use
|
||||
@@ -89,14 +89,14 @@ void bli_syr_front
|
||||
// combinations of upper/lower triangular storage and row/column-storage.
|
||||
// The row-stored lower triangular and column-stored upper triangular
|
||||
// trees are identical. Same for the remaining two trees.
|
||||
if ( bli_obj_is_lower( *c ) )
|
||||
if ( bli_obj_is_lower( c ) )
|
||||
{
|
||||
if ( bli_obj_is_row_stored( *c ) ) her_cntl = her_cntl_bs_ke_lrow_ucol;
|
||||
if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_bs_ke_lrow_ucol;
|
||||
else her_cntl = her_cntl_bs_ke_lcol_urow;
|
||||
}
|
||||
else // if ( bli_obj_is_upper( *c ) )
|
||||
else // if ( bli_obj_is_upper( c ) )
|
||||
{
|
||||
if ( bli_obj_is_row_stored( *c ) ) her_cntl = her_cntl_bs_ke_lcol_urow;
|
||||
if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_bs_ke_lcol_urow;
|
||||
else her_cntl = her_cntl_bs_ke_lrow_ucol;
|
||||
}
|
||||
}
|
||||
@@ -104,19 +104,19 @@ void bli_syr_front
|
||||
{
|
||||
// Mark objects with unit stride as already being packed. This prevents
|
||||
// unnecessary packing from happening within the blocked algorithm.
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c );
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
|
||||
if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, c );
|
||||
|
||||
// Here, we make a similar choice as above, except that (1) we look
|
||||
// at storage tilt, and (2) we choose a tree that performs blocking.
|
||||
if ( bli_obj_is_lower( *c ) )
|
||||
if ( bli_obj_is_lower( c ) )
|
||||
{
|
||||
if ( bli_obj_is_row_stored( *c ) ) her_cntl = her_cntl_ge_lrow_ucol;
|
||||
if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_ge_lrow_ucol;
|
||||
else her_cntl = her_cntl_ge_lcol_urow;
|
||||
}
|
||||
else // if ( bli_obj_is_upper( *c ) )
|
||||
else // if ( bli_obj_is_upper( c ) )
|
||||
{
|
||||
if ( bli_obj_is_row_stored( *c ) ) her_cntl = her_cntl_ge_lcol_urow;
|
||||
if ( bli_obj_is_row_stored( c ) ) her_cntl = her_cntl_ge_lcol_urow;
|
||||
else her_cntl = her_cntl_ge_lrow_ucol;
|
||||
}
|
||||
}
|
||||
@@ -163,10 +163,10 @@ void PASTEMAC(ch,opname) \
|
||||
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
|
||||
bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \
|
||||
\
|
||||
bli_obj_set_conj( conjx, xo ); \
|
||||
bli_obj_set_uplo( uploc, co ); \
|
||||
bli_obj_set_conj( conjx, &xo ); \
|
||||
bli_obj_set_uplo( uploc, &co ); \
|
||||
\
|
||||
bli_obj_set_struc( BLIS_SYMMETRIC, co ); \
|
||||
bli_obj_set_struc( BLIS_SYMMETRIC, &co ); \
|
||||
\
|
||||
PASTEMAC0(opname)( &alphao, \
|
||||
&xo, \
|
||||
|
||||
@@ -64,15 +64,15 @@ void bli_syr2_front
|
||||
|
||||
|
||||
// Query the target datatypes of each object.
|
||||
dt_targ_x = bli_obj_target_dt( *x );
|
||||
dt_targ_y = bli_obj_target_dt( *y );
|
||||
//dt_targ_c = bli_obj_target_dt( *c );
|
||||
dt_targ_x = bli_obj_target_dt( x );
|
||||
dt_targ_y = bli_obj_target_dt( y );
|
||||
//dt_targ_c = bli_obj_target_dt( c );
|
||||
|
||||
// Determine whether each operand with unit stride.
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
y_has_unit_inc = ( bli_obj_vector_inc( *y ) == 1 );
|
||||
c_has_unit_inc = ( bli_obj_is_row_stored( *c ) ||
|
||||
bli_obj_is_col_stored( *c ) );
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
|
||||
y_has_unit_inc = ( bli_obj_vector_inc( y ) == 1 );
|
||||
c_has_unit_inc = ( bli_obj_is_row_stored( c ) ||
|
||||
bli_obj_is_col_stored( c ) );
|
||||
|
||||
|
||||
// Create an object to hold a copy-cast of alpha. Notice that we use
|
||||
@@ -94,14 +94,14 @@ void bli_syr2_front
|
||||
// combinations of upper/lower triangular storage and row/column-storage.
|
||||
// The row-stored lower triangular and column-stored upper triangular
|
||||
// trees are identical. Same for the remaining two trees.
|
||||
if ( bli_obj_is_lower( *c ) )
|
||||
if ( bli_obj_is_lower( c ) )
|
||||
{
|
||||
if ( bli_obj_is_row_stored( *c ) ) her2_cntl = her2_cntl_bs_ke_lrow_ucol;
|
||||
if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_bs_ke_lrow_ucol;
|
||||
else her2_cntl = her2_cntl_bs_ke_lcol_urow;
|
||||
}
|
||||
else // if ( bli_obj_is_upper( *c ) )
|
||||
else // if ( bli_obj_is_upper( c ) )
|
||||
{
|
||||
if ( bli_obj_is_row_stored( *c ) ) her2_cntl = her2_cntl_bs_ke_lcol_urow;
|
||||
if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_bs_ke_lcol_urow;
|
||||
else her2_cntl = her2_cntl_bs_ke_lrow_ucol;
|
||||
}
|
||||
}
|
||||
@@ -109,20 +109,20 @@ void bli_syr2_front
|
||||
{
|
||||
// Mark objects with unit stride as already being packed. This prevents
|
||||
// unnecessary packing from happening within the blocked algorithm.
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *y );
|
||||
if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *c );
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
|
||||
if ( y_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, y );
|
||||
if ( c_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, c );
|
||||
|
||||
// Here, we make a similar choice as above, except that (1) we look
|
||||
// at storage tilt, and (2) we choose a tree that performs blocking.
|
||||
if ( bli_obj_is_lower( *c ) )
|
||||
if ( bli_obj_is_lower( c ) )
|
||||
{
|
||||
if ( bli_obj_is_row_stored( *c ) ) her2_cntl = her2_cntl_ge_lrow_ucol;
|
||||
if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_ge_lrow_ucol;
|
||||
else her2_cntl = her2_cntl_ge_lcol_urow;
|
||||
}
|
||||
else // if ( bli_obj_is_upper( *c ) )
|
||||
else // if ( bli_obj_is_upper( c ) )
|
||||
{
|
||||
if ( bli_obj_is_row_stored( *c ) ) her2_cntl = her2_cntl_ge_lcol_urow;
|
||||
if ( bli_obj_is_row_stored( c ) ) her2_cntl = her2_cntl_ge_lcol_urow;
|
||||
else her2_cntl = her2_cntl_ge_lrow_ucol;
|
||||
}
|
||||
}
|
||||
@@ -176,11 +176,11 @@ void PASTEMAC(ch,opname) \
|
||||
bli_obj_create_with_attached_buffer( dt, m, 1, y, rs_y, cs_y, &yo ); \
|
||||
bli_obj_create_with_attached_buffer( dt, m, m, c, rs_c, cs_c, &co ); \
|
||||
\
|
||||
bli_obj_set_conj( conjx, xo ); \
|
||||
bli_obj_set_conj( conjy, yo ); \
|
||||
bli_obj_set_uplo( uploc, co ); \
|
||||
bli_obj_set_conj( conjx, &xo ); \
|
||||
bli_obj_set_conj( conjy, &yo ); \
|
||||
bli_obj_set_uplo( uploc, &co ); \
|
||||
\
|
||||
bli_obj_set_struc( BLIS_SYMMETRIC, co ); \
|
||||
bli_obj_set_struc( BLIS_SYMMETRIC, &co ); \
|
||||
\
|
||||
PASTEMAC0(opname)( &alphao, \
|
||||
&xo, \
|
||||
|
||||
@@ -48,22 +48,22 @@ void PASTEMAC0(opname) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *a ); \
|
||||
num_t dt = bli_obj_dt( a ); \
|
||||
\
|
||||
uplo_t uploa = bli_obj_uplo( *a ); \
|
||||
trans_t transa = bli_obj_conjtrans_status( *a ); \
|
||||
diag_t diaga = bli_obj_diag( *a ); \
|
||||
uplo_t uploa = bli_obj_uplo( a ); \
|
||||
trans_t transa = bli_obj_conjtrans_status( a ); \
|
||||
diag_t diaga = bli_obj_diag( a ); \
|
||||
\
|
||||
dim_t m = bli_obj_length( *a ); \
|
||||
dim_t m = bli_obj_length( a ); \
|
||||
\
|
||||
void* buf_a = bli_obj_buffer_at_off( *a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( *a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( *a ); \
|
||||
void* buf_a = bli_obj_buffer_at_off( a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( a ); \
|
||||
\
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t incx = bli_obj_vector_inc( *x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t incx = bli_obj_vector_inc( x ); \
|
||||
\
|
||||
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \
|
||||
void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \
|
||||
\
|
||||
/* Invoke the void pointer-based function for the given datatype. */ \
|
||||
bli_call_ft_11 \
|
||||
|
||||
@@ -61,13 +61,13 @@ void bli_trmv_front
|
||||
|
||||
|
||||
// Query the target datatypes of each object.
|
||||
dt_targ_a = bli_obj_target_dt( *a );
|
||||
dt_targ_x = bli_obj_target_dt( *x );
|
||||
dt_targ_a = bli_obj_target_dt( a );
|
||||
dt_targ_x = bli_obj_target_dt( x );
|
||||
|
||||
// Determine whether each operand with unit stride.
|
||||
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
|
||||
bli_obj_is_col_stored( *a ) );
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
a_has_unit_inc = ( bli_obj_is_row_stored( a ) ||
|
||||
bli_obj_is_col_stored( a ) );
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
|
||||
|
||||
|
||||
// Create an object to hold a copy-cast of alpha. Notice that we use
|
||||
@@ -89,14 +89,14 @@ void bli_trmv_front
|
||||
// combinations of transposition and row/column-storage.
|
||||
// The row-stored without transpose and column-stored with transpose
|
||||
// trees are identical. Same for the remaining two trees.
|
||||
if ( bli_obj_has_notrans( *a ) )
|
||||
if ( bli_obj_has_notrans( a ) )
|
||||
{
|
||||
if ( bli_obj_is_row_stored( *a ) ) trmv_cntl = trmv_cntl_bs_ke_nrow_tcol;
|
||||
if ( bli_obj_is_row_stored( a ) ) trmv_cntl = trmv_cntl_bs_ke_nrow_tcol;
|
||||
else trmv_cntl = trmv_cntl_bs_ke_ncol_trow;
|
||||
}
|
||||
else // if ( bli_obj_has_trans( *a ) )
|
||||
else // if ( bli_obj_has_trans( a ) )
|
||||
{
|
||||
if ( bli_obj_is_row_stored( *a ) ) trmv_cntl = trmv_cntl_bs_ke_ncol_trow;
|
||||
if ( bli_obj_is_row_stored( a ) ) trmv_cntl = trmv_cntl_bs_ke_ncol_trow;
|
||||
else trmv_cntl = trmv_cntl_bs_ke_nrow_tcol;
|
||||
}
|
||||
}
|
||||
@@ -104,19 +104,19 @@ void bli_trmv_front
|
||||
{
|
||||
// Mark objects with unit stride as already being packed. This prevents
|
||||
// unnecessary packing from happening within the blocked algorithm.
|
||||
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, a );
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
|
||||
|
||||
// Here, we make a similar choice as above, except that (1) we look
|
||||
// at storage tilt, and (2) we choose a tree that performs blocking.
|
||||
if ( bli_obj_has_notrans( *a ) )
|
||||
if ( bli_obj_has_notrans( a ) )
|
||||
{
|
||||
if ( bli_obj_is_row_tilted( *a ) ) trmv_cntl = trmv_cntl_ge_nrow_tcol;
|
||||
if ( bli_obj_is_row_tilted( a ) ) trmv_cntl = trmv_cntl_ge_nrow_tcol;
|
||||
else trmv_cntl = trmv_cntl_ge_ncol_trow;
|
||||
}
|
||||
else // if ( bli_obj_has_trans( *a ) )
|
||||
else // if ( bli_obj_has_trans( a ) )
|
||||
{
|
||||
if ( bli_obj_is_row_tilted( *a ) ) trmv_cntl = trmv_cntl_ge_ncol_trow;
|
||||
if ( bli_obj_is_row_tilted( a ) ) trmv_cntl = trmv_cntl_ge_ncol_trow;
|
||||
else trmv_cntl = trmv_cntl_ge_nrow_tcol;
|
||||
}
|
||||
}
|
||||
@@ -162,11 +162,11 @@ void PASTEMAC(ch,opname) \
|
||||
bli_obj_create_with_attached_buffer( dt, m, m, a, rs_a, cs_a, &ao ); \
|
||||
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
|
||||
\
|
||||
bli_obj_set_uplo( uploa, ao ); \
|
||||
bli_obj_set_conjtrans( transa, ao ); \
|
||||
bli_obj_set_diag( diaga, ao ); \
|
||||
bli_obj_set_uplo( uploa, &ao ); \
|
||||
bli_obj_set_conjtrans( transa, &ao ); \
|
||||
bli_obj_set_diag( diaga, &ao ); \
|
||||
\
|
||||
bli_obj_set_struc( BLIS_TRIANGULAR, ao ); \
|
||||
bli_obj_set_struc( BLIS_TRIANGULAR, &ao ); \
|
||||
\
|
||||
PASTEMAC0(opname)( &alphao, \
|
||||
&ao, \
|
||||
|
||||
@@ -77,18 +77,18 @@ void bli_trmv_int( obj_t* alpha,
|
||||
bli_trmv_check( alpha, a, x );
|
||||
|
||||
// If A or x has a zero dimension, return early.
|
||||
if ( bli_obj_has_zero_dim( *a ) ) return;
|
||||
if ( bli_obj_has_zero_dim( *x ) ) return;
|
||||
if ( bli_obj_has_zero_dim( a ) ) return;
|
||||
if ( bli_obj_has_zero_dim( x ) ) return;
|
||||
|
||||
// Alias A in case we need to induce a transformation (ie: transposition).
|
||||
bli_obj_alias_to( *a, a_local );
|
||||
bli_obj_alias_to( a, &a_local );
|
||||
|
||||
// NOTE: to support cases where B is complex and A is real, we will
|
||||
// need to have the default side case be BLIS_RIGHT and then express
|
||||
// the left case in terms of it, rather than the other way around.
|
||||
|
||||
// Determine uplo (for indexing to the correct function pointer).
|
||||
if ( bli_obj_is_lower( a_local ) ) uplo = 0;
|
||||
if ( bli_obj_is_lower( &a_local ) ) uplo = 0;
|
||||
else uplo = 1;
|
||||
|
||||
// We do not explicitly implement the cases where A is transposed.
|
||||
@@ -107,11 +107,12 @@ void bli_trmv_int( obj_t* alpha,
|
||||
// affect the optimal choice of kernel (ie: a column-major column panel
|
||||
// matrix with transpose times a vector would use the same kernel as a
|
||||
// row-major row panel matrix with no transpose times a vector).
|
||||
if ( bli_obj_has_trans( a_local ) )
|
||||
if ( bli_obj_has_trans( &a_local ) )
|
||||
{
|
||||
//bli_obj_induce_trans( a_local );
|
||||
//bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, a_local );
|
||||
bli_toggle_bool( uplo );
|
||||
//bli_obj_induce_trans( &a_local );
|
||||
//bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &a_local );
|
||||
if ( uplo == 1 ) uplo = 0;
|
||||
else uplo = 1;
|
||||
}
|
||||
|
||||
// Extract the variant number and implementation type.
|
||||
|
||||
@@ -54,7 +54,7 @@ void bli_trmv_l_blk_var1( obj_t* alpha,
|
||||
bli_obj_init_pack( &x1_pack );
|
||||
|
||||
// Query dimension.
|
||||
mn = bli_obj_length( *a );
|
||||
mn = bli_obj_length( a );
|
||||
|
||||
// Partition diagonally.
|
||||
for ( ij = 0; ij < mn; ij += b_alg )
|
||||
|
||||
@@ -54,7 +54,7 @@ void bli_trmv_l_blk_var2( obj_t* alpha,
|
||||
bli_obj_init_pack( &x1_pack );
|
||||
|
||||
// Query dimension.
|
||||
mn = bli_obj_length( *a );
|
||||
mn = bli_obj_length( a );
|
||||
|
||||
// Partition diagonally.
|
||||
for ( ij = 0; ij < mn; ij += b_alg )
|
||||
|
||||
@@ -54,7 +54,7 @@ void bli_trmv_u_blk_var1( obj_t* alpha,
|
||||
bli_obj_init_pack( &x1_pack );
|
||||
|
||||
// Query dimension.
|
||||
mn = bli_obj_length( *a );
|
||||
mn = bli_obj_length( a );
|
||||
|
||||
// Partition diagonally.
|
||||
for ( ij = 0; ij < mn; ij += b_alg )
|
||||
|
||||
@@ -54,7 +54,7 @@ void bli_trmv_u_blk_var2( obj_t* alpha,
|
||||
bli_obj_init_pack( &x1_pack );
|
||||
|
||||
// Query dimension.
|
||||
mn = bli_obj_length( *a );
|
||||
mn = bli_obj_length( a );
|
||||
|
||||
// Partition diagonally.
|
||||
for ( ij = 0; ij < mn; ij += b_alg )
|
||||
|
||||
@@ -48,22 +48,22 @@ void PASTEMAC0(opname) \
|
||||
{ \
|
||||
bli_init_once(); \
|
||||
\
|
||||
num_t dt = bli_obj_dt( *a ); \
|
||||
num_t dt = bli_obj_dt( a ); \
|
||||
\
|
||||
uplo_t uploa = bli_obj_uplo( *a ); \
|
||||
trans_t transa = bli_obj_conjtrans_status( *a ); \
|
||||
diag_t diaga = bli_obj_diag( *a ); \
|
||||
uplo_t uploa = bli_obj_uplo( a ); \
|
||||
trans_t transa = bli_obj_conjtrans_status( a ); \
|
||||
diag_t diaga = bli_obj_diag( a ); \
|
||||
\
|
||||
dim_t m = bli_obj_length( *a ); \
|
||||
dim_t m = bli_obj_length( a ); \
|
||||
\
|
||||
void* buf_a = bli_obj_buffer_at_off( *a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( *a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( *a ); \
|
||||
void* buf_a = bli_obj_buffer_at_off( a ); \
|
||||
inc_t rs_a = bli_obj_row_stride( a ); \
|
||||
inc_t cs_a = bli_obj_col_stride( a ); \
|
||||
\
|
||||
void* buf_x = bli_obj_buffer_at_off( *x ); \
|
||||
inc_t incx = bli_obj_vector_inc( *x ); \
|
||||
void* buf_x = bli_obj_buffer_at_off( x ); \
|
||||
inc_t incx = bli_obj_vector_inc( x ); \
|
||||
\
|
||||
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha ); \
|
||||
void* buf_alpha = bli_obj_buffer_for_1x1( dt, alpha ); \
|
||||
\
|
||||
/* Invoke the void pointer-based function for the given datatype. */ \
|
||||
bli_call_ft_11 \
|
||||
|
||||
@@ -61,13 +61,13 @@ void bli_trsv_front
|
||||
|
||||
|
||||
// Query the target datatypes of each object.
|
||||
dt_targ_a = bli_obj_dt( *a );
|
||||
dt_targ_x = bli_obj_dt( *x );
|
||||
dt_targ_a = bli_obj_dt( a );
|
||||
dt_targ_x = bli_obj_dt( x );
|
||||
|
||||
// Determine whether each operand with unit stride.
|
||||
a_has_unit_inc = ( bli_obj_is_row_stored( *a ) ||
|
||||
bli_obj_is_col_stored( *a ) );
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( *x ) == 1 );
|
||||
a_has_unit_inc = ( bli_obj_is_row_stored( a ) ||
|
||||
bli_obj_is_col_stored( a ) );
|
||||
x_has_unit_inc = ( bli_obj_vector_inc( x ) == 1 );
|
||||
|
||||
|
||||
// Create an object to hold a copy-cast of alpha. Notice that we use
|
||||
@@ -85,14 +85,14 @@ void bli_trsv_front
|
||||
if ( a_has_unit_inc &&
|
||||
x_has_unit_inc )
|
||||
{
|
||||
if ( bli_obj_has_notrans( *a ) )
|
||||
if ( bli_obj_has_notrans( a ) )
|
||||
{
|
||||
if ( bli_obj_is_row_stored( *a ) ) trsv_cntl = trsv_cntl_bs_ke_nrow_tcol;
|
||||
if ( bli_obj_is_row_stored( a ) ) trsv_cntl = trsv_cntl_bs_ke_nrow_tcol;
|
||||
else trsv_cntl = trsv_cntl_bs_ke_ncol_trow;
|
||||
}
|
||||
else // if ( bli_obj_has_trans( *a ) )
|
||||
else // if ( bli_obj_has_trans( a ) )
|
||||
{
|
||||
if ( bli_obj_is_row_stored( *a ) ) trsv_cntl = trsv_cntl_bs_ke_ncol_trow;
|
||||
if ( bli_obj_is_row_stored( a ) ) trsv_cntl = trsv_cntl_bs_ke_ncol_trow;
|
||||
else trsv_cntl = trsv_cntl_bs_ke_nrow_tcol;
|
||||
}
|
||||
}
|
||||
@@ -100,19 +100,19 @@ void bli_trsv_front
|
||||
{
|
||||
// Mark objects with unit stride as already being packed. This prevents
|
||||
// unnecessary packing from happening within the blocked algorithm.
|
||||
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, *a );
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, *x );
|
||||
if ( a_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_UNSPEC, a );
|
||||
if ( x_has_unit_inc ) bli_obj_set_pack_schema( BLIS_PACKED_VECTOR, x );
|
||||
|
||||
// Here, we make a similar choice as above, except that (1) we look
|
||||
// at storage tilt, and (2) we choose a tree that performs blocking.
|
||||
if ( bli_obj_has_notrans( *a ) )
|
||||
if ( bli_obj_has_notrans( a ) )
|
||||
{
|
||||
if ( bli_obj_is_row_tilted( *a ) ) trsv_cntl = trsv_cntl_ge_nrow_tcol;
|
||||
if ( bli_obj_is_row_tilted( a ) ) trsv_cntl = trsv_cntl_ge_nrow_tcol;
|
||||
else trsv_cntl = trsv_cntl_ge_ncol_trow;
|
||||
}
|
||||
else // if ( bli_obj_has_trans( *a ) )
|
||||
else // if ( bli_obj_has_trans( a ) )
|
||||
{
|
||||
if ( bli_obj_is_row_tilted( *a ) ) trsv_cntl = trsv_cntl_ge_ncol_trow;
|
||||
if ( bli_obj_is_row_tilted( a ) ) trsv_cntl = trsv_cntl_ge_ncol_trow;
|
||||
else trsv_cntl = trsv_cntl_ge_nrow_tcol;
|
||||
}
|
||||
}
|
||||
@@ -158,11 +158,11 @@ void PASTEMAC(ch,opname) \
|
||||
bli_obj_create_with_attached_buffer( dt, m, m, a, rs_a, cs_a, &ao ); \
|
||||
bli_obj_create_with_attached_buffer( dt, m, 1, x, rs_x, cs_x, &xo ); \
|
||||
\
|
||||
bli_obj_set_uplo( uploa, ao ); \
|
||||
bli_obj_set_conjtrans( transa, ao ); \
|
||||
bli_obj_set_diag( diaga, ao ); \
|
||||
bli_obj_set_uplo( uploa, &ao ); \
|
||||
bli_obj_set_conjtrans( transa, &ao ); \
|
||||
bli_obj_set_diag( diaga, &ao ); \
|
||||
\
|
||||
bli_obj_set_struc( BLIS_TRIANGULAR, ao ); \
|
||||
bli_obj_set_struc( BLIS_TRIANGULAR, &ao ); \
|
||||
\
|
||||
PASTEMAC0(opname)( &alphao, \
|
||||
&ao, \
|
||||
|
||||
@@ -77,19 +77,19 @@ void bli_trsv_int( obj_t* alpha,
|
||||
bli_trsv_check( alpha, a, x );
|
||||
|
||||
// If A or x has a zero dimension, return early.
|
||||
if ( bli_obj_has_zero_dim( *a ) ) return;
|
||||
if ( bli_obj_has_zero_dim( *x ) ) return;
|
||||
if ( bli_obj_has_zero_dim( a ) ) return;
|
||||
if ( bli_obj_has_zero_dim( x ) ) return;
|
||||
|
||||
// Alias A in case we need to induce a transformation (ie: transposition).
|
||||
bli_obj_alias_to( *a, a_local );
|
||||
bli_obj_alias_to( a, &a_local );
|
||||
|
||||
// NOTE: to support cases where B is complex and A is real, we will
|
||||
// need to have the default side case be BLIS_RIGHT and then express
|
||||
// the left case in terms of it, rather than the other way around.
|
||||
|
||||
// Determine uplo (for indexing to the correct function pointer).
|
||||
if ( bli_obj_is_lower( a_local ) ) uplo = 0;
|
||||
else uplo = 1;
|
||||
if ( bli_obj_is_lower( &a_local ) ) uplo = 0;
|
||||
else uplo = 1;
|
||||
|
||||
// We do not explicitly implement the cases where A is transposed.
|
||||
// However, we can still handle them. Specifically, if A is marked as
|
||||
@@ -107,11 +107,12 @@ void bli_trsv_int( obj_t* alpha,
|
||||
// affect the optimal choice of kernel (ie: a column-major column panel
|
||||
// matrix with transpose times a vector would use the same kernel as a
|
||||
// row-major row panel matrix with no transpose times a vector).
|
||||
if ( bli_obj_has_trans( a_local ) )
|
||||
if ( bli_obj_has_trans( &a_local ) )
|
||||
{
|
||||
//bli_obj_induce_trans( a_local );
|
||||
//bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, a_local );
|
||||
bli_toggle_bool( uplo );
|
||||
//bli_obj_induce_trans( &a_local );
|
||||
//bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, &a_local );
|
||||
if ( uplo == 1 ) uplo = 0;
|
||||
else uplo = 1;
|
||||
}
|
||||
|
||||
// Extract the variant number and implementation type.
|
||||
|
||||
@@ -54,7 +54,7 @@ void bli_trsv_l_blk_var1( obj_t* alpha,
|
||||
bli_obj_init_pack( &x1_pack );
|
||||
|
||||
// Query dimension.
|
||||
mn = bli_obj_length( *a );
|
||||
mn = bli_obj_length( a );
|
||||
|
||||
// x = alpha * x;
|
||||
bli_scalv_int( alpha,
|
||||
|
||||
@@ -54,7 +54,7 @@ void bli_trsv_l_blk_var2( obj_t* alpha,
|
||||
bli_obj_init_pack( &x1_pack );
|
||||
|
||||
// Query dimension.
|
||||
mn = bli_obj_length( *a );
|
||||
mn = bli_obj_length( a );
|
||||
|
||||
// x = alpha * x;
|
||||
bli_scalv_int( alpha,
|
||||
|
||||
@@ -54,7 +54,7 @@ void bli_trsv_u_blk_var1( obj_t* alpha,
|
||||
bli_obj_init_pack( &x1_pack );
|
||||
|
||||
// Query dimension.
|
||||
mn = bli_obj_length( *a );
|
||||
mn = bli_obj_length( a );
|
||||
|
||||
// x = alpha * x;
|
||||
bli_scalv_int( alpha,
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user