Support all four dts in test/test_her[2][k].c (#578)

Details:
- Replaced the hard-coded calls to double-precision real syr, syr2, 
  syrk, and syrk in the corresponding standalone test drivers in the 
  'test' directory with conditional branches that will call the 
  appropriate BLAS interface depending on which datatype is enabled. 
  Thanks to Madan mohan Manokar for this improvement.
- CREDITS file update.
This commit is contained in:
Madan mohan Manokar
2021-11-18 00:46:46 +05:30
committed by GitHub
parent 26e4b6b293
commit 9be97c150e
5 changed files with 227 additions and 136 deletions

View File

@@ -58,6 +58,7 @@ but many others have contributed code and feedback, including
Tze Meng Low (The University of Texas at Austin)
Ye Luo @ye-luo (Argonne National Laboratory)
Ricardo Magana @magania (Hewlett Packard Enterprise)
Madan mohan Manokar @madanm3 (AMD)
Giorgos Margaritis
Bryan Marker @bamarker (The University of Texas at Austin)
Simon Lukas Märtens @ACSimon33 (RWTH Aachen University)

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -81,11 +82,8 @@ int main( int argc, char** argv )
m_input = 6;
#endif
#if 1
dt_alpha = dt_x = dt_a = BLIS_DOUBLE;
#else
// her supports complex and double complex
dt_alpha = dt_x = dt_a = BLIS_DCOMPLEX;
#endif
uplo = BLIS_LOWER;
@@ -127,7 +125,7 @@ int main( int argc, char** argv )
bli_copym( &a, &a_save );
dtime_save = DBL_MAX;
for ( r = 0; r < n_repeats; ++r )
@@ -143,33 +141,76 @@ int main( int argc, char** argv )
#endif
#ifdef BLIS
//bli_obj_toggle_conj( &x );
//bli_syr( &alpha,
bli_her( &alpha,
&x,
&a );
#else
if ( bli_is_float( dt_a ) )
{
f77_char uplo = 'L';
f77_int mm = bli_obj_length( &a );
f77_int incx = bli_obj_vector_inc( &x );
f77_int lda = bli_obj_col_stride( &a );
float* alphap = bli_obj_buffer( &alpha );
float* xp = bli_obj_buffer( &x );
float* ap = bli_obj_buffer( &a );
f77_char uplo = 'L';
f77_int mm = bli_obj_length( &a );
f77_int incx = bli_obj_vector_inc( &x );
f77_int lda = bli_obj_col_stride( &a );
double* alphap = bli_obj_buffer( &alpha );
double* xp = bli_obj_buffer( &x );
double* ap = bli_obj_buffer( &a );
/*
dcomplex* xp = bli_obj_buffer( x );
dcomplex* ap = bli_obj_buffer( &a );
*/
ssyr_( &uplo,
&mm,
alphap,
xp, &incx,
ap, &lda );
}
else if ( bli_is_double( dt_a ) )
{
f77_char uplo = 'L';
f77_int mm = bli_obj_length( &a );
f77_int incx = bli_obj_vector_inc( &x );
f77_int lda = bli_obj_col_stride( &a );
double* alphap = bli_obj_buffer( &alpha );
double* xp = bli_obj_buffer( &x );
double* ap = bli_obj_buffer( &a );
dsyr_( &uplo,
//zher_( &uplo,
&mm,
alphap,
xp, &incx,
ap, &lda );
dsyr_( &uplo,
&mm,
alphap,
xp, &incx,
ap, &lda );
}
else if ( bli_is_scomplex( dt_a ) )
{
f77_char uplo = 'L';
f77_int mm = bli_obj_length( &a );
f77_int incx = bli_obj_vector_inc( &x );
f77_int lda = bli_obj_col_stride( &a );
float* alphap = bli_obj_buffer( &alpha );
scomplex* xp = bli_obj_buffer( &x );
scomplex* ap = bli_obj_buffer( &a );
cher_( &uplo,
&mm,
alphap,
xp, &incx,
ap, &lda );
}
else if ( bli_is_dcomplex( dt_a ) )
{
f77_char uplo = 'L';
f77_int mm = bli_obj_length( &a );
f77_int incx = bli_obj_vector_inc( &x );
f77_int lda = bli_obj_col_stride( &a );
double* alphap = bli_obj_buffer( &alpha );
dcomplex* xp = bli_obj_buffer( &x );
dcomplex* ap = bli_obj_buffer( &a );
zher_( &uplo,
&mm,
alphap,
xp, &incx,
ap, &lda );
}
#endif
#ifdef PRINT

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -41,7 +42,7 @@
// uplo m alpha x incx y incy a lda
//void dsyr2_( char*, int*, double*, double*, int*, double*, int*, double*, int* );
//#define PRINT
int main( int argc, char** argv )
@@ -80,11 +81,8 @@ int main( int argc, char** argv )
m_input = 6;
#endif
#if 1
dt_alpha = dt_x = dt_y = dt_a = BLIS_DOUBLE;
#else
dt_alpha = dt_x = dt_y = dt_a = BLIS_DCOMPLEX;
#endif
// her2 supports complex and double complex
dt_alpha = dt_x = dt_y = dt_a = BLIS_SCOMPLEX;
uplo = BLIS_LOWER;
@@ -128,7 +126,7 @@ int main( int argc, char** argv )
bli_copym( &a, &a_save );
dtime_save = DBL_MAX;
for ( r = 0; r < n_repeats; ++r )
@@ -142,37 +140,93 @@ int main( int argc, char** argv )
bli_printm( "x", &x, "%4.1f", "" );
bli_printm( "y", &y, "%4.1f", "" );
bli_printm( "a", &a, "%4.1f", "" );
#endif
#endif
#ifdef BLIS
//bli_obj_toggle_conj( &x );
//bli_obj_toggle_conj( &y );
//bli_syr2( &alpha,
bli_her2( &alpha,
&x,
&y,
&a );
#else
if ( bli_is_float( dt_a ) )
{
f77_char uplo = 'L';
f77_int mm = bli_obj_length( &a );
f77_int incx = bli_obj_vector_inc( &x );
f77_int incy = bli_obj_vector_inc( &y );
f77_int lda = bli_obj_col_stride( &a );
float* alphap = bli_obj_buffer( &alpha );
float* xp = bli_obj_buffer( &x );
float* yp = bli_obj_buffer( &y );
float* ap = bli_obj_buffer( &a );
f77_char uplo = 'L';
f77_int mm = bli_obj_length( &a );
f77_int incx = bli_obj_vector_inc( &x );
f77_int incy = bli_obj_vector_inc( &y );
f77_int lda = bli_obj_col_stride( &a );
double* alphap = bli_obj_buffer( &alpha );
double* xp = bli_obj_buffer( &x );
double* yp = bli_obj_buffer( &y );
double* ap = bli_obj_buffer( &a );
ssyr2_( &uplo,
&mm,
alphap,
xp, &incx,
yp, &incy,
ap, &lda );
}
else if ( bli_is_double( dt_a ) )
{
f77_char uplo = 'L';
f77_int mm = bli_obj_length( &a );
f77_int incx = bli_obj_vector_inc( &x );
f77_int incy = bli_obj_vector_inc( &y );
f77_int lda = bli_obj_col_stride( &a );
double* alphap = bli_obj_buffer( &alpha );
double* xp = bli_obj_buffer( &x );
double* yp = bli_obj_buffer( &y );
double* ap = bli_obj_buffer( &a );
dsyr2_( &uplo,
&mm,
alphap,
xp, &incx,
yp, &incy,
ap, &lda );
}
else if ( bli_is_scomplex( dt_a ) )
{
f77_char uplo = 'L';
f77_int mm = bli_obj_length( &a );
f77_int incx = bli_obj_vector_inc( &x );
f77_int incy = bli_obj_vector_inc( &y );
f77_int lda = bli_obj_col_stride( &a );
scomplex* alphap = bli_obj_buffer( &alpha );
scomplex* xp = bli_obj_buffer( &x );
scomplex* yp = bli_obj_buffer( &y );
scomplex* ap = bli_obj_buffer( &a );
cher2_( &uplo,
&mm,
alphap,
xp, &incx,
yp, &incy,
ap, &lda );
}
else if ( bli_is_dcomplex( dt_a ) )
{
f77_char uplo = 'L';
f77_int mm = bli_obj_length( &a );
f77_int incx = bli_obj_vector_inc( &x );
f77_int incy = bli_obj_vector_inc( &y );
f77_int lda = bli_obj_col_stride( &a );
dcomplex* alphap = bli_obj_buffer( &alpha );
dcomplex* xp = bli_obj_buffer( &x );
dcomplex* yp = bli_obj_buffer( &y );
dcomplex* ap = bli_obj_buffer( &a );
zher2_( &uplo,
&mm,
alphap,
xp, &incx,
yp, &incy,
ap, &lda );
}
dsyr2_( &uplo,
&mm,
alphap,
xp, &incx,
yp, &incy,
ap, &lda );
#endif
#ifdef PRINT

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -84,13 +85,10 @@ int main( int argc, char** argv )
k_input = 1;
#endif
#if 1
//dt = BLIS_FLOAT;
dt = BLIS_DOUBLE;
#else
// her2k supports complex and double complex
//dt = BLIS_SCOMPLEX;
dt = BLIS_DCOMPLEX;
#endif
uploc = BLIS_LOWER;
//uploc = BLIS_UPPER;
@@ -153,7 +151,7 @@ int main( int argc, char** argv )
bli_copym( &c, &c_save );
dtime_save = DBL_MAX;
for ( r = 0; r < n_repeats; ++r )
@@ -181,16 +179,16 @@ int main( int argc, char** argv )
#else
if ( bli_is_float( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldb = bli_obj_col_stride( &b );
f77_int ldc = bli_obj_col_stride( &c );
float* alphap = bli_obj_buffer( &alpha );
float* ap = bli_obj_buffer( &a );
float* bp = bli_obj_buffer( &b );
float* betap = bli_obj_buffer( &beta );
float* cp = bli_obj_buffer( &c );
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldb = bli_obj_col_stride( &b );
f77_int ldc = bli_obj_col_stride( &c );
float* alphap = bli_obj_buffer( &alpha );
float* ap = bli_obj_buffer( &a );
float* bp = bli_obj_buffer( &b );
float* betap = bli_obj_buffer( &beta );
float* cp = bli_obj_buffer( &c );
ssyr2k_( &f77_uploc,
&f77_transa,
@@ -204,16 +202,16 @@ int main( int argc, char** argv )
}
else if ( bli_is_double( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldb = bli_obj_col_stride( &b );
f77_int ldc = bli_obj_col_stride( &c );
double* alphap = bli_obj_buffer( &alpha );
double* ap = bli_obj_buffer( &a );
double* bp = bli_obj_buffer( &b );
double* betap = bli_obj_buffer( &beta );
double* cp = bli_obj_buffer( &c );
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldb = bli_obj_col_stride( &b );
f77_int ldc = bli_obj_col_stride( &c );
double* alphap = bli_obj_buffer( &alpha );
double* ap = bli_obj_buffer( &a );
double* bp = bli_obj_buffer( &b );
double* betap = bli_obj_buffer( &beta );
double* cp = bli_obj_buffer( &c );
dsyr2k_( &f77_uploc,
&f77_transa,
@@ -227,16 +225,16 @@ int main( int argc, char** argv )
}
else if ( bli_is_scomplex( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldb = bli_obj_col_stride( &b );
f77_int ldc = bli_obj_col_stride( &c );
scomplex* alphap = bli_obj_buffer( &alpha );
scomplex* ap = bli_obj_buffer( &a );
scomplex* bp = bli_obj_buffer( &b );
float* betap = bli_obj_buffer( &beta );
scomplex* cp = bli_obj_buffer( &c );
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldb = bli_obj_col_stride( &b );
f77_int ldc = bli_obj_col_stride( &c );
scomplex* alphap = bli_obj_buffer( &alpha );
scomplex* ap = bli_obj_buffer( &a );
scomplex* bp = bli_obj_buffer( &b );
float* betap = bli_obj_buffer( &beta );
scomplex* cp = bli_obj_buffer( &c );
cher2k_( &f77_uploc,
&f77_transa,
@@ -250,16 +248,16 @@ int main( int argc, char** argv )
}
else if ( bli_is_dcomplex( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldb = bli_obj_col_stride( &b );
f77_int ldc = bli_obj_col_stride( &c );
dcomplex* alphap = bli_obj_buffer( &alpha );
dcomplex* ap = bli_obj_buffer( &a );
dcomplex* bp = bli_obj_buffer( &b );
double* betap = bli_obj_buffer( &beta );
dcomplex* cp = bli_obj_buffer( &c );
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldb = bli_obj_col_stride( &b );
f77_int ldc = bli_obj_col_stride( &c );
dcomplex* alphap = bli_obj_buffer( &alpha );
dcomplex* ap = bli_obj_buffer( &a );
dcomplex* bp = bli_obj_buffer( &b );
double* betap = bli_obj_buffer( &beta );
dcomplex* cp = bli_obj_buffer( &c );
zher2k_( &f77_uploc,
&f77_transa,

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -83,14 +84,10 @@ int main( int argc, char** argv )
m_input = 3;
k_input = 1;
#endif
#if 1
//dt = BLIS_FLOAT;
dt = BLIS_DOUBLE;
#else
// herk supports complex and double complex
//dt = BLIS_SCOMPLEX;
dt = BLIS_DCOMPLEX;
#endif
uploc = BLIS_LOWER;
//uploc = BLIS_UPPER;
@@ -145,7 +142,7 @@ int main( int argc, char** argv )
bli_copym( &c, &c_save );
dtime_save = DBL_MAX;
for ( r = 0; r < n_repeats; ++r )
@@ -171,14 +168,14 @@ int main( int argc, char** argv )
#else
if ( bli_is_float( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldc = bli_obj_col_stride( &c );
float* alphap = bli_obj_buffer( &alpha );
float* ap = bli_obj_buffer( &a );
float* betap = bli_obj_buffer( &beta );
float* cp = bli_obj_buffer( &c );
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldc = bli_obj_col_stride( &c );
float* alphap = bli_obj_buffer( &alpha );
float* ap = bli_obj_buffer( &a );
float* betap = bli_obj_buffer( &beta );
float* cp = bli_obj_buffer( &c );
ssyrk_( &f77_uploc,
&f77_transa,
@@ -191,14 +188,14 @@ int main( int argc, char** argv )
}
else if ( bli_is_double( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldc = bli_obj_col_stride( &c );
double* alphap = bli_obj_buffer( &alpha );
double* ap = bli_obj_buffer( &a );
double* betap = bli_obj_buffer( &beta );
double* cp = bli_obj_buffer( &c );
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldc = bli_obj_col_stride( &c );
double* alphap = bli_obj_buffer( &alpha );
double* ap = bli_obj_buffer( &a );
double* betap = bli_obj_buffer( &beta );
double* cp = bli_obj_buffer( &c );
dsyrk_( &f77_uploc,
&f77_transa,
@@ -211,14 +208,14 @@ int main( int argc, char** argv )
}
else if ( bli_is_scomplex( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldc = bli_obj_col_stride( &c );
float* alphap = bli_obj_buffer( &alpha );
scomplex* ap = bli_obj_buffer( &a );
float* betap = bli_obj_buffer( &beta );
scomplex* cp = bli_obj_buffer( &c );
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldc = bli_obj_col_stride( &c );
float* alphap = bli_obj_buffer( &alpha );
scomplex* ap = bli_obj_buffer( &a );
float* betap = bli_obj_buffer( &beta );
scomplex* cp = bli_obj_buffer( &c );
cherk_( &f77_uploc,
&f77_transa,
@@ -231,14 +228,14 @@ int main( int argc, char** argv )
}
else if ( bli_is_dcomplex( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldc = bli_obj_col_stride( &c );
double* alphap = bli_obj_buffer( &alpha );
dcomplex* ap = bli_obj_buffer( &a );
double* betap = bli_obj_buffer( &beta );
dcomplex* cp = bli_obj_buffer( &c );
f77_int mm = bli_obj_length( &c );
f77_int kk = bli_obj_width_after_trans( &a );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldc = bli_obj_col_stride( &c );
double* alphap = bli_obj_buffer( &alpha );
dcomplex* ap = bli_obj_buffer( &a );
double* betap = bli_obj_buffer( &beta );
dcomplex* cp = bli_obj_buffer( &c );
zherk_( &f77_uploc,
&f77_transa,