Modified Function definition for BLAS and CBLAS interfaces of DOTV and SWAPV APIs

Details:
-Kernel is called directly from API call to avoid framework
 overhead in case of single and double precisions.
-Currently these changes are applicable only for zen2 configuration.
 They will be enabled for zen family processors in future.
-These changes improve performance of BLAS and CBLAS interfaces of API.
 They do not affect BLIS-specific APIs.

Change-Id: I1eb7ca470ced82c3cfa8b22f2b53000d42fef96c
Signed-off-by: Meghana Vankadari <Meghana.Vankadari@amd.com>
AMD-Internal: [CPUPL-847,CPUPL-816]
This commit is contained in:
Meghana
2020-04-30 17:09:39 +05:30
parent 4ad5b1a5e6
commit 28bb28b79f
7 changed files with 642 additions and 27 deletions

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -86,8 +87,170 @@ ftype PASTEF772(ch,blasname,chc) \
}
#ifdef BLIS_ENABLE_BLAS
INSERT_GENTFUNCDOT_BLAS( dot, dotv )
#ifdef BLIS_CONFIG_ZEN2
float sdot_
(
const f77_int* n,
const float* x, const f77_int* incx,
const float* y, const f77_int* incy
)
{
dim_t n0;
float* x0;
float* y0;
inc_t incx0;
inc_t incy0;
float rho;
/* Initialize BLIS. */
// bli_init_auto();
/* Convert/typecast negative values of n to zero. */
if ( *n < 0 ) n0 = ( dim_t )0;
else n0 = ( dim_t )(*n);
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */
if ( *incx < 0 )
{
/* The semantics of negative stride in BLAS are that the vector
operand be traversed in reverse order. (Another way to think
of this is that negative strides effectively reverse the order
of the vector, but without any explicit data movements.) This
is also how BLIS interprets negative strides. The differences
is that with BLAS, the caller *always* passes in the 0th (i.e.,
top-most or left-most) element of the vector, even when the
stride is negative. By contrast, in BLIS, negative strides are
used *relative* to the vector address as it is given. Thus, in
BLIS, if this backwards traversal is desired, the caller *must*
pass in the address to the (n-1)th (i.e., the bottom-most or
right-most) element along with a negative stride. */
x0 = ((float*)x) + (n0-1)*(-*incx);
incx0 = ( inc_t )(*incx);
}
else
{
x0 = ((float*)x);
incx0 = ( inc_t )(*incx);
}
if ( *incy < 0 )
{
y0 = ((float*)y) + (n0-1)*(-*incy);
incy0 = ( inc_t )(*incy);
}
else
{
y0 = ((float*)y);
incy0 = ( inc_t )(*incy);
}
/* Call BLIS kernel. */
bli_sdotv_zen_int10
(
BLIS_NO_CONJUGATE,
BLIS_NO_CONJUGATE,
n0,
x0, incx0,
y0, incy0,
&rho,
NULL
);
/* Finalize BLIS. */
// bli_finalize_auto();
return rho;
}
double ddot_
(
const f77_int* n,
const double* x, const f77_int* incx,
const double* y, const f77_int* incy
)
{
dim_t n0;
double* x0;
double* y0;
inc_t incx0;
inc_t incy0;
double rho;
/* Initialize BLIS. */
// bli_init_auto();
/* Convert/typecast negative values of n to zero. */
if ( *n < 0 ) n0 = ( dim_t )0;
else n0 = ( dim_t )(*n);
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */
if ( *incx < 0 )
{
/* The semantics of negative stride in BLAS are that the vector
operand be traversed in reverse order. (Another way to think
of this is that negative strides effectively reverse the order
of the vector, but without any explicit data movements.) This
is also how BLIS interprets negative strides. The differences
is that with BLAS, the caller *always* passes in the 0th (i.e.,
top-most or left-most) element of the vector, even when the
stride is negative. By contrast, in BLIS, negative strides are
used *relative* to the vector address as it is given. Thus, in
BLIS, if this backwards traversal is desired, the caller *must*
pass in the address to the (n-1)th (i.e., the bottom-most or
right-most) element along with a negative stride. */
x0 = ((double*)x) + (n0-1)*(-*incx);
incx0 = ( inc_t )(*incx);
}
else
{
x0 = ((double*)x);
incx0 = ( inc_t )(*incx);
}
if ( *incy < 0 )
{
y0 = ((double*)y) + (n0-1)*(-*incy);
incy0 = ( inc_t )(*incy);
}
else
{
y0 = ((double*)y);
incy0 = ( inc_t )(*incy);
}
/* Call BLIS kernel. */
bli_ddotv_zen_int10
(
BLIS_NO_CONJUGATE,
BLIS_NO_CONJUGATE,
n0,
x0, incx0,
y0, incy0,
&rho,
NULL
);
/* Finalize BLIS. */
// bli_finalize_auto();
return rho;
}
INSERT_GENTFUNCDOT_BLAS_ZEN2( dot, dotv )
#else
INSERT_GENTFUNCDOT_BLAS( dot, dotv )
#endif
// -- "Black sheep" dot product function definitions --

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -48,38 +49,189 @@ void PASTEF77(ch,blasname) \
ftype* y, const f77_int* incy \
) \
{ \
dim_t n0; \
ftype* x0; \
ftype* y0; \
inc_t incx0; \
inc_t incy0; \
dim_t n0; \
ftype* x0; \
ftype* y0; \
inc_t incx0; \
inc_t incy0; \
\
/* Initialize BLIS. */ \
bli_init_auto(); \
/* Initialize BLIS. */ \
bli_init_auto(); \
\
/* Convert/typecast negative values of n to zero. */ \
bli_convert_blas_dim1( *n, n0 ); \
/* Convert/typecast negative values of n to zero. */ \
bli_convert_blas_dim1( *n, n0 ); \
\
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */ \
bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \
bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */ \
bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \
bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \
\
/* Call BLIS interface. */ \
PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \
( \
n0, \
x0, incx0, \
y0, incy0, \
NULL, \
NULL \
); \
/* Call BLIS interface. */ \
PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \
( \
n0, \
x0, incx0, \
y0, incy0, \
NULL, \
NULL \
); \
\
/* Finalize BLIS. */ \
bli_finalize_auto(); \
/* Finalize BLIS. */ \
bli_finalize_auto(); \
}
#ifdef BLIS_ENABLE_BLAS
#ifdef BLIS_CONFIG_ZEN2
void sswap_
(
const f77_int* n,
float* x, const f77_int* incx,
float* y, const f77_int* incy
)
{
dim_t n0;
float* x0;
float* y0;
inc_t incx0;
inc_t incy0;
/* Initialize BLIS. */
// bli_init_auto();
/* Convert/typecast negative values of n to zero. */
if ( *n < 0 ) n0 = ( dim_t )0;
else n0 = ( dim_t )(*n);
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */
if ( *incx < 0 )
{
/* The semantics of negative stride in BLAS are that the vector
operand be traversed in reverse order. (Another way to think
of this is that negative strides effectively reverse the order
of the vector, but without any explicit data movements.) This
is also how BLIS interprets negative strides. The differences
is that with BLAS, the caller *always* passes in the 0th (i.e.,
top-most or left-most) element of the vector, even when the
stride is negative. By contrast, in BLIS, negative strides are
used *relative* to the vector address as it is given. Thus, in
BLIS, if this backwards traversal is desired, the caller *must*
pass in the address to the (n-1)th (i.e., the bottom-most or
right-most) element along with a negative stride. */
x0 = (x) + (n0-1)*(-*incx);
incx0 = ( inc_t )(*incx);
}
else
{
x0 = (x);
incx0 = ( inc_t )(*incx);
}
if ( *incy < 0 )
{
y0 = (y) + (n0-1)*(-*incy);
incy0 = ( inc_t )(*incy);
}
else
{
y0 = (y);
incy0 = ( inc_t )(*incy);
}
/* Call BLIS kernel */
bli_sswapv_zen_int8
(
n0,
x0, incx0,
y0, incy0,
NULL
);
/* Finalize BLIS. */
// bli_finalize_auto();
}
void dswap_
(
const f77_int* n,
double* x, const f77_int* incx,
double* y, const f77_int* incy
)
{
dim_t n0;
double* x0;
double* y0;
inc_t incx0;
inc_t incy0;
/* Initialize BLIS. */
// bli_init_auto();
/* Convert/typecast negative values of n to zero. */
if ( *n < 0 ) n0 = ( dim_t )0;
else n0 = ( dim_t )(*n);
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */
if ( *incx < 0 )
{
/* The semantics of negative stride in BLAS are that the vector
operand be traversed in reverse order. (Another way to think
of this is that negative strides effectively reverse the order
of the vector, but without any explicit data movements.) This
is also how BLIS interprets negative strides. The differences
is that with BLAS, the caller *always* passes in the 0th (i.e.,
top-most or left-most) element of the vector, even when the
stride is negative. By contrast, in BLIS, negative strides are
used *relative* to the vector address as it is given. Thus, in
BLIS, if this backwards traversal is desired, the caller *must*
pass in the address to the (n-1)th (i.e., the bottom-most or
right-most) element along with a negative stride. */
x0 = (x) + (n0-1)*(-*incx);
incx0 = ( inc_t )(*incx);
}
else
{
x0 = (x);
incx0 = ( inc_t )(*incx);
}
if ( *incy < 0 )
{
y0 = (y) + (n0-1)*(-*incy);
incy0 = ( inc_t )(*incy);
}
else
{
y0 = (y);
incy0 = ( inc_t )(*incy);
}
/* Call BLIS kernel */
bli_dswapv_zen_int8
(
n0,
x0, incx0,
y0, incy0,
NULL
);
/* Finalize BLIS. */
// bli_finalize_auto();
}
INSERT_GENTFUNC_BLAS_ZEN2( swap, swapv )
#else
INSERT_GENTFUNC_BLAS( swap, swapv )
#endif
#endif

View File

@@ -7,6 +7,8 @@
* It calls the fortran wrapper before calling ddot.
*
* Written by Keita Teranishi. 2/11/1998
*
* Copyright (C) 2020, Advanced Micro Devices, Inc.
*
*/
#include "cblas.h"
@@ -22,7 +24,80 @@ double cblas_ddot( f77_int N, const double *X,
#define F77_incX incX
#define F77_incY incY
#endif
#ifdef BLIS_CONFIG_ZEN2
dim_t n0;
double* x0;
double* y0;
inc_t incx0;
inc_t incy0;
/* Initialize BLIS. */
// bli_init_auto();
/* Convert/typecast negative values of n to zero. */
if ( F77_N < 0 ) n0 = ( dim_t )0;
else n0 = ( dim_t )(F77_N);
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */
if ( F77_incX < 0 )
{
/* The semantics of negative stride in BLAS are that the vector
operand be traversed in reverse order. (Another way to think
of this is that negative strides effectively reverse the order
of the vector, but without any explicit data movements.) This
is also how BLIS interprets negative strides. The differences
is that with BLAS, the caller *always* passes in the 0th (i.e.,
top-most or left-most) element of the vector, even when the
stride is negative. By contrast, in BLIS, negative strides are
used *relative* to the vector address as it is given. Thus, in
BLIS, if this backwards traversal is desired, the caller *must*
pass in the address to the (n-1)th (i.e., the bottom-most or
right-most) element along with a negative stride. */
x0 = ((double*)X) + (n0-1)*(-F77_incX);
incx0 = ( inc_t )(F77_incX);
}
else
{
x0 = ((double*)X);
incx0 = ( inc_t )(F77_incX);
}
if ( F77_incY < 0 )
{
y0 = ((double*)Y) + (n0-1)*(-F77_incY);
incy0 = ( inc_t )(F77_incY);
}
else
{
y0 = ((double*)Y);
incy0 = ( inc_t )(F77_incY);
}
/* Call BLIS kernel. */
bli_ddotv_zen_int10
(
BLIS_NO_CONJUGATE,
BLIS_NO_CONJUGATE,
n0,
x0, incx0,
y0, incy0,
&dot,
NULL
);
/* Finalize BLIS. */
// bli_finalize_auto();
return dot;
#else
F77_ddot_sub( &F77_N, X, &F77_incX, Y, &F77_incY, &dot);
return dot;
#endif
}
#endif

View File

@@ -7,6 +7,8 @@
*
* Written by Keita Teranishi. 2/11/1998
*
* Copyright (C) 2020, Advanced Micro Devices, Inc.
*
*/
#include "cblas.h"
#include "cblas_f77.h"
@@ -20,6 +22,74 @@ void cblas_dswap( f77_int N, double *X, f77_int incX, double *Y,
#define F77_incX incX
#define F77_incY incY
#endif
#ifdef BLIS_CONFIG_ZEN2
dim_t n0;
double* x0;
double* y0;
inc_t incx0;
inc_t incy0;
/* Initialize BLIS. */
// bli_init_auto();
/* Convert/typecast negative values of n to zero. */
if ( F77_N < 0 ) n0 = ( dim_t )0;
else n0 = ( dim_t )(F77_N);
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */
if ( F77_incX < 0 )
{
/* The semantics of negative stride in BLAS are that the vector
operand be traversed in reverse order. (Another way to think
of this is that negative strides effectively reverse the order
of the vector, but without any explicit data movements.) This
is also how BLIS interprets negative strides. The differences
is that with BLAS, the caller *always* passes in the 0th (i.e.,
top-most or left-most) element of the vector, even when the
stride is negative. By contrast, in BLIS, negative strides are
used *relative* to the vector address as it is given. Thus, in
BLIS, if this backwards traversal is desired, the caller *must*
pass in the address to the (n-1)th (i.e., the bottom-most or
right-most) element along with a negative stride. */
x0 = (X) + (n0-1)*(-F77_incX);
incx0 = ( inc_t )(F77_incX);
}
else
{
x0 = (X);
incx0 = ( inc_t )(F77_incX);
}
if ( F77_incY < 0 )
{
y0 = (Y) + (n0-1)*(-F77_incY);
incy0 = ( inc_t )(F77_incY);
}
else
{
y0 = (Y);
incy0 = ( inc_t )(F77_incY);
}
/* Call BLIS kernel */
bli_dswapv_zen_int8
(
n0,
x0, incx0,
y0, incy0,
NULL
);
/* Finalize BLIS. */
// bli_finalize_auto();
#else
F77_dswap( &F77_N, X, &F77_incX, Y, &F77_incY);
#endif
}
#endif

View File

@@ -8,6 +8,8 @@
*
* Written by Keita Teranishi. 2/11/1998
*
* Copyright (C) 2020, Advanced Micro Devices, Inc.
*
*/
#include "cblas.h"
#include "cblas_f77.h"
@@ -22,7 +24,79 @@ float cblas_sdot( f77_int N, const float *X,
#define F77_incX incX
#define F77_incY incY
#endif
#ifdef BLIS_CONFIG_ZEN2
dim_t n0;
float* x0;
float* y0;
inc_t incx0;
inc_t incy0;
/* Initialize BLIS. */
// bli_init_auto();
/* Convert/typecast negative values of n to zero. */
if ( F77_N < 0 ) n0 = ( dim_t )0;
else n0 = ( dim_t )(F77_N);
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */
if ( F77_incX < 0 )
{
/* The semantics of negative stride in BLAS are that the vector
operand be traversed in reverse order. (Another way to think
of this is that negative strides effectively reverse the order
of the vector, but without any explicit data movements.) This
is also how BLIS interprets negative strides. The differences
is that with BLAS, the caller *always* passes in the 0th (i.e.,
top-most or left-most) element of the vector, even when the
stride is negative. By contrast, in BLIS, negative strides are
used *relative* to the vector address as it is given. Thus, in
BLIS, if this backwards traversal is desired, the caller *must*
pass in the address to the (n-1)th (i.e., the bottom-most or
right-most) element along with a negative stride. */
x0 = ((float*)X) + (n0-1)*(-F77_incX);
incx0 = ( inc_t )(F77_incX);
}
else
{
x0 = ((float*)X);
incx0 = ( inc_t )(F77_incX);
}
if ( F77_incY < 0 )
{
y0 = ((float*)Y) + (n0-1)*(-F77_incY);
incy0 = ( inc_t )(F77_incY);
}
else
{
y0 = ((float*)Y);
incy0 = ( inc_t )(F77_incY);
}
/* Call BLIS kernel. */
bli_sdotv_zen_int10
(
BLIS_NO_CONJUGATE,
BLIS_NO_CONJUGATE,
n0,
x0, incx0,
y0, incy0,
&dot,
NULL
);
/* Finalize BLIS. */
// bli_finalize_auto();
return dot;
#else
F77_sdot_sub( &F77_N, X, &F77_incX, Y, &F77_incY, &dot);
return dot;
#endif
}
#endif

View File

@@ -7,7 +7,9 @@
*
* Written by Keita Teranishi. 2/11/1998
*
*/
* Copyright (C) 2020, Advanced Micro Devices, Inc.
*
*/
#include "cblas.h"
#include "cblas_f77.h"
void cblas_sswap( f77_int N, float *X, f77_int incX, float *Y,
@@ -20,6 +22,76 @@ void cblas_sswap( f77_int N, float *X, f77_int incX, float *Y,
#define F77_incX incX
#define F77_incY incY
#endif
#ifdef BLIS_CONFIG_ZEN2
dim_t n0;
float* x0;
float* y0;
inc_t incx0;
inc_t incy0;
/* Initialize BLIS. */
// bli_init_auto();
/* Convert/typecast negative values of n to zero. */
if ( F77_N < 0 ) n0 = ( dim_t )0;
else n0 = ( dim_t )(F77_N);
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */
if ( F77_incX < 0 )
{
/* The semantics of negative stride in BLAS are that the vector
operand be traversed in reverse order. (Another way to think
of this is that negative strides effectively reverse the order
of the vector, but without any explicit data movements.) This
is also how BLIS interprets negative strides. The differences
is that with BLAS, the caller *always* passes in the 0th (i.e.,
top-most or left-most) element of the vector, even when the
stride is negative. By contrast, in BLIS, negative strides are
used *relative* to the vector address as it is given. Thus, in
BLIS, if this backwards traversal is desired, the caller *must*
pass in the address to the (n-1)th (i.e., the bottom-most or
right-most) element along with a negative stride. */
x0 = (X) + (n0-1)*(-F77_incX);
incx0 = ( inc_t )(F77_incX);
}
else
{
x0 = (X);
incx0 = ( inc_t )(F77_incX);
}
if ( F77_incY < 0 )
{
y0 = (Y) + (n0-1)*(-F77_incY);
incy0 = ( inc_t )(F77_incY);
}
else
{
y0 = (Y);
incy0 = ( inc_t )(F77_incY);
}
/* Call BLIS kernel */
bli_sswapv_zen_int8
(
n0,
x0, incx0,
y0, incy0,
NULL
);
/* Finalize BLIS. */
// bli_finalize_auto();
#else
F77_sswap( &F77_N, X, &F77_incX, Y, &F77_incY);
#endif
}
#endif

View File

@@ -56,10 +56,12 @@ GENTFUNC( double, d, blasname, blisname ) \
GENTFUNC( scomplex, c, blasname, blisname ) \
GENTFUNC( dcomplex, z, blasname, blisname )
#define INSERT_GENTFUNC_BLAS_ZEN2( blasname, blisname ) \
\
GENTFUNC( scomplex, c, blasname, blisname ) \
GENTFUNC( dcomplex, z, blasname, blisname )
// -- Basic one-operand macro with real domain only --
@@ -80,6 +82,13 @@ GENTFUNCCO( dcomplex, double, z, d, blasname, blisname )
// -- Basic one-operand macro with conjugation (used only for dot, ger) --
#define INSERT_GENTFUNCDOT_BLAS_ZEN2( blasname, blisname ) \
\
GENTFUNCDOT( scomplex, c, c, BLIS_CONJUGATE, blasname, blisname ) \
GENTFUNCDOT( scomplex, c, u, BLIS_NO_CONJUGATE, blasname, blisname ) \
GENTFUNCDOT( dcomplex, z, c, BLIS_CONJUGATE, blasname, blisname ) \
GENTFUNCDOT( dcomplex, z, u, BLIS_NO_CONJUGATE, blasname, blisname )
#define INSERT_GENTFUNCDOT_BLAS( blasname, blisname ) \
\