Fixed dynamic dispatch crash issue on non-zen architecture.

Removed direct calling of zen kernels in cblas source itself.
Similar optimizations are done by the function directly invoked from
Cblas layer.

The BLIS binary with dynamic dispatch feature was crashing on non-zen CPUs
(specifically CPUs without AVX2 support). The crash was caused by un-supported
instructions in zen optimized kernels. The issue is fixed by calling only
reference kernels if the architecture detected at runtime is not zen, zen2 or zen3.

AMD-Internal: [CPUPL-1930]
Change-Id: I9178b7a98f2563dee2817064f37fcbb84073eeea
This commit is contained in:
Dipal M Zambare
2021-10-27 16:49:42 +05:30
parent ddbdfd0ba4
commit 61b5b9c4d0
12 changed files with 23 additions and 796 deletions

View File

@@ -7,7 +7,7 @@
*
* Written by Keita Teranishi. 2/11/1998
*
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved.
*/
#include "cblas.h"
#include "cblas_f77.h"
@@ -22,72 +22,8 @@ void cblas_daxpy( f77_int N, double alpha, const double *X,
#define F77_incY incY
#endif
#ifdef BLIS_CONFIG_EPYC
dim_t n0;
double* x0;
double* y0;
inc_t incx0;
inc_t incy0;
/* Initialize BLIS. */
// bli_init_auto();
/* Convert/typecast negative values of n to zero. */
if ( F77_N < 0 ) n0 = ( dim_t )0;
else n0 = ( dim_t )(F77_N);
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */
if ( F77_incX < 0 )
{
/* The semantics of negative stride in BLAS are that the vector
operand be traversed in reverse order. (Another way to think
of this is that negative strides effectively reverse the order
of the vector, but without any explicit data movements.) This
is also how BLIS interprets negative strides. The differences
is that with BLAS, the caller *always* passes in the 0th (i.e.,
top-most or left-most) element of the vector, even when the
stride is negative. By contrast, in BLIS, negative strides are
used *relative* to the vector address as it is given. Thus, in
BLIS, if this backwards traversal is desired, the caller *must*
pass in the address to the (n-1)th (i.e., the bottom-most or
right-most) element along with a negative stride. */
x0 = ((double*)X) + (n0-1)*(-F77_incX);
incx0 = ( inc_t )(F77_incX);
}
else
{
x0 = ((double*)X);
incx0 = ( inc_t )(F77_incX);
}
if ( F77_incY < 0 )
{
y0 = ((double*)Y) + (n0-1)*(-F77_incY);
incy0 = ( inc_t )(F77_incY);
}
else
{
y0 = ((double*)Y);
incy0 = ( inc_t )(F77_incY);
}
bli_daxpyv_zen_int10(
BLIS_NO_CONJUGATE,
n0,
(double*)&alpha,
x0, incx0,
y0, incy0,
NULL
);
/* Finalize BLIS. */
// bli_finalize_auto();
#else
F77_daxpy( &F77_N, &alpha, X, &F77_incX, Y, &F77_incY);
#endif
}
#endif

View File

@@ -7,7 +7,7 @@
*
* Written by Keita Teranishi. 2/11/1998
*
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved.
*
*/
@@ -18,80 +18,12 @@ void cblas_dcopy( f77_int N, const double *X,
{
#ifdef F77_INT
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
#else
#else
#define F77_N N
#define F77_incX incX
#define F77_incY incY
#endif
#ifdef BLIS_CONFIG_EPYC
dim_t n0;
double* x0;
double* y0;
inc_t incx0;
inc_t incy0;
/* Initialize BLIS. */
// bli_init_auto();
/* Convert/typecast negative values of n to zero. */
if ( F77_N < 0 ) n0 = ( dim_t )0;
else n0 = ( dim_t )(F77_N);
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */
if ( F77_incX < 0 )
{
/* The semantics of negative stride in BLAS are that the vector
operand be traversed in reverse order. (Another way to think
of this is that negative strides effectively reverse the order
of the vector, but without any explicit data movements.) This
is also how BLIS interprets negative strides. The differences
is that with BLAS, the caller *always* passes in the 0th (i.e.,
top-most or left-most) element of the vector, even when the
stride is negative. By contrast, in BLIS, negative strides are
used *relative* to the vector address as it is given. Thus, in
BLIS, if this backwards traversal is desired, the caller *must*
pass in the address to the (n-1)th (i.e., the bottom-most or
right-most) element along with a negative stride. */
x0 = (double*)((X) + (n0-1)*(-F77_incX));
incx0 = ( inc_t )(F77_incX);
}
else
{
x0 = (double*)(X);
incx0 = ( inc_t )(F77_incX);
}
if ( F77_incY < 0 )
{
y0 = (Y) + (n0-1)*(-F77_incY);
incy0 = ( inc_t )(F77_incY);
}
else
{
y0 = (Y);
incy0 = ( inc_t )(F77_incY);
}
/* Call BLIS kernel */
bli_dcopyv_zen_int
(
BLIS_NO_CONJUGATE,
n0,
x0, incx0,
y0, incy0,
NULL
);
/* Finalize BLIS. */
// bli_finalize_auto();
#else
F77_dcopy( &F77_N, X, &F77_incX, Y, &F77_incY);
#endif
}
#endif

View File

@@ -8,7 +8,7 @@
*
* Written by Keita Teranishi. 2/11/1998
*
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved.
*
*/
#include "cblas.h"
@@ -20,85 +20,14 @@ double cblas_ddot( f77_int N, const double *X,
double dot;
#ifdef F77_INT
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
#else
#else
#define F77_N N
#define F77_incX incX
#define F77_incY incY
#endif
#ifdef BLIS_CONFIG_EPYC
dim_t n0;
double* x0;
double* y0;
inc_t incx0;
inc_t incy0;
/* Initialize BLIS. */
// bli_init_auto();
/* Convert/typecast negative values of n to zero. */
if ( F77_N < 0 ) n0 = ( dim_t )0;
else n0 = ( dim_t )(F77_N);
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */
if ( F77_incX < 0 )
{
/* The semantics of negative stride in BLAS are that the vector
operand be traversed in reverse order. (Another way to think
of this is that negative strides effectively reverse the order
of the vector, but without any explicit data movements.) This
is also how BLIS interprets negative strides. The differences
is that with BLAS, the caller *always* passes in the 0th (i.e.,
top-most or left-most) element of the vector, even when the
stride is negative. By contrast, in BLIS, negative strides are
used *relative* to the vector address as it is given. Thus, in
BLIS, if this backwards traversal is desired, the caller *must*
pass in the address to the (n-1)th (i.e., the bottom-most or
right-most) element along with a negative stride. */
x0 = ((double*)X) + (n0-1)*(-F77_incX);
incx0 = ( inc_t )(F77_incX);
}
else
{
x0 = ((double*)X);
incx0 = ( inc_t )(F77_incX);
}
if ( F77_incY < 0 )
{
y0 = ((double*)Y) + (n0-1)*(-F77_incY);
incy0 = ( inc_t )(F77_incY);
}
else
{
y0 = ((double*)Y);
incy0 = ( inc_t )(F77_incY);
}
/* Call BLIS kernel. */
bli_ddotv_zen_int10
(
BLIS_NO_CONJUGATE,
BLIS_NO_CONJUGATE,
n0,
x0, incx0,
y0, incy0,
&dot,
NULL
);
/* Finalize BLIS. */
// bli_finalize_auto();
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
return dot;
#else
F77_ddot_sub( &F77_N, X, &F77_incX, Y, &F77_incY, &dot);
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
return dot;
#endif
}
}
#endif

View File

@@ -8,7 +8,7 @@
* Written by Keita Teranishi. 2/11/1998
*
*
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved.
*/
#include "cblas.h"
#include "cblas_f77.h"
@@ -22,56 +22,8 @@ void cblas_dscal( f77_int N, double alpha, double *X,
#define F77_incX incX
#define F77_incY incY
#endif
#ifdef BLIS_CONFIG_EPYC
dim_t n0;
double* x0;
inc_t incx0;
/* Initialize BLIS. */
// bli_init_auto();
if ( F77_N < 0 ) n0 = ( dim_t )0;
else n0 = ( dim_t )(F77_N);
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */
if ( F77_incX < 0 )
{
/* The semantics of negative stride in BLAS are that the vector
operand be traversed in reverse order. (Another way to think
of this is that negative strides effectively reverse the order
of the vector, but without any explicit data movements.) This
is also how BLIS interprets negative strides. The differences
is that with BLAS, the caller *always* passes in the 0th (i.e.,
top-most or left-most) element of the vector, even when the
stride is negative. By contrast, in BLIS, negative strides are
used *relative* to the vector address as it is given. Thus, in
BLIS, if this backwards traversal is desired, the caller *must*
pass in the address to the (n-1)th (i.e., the bottom-most or
right-most) element along with a negative stride. */
x0 = (X) + (n0-1)*(-F77_incX);
incx0 = ( inc_t )(F77_incX);
}
else
{
x0 = (X);
incx0 = ( inc_t )(F77_incX);
}
/* Call BLIS kernel */
bli_dscalv_zen_int10
(
BLIS_NO_CONJUGATE,
n0,
&alpha,
x0, incx0,
NULL
);
#else
F77_dscal( &F77_N, &alpha, X, &F77_incX);
#endif
}
#endif

View File

@@ -7,7 +7,7 @@
*
* Written by Keita Teranishi. 2/11/1998
*
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved.
*
*/
#include "cblas.h"
@@ -17,79 +17,12 @@ void cblas_dswap( f77_int N, double *X, f77_int incX, double *Y,
{
#ifdef F77_INT
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
#else
#else
#define F77_N N
#define F77_incX incX
#define F77_incY incY
#endif
#ifdef BLIS_CONFIG_EPYC
dim_t n0;
double* x0;
double* y0;
inc_t incx0;
inc_t incy0;
/* Initialize BLIS. */
// bli_init_auto();
/* Convert/typecast negative values of n to zero. */
if ( F77_N < 0 ) n0 = ( dim_t )0;
else n0 = ( dim_t )(F77_N);
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */
if ( F77_incX < 0 )
{
/* The semantics of negative stride in BLAS are that the vector
operand be traversed in reverse order. (Another way to think
of this is that negative strides effectively reverse the order
of the vector, but without any explicit data movements.) This
is also how BLIS interprets negative strides. The differences
is that with BLAS, the caller *always* passes in the 0th (i.e.,
top-most or left-most) element of the vector, even when the
stride is negative. By contrast, in BLIS, negative strides are
used *relative* to the vector address as it is given. Thus, in
BLIS, if this backwards traversal is desired, the caller *must*
pass in the address to the (n-1)th (i.e., the bottom-most or
right-most) element along with a negative stride. */
x0 = (X) + (n0-1)*(-F77_incX);
incx0 = ( inc_t )(F77_incX);
}
else
{
x0 = (X);
incx0 = ( inc_t )(F77_incX);
}
if ( F77_incY < 0 )
{
y0 = (Y) + (n0-1)*(-F77_incY);
incy0 = ( inc_t )(F77_incY);
}
else
{
y0 = (Y);
incy0 = ( inc_t )(F77_incY);
}
/* Call BLIS kernel */
bli_dswapv_zen_int8
(
n0,
x0, incx0,
y0, incy0,
NULL
);
/* Finalize BLIS. */
// bli_finalize_auto();
#else
F77_dswap( &F77_N, X, &F77_incX, Y, &F77_incY);
#endif
}
#endif

View File

@@ -7,7 +7,7 @@
* It calls the fortran wrapper before calling idamax.
*
* Written by Keita Teranishi. 2/11/1998
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved.
*
*/
#include "cblas.h"
@@ -17,76 +17,12 @@ f77_int cblas_idamax( f77_int N, const double *X, f77_int incX)
f77_int iamax;
#ifdef F77_INT
F77_INT F77_N=N, F77_incX=incX;
#else
#else
#define F77_N N
#define F77_incX incX
#endif
#ifdef BLIS_CONFIG_EPYC
dim_t n0;
double* x0;
inc_t incx0;
gint_t bli_index;
/* If the vector is empty, return an index of zero. This early check
is needed to emulate netlib BLAS. Without it, bli_?amaxv() will
return 0, which ends up getting incremented to 1 (below) before
being returned, which is not what we want. */
if ( F77_N < 1 || F77_incX <= 0 ) return 0;
/* Initialize BLIS. */
// bli_init_auto();
/* Convert/typecast negative values of n to zero. */
if ( F77_N < 0 ) n0 = ( dim_t )0;
else n0 = ( dim_t )(F77_N);
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */
if ( F77_incX < 0 )
{
/* The semantics of negative stride in BLAS are that the vector
operand be traversed in reverse order. (Another way to think
of this is that negative strides effectively reverse the order
of the vector, but without any explicit data movements.) This
is also how BLIS interprets negative strides. The differences
is that with BLAS, the caller *always* passes in the 0th (i.e.,
top-most or left-most) element of the vector, even when the
stride is negative. By contrast, in BLIS, negative strides are
used *relative* to the vector address as it is given. Thus, in
BLIS, if this backwards traversal is desired, the caller *must*
pass in the address to the (n-1)th (i.e., the bottom-most or
right-most) element along with a negative stride. */
x0 = ((double*)X) + (n0-1)*(-F77_incX);
incx0 = ( inc_t )(F77_incX);
}
else
{
x0 = ((double*)X);
incx0 = ( inc_t )(F77_incX);
}
/* Call BLIS kernel. */
bli_damaxv_zen_int
(
n0,
x0, incx0,
&bli_index,
NULL
);
/* Finalize BLIS. */
// bli_finalize_auto();
iamax = bli_index;
return iamax;
#else
F77_idamax_sub( &F77_N, X, &F77_incX, &iamax);
return iamax ? iamax-1 : 0;
#endif
}
#endif

View File

@@ -7,7 +7,7 @@
* It calls the fortran wrapper before calling isamax.
*
* Written by Keita Teranishi. 2/11/1998
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved.
*
*/
#include "cblas.h"
@@ -17,76 +17,12 @@ f77_int cblas_isamax( f77_int N, const float *X, f77_int incX)
f77_int iamax;
#ifdef F77_INT
F77_INT F77_N=N, F77_incX=incX;
#else
#else
#define F77_N N
#define F77_incX incX
#endif
#ifdef BLIS_CONFIG_EPYC
dim_t n0;
float* x0;
inc_t incx0;
gint_t bli_index;
/* If the vector is empty, return an index of zero. This early check
is needed to emulate netlib BLAS. Without it, bli_?amaxv() will
return 0, which ends up getting incremented to 1 (below) before
being returned, which is not what we want. */
if ( F77_N < 1 || F77_incX <= 0 ) return 0;
/* Initialize BLIS. */
// bli_init_auto();
/* Convert/typecast negative values of n to zero. */
if ( F77_N < 0 ) n0 = ( dim_t )0;
else n0 = ( dim_t )(F77_N);
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */
if ( F77_incX < 0 )
{
/* The semantics of negative stride in BLAS are that the vector
operand be traversed in reverse order. (Another way to think
of this is that negative strides effectively reverse the order
of the vector, but without any explicit data movements.) This
is also how BLIS interprets negative strides. The differences
is that with BLAS, the caller *always* passes in the 0th (i.e.,
top-most or left-most) element of the vector, even when the
stride is negative. By contrast, in BLIS, negative strides are
used *relative* to the vector address as it is given. Thus, in
BLIS, if this backwards traversal is desired, the caller *must*
pass in the address to the (n-1)th (i.e., the bottom-most or
right-most) element along with a negative stride. */
x0 = ((float*)X) + (n0-1)*(-F77_incX);
incx0 = ( inc_t )(F77_incX);
}
else
{
x0 = ((float*)X);
incx0 = ( inc_t )(F77_incX);
}
/* Call BLIS kernel. */
bli_samaxv_zen_int
(
n0,
x0, incx0,
&bli_index,
NULL
);
/* Finalize BLIS. */
// bli_finalize_auto();
iamax = bli_index;
return iamax;
#else
F77_isamax_sub( &F77_N, X, &F77_incX, &iamax);
return iamax ? iamax-1 : 0;
#endif
}
#endif

View File

@@ -8,7 +8,7 @@
*
* Written by Keita Teranishi. 2/11/1998
*
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved.
*/
#include "cblas.h"
@@ -24,70 +24,7 @@ void cblas_saxpy( f77_int N, float alpha, const float *X,
#define F77_incY incY
#endif
#ifdef BLIS_CONFIG_EPYC
dim_t n0;
float* x0;
float* y0;
inc_t incx0;
inc_t incy0;
/* Initialize BLIS. */
// bli_init_auto();
/* Convert/typecast negative values of n to zero. */
if ( F77_N < 0 ) n0 = ( dim_t )0;
else n0 = ( dim_t )(F77_N);
if ( F77_incX < 0 )
{
/* The semantics of negative stride in BLAS are that the vector
operand be traversed in reverse order. (Another way to think
of this is that negative strides effectively reverse the order
of the vector, but without any explicit data movements.) This
is also how BLIS interprets negative strides. The differences
is that with BLAS, the caller *always* passes in the 0th (i.e.,
top-most or left-most) element of the vector, even when the
stride is negative. By contrast, in BLIS, negative strides are
used *relative* to the vector address as it is given. Thus, in
BLIS, if this backwards traversal is desired, the caller *must*
pass in the address to the (n-1)th (i.e., the bottom-most or
right-most) element along with a negative stride. */
x0 = ((float*)X) + (n0-1)*(-F77_incX);
incx0 = ( inc_t )(F77_incX);
}
else
{
x0 = ((float*)X);
incx0 = ( inc_t )(F77_incX);
}
if ( F77_incY < 0 )
{
y0 = ((float*)Y) + (n0-1)*(-F77_incY);
incy0 = ( inc_t )(F77_incY);
}
else
{
y0 = ((float*)Y);
incy0 = ( inc_t )(F77_incY);
}
bli_saxpyv_zen_int10(
BLIS_NO_CONJUGATE,
n0,
(float*)&alpha,
x0, incx0,
y0, incy0,
NULL
);
/* Finalize BLIS. */
// bli_finalize_auto();
#else
F77_saxpy( &F77_N, &alpha, X, &F77_incX, Y, &F77_incY);
#endif
}
#endif

View File

@@ -7,7 +7,7 @@
*
* Written by Keita Teranishi. 2/11/1998
*
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved.
*
*/
#include "cblas.h"
@@ -17,82 +17,12 @@ void cblas_scopy( f77_int N, const float *X,
{
#ifdef F77_INT
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
#else
#else
#define F77_N N
#define F77_incX incX
#define F77_incY incY
#endif
#ifdef BLIS_CONFIG_EPYC
dim_t n0;
float* x0;
float* y0;
inc_t incx0;
inc_t incy0;
/* Initialize BLIS. */
// bli_init_auto();
/* Convert/typecast negative values of n to zero. */
if ( F77_N < 0 ) n0 = ( dim_t )0;
else n0 = ( dim_t )(F77_N);
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */
if ( F77_incX < 0 )
{
/* The semantics of negative stride in BLAS are that the vector
operand be traversed in reverse order. (Another way to think
of this is that negative strides effectively reverse the order
of the vector, but without any explicit data movements.) This
is also how BLIS interprets negative strides. The differences
is that with BLAS, the caller *always* passes in the 0th (i.e.,
top-most or left-most) element of the vector, even when the
stride is negative. By contrast, in BLIS, negative strides are
used *relative* to the vector address as it is given. Thus, in
BLIS, if this backwards traversal is desired, the caller *must*
pass in the address to the (n-1)th (i.e., the bottom-most or
right-most) element along with a negative stride. */
x0 = (float*)((X) + (n0-1)*(-F77_incX));
incx0 = ( inc_t )(F77_incX);
}
else
{
x0 = (float*)(X);
incx0 = ( inc_t )(F77_incX);
}
if ( F77_incY < 0 )
{
y0 = (Y) + (n0-1)*(-F77_incY);
incy0 = ( inc_t )(F77_incY);
}
else
{
y0 = (Y);
incy0 = ( inc_t )(F77_incY);
}
/* Call BLIS kernel */
bli_scopyv_zen_int
(
BLIS_NO_CONJUGATE,
n0,
x0, incx0,
y0, incy0,
NULL
);
/* Finalize BLIS. */
// bli_finalize_auto();
#else
F77_scopy( &F77_N, X, &F77_incX, Y, &F77_incY);
#endif
}
#endif

View File

@@ -8,7 +8,7 @@
*
* Written by Keita Teranishi. 2/11/1998
*
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved.
*
*/
#include "cblas.h"
@@ -20,85 +20,14 @@ float cblas_sdot( f77_int N, const float *X,
float dot;
#ifdef F77_INT
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
#else
#else
#define F77_N N
#define F77_incX incX
#define F77_incY incY
#endif
#ifdef BLIS_CONFIG_EPYC
dim_t n0;
float* x0;
float* y0;
inc_t incx0;
inc_t incy0;
/* Initialize BLIS. */
// bli_init_auto();
/* Convert/typecast negative values of n to zero. */
if ( F77_N < 0 ) n0 = ( dim_t )0;
else n0 = ( dim_t )(F77_N);
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */
if ( F77_incX < 0 )
{
/* The semantics of negative stride in BLAS are that the vector
operand be traversed in reverse order. (Another way to think
of this is that negative strides effectively reverse the order
of the vector, but without any explicit data movements.) This
is also how BLIS interprets negative strides. The differences
is that with BLAS, the caller *always* passes in the 0th (i.e.,
top-most or left-most) element of the vector, even when the
stride is negative. By contrast, in BLIS, negative strides are
used *relative* to the vector address as it is given. Thus, in
BLIS, if this backwards traversal is desired, the caller *must*
pass in the address to the (n-1)th (i.e., the bottom-most or
right-most) element along with a negative stride. */
x0 = ((float*)X) + (n0-1)*(-F77_incX);
incx0 = ( inc_t )(F77_incX);
}
else
{
x0 = ((float*)X);
incx0 = ( inc_t )(F77_incX);
}
if ( F77_incY < 0 )
{
y0 = ((float*)Y) + (n0-1)*(-F77_incY);
incy0 = ( inc_t )(F77_incY);
}
else
{
y0 = ((float*)Y);
incy0 = ( inc_t )(F77_incY);
}
/* Call BLIS kernel. */
bli_sdotv_zen_int10
(
BLIS_NO_CONJUGATE,
BLIS_NO_CONJUGATE,
n0,
x0, incx0,
y0, incy0,
&dot,
NULL
);
/* Finalize BLIS. */
// bli_finalize_auto();
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
return dot;
#else
F77_sdot_sub( &F77_N, X, &F77_incX, Y, &F77_incY, &dot);
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
return dot;
#endif
}
}
#endif

View File

@@ -7,7 +7,7 @@
*
* Written by Keita Teranishi. 2/11/1998
*
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved.
*
*/
#include "cblas.h"
@@ -22,63 +22,8 @@ void cblas_sscal( f77_int N, float alpha, float *X,
#define F77_incX incX
#define F77_incY incY
#endif
#ifdef BLIS_CONFIG_EPYC
dim_t n0;
float* x0;
inc_t incx0;
/* Initialize BLIS. */
//bli_init_auto();
/* Convert/typecast negative values of n to zero. */
if ( F77_N < 0 ) n0 = ( dim_t )0;
else n0 = ( dim_t )(F77_N);
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */
if ( F77_incX < 0 )
{
/* The semantics of negative stride in BLAS are that the vector
operand be traversed in reverse order. (Another way to think
of this is that negative strides effectively reverse the order
of the vector, but without any explicit data movements.) This
is also how BLIS interprets negative strides. The differences
is that with BLAS, the caller *always* passes in the 0th (i.e.,
top-most or left-most) element of the vector, even when the
stride is negative. By contrast, in BLIS, negative strides are
used *relative* to the vector address as it is given. Thus, in
BLIS, if this backwards traversal is desired, the caller *must*
pass in the address to the (n-1)th (i.e., the bottom-most or
right-most) element along with a negative stride. */
x0 = (X) + (n0-1)*(-F77_incX);
incx0 = ( inc_t )(F77_incX);
}
else
{
x0 = (X);
incx0 = ( inc_t )(F77_incX);
}
/* Call BLIS kernel */
bli_sscalv_zen_int10
(
BLIS_NO_CONJUGATE,
n0,
&alpha,
x0, incx0,
NULL
);
/* Finalize BLIS. */
// bli_finalize_auto();
#else
F77_sscal( &F77_N, &alpha, X, &F77_incX);
#endif
}
#endif

View File

@@ -7,7 +7,7 @@
*
* Written by Keita Teranishi. 2/11/1998
*
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
* Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved.
*
*/
#include "cblas.h"
@@ -17,81 +17,13 @@ void cblas_sswap( f77_int N, float *X, f77_int incX, float *Y,
{
#ifdef F77_INT
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
#else
#else
#define F77_N N
#define F77_incX incX
#define F77_incY incY
#endif
#ifdef BLIS_CONFIG_EPYC
dim_t n0;
float* x0;
float* y0;
inc_t incx0;
inc_t incy0;
/* Initialize BLIS. */
// bli_init_auto();
/* Convert/typecast negative values of n to zero. */
if ( F77_N < 0 ) n0 = ( dim_t )0;
else n0 = ( dim_t )(F77_N);
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */
if ( F77_incX < 0 )
{
/* The semantics of negative stride in BLAS are that the vector
operand be traversed in reverse order. (Another way to think
of this is that negative strides effectively reverse the order
of the vector, but without any explicit data movements.) This
is also how BLIS interprets negative strides. The differences
is that with BLAS, the caller *always* passes in the 0th (i.e.,
top-most or left-most) element of the vector, even when the
stride is negative. By contrast, in BLIS, negative strides are
used *relative* to the vector address as it is given. Thus, in
BLIS, if this backwards traversal is desired, the caller *must*
pass in the address to the (n-1)th (i.e., the bottom-most or
right-most) element along with a negative stride. */
x0 = (X) + (n0-1)*(-F77_incX);
incx0 = ( inc_t )(F77_incX);
}
else
{
x0 = (X);
incx0 = ( inc_t )(F77_incX);
}
if ( F77_incY < 0 )
{
y0 = (Y) + (n0-1)*(-F77_incY);
incy0 = ( inc_t )(F77_incY);
}
else
{
y0 = (Y);
incy0 = ( inc_t )(F77_incY);
}
/* Call BLIS kernel */
bli_sswapv_zen_int8
(
n0,
x0, incx0,
y0, incy0,
NULL
);
/* Finalize BLIS. */
// bli_finalize_auto();
#else
F77_sswap( &F77_N, X, &F77_incX, Y, &F77_incY);
#endif
}
#endif