mirror of
https://github.com/amd/blis.git
synced 2026-05-21 17:08:17 +00:00
Fixed dynamic dispatch crash issue on non-zen architecture.
Removed direct calling of zen kernels in cblas source itself. Similar optimizations are done by the function directly invoked from Cblas layer. The BLIS binary with dynamic dispatch feature was crashing on non-zen CPUs (specifically CPUs without AVX2 support). The crash was caused by un-supported instructions in zen optimized kernels. The issue is fixed by calling only reference kernels if the architecture detected at runtime is not zen, zen2 or zen3. AMD-Internal: [CPUPL-1930] Change-Id: I9178b7a98f2563dee2817064f37fcbb84073eeea
This commit is contained in:
@@ -7,7 +7,7 @@
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved.
|
||||
*/
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
@@ -22,72 +22,8 @@ void cblas_daxpy( f77_int N, double alpha, const double *X,
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
dim_t n0;
|
||||
double* x0;
|
||||
double* y0;
|
||||
inc_t incx0;
|
||||
inc_t incy0;
|
||||
|
||||
/* Initialize BLIS. */
|
||||
// bli_init_auto();
|
||||
|
||||
/* Convert/typecast negative values of n to zero. */
|
||||
if ( F77_N < 0 ) n0 = ( dim_t )0;
|
||||
else n0 = ( dim_t )(F77_N);
|
||||
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */
|
||||
if ( F77_incX < 0 )
|
||||
{
|
||||
/* The semantics of negative stride in BLAS are that the vector
|
||||
operand be traversed in reverse order. (Another way to think
|
||||
of this is that negative strides effectively reverse the order
|
||||
of the vector, but without any explicit data movements.) This
|
||||
is also how BLIS interprets negative strides. The differences
|
||||
is that with BLAS, the caller *always* passes in the 0th (i.e.,
|
||||
top-most or left-most) element of the vector, even when the
|
||||
stride is negative. By contrast, in BLIS, negative strides are
|
||||
used *relative* to the vector address as it is given. Thus, in
|
||||
BLIS, if this backwards traversal is desired, the caller *must*
|
||||
pass in the address to the (n-1)th (i.e., the bottom-most or
|
||||
right-most) element along with a negative stride. */
|
||||
x0 = ((double*)X) + (n0-1)*(-F77_incX);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
}
|
||||
else
|
||||
{
|
||||
x0 = ((double*)X);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
}
|
||||
|
||||
if ( F77_incY < 0 )
|
||||
{
|
||||
y0 = ((double*)Y) + (n0-1)*(-F77_incY);
|
||||
incy0 = ( inc_t )(F77_incY);
|
||||
}
|
||||
else
|
||||
{
|
||||
y0 = ((double*)Y);
|
||||
incy0 = ( inc_t )(F77_incY);
|
||||
}
|
||||
|
||||
bli_daxpyv_zen_int10(
|
||||
BLIS_NO_CONJUGATE,
|
||||
n0,
|
||||
(double*)&alpha,
|
||||
x0, incx0,
|
||||
y0, incy0,
|
||||
NULL
|
||||
);
|
||||
|
||||
/* Finalize BLIS. */
|
||||
// bli_finalize_auto();
|
||||
|
||||
|
||||
#else
|
||||
F77_daxpy( &F77_N, &alpha, X, &F77_incX, Y, &F77_incY);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
*/
|
||||
|
||||
@@ -18,80 +18,12 @@ void cblas_dcopy( f77_int N, const double *X,
|
||||
{
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
|
||||
#else
|
||||
#else
|
||||
#define F77_N N
|
||||
#define F77_incX incX
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
dim_t n0;
|
||||
double* x0;
|
||||
double* y0;
|
||||
inc_t incx0;
|
||||
inc_t incy0;
|
||||
|
||||
/* Initialize BLIS. */
|
||||
// bli_init_auto();
|
||||
|
||||
/* Convert/typecast negative values of n to zero. */
|
||||
if ( F77_N < 0 ) n0 = ( dim_t )0;
|
||||
else n0 = ( dim_t )(F77_N);
|
||||
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */
|
||||
if ( F77_incX < 0 )
|
||||
{
|
||||
/* The semantics of negative stride in BLAS are that the vector
|
||||
operand be traversed in reverse order. (Another way to think
|
||||
of this is that negative strides effectively reverse the order
|
||||
of the vector, but without any explicit data movements.) This
|
||||
is also how BLIS interprets negative strides. The differences
|
||||
is that with BLAS, the caller *always* passes in the 0th (i.e.,
|
||||
top-most or left-most) element of the vector, even when the
|
||||
stride is negative. By contrast, in BLIS, negative strides are
|
||||
used *relative* to the vector address as it is given. Thus, in
|
||||
BLIS, if this backwards traversal is desired, the caller *must*
|
||||
pass in the address to the (n-1)th (i.e., the bottom-most or
|
||||
right-most) element along with a negative stride. */
|
||||
|
||||
x0 = (double*)((X) + (n0-1)*(-F77_incX));
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
x0 = (double*)(X);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
}
|
||||
|
||||
if ( F77_incY < 0 )
|
||||
{
|
||||
y0 = (Y) + (n0-1)*(-F77_incY);
|
||||
incy0 = ( inc_t )(F77_incY);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
y0 = (Y);
|
||||
incy0 = ( inc_t )(F77_incY);
|
||||
}
|
||||
|
||||
|
||||
/* Call BLIS kernel */
|
||||
bli_dcopyv_zen_int
|
||||
(
|
||||
BLIS_NO_CONJUGATE,
|
||||
n0,
|
||||
x0, incx0,
|
||||
y0, incy0,
|
||||
NULL
|
||||
);
|
||||
|
||||
/* Finalize BLIS. */
|
||||
// bli_finalize_auto();
|
||||
#else
|
||||
F77_dcopy( &F77_N, X, &F77_incX, Y, &F77_incY);
|
||||
#endif
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
@@ -20,85 +20,14 @@ double cblas_ddot( f77_int N, const double *X,
|
||||
double dot;
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
|
||||
#else
|
||||
#else
|
||||
#define F77_N N
|
||||
#define F77_incX incX
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
dim_t n0;
|
||||
double* x0;
|
||||
double* y0;
|
||||
inc_t incx0;
|
||||
inc_t incy0;
|
||||
|
||||
/* Initialize BLIS. */
|
||||
// bli_init_auto();
|
||||
|
||||
/* Convert/typecast negative values of n to zero. */
|
||||
if ( F77_N < 0 ) n0 = ( dim_t )0;
|
||||
else n0 = ( dim_t )(F77_N);
|
||||
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */
|
||||
|
||||
if ( F77_incX < 0 )
|
||||
{
|
||||
/* The semantics of negative stride in BLAS are that the vector
|
||||
operand be traversed in reverse order. (Another way to think
|
||||
of this is that negative strides effectively reverse the order
|
||||
of the vector, but without any explicit data movements.) This
|
||||
is also how BLIS interprets negative strides. The differences
|
||||
is that with BLAS, the caller *always* passes in the 0th (i.e.,
|
||||
top-most or left-most) element of the vector, even when the
|
||||
stride is negative. By contrast, in BLIS, negative strides are
|
||||
used *relative* to the vector address as it is given. Thus, in
|
||||
BLIS, if this backwards traversal is desired, the caller *must*
|
||||
pass in the address to the (n-1)th (i.e., the bottom-most or
|
||||
right-most) element along with a negative stride. */
|
||||
|
||||
x0 = ((double*)X) + (n0-1)*(-F77_incX);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
x0 = ((double*)X);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
}
|
||||
|
||||
if ( F77_incY < 0 )
|
||||
{
|
||||
y0 = ((double*)Y) + (n0-1)*(-F77_incY);
|
||||
incy0 = ( inc_t )(F77_incY);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
y0 = ((double*)Y);
|
||||
incy0 = ( inc_t )(F77_incY);
|
||||
}
|
||||
/* Call BLIS kernel. */
|
||||
bli_ddotv_zen_int10
|
||||
(
|
||||
BLIS_NO_CONJUGATE,
|
||||
BLIS_NO_CONJUGATE,
|
||||
n0,
|
||||
x0, incx0,
|
||||
y0, incy0,
|
||||
&dot,
|
||||
NULL
|
||||
);
|
||||
|
||||
/* Finalize BLIS. */
|
||||
// bli_finalize_auto();
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
|
||||
return dot;
|
||||
|
||||
#else
|
||||
F77_ddot_sub( &F77_N, X, &F77_incX, Y, &F77_incY, &dot);
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
|
||||
return dot;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
*
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved.
|
||||
*/
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
@@ -22,56 +22,8 @@ void cblas_dscal( f77_int N, double alpha, double *X,
|
||||
#define F77_incX incX
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
dim_t n0;
|
||||
double* x0;
|
||||
inc_t incx0;
|
||||
|
||||
/* Initialize BLIS. */
|
||||
// bli_init_auto();
|
||||
|
||||
if ( F77_N < 0 ) n0 = ( dim_t )0;
|
||||
else n0 = ( dim_t )(F77_N);
|
||||
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */
|
||||
if ( F77_incX < 0 )
|
||||
{
|
||||
/* The semantics of negative stride in BLAS are that the vector
|
||||
operand be traversed in reverse order. (Another way to think
|
||||
of this is that negative strides effectively reverse the order
|
||||
of the vector, but without any explicit data movements.) This
|
||||
is also how BLIS interprets negative strides. The differences
|
||||
is that with BLAS, the caller *always* passes in the 0th (i.e.,
|
||||
top-most or left-most) element of the vector, even when the
|
||||
stride is negative. By contrast, in BLIS, negative strides are
|
||||
used *relative* to the vector address as it is given. Thus, in
|
||||
BLIS, if this backwards traversal is desired, the caller *must*
|
||||
pass in the address to the (n-1)th (i.e., the bottom-most or
|
||||
right-most) element along with a negative stride. */
|
||||
|
||||
x0 = (X) + (n0-1)*(-F77_incX);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
x0 = (X);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
}
|
||||
|
||||
/* Call BLIS kernel */
|
||||
bli_dscalv_zen_int10
|
||||
(
|
||||
BLIS_NO_CONJUGATE,
|
||||
n0,
|
||||
&alpha,
|
||||
x0, incx0,
|
||||
NULL
|
||||
);
|
||||
#else
|
||||
F77_dscal( &F77_N, &alpha, X, &F77_incX);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
@@ -17,79 +17,12 @@ void cblas_dswap( f77_int N, double *X, f77_int incX, double *Y,
|
||||
{
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
|
||||
#else
|
||||
#else
|
||||
#define F77_N N
|
||||
#define F77_incX incX
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
dim_t n0;
|
||||
double* x0;
|
||||
double* y0;
|
||||
inc_t incx0;
|
||||
inc_t incy0;
|
||||
|
||||
/* Initialize BLIS. */
|
||||
// bli_init_auto();
|
||||
|
||||
/* Convert/typecast negative values of n to zero. */
|
||||
if ( F77_N < 0 ) n0 = ( dim_t )0;
|
||||
else n0 = ( dim_t )(F77_N);
|
||||
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */
|
||||
if ( F77_incX < 0 )
|
||||
{
|
||||
/* The semantics of negative stride in BLAS are that the vector
|
||||
operand be traversed in reverse order. (Another way to think
|
||||
of this is that negative strides effectively reverse the order
|
||||
of the vector, but without any explicit data movements.) This
|
||||
is also how BLIS interprets negative strides. The differences
|
||||
is that with BLAS, the caller *always* passes in the 0th (i.e.,
|
||||
top-most or left-most) element of the vector, even when the
|
||||
stride is negative. By contrast, in BLIS, negative strides are
|
||||
used *relative* to the vector address as it is given. Thus, in
|
||||
BLIS, if this backwards traversal is desired, the caller *must*
|
||||
pass in the address to the (n-1)th (i.e., the bottom-most or
|
||||
right-most) element along with a negative stride. */
|
||||
|
||||
x0 = (X) + (n0-1)*(-F77_incX);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
x0 = (X);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
}
|
||||
|
||||
if ( F77_incY < 0 )
|
||||
{
|
||||
y0 = (Y) + (n0-1)*(-F77_incY);
|
||||
incy0 = ( inc_t )(F77_incY);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
y0 = (Y);
|
||||
incy0 = ( inc_t )(F77_incY);
|
||||
}
|
||||
|
||||
|
||||
/* Call BLIS kernel */
|
||||
bli_dswapv_zen_int8
|
||||
(
|
||||
n0,
|
||||
x0, incx0,
|
||||
y0, incy0,
|
||||
NULL
|
||||
);
|
||||
|
||||
/* Finalize BLIS. */
|
||||
// bli_finalize_auto();
|
||||
#else
|
||||
F77_dswap( &F77_N, X, &F77_incX, Y, &F77_incY);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
* It calls the fortran wrapper before calling idamax.
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
@@ -17,76 +17,12 @@ f77_int cblas_idamax( f77_int N, const double *X, f77_int incX)
|
||||
f77_int iamax;
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_N=N, F77_incX=incX;
|
||||
#else
|
||||
#else
|
||||
#define F77_N N
|
||||
#define F77_incX incX
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
dim_t n0;
|
||||
double* x0;
|
||||
inc_t incx0;
|
||||
gint_t bli_index;
|
||||
|
||||
/* If the vector is empty, return an index of zero. This early check
|
||||
is needed to emulate netlib BLAS. Without it, bli_?amaxv() will
|
||||
return 0, which ends up getting incremented to 1 (below) before
|
||||
being returned, which is not what we want. */
|
||||
if ( F77_N < 1 || F77_incX <= 0 ) return 0;
|
||||
|
||||
/* Initialize BLIS. */
|
||||
// bli_init_auto();
|
||||
|
||||
/* Convert/typecast negative values of n to zero. */
|
||||
if ( F77_N < 0 ) n0 = ( dim_t )0;
|
||||
else n0 = ( dim_t )(F77_N);
|
||||
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */
|
||||
if ( F77_incX < 0 )
|
||||
{
|
||||
/* The semantics of negative stride in BLAS are that the vector
|
||||
operand be traversed in reverse order. (Another way to think
|
||||
of this is that negative strides effectively reverse the order
|
||||
of the vector, but without any explicit data movements.) This
|
||||
is also how BLIS interprets negative strides. The differences
|
||||
is that with BLAS, the caller *always* passes in the 0th (i.e.,
|
||||
top-most or left-most) element of the vector, even when the
|
||||
stride is negative. By contrast, in BLIS, negative strides are
|
||||
used *relative* to the vector address as it is given. Thus, in
|
||||
BLIS, if this backwards traversal is desired, the caller *must*
|
||||
pass in the address to the (n-1)th (i.e., the bottom-most or
|
||||
right-most) element along with a negative stride. */
|
||||
|
||||
x0 = ((double*)X) + (n0-1)*(-F77_incX);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
x0 = ((double*)X);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
}
|
||||
|
||||
/* Call BLIS kernel. */
|
||||
bli_damaxv_zen_int
|
||||
(
|
||||
n0,
|
||||
x0, incx0,
|
||||
&bli_index,
|
||||
NULL
|
||||
);
|
||||
|
||||
/* Finalize BLIS. */
|
||||
// bli_finalize_auto();
|
||||
|
||||
iamax = bli_index;
|
||||
|
||||
return iamax;
|
||||
|
||||
#else
|
||||
F77_idamax_sub( &F77_N, X, &F77_incX, &iamax);
|
||||
return iamax ? iamax-1 : 0;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
* It calls the fortran wrapper before calling isamax.
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
@@ -17,76 +17,12 @@ f77_int cblas_isamax( f77_int N, const float *X, f77_int incX)
|
||||
f77_int iamax;
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_N=N, F77_incX=incX;
|
||||
#else
|
||||
#else
|
||||
#define F77_N N
|
||||
#define F77_incX incX
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
dim_t n0;
|
||||
float* x0;
|
||||
inc_t incx0;
|
||||
gint_t bli_index;
|
||||
|
||||
/* If the vector is empty, return an index of zero. This early check
|
||||
is needed to emulate netlib BLAS. Without it, bli_?amaxv() will
|
||||
return 0, which ends up getting incremented to 1 (below) before
|
||||
being returned, which is not what we want. */
|
||||
if ( F77_N < 1 || F77_incX <= 0 ) return 0;
|
||||
|
||||
/* Initialize BLIS. */
|
||||
// bli_init_auto();
|
||||
|
||||
/* Convert/typecast negative values of n to zero. */
|
||||
if ( F77_N < 0 ) n0 = ( dim_t )0;
|
||||
else n0 = ( dim_t )(F77_N);
|
||||
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */
|
||||
if ( F77_incX < 0 )
|
||||
{
|
||||
/* The semantics of negative stride in BLAS are that the vector
|
||||
operand be traversed in reverse order. (Another way to think
|
||||
of this is that negative strides effectively reverse the order
|
||||
of the vector, but without any explicit data movements.) This
|
||||
is also how BLIS interprets negative strides. The differences
|
||||
is that with BLAS, the caller *always* passes in the 0th (i.e.,
|
||||
top-most or left-most) element of the vector, even when the
|
||||
stride is negative. By contrast, in BLIS, negative strides are
|
||||
used *relative* to the vector address as it is given. Thus, in
|
||||
BLIS, if this backwards traversal is desired, the caller *must*
|
||||
pass in the address to the (n-1)th (i.e., the bottom-most or
|
||||
right-most) element along with a negative stride. */
|
||||
|
||||
x0 = ((float*)X) + (n0-1)*(-F77_incX);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
x0 = ((float*)X);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
}
|
||||
|
||||
/* Call BLIS kernel. */
|
||||
bli_samaxv_zen_int
|
||||
(
|
||||
n0,
|
||||
x0, incx0,
|
||||
&bli_index,
|
||||
NULL
|
||||
);
|
||||
|
||||
/* Finalize BLIS. */
|
||||
// bli_finalize_auto();
|
||||
|
||||
iamax = bli_index;
|
||||
|
||||
return iamax;
|
||||
|
||||
#else
|
||||
F77_isamax_sub( &F77_N, X, &F77_incX, &iamax);
|
||||
return iamax ? iamax-1 : 0;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved.
|
||||
*/
|
||||
|
||||
#include "cblas.h"
|
||||
@@ -24,70 +24,7 @@ void cblas_saxpy( f77_int N, float alpha, const float *X,
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
dim_t n0;
|
||||
float* x0;
|
||||
float* y0;
|
||||
inc_t incx0;
|
||||
inc_t incy0;
|
||||
|
||||
/* Initialize BLIS. */
|
||||
// bli_init_auto();
|
||||
|
||||
/* Convert/typecast negative values of n to zero. */
|
||||
if ( F77_N < 0 ) n0 = ( dim_t )0;
|
||||
else n0 = ( dim_t )(F77_N);
|
||||
|
||||
if ( F77_incX < 0 )
|
||||
{
|
||||
/* The semantics of negative stride in BLAS are that the vector
|
||||
operand be traversed in reverse order. (Another way to think
|
||||
of this is that negative strides effectively reverse the order
|
||||
of the vector, but without any explicit data movements.) This
|
||||
is also how BLIS interprets negative strides. The differences
|
||||
is that with BLAS, the caller *always* passes in the 0th (i.e.,
|
||||
top-most or left-most) element of the vector, even when the
|
||||
stride is negative. By contrast, in BLIS, negative strides are
|
||||
used *relative* to the vector address as it is given. Thus, in
|
||||
BLIS, if this backwards traversal is desired, the caller *must*
|
||||
pass in the address to the (n-1)th (i.e., the bottom-most or
|
||||
right-most) element along with a negative stride. */
|
||||
x0 = ((float*)X) + (n0-1)*(-F77_incX);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
}
|
||||
else
|
||||
{
|
||||
x0 = ((float*)X);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
}
|
||||
|
||||
if ( F77_incY < 0 )
|
||||
{
|
||||
y0 = ((float*)Y) + (n0-1)*(-F77_incY);
|
||||
incy0 = ( inc_t )(F77_incY);
|
||||
}
|
||||
else
|
||||
{
|
||||
y0 = ((float*)Y);
|
||||
incy0 = ( inc_t )(F77_incY);
|
||||
}
|
||||
|
||||
bli_saxpyv_zen_int10(
|
||||
BLIS_NO_CONJUGATE,
|
||||
n0,
|
||||
(float*)&alpha,
|
||||
x0, incx0,
|
||||
y0, incy0,
|
||||
NULL
|
||||
);
|
||||
|
||||
/* Finalize BLIS. */
|
||||
// bli_finalize_auto();
|
||||
|
||||
#else
|
||||
F77_saxpy( &F77_N, &alpha, X, &F77_incX, Y, &F77_incY);
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
@@ -17,82 +17,12 @@ void cblas_scopy( f77_int N, const float *X,
|
||||
{
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
|
||||
#else
|
||||
#else
|
||||
#define F77_N N
|
||||
#define F77_incX incX
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
|
||||
dim_t n0;
|
||||
float* x0;
|
||||
float* y0;
|
||||
inc_t incx0;
|
||||
inc_t incy0;
|
||||
|
||||
/* Initialize BLIS. */
|
||||
// bli_init_auto();
|
||||
|
||||
/* Convert/typecast negative values of n to zero. */
|
||||
if ( F77_N < 0 ) n0 = ( dim_t )0;
|
||||
else n0 = ( dim_t )(F77_N);
|
||||
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */
|
||||
if ( F77_incX < 0 )
|
||||
{
|
||||
/* The semantics of negative stride in BLAS are that the vector
|
||||
operand be traversed in reverse order. (Another way to think
|
||||
of this is that negative strides effectively reverse the order
|
||||
of the vector, but without any explicit data movements.) This
|
||||
is also how BLIS interprets negative strides. The differences
|
||||
is that with BLAS, the caller *always* passes in the 0th (i.e.,
|
||||
top-most or left-most) element of the vector, even when the
|
||||
stride is negative. By contrast, in BLIS, negative strides are
|
||||
used *relative* to the vector address as it is given. Thus, in
|
||||
BLIS, if this backwards traversal is desired, the caller *must*
|
||||
pass in the address to the (n-1)th (i.e., the bottom-most or
|
||||
right-most) element along with a negative stride. */
|
||||
|
||||
x0 = (float*)((X) + (n0-1)*(-F77_incX));
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
x0 = (float*)(X);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
}
|
||||
|
||||
if ( F77_incY < 0 )
|
||||
{
|
||||
y0 = (Y) + (n0-1)*(-F77_incY);
|
||||
incy0 = ( inc_t )(F77_incY);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
y0 = (Y);
|
||||
incy0 = ( inc_t )(F77_incY);
|
||||
}
|
||||
|
||||
|
||||
/* Call BLIS kernel */
|
||||
bli_scopyv_zen_int
|
||||
(
|
||||
BLIS_NO_CONJUGATE,
|
||||
n0,
|
||||
x0, incx0,
|
||||
y0, incy0,
|
||||
NULL
|
||||
);
|
||||
|
||||
/* Finalize BLIS. */
|
||||
// bli_finalize_auto();
|
||||
|
||||
#else
|
||||
F77_scopy( &F77_N, X, &F77_incX, Y, &F77_incY);
|
||||
#endif
|
||||
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -8,7 +8,7 @@
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
@@ -20,85 +20,14 @@ float cblas_sdot( f77_int N, const float *X,
|
||||
float dot;
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
|
||||
#else
|
||||
#else
|
||||
#define F77_N N
|
||||
#define F77_incX incX
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
dim_t n0;
|
||||
float* x0;
|
||||
float* y0;
|
||||
inc_t incx0;
|
||||
inc_t incy0;
|
||||
|
||||
/* Initialize BLIS. */
|
||||
// bli_init_auto();
|
||||
|
||||
/* Convert/typecast negative values of n to zero. */
|
||||
if ( F77_N < 0 ) n0 = ( dim_t )0;
|
||||
else n0 = ( dim_t )(F77_N);
|
||||
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */
|
||||
|
||||
if ( F77_incX < 0 )
|
||||
{
|
||||
/* The semantics of negative stride in BLAS are that the vector
|
||||
operand be traversed in reverse order. (Another way to think
|
||||
of this is that negative strides effectively reverse the order
|
||||
of the vector, but without any explicit data movements.) This
|
||||
is also how BLIS interprets negative strides. The differences
|
||||
is that with BLAS, the caller *always* passes in the 0th (i.e.,
|
||||
top-most or left-most) element of the vector, even when the
|
||||
stride is negative. By contrast, in BLIS, negative strides are
|
||||
used *relative* to the vector address as it is given. Thus, in
|
||||
BLIS, if this backwards traversal is desired, the caller *must*
|
||||
pass in the address to the (n-1)th (i.e., the bottom-most or
|
||||
right-most) element along with a negative stride. */
|
||||
|
||||
x0 = ((float*)X) + (n0-1)*(-F77_incX);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
x0 = ((float*)X);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
}
|
||||
|
||||
if ( F77_incY < 0 )
|
||||
{
|
||||
y0 = ((float*)Y) + (n0-1)*(-F77_incY);
|
||||
incy0 = ( inc_t )(F77_incY);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
y0 = ((float*)Y);
|
||||
incy0 = ( inc_t )(F77_incY);
|
||||
}
|
||||
|
||||
/* Call BLIS kernel. */
|
||||
bli_sdotv_zen_int10
|
||||
(
|
||||
BLIS_NO_CONJUGATE,
|
||||
BLIS_NO_CONJUGATE,
|
||||
n0,
|
||||
x0, incx0,
|
||||
y0, incy0,
|
||||
&dot,
|
||||
NULL
|
||||
);
|
||||
|
||||
/* Finalize BLIS. */
|
||||
// bli_finalize_auto();
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
|
||||
return dot;
|
||||
#else
|
||||
F77_sdot_sub( &F77_N, X, &F77_incX, Y, &F77_incY, &dot);
|
||||
AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1);
|
||||
return dot;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
@@ -22,63 +22,8 @@ void cblas_sscal( f77_int N, float alpha, float *X,
|
||||
#define F77_incX incX
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
|
||||
dim_t n0;
|
||||
float* x0;
|
||||
inc_t incx0;
|
||||
|
||||
/* Initialize BLIS. */
|
||||
//bli_init_auto();
|
||||
|
||||
/* Convert/typecast negative values of n to zero. */
|
||||
if ( F77_N < 0 ) n0 = ( dim_t )0;
|
||||
else n0 = ( dim_t )(F77_N);
|
||||
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */
|
||||
if ( F77_incX < 0 )
|
||||
{
|
||||
/* The semantics of negative stride in BLAS are that the vector
|
||||
operand be traversed in reverse order. (Another way to think
|
||||
of this is that negative strides effectively reverse the order
|
||||
of the vector, but without any explicit data movements.) This
|
||||
is also how BLIS interprets negative strides. The differences
|
||||
is that with BLAS, the caller *always* passes in the 0th (i.e.,
|
||||
top-most or left-most) element of the vector, even when the
|
||||
stride is negative. By contrast, in BLIS, negative strides are
|
||||
used *relative* to the vector address as it is given. Thus, in
|
||||
BLIS, if this backwards traversal is desired, the caller *must*
|
||||
pass in the address to the (n-1)th (i.e., the bottom-most or
|
||||
right-most) element along with a negative stride. */
|
||||
|
||||
x0 = (X) + (n0-1)*(-F77_incX);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
x0 = (X);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
}
|
||||
|
||||
|
||||
/* Call BLIS kernel */
|
||||
bli_sscalv_zen_int10
|
||||
(
|
||||
BLIS_NO_CONJUGATE,
|
||||
n0,
|
||||
&alpha,
|
||||
x0, incx0,
|
||||
NULL
|
||||
);
|
||||
|
||||
/* Finalize BLIS. */
|
||||
// bli_finalize_auto();
|
||||
|
||||
#else
|
||||
F77_sscal( &F77_N, &alpha, X, &F77_incX);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved.
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
@@ -17,81 +17,13 @@ void cblas_sswap( f77_int N, float *X, f77_int incX, float *Y,
|
||||
{
|
||||
#ifdef F77_INT
|
||||
F77_INT F77_N=N, F77_incX=incX, F77_incY=incY;
|
||||
#else
|
||||
#else
|
||||
#define F77_N N
|
||||
#define F77_incX incX
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_CONFIG_EPYC
|
||||
|
||||
dim_t n0;
|
||||
float* x0;
|
||||
float* y0;
|
||||
inc_t incx0;
|
||||
inc_t incy0;
|
||||
|
||||
/* Initialize BLIS. */
|
||||
// bli_init_auto();
|
||||
|
||||
/* Convert/typecast negative values of n to zero. */
|
||||
if ( F77_N < 0 ) n0 = ( dim_t )0;
|
||||
else n0 = ( dim_t )(F77_N);
|
||||
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */
|
||||
if ( F77_incX < 0 )
|
||||
{
|
||||
/* The semantics of negative stride in BLAS are that the vector
|
||||
operand be traversed in reverse order. (Another way to think
|
||||
of this is that negative strides effectively reverse the order
|
||||
of the vector, but without any explicit data movements.) This
|
||||
is also how BLIS interprets negative strides. The differences
|
||||
is that with BLAS, the caller *always* passes in the 0th (i.e.,
|
||||
top-most or left-most) element of the vector, even when the
|
||||
stride is negative. By contrast, in BLIS, negative strides are
|
||||
used *relative* to the vector address as it is given. Thus, in
|
||||
BLIS, if this backwards traversal is desired, the caller *must*
|
||||
pass in the address to the (n-1)th (i.e., the bottom-most or
|
||||
right-most) element along with a negative stride. */
|
||||
|
||||
x0 = (X) + (n0-1)*(-F77_incX);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
x0 = (X);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
}
|
||||
|
||||
if ( F77_incY < 0 )
|
||||
{
|
||||
y0 = (Y) + (n0-1)*(-F77_incY);
|
||||
incy0 = ( inc_t )(F77_incY);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
y0 = (Y);
|
||||
incy0 = ( inc_t )(F77_incY);
|
||||
}
|
||||
|
||||
|
||||
/* Call BLIS kernel */
|
||||
bli_sswapv_zen_int8
|
||||
(
|
||||
n0,
|
||||
x0, incx0,
|
||||
y0, incy0,
|
||||
NULL
|
||||
);
|
||||
|
||||
/* Finalize BLIS. */
|
||||
// bli_finalize_auto();
|
||||
|
||||
#else
|
||||
F77_sswap( &F77_N, X, &F77_incX, Y, &F77_incY);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
Reference in New Issue
Block a user