From 61b5b9c4d0f4b65250a407c8da45df5304b010dd Mon Sep 17 00:00:00 2001 From: Dipal M Zambare Date: Wed, 27 Oct 2021 16:49:42 +0530 Subject: [PATCH] Fixed dynamic dispatch crash issue on non-zen architecture. Removed direct calling of zen kernels in cblas source itself. Similar optimizations are done by the function directly invoked from Cblas layer. The BLIS binary with dynamic dispatch feature was crashing on non-zen CPUs (specifically CPUs without AVX2 support). The crash was caused by un-supported instructions in zen optimized kernels. The issue is fixed by calling only reference kernels if the architecture detected at runtime is not zen, zen2 or zen3. AMD-Internal: [CPUPL-1930] Change-Id: I9178b7a98f2563dee2817064f37fcbb84073eeea --- frame/compat/cblas/src/cblas_daxpy.c | 68 +---------------------- frame/compat/cblas/src/cblas_dcopy.c | 72 +------------------------ frame/compat/cblas/src/cblas_ddot.c | 77 ++------------------------- frame/compat/cblas/src/cblas_dscal.c | 50 +---------------- frame/compat/cblas/src/cblas_dswap.c | 71 +----------------------- frame/compat/cblas/src/cblas_idamax.c | 68 +---------------------- frame/compat/cblas/src/cblas_isamax.c | 68 +---------------------- frame/compat/cblas/src/cblas_saxpy.c | 65 +--------------------- frame/compat/cblas/src/cblas_scopy.c | 74 +------------------------ frame/compat/cblas/src/cblas_sdot.c | 77 ++------------------------- frame/compat/cblas/src/cblas_sscal.c | 57 +------------------- frame/compat/cblas/src/cblas_sswap.c | 72 +------------------------ 12 files changed, 23 insertions(+), 796 deletions(-) diff --git a/frame/compat/cblas/src/cblas_daxpy.c b/frame/compat/cblas/src/cblas_daxpy.c index eb4736767..a42b92ae0 100644 --- a/frame/compat/cblas/src/cblas_daxpy.c +++ b/frame/compat/cblas/src/cblas_daxpy.c @@ -7,7 +7,7 @@ * * Written by Keita Teranishi. 2/11/1998 * - * Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. + * Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved. */ #include "cblas.h" #include "cblas_f77.h" @@ -22,72 +22,8 @@ void cblas_daxpy( f77_int N, double alpha, const double *X, #define F77_incY incY #endif -#ifdef BLIS_CONFIG_EPYC - dim_t n0; - double* x0; - double* y0; - inc_t incx0; - inc_t incy0; - - /* Initialize BLIS. */ -// bli_init_auto(); - - /* Convert/typecast negative values of n to zero. */ - if ( F77_N < 0 ) n0 = ( dim_t )0; - else n0 = ( dim_t )(F77_N); - - /* If the input increments are negative, adjust the pointers so we can - use positive increments instead. */ - if ( F77_incX < 0 ) - { - /* The semantics of negative stride in BLAS are that the vector - operand be traversed in reverse order. (Another way to think - of this is that negative strides effectively reverse the order - of the vector, but without any explicit data movements.) This - is also how BLIS interprets negative strides. The differences - is that with BLAS, the caller *always* passes in the 0th (i.e., - top-most or left-most) element of the vector, even when the - stride is negative. By contrast, in BLIS, negative strides are - used *relative* to the vector address as it is given. Thus, in - BLIS, if this backwards traversal is desired, the caller *must* - pass in the address to the (n-1)th (i.e., the bottom-most or - right-most) element along with a negative stride. */ - x0 = ((double*)X) + (n0-1)*(-F77_incX); - incx0 = ( inc_t )(F77_incX); - } - else - { - x0 = ((double*)X); - incx0 = ( inc_t )(F77_incX); - } - - if ( F77_incY < 0 ) - { - y0 = ((double*)Y) + (n0-1)*(-F77_incY); - incy0 = ( inc_t )(F77_incY); - } - else - { - y0 = ((double*)Y); - incy0 = ( inc_t )(F77_incY); - } - - bli_daxpyv_zen_int10( - BLIS_NO_CONJUGATE, - n0, - (double*)&alpha, - x0, incx0, - y0, incy0, - NULL - ); - - /* Finalize BLIS. */ -// bli_finalize_auto(); - - -#else F77_daxpy( &F77_N, &alpha, X, &F77_incX, Y, &F77_incY); -#endif + } #endif diff --git a/frame/compat/cblas/src/cblas_dcopy.c b/frame/compat/cblas/src/cblas_dcopy.c index c0be6fc0f..7a5dcaf6b 100644 --- a/frame/compat/cblas/src/cblas_dcopy.c +++ b/frame/compat/cblas/src/cblas_dcopy.c @@ -7,7 +7,7 @@ * * Written by Keita Teranishi. 2/11/1998 * - * Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. + * Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved. * */ @@ -18,80 +18,12 @@ void cblas_dcopy( f77_int N, const double *X, { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; -#else +#else #define F77_N N #define F77_incX incX #define F77_incY incY #endif -#ifdef BLIS_CONFIG_EPYC - dim_t n0; - double* x0; - double* y0; - inc_t incx0; - inc_t incy0; - /* Initialize BLIS. */ -// bli_init_auto(); - - /* Convert/typecast negative values of n to zero. */ - if ( F77_N < 0 ) n0 = ( dim_t )0; - else n0 = ( dim_t )(F77_N); - - /* If the input increments are negative, adjust the pointers so we can - use positive increments instead. */ - if ( F77_incX < 0 ) - { - /* The semantics of negative stride in BLAS are that the vector - operand be traversed in reverse order. (Another way to think - of this is that negative strides effectively reverse the order - of the vector, but without any explicit data movements.) This - is also how BLIS interprets negative strides. The differences - is that with BLAS, the caller *always* passes in the 0th (i.e., - top-most or left-most) element of the vector, even when the - stride is negative. By contrast, in BLIS, negative strides are - used *relative* to the vector address as it is given. Thus, in - BLIS, if this backwards traversal is desired, the caller *must* - pass in the address to the (n-1)th (i.e., the bottom-most or - right-most) element along with a negative stride. */ - - x0 = (double*)((X) + (n0-1)*(-F77_incX)); - incx0 = ( inc_t )(F77_incX); - - } - else - { - x0 = (double*)(X); - incx0 = ( inc_t )(F77_incX); - } - - if ( F77_incY < 0 ) - { - y0 = (Y) + (n0-1)*(-F77_incY); - incy0 = ( inc_t )(F77_incY); - - } - else - { - y0 = (Y); - incy0 = ( inc_t )(F77_incY); - } - - - /* Call BLIS kernel */ - bli_dcopyv_zen_int - ( - BLIS_NO_CONJUGATE, - n0, - x0, incx0, - y0, incy0, - NULL - ); - - /* Finalize BLIS. */ -// bli_finalize_auto(); -#else F77_dcopy( &F77_N, X, &F77_incX, Y, &F77_incY); -#endif - } #endif diff --git a/frame/compat/cblas/src/cblas_ddot.c b/frame/compat/cblas/src/cblas_ddot.c index fd16ad761..47fc9efb1 100644 --- a/frame/compat/cblas/src/cblas_ddot.c +++ b/frame/compat/cblas/src/cblas_ddot.c @@ -8,7 +8,7 @@ * * Written by Keita Teranishi. 2/11/1998 * - * Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. + * Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved. * */ #include "cblas.h" @@ -20,85 +20,14 @@ double cblas_ddot( f77_int N, const double *X, double dot; #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; -#else +#else #define F77_N N #define F77_incX incX #define F77_incY incY #endif -#ifdef BLIS_CONFIG_EPYC - dim_t n0; - double* x0; - double* y0; - inc_t incx0; - inc_t incy0; - /* Initialize BLIS. */ -// bli_init_auto(); - - /* Convert/typecast negative values of n to zero. */ - if ( F77_N < 0 ) n0 = ( dim_t )0; - else n0 = ( dim_t )(F77_N); - - /* If the input increments are negative, adjust the pointers so we can - use positive increments instead. */ - - if ( F77_incX < 0 ) - { - /* The semantics of negative stride in BLAS are that the vector - operand be traversed in reverse order. (Another way to think - of this is that negative strides effectively reverse the order - of the vector, but without any explicit data movements.) This - is also how BLIS interprets negative strides. The differences - is that with BLAS, the caller *always* passes in the 0th (i.e., - top-most or left-most) element of the vector, even when the - stride is negative. By contrast, in BLIS, negative strides are - used *relative* to the vector address as it is given. Thus, in - BLIS, if this backwards traversal is desired, the caller *must* - pass in the address to the (n-1)th (i.e., the bottom-most or - right-most) element along with a negative stride. */ - - x0 = ((double*)X) + (n0-1)*(-F77_incX); - incx0 = ( inc_t )(F77_incX); - - } - else - { - x0 = ((double*)X); - incx0 = ( inc_t )(F77_incX); - } - - if ( F77_incY < 0 ) - { - y0 = ((double*)Y) + (n0-1)*(-F77_incY); - incy0 = ( inc_t )(F77_incY); - - } - else - { - y0 = ((double*)Y); - incy0 = ( inc_t )(F77_incY); - } - /* Call BLIS kernel. */ - bli_ddotv_zen_int10 - ( - BLIS_NO_CONJUGATE, - BLIS_NO_CONJUGATE, - n0, - x0, incx0, - y0, incy0, - &dot, - NULL - ); - - /* Finalize BLIS. */ -// bli_finalize_auto(); - AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); - return dot; - -#else F77_ddot_sub( &F77_N, X, &F77_incX, Y, &F77_incY, &dot); AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); return dot; -#endif -} +} #endif diff --git a/frame/compat/cblas/src/cblas_dscal.c b/frame/compat/cblas/src/cblas_dscal.c index e0e3b29b4..88c5b3fa0 100644 --- a/frame/compat/cblas/src/cblas_dscal.c +++ b/frame/compat/cblas/src/cblas_dscal.c @@ -8,7 +8,7 @@ * Written by Keita Teranishi. 2/11/1998 * * - * Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. + * Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved. */ #include "cblas.h" #include "cblas_f77.h" @@ -22,56 +22,8 @@ void cblas_dscal( f77_int N, double alpha, double *X, #define F77_incX incX #define F77_incY incY #endif -#ifdef BLIS_CONFIG_EPYC - dim_t n0; - double* x0; - inc_t incx0; - /* Initialize BLIS. */ -// bli_init_auto(); - - if ( F77_N < 0 ) n0 = ( dim_t )0; - else n0 = ( dim_t )(F77_N); - - /* If the input increments are negative, adjust the pointers so we can - use positive increments instead. */ - if ( F77_incX < 0 ) - { - /* The semantics of negative stride in BLAS are that the vector - operand be traversed in reverse order. (Another way to think - of this is that negative strides effectively reverse the order - of the vector, but without any explicit data movements.) This - is also how BLIS interprets negative strides. The differences - is that with BLAS, the caller *always* passes in the 0th (i.e., - top-most or left-most) element of the vector, even when the - stride is negative. By contrast, in BLIS, negative strides are - used *relative* to the vector address as it is given. Thus, in - BLIS, if this backwards traversal is desired, the caller *must* - pass in the address to the (n-1)th (i.e., the bottom-most or - right-most) element along with a negative stride. */ - - x0 = (X) + (n0-1)*(-F77_incX); - incx0 = ( inc_t )(F77_incX); - - } - else - { - x0 = (X); - incx0 = ( inc_t )(F77_incX); - } - - /* Call BLIS kernel */ - bli_dscalv_zen_int10 - ( - BLIS_NO_CONJUGATE, - n0, - &alpha, - x0, incx0, - NULL - ); -#else F77_dscal( &F77_N, &alpha, X, &F77_incX); -#endif } #endif diff --git a/frame/compat/cblas/src/cblas_dswap.c b/frame/compat/cblas/src/cblas_dswap.c index 5a5ccbf14..1432d59ae 100644 --- a/frame/compat/cblas/src/cblas_dswap.c +++ b/frame/compat/cblas/src/cblas_dswap.c @@ -7,7 +7,7 @@ * * Written by Keita Teranishi. 2/11/1998 * - * Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. + * Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved. * */ #include "cblas.h" @@ -17,79 +17,12 @@ void cblas_dswap( f77_int N, double *X, f77_int incX, double *Y, { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; -#else +#else #define F77_N N #define F77_incX incX #define F77_incY incY #endif -#ifdef BLIS_CONFIG_EPYC - dim_t n0; - double* x0; - double* y0; - inc_t incx0; - inc_t incy0; - - /* Initialize BLIS. */ -// bli_init_auto(); - - /* Convert/typecast negative values of n to zero. */ - if ( F77_N < 0 ) n0 = ( dim_t )0; - else n0 = ( dim_t )(F77_N); - - /* If the input increments are negative, adjust the pointers so we can - use positive increments instead. */ - if ( F77_incX < 0 ) - { - /* The semantics of negative stride in BLAS are that the vector - operand be traversed in reverse order. (Another way to think - of this is that negative strides effectively reverse the order - of the vector, but without any explicit data movements.) This - is also how BLIS interprets negative strides. The differences - is that with BLAS, the caller *always* passes in the 0th (i.e., - top-most or left-most) element of the vector, even when the - stride is negative. By contrast, in BLIS, negative strides are - used *relative* to the vector address as it is given. Thus, in - BLIS, if this backwards traversal is desired, the caller *must* - pass in the address to the (n-1)th (i.e., the bottom-most or - right-most) element along with a negative stride. */ - - x0 = (X) + (n0-1)*(-F77_incX); - incx0 = ( inc_t )(F77_incX); - - } - else - { - x0 = (X); - incx0 = ( inc_t )(F77_incX); - } - - if ( F77_incY < 0 ) - { - y0 = (Y) + (n0-1)*(-F77_incY); - incy0 = ( inc_t )(F77_incY); - - } - else - { - y0 = (Y); - incy0 = ( inc_t )(F77_incY); - } - - - /* Call BLIS kernel */ - bli_dswapv_zen_int8 - ( - n0, - x0, incx0, - y0, incy0, - NULL - ); - - /* Finalize BLIS. */ -// bli_finalize_auto(); -#else F77_dswap( &F77_N, X, &F77_incX, Y, &F77_incY); -#endif } #endif diff --git a/frame/compat/cblas/src/cblas_idamax.c b/frame/compat/cblas/src/cblas_idamax.c index 071482c36..46d7d9377 100644 --- a/frame/compat/cblas/src/cblas_idamax.c +++ b/frame/compat/cblas/src/cblas_idamax.c @@ -7,7 +7,7 @@ * It calls the fortran wrapper before calling idamax. * * Written by Keita Teranishi. 2/11/1998 - * Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. + * Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved. * */ #include "cblas.h" @@ -17,76 +17,12 @@ f77_int cblas_idamax( f77_int N, const double *X, f77_int incX) f77_int iamax; #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; -#else +#else #define F77_N N #define F77_incX incX #endif -#ifdef BLIS_CONFIG_EPYC - dim_t n0; - double* x0; - inc_t incx0; - gint_t bli_index; - - /* If the vector is empty, return an index of zero. This early check - is needed to emulate netlib BLAS. Without it, bli_?amaxv() will - return 0, which ends up getting incremented to 1 (below) before - being returned, which is not what we want. */ - if ( F77_N < 1 || F77_incX <= 0 ) return 0; - - /* Initialize BLIS. */ -// bli_init_auto(); - - /* Convert/typecast negative values of n to zero. */ - if ( F77_N < 0 ) n0 = ( dim_t )0; - else n0 = ( dim_t )(F77_N); - - /* If the input increments are negative, adjust the pointers so we can - use positive increments instead. */ - if ( F77_incX < 0 ) - { - /* The semantics of negative stride in BLAS are that the vector - operand be traversed in reverse order. (Another way to think - of this is that negative strides effectively reverse the order - of the vector, but without any explicit data movements.) This - is also how BLIS interprets negative strides. The differences - is that with BLAS, the caller *always* passes in the 0th (i.e., - top-most or left-most) element of the vector, even when the - stride is negative. By contrast, in BLIS, negative strides are - used *relative* to the vector address as it is given. Thus, in - BLIS, if this backwards traversal is desired, the caller *must* - pass in the address to the (n-1)th (i.e., the bottom-most or - right-most) element along with a negative stride. */ - - x0 = ((double*)X) + (n0-1)*(-F77_incX); - incx0 = ( inc_t )(F77_incX); - - } - else - { - x0 = ((double*)X); - incx0 = ( inc_t )(F77_incX); - } - - /* Call BLIS kernel. */ - bli_damaxv_zen_int - ( - n0, - x0, incx0, - &bli_index, - NULL - ); - - /* Finalize BLIS. */ -// bli_finalize_auto(); - - iamax = bli_index; - - return iamax; - -#else F77_idamax_sub( &F77_N, X, &F77_incX, &iamax); return iamax ? iamax-1 : 0; -#endif } #endif diff --git a/frame/compat/cblas/src/cblas_isamax.c b/frame/compat/cblas/src/cblas_isamax.c index 81d13d099..f41e43097 100644 --- a/frame/compat/cblas/src/cblas_isamax.c +++ b/frame/compat/cblas/src/cblas_isamax.c @@ -7,7 +7,7 @@ * It calls the fortran wrapper before calling isamax. * * Written by Keita Teranishi. 2/11/1998 - * Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. + * Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved. * */ #include "cblas.h" @@ -17,76 +17,12 @@ f77_int cblas_isamax( f77_int N, const float *X, f77_int incX) f77_int iamax; #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX; -#else +#else #define F77_N N #define F77_incX incX #endif -#ifdef BLIS_CONFIG_EPYC - dim_t n0; - float* x0; - inc_t incx0; - gint_t bli_index; - - /* If the vector is empty, return an index of zero. This early check - is needed to emulate netlib BLAS. Without it, bli_?amaxv() will - return 0, which ends up getting incremented to 1 (below) before - being returned, which is not what we want. */ - if ( F77_N < 1 || F77_incX <= 0 ) return 0; - - /* Initialize BLIS. */ -// bli_init_auto(); - - /* Convert/typecast negative values of n to zero. */ - if ( F77_N < 0 ) n0 = ( dim_t )0; - else n0 = ( dim_t )(F77_N); - - /* If the input increments are negative, adjust the pointers so we can - use positive increments instead. */ - if ( F77_incX < 0 ) - { - /* The semantics of negative stride in BLAS are that the vector - operand be traversed in reverse order. (Another way to think - of this is that negative strides effectively reverse the order - of the vector, but without any explicit data movements.) This - is also how BLIS interprets negative strides. The differences - is that with BLAS, the caller *always* passes in the 0th (i.e., - top-most or left-most) element of the vector, even when the - stride is negative. By contrast, in BLIS, negative strides are - used *relative* to the vector address as it is given. Thus, in - BLIS, if this backwards traversal is desired, the caller *must* - pass in the address to the (n-1)th (i.e., the bottom-most or - right-most) element along with a negative stride. */ - - x0 = ((float*)X) + (n0-1)*(-F77_incX); - incx0 = ( inc_t )(F77_incX); - - } - else - { - x0 = ((float*)X); - incx0 = ( inc_t )(F77_incX); - } - - /* Call BLIS kernel. */ - bli_samaxv_zen_int - ( - n0, - x0, incx0, - &bli_index, - NULL - ); - - /* Finalize BLIS. */ -// bli_finalize_auto(); - - iamax = bli_index; - - return iamax; - -#else F77_isamax_sub( &F77_N, X, &F77_incX, &iamax); return iamax ? iamax-1 : 0; -#endif } #endif diff --git a/frame/compat/cblas/src/cblas_saxpy.c b/frame/compat/cblas/src/cblas_saxpy.c index 8c5ace43f..db6b21b85 100644 --- a/frame/compat/cblas/src/cblas_saxpy.c +++ b/frame/compat/cblas/src/cblas_saxpy.c @@ -8,7 +8,7 @@ * * Written by Keita Teranishi. 2/11/1998 * - * Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. + * Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved. */ #include "cblas.h" @@ -24,70 +24,7 @@ void cblas_saxpy( f77_int N, float alpha, const float *X, #define F77_incY incY #endif -#ifdef BLIS_CONFIG_EPYC - dim_t n0; - float* x0; - float* y0; - inc_t incx0; - inc_t incy0; - - /* Initialize BLIS. */ -// bli_init_auto(); - - /* Convert/typecast negative values of n to zero. */ - if ( F77_N < 0 ) n0 = ( dim_t )0; - else n0 = ( dim_t )(F77_N); - - if ( F77_incX < 0 ) - { - /* The semantics of negative stride in BLAS are that the vector - operand be traversed in reverse order. (Another way to think - of this is that negative strides effectively reverse the order - of the vector, but without any explicit data movements.) This - is also how BLIS interprets negative strides. The differences - is that with BLAS, the caller *always* passes in the 0th (i.e., - top-most or left-most) element of the vector, even when the - stride is negative. By contrast, in BLIS, negative strides are - used *relative* to the vector address as it is given. Thus, in - BLIS, if this backwards traversal is desired, the caller *must* - pass in the address to the (n-1)th (i.e., the bottom-most or - right-most) element along with a negative stride. */ - x0 = ((float*)X) + (n0-1)*(-F77_incX); - incx0 = ( inc_t )(F77_incX); - } - else - { - x0 = ((float*)X); - incx0 = ( inc_t )(F77_incX); - } - - if ( F77_incY < 0 ) - { - y0 = ((float*)Y) + (n0-1)*(-F77_incY); - incy0 = ( inc_t )(F77_incY); - } - else - { - y0 = ((float*)Y); - incy0 = ( inc_t )(F77_incY); - } - - bli_saxpyv_zen_int10( - BLIS_NO_CONJUGATE, - n0, - (float*)&alpha, - x0, incx0, - y0, incy0, - NULL - ); - - /* Finalize BLIS. */ -// bli_finalize_auto(); - -#else F77_saxpy( &F77_N, &alpha, X, &F77_incX, Y, &F77_incY); -#endif - } #endif diff --git a/frame/compat/cblas/src/cblas_scopy.c b/frame/compat/cblas/src/cblas_scopy.c index 518d4f629..23c78e0dc 100644 --- a/frame/compat/cblas/src/cblas_scopy.c +++ b/frame/compat/cblas/src/cblas_scopy.c @@ -7,7 +7,7 @@ * * Written by Keita Teranishi. 2/11/1998 * - * Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. + * Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved. * */ #include "cblas.h" @@ -17,82 +17,12 @@ void cblas_scopy( f77_int N, const float *X, { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; -#else +#else #define F77_N N #define F77_incX incX #define F77_incY incY #endif -#ifdef BLIS_CONFIG_EPYC - dim_t n0; - float* x0; - float* y0; - inc_t incx0; - inc_t incy0; - - /* Initialize BLIS. */ -// bli_init_auto(); - - /* Convert/typecast negative values of n to zero. */ - if ( F77_N < 0 ) n0 = ( dim_t )0; - else n0 = ( dim_t )(F77_N); - - /* If the input increments are negative, adjust the pointers so we can - use positive increments instead. */ - if ( F77_incX < 0 ) - { - /* The semantics of negative stride in BLAS are that the vector - operand be traversed in reverse order. (Another way to think - of this is that negative strides effectively reverse the order - of the vector, but without any explicit data movements.) This - is also how BLIS interprets negative strides. The differences - is that with BLAS, the caller *always* passes in the 0th (i.e., - top-most or left-most) element of the vector, even when the - stride is negative. By contrast, in BLIS, negative strides are - used *relative* to the vector address as it is given. Thus, in - BLIS, if this backwards traversal is desired, the caller *must* - pass in the address to the (n-1)th (i.e., the bottom-most or - right-most) element along with a negative stride. */ - - x0 = (float*)((X) + (n0-1)*(-F77_incX)); - incx0 = ( inc_t )(F77_incX); - - } - else - { - x0 = (float*)(X); - incx0 = ( inc_t )(F77_incX); - } - - if ( F77_incY < 0 ) - { - y0 = (Y) + (n0-1)*(-F77_incY); - incy0 = ( inc_t )(F77_incY); - - } - else - { - y0 = (Y); - incy0 = ( inc_t )(F77_incY); - } - - - /* Call BLIS kernel */ - bli_scopyv_zen_int - ( - BLIS_NO_CONJUGATE, - n0, - x0, incx0, - y0, incy0, - NULL - ); - - /* Finalize BLIS. */ -// bli_finalize_auto(); - -#else F77_scopy( &F77_N, X, &F77_incX, Y, &F77_incY); -#endif - } #endif diff --git a/frame/compat/cblas/src/cblas_sdot.c b/frame/compat/cblas/src/cblas_sdot.c index 970eda42d..750459747 100644 --- a/frame/compat/cblas/src/cblas_sdot.c +++ b/frame/compat/cblas/src/cblas_sdot.c @@ -8,7 +8,7 @@ * * Written by Keita Teranishi. 2/11/1998 * - * Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. + * Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved. * */ #include "cblas.h" @@ -20,85 +20,14 @@ float cblas_sdot( f77_int N, const float *X, float dot; #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; -#else +#else #define F77_N N #define F77_incX incX #define F77_incY incY #endif -#ifdef BLIS_CONFIG_EPYC - dim_t n0; - float* x0; - float* y0; - inc_t incx0; - inc_t incy0; - /* Initialize BLIS. */ -// bli_init_auto(); - - /* Convert/typecast negative values of n to zero. */ - if ( F77_N < 0 ) n0 = ( dim_t )0; - else n0 = ( dim_t )(F77_N); - - /* If the input increments are negative, adjust the pointers so we can - use positive increments instead. */ - - if ( F77_incX < 0 ) - { - /* The semantics of negative stride in BLAS are that the vector - operand be traversed in reverse order. (Another way to think - of this is that negative strides effectively reverse the order - of the vector, but without any explicit data movements.) This - is also how BLIS interprets negative strides. The differences - is that with BLAS, the caller *always* passes in the 0th (i.e., - top-most or left-most) element of the vector, even when the - stride is negative. By contrast, in BLIS, negative strides are - used *relative* to the vector address as it is given. Thus, in - BLIS, if this backwards traversal is desired, the caller *must* - pass in the address to the (n-1)th (i.e., the bottom-most or - right-most) element along with a negative stride. */ - - x0 = ((float*)X) + (n0-1)*(-F77_incX); - incx0 = ( inc_t )(F77_incX); - - } - else - { - x0 = ((float*)X); - incx0 = ( inc_t )(F77_incX); - } - - if ( F77_incY < 0 ) - { - y0 = ((float*)Y) + (n0-1)*(-F77_incY); - incy0 = ( inc_t )(F77_incY); - - } - else - { - y0 = ((float*)Y); - incy0 = ( inc_t )(F77_incY); - } - - /* Call BLIS kernel. */ - bli_sdotv_zen_int10 - ( - BLIS_NO_CONJUGATE, - BLIS_NO_CONJUGATE, - n0, - x0, incx0, - y0, incy0, - &dot, - NULL - ); - - /* Finalize BLIS. */ -// bli_finalize_auto(); - AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); - return dot; -#else F77_sdot_sub( &F77_N, X, &F77_incX, Y, &F77_incY, &dot); AOCL_DTL_TRACE_EXIT(AOCL_DTL_LEVEL_TRACE_1); return dot; -#endif -} +} #endif diff --git a/frame/compat/cblas/src/cblas_sscal.c b/frame/compat/cblas/src/cblas_sscal.c index 6c4de4683..b1b4cb471 100644 --- a/frame/compat/cblas/src/cblas_sscal.c +++ b/frame/compat/cblas/src/cblas_sscal.c @@ -7,7 +7,7 @@ * * Written by Keita Teranishi. 2/11/1998 * - * Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. + * Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved. * */ #include "cblas.h" @@ -22,63 +22,8 @@ void cblas_sscal( f77_int N, float alpha, float *X, #define F77_incX incX #define F77_incY incY #endif -#ifdef BLIS_CONFIG_EPYC - dim_t n0; - float* x0; - inc_t incx0; - - /* Initialize BLIS. */ - //bli_init_auto(); - - /* Convert/typecast negative values of n to zero. */ - if ( F77_N < 0 ) n0 = ( dim_t )0; - else n0 = ( dim_t )(F77_N); - - /* If the input increments are negative, adjust the pointers so we can - use positive increments instead. */ - if ( F77_incX < 0 ) - { - /* The semantics of negative stride in BLAS are that the vector - operand be traversed in reverse order. (Another way to think - of this is that negative strides effectively reverse the order - of the vector, but without any explicit data movements.) This - is also how BLIS interprets negative strides. The differences - is that with BLAS, the caller *always* passes in the 0th (i.e., - top-most or left-most) element of the vector, even when the - stride is negative. By contrast, in BLIS, negative strides are - used *relative* to the vector address as it is given. Thus, in - BLIS, if this backwards traversal is desired, the caller *must* - pass in the address to the (n-1)th (i.e., the bottom-most or - right-most) element along with a negative stride. */ - - x0 = (X) + (n0-1)*(-F77_incX); - incx0 = ( inc_t )(F77_incX); - - } - else - { - x0 = (X); - incx0 = ( inc_t )(F77_incX); - } - - - /* Call BLIS kernel */ - bli_sscalv_zen_int10 - ( - BLIS_NO_CONJUGATE, - n0, - &alpha, - x0, incx0, - NULL - ); - - /* Finalize BLIS. */ -// bli_finalize_auto(); - -#else F77_sscal( &F77_N, &alpha, X, &F77_incX); -#endif } #endif diff --git a/frame/compat/cblas/src/cblas_sswap.c b/frame/compat/cblas/src/cblas_sswap.c index c09e154c0..d352ee96a 100644 --- a/frame/compat/cblas/src/cblas_sswap.c +++ b/frame/compat/cblas/src/cblas_sswap.c @@ -7,7 +7,7 @@ * * Written by Keita Teranishi. 2/11/1998 * - * Copyright (C) 2020, Advanced Micro Devices, Inc. All rights reserved. + * Copyright (C) 2021, Advanced Micro Devices, Inc. All rights reserved. * */ #include "cblas.h" @@ -17,81 +17,13 @@ void cblas_sswap( f77_int N, float *X, f77_int incX, float *Y, { #ifdef F77_INT F77_INT F77_N=N, F77_incX=incX, F77_incY=incY; -#else +#else #define F77_N N #define F77_incX incX #define F77_incY incY #endif -#ifdef BLIS_CONFIG_EPYC - dim_t n0; - float* x0; - float* y0; - inc_t incx0; - inc_t incy0; - - /* Initialize BLIS. */ -// bli_init_auto(); - - /* Convert/typecast negative values of n to zero. */ - if ( F77_N < 0 ) n0 = ( dim_t )0; - else n0 = ( dim_t )(F77_N); - - /* If the input increments are negative, adjust the pointers so we can - use positive increments instead. */ - if ( F77_incX < 0 ) - { - /* The semantics of negative stride in BLAS are that the vector - operand be traversed in reverse order. (Another way to think - of this is that negative strides effectively reverse the order - of the vector, but without any explicit data movements.) This - is also how BLIS interprets negative strides. The differences - is that with BLAS, the caller *always* passes in the 0th (i.e., - top-most or left-most) element of the vector, even when the - stride is negative. By contrast, in BLIS, negative strides are - used *relative* to the vector address as it is given. Thus, in - BLIS, if this backwards traversal is desired, the caller *must* - pass in the address to the (n-1)th (i.e., the bottom-most or - right-most) element along with a negative stride. */ - - x0 = (X) + (n0-1)*(-F77_incX); - incx0 = ( inc_t )(F77_incX); - - } - else - { - x0 = (X); - incx0 = ( inc_t )(F77_incX); - } - - if ( F77_incY < 0 ) - { - y0 = (Y) + (n0-1)*(-F77_incY); - incy0 = ( inc_t )(F77_incY); - - } - else - { - y0 = (Y); - incy0 = ( inc_t )(F77_incY); - } - - - /* Call BLIS kernel */ - bli_sswapv_zen_int8 - ( - n0, - x0, incx0, - y0, incy0, - NULL - ); - - /* Finalize BLIS. */ -// bli_finalize_auto(); - -#else F77_sswap( &F77_N, X, &F77_incX, Y, &F77_incY); -#endif } #endif