mirror of
https://github.com/amd/blis.git
synced 2026-05-12 10:05:38 +00:00
Modified Function definition for BLAS and CBLAS interfaces of DOTV and SWAPV APIs
Details: -Kernel is called directly from API call to avoid framework overhead in case of single and double precisions. -Currently these changes are applicable only for zen2 configuration. They will be enabled for zen family processors in future. -These changes improve performance of BLAS and CBLAS interfaces of API. They do not affect BLIS-specific APIs. Change-Id: I1eb7ca470ced82c3cfa8b22f2b53000d42fef96c Signed-off-by: Meghana Vankadari <Meghana.Vankadari@amd.com> AMD-Internal: [CPUPL-847,CPUPL-816]
This commit is contained in:
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -86,8 +87,170 @@ ftype PASTEF772(ch,blasname,chc) \
|
||||
}
|
||||
|
||||
#ifdef BLIS_ENABLE_BLAS
|
||||
INSERT_GENTFUNCDOT_BLAS( dot, dotv )
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
|
||||
float sdot_
|
||||
(
|
||||
const f77_int* n,
|
||||
const float* x, const f77_int* incx,
|
||||
const float* y, const f77_int* incy
|
||||
)
|
||||
{
|
||||
dim_t n0;
|
||||
float* x0;
|
||||
float* y0;
|
||||
inc_t incx0;
|
||||
inc_t incy0;
|
||||
float rho;
|
||||
|
||||
/* Initialize BLIS. */
|
||||
// bli_init_auto();
|
||||
|
||||
/* Convert/typecast negative values of n to zero. */
|
||||
if ( *n < 0 ) n0 = ( dim_t )0;
|
||||
else n0 = ( dim_t )(*n);
|
||||
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */
|
||||
|
||||
if ( *incx < 0 )
|
||||
{
|
||||
/* The semantics of negative stride in BLAS are that the vector
|
||||
operand be traversed in reverse order. (Another way to think
|
||||
of this is that negative strides effectively reverse the order
|
||||
of the vector, but without any explicit data movements.) This
|
||||
is also how BLIS interprets negative strides. The differences
|
||||
is that with BLAS, the caller *always* passes in the 0th (i.e.,
|
||||
top-most or left-most) element of the vector, even when the
|
||||
stride is negative. By contrast, in BLIS, negative strides are
|
||||
used *relative* to the vector address as it is given. Thus, in
|
||||
BLIS, if this backwards traversal is desired, the caller *must*
|
||||
pass in the address to the (n-1)th (i.e., the bottom-most or
|
||||
right-most) element along with a negative stride. */
|
||||
|
||||
x0 = ((float*)x) + (n0-1)*(-*incx);
|
||||
incx0 = ( inc_t )(*incx);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
x0 = ((float*)x);
|
||||
incx0 = ( inc_t )(*incx);
|
||||
}
|
||||
|
||||
if ( *incy < 0 )
|
||||
{
|
||||
y0 = ((float*)y) + (n0-1)*(-*incy);
|
||||
incy0 = ( inc_t )(*incy);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
y0 = ((float*)y);
|
||||
incy0 = ( inc_t )(*incy);
|
||||
}
|
||||
|
||||
/* Call BLIS kernel. */
|
||||
bli_sdotv_zen_int10
|
||||
(
|
||||
BLIS_NO_CONJUGATE,
|
||||
BLIS_NO_CONJUGATE,
|
||||
n0,
|
||||
x0, incx0,
|
||||
y0, incy0,
|
||||
&rho,
|
||||
NULL
|
||||
);
|
||||
|
||||
/* Finalize BLIS. */
|
||||
// bli_finalize_auto();
|
||||
|
||||
return rho;
|
||||
}
|
||||
|
||||
double ddot_
|
||||
(
|
||||
const f77_int* n,
|
||||
const double* x, const f77_int* incx,
|
||||
const double* y, const f77_int* incy
|
||||
)
|
||||
{
|
||||
dim_t n0;
|
||||
double* x0;
|
||||
double* y0;
|
||||
inc_t incx0;
|
||||
inc_t incy0;
|
||||
double rho;
|
||||
|
||||
/* Initialize BLIS. */
|
||||
// bli_init_auto();
|
||||
|
||||
/* Convert/typecast negative values of n to zero. */
|
||||
if ( *n < 0 ) n0 = ( dim_t )0;
|
||||
else n0 = ( dim_t )(*n);
|
||||
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */
|
||||
|
||||
if ( *incx < 0 )
|
||||
{
|
||||
/* The semantics of negative stride in BLAS are that the vector
|
||||
operand be traversed in reverse order. (Another way to think
|
||||
of this is that negative strides effectively reverse the order
|
||||
of the vector, but without any explicit data movements.) This
|
||||
is also how BLIS interprets negative strides. The differences
|
||||
is that with BLAS, the caller *always* passes in the 0th (i.e.,
|
||||
top-most or left-most) element of the vector, even when the
|
||||
stride is negative. By contrast, in BLIS, negative strides are
|
||||
used *relative* to the vector address as it is given. Thus, in
|
||||
BLIS, if this backwards traversal is desired, the caller *must*
|
||||
pass in the address to the (n-1)th (i.e., the bottom-most or
|
||||
right-most) element along with a negative stride. */
|
||||
|
||||
x0 = ((double*)x) + (n0-1)*(-*incx);
|
||||
incx0 = ( inc_t )(*incx);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
x0 = ((double*)x);
|
||||
incx0 = ( inc_t )(*incx);
|
||||
}
|
||||
|
||||
if ( *incy < 0 )
|
||||
{
|
||||
y0 = ((double*)y) + (n0-1)*(-*incy);
|
||||
incy0 = ( inc_t )(*incy);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
y0 = ((double*)y);
|
||||
incy0 = ( inc_t )(*incy);
|
||||
}
|
||||
|
||||
/* Call BLIS kernel. */
|
||||
bli_ddotv_zen_int10
|
||||
(
|
||||
BLIS_NO_CONJUGATE,
|
||||
BLIS_NO_CONJUGATE,
|
||||
n0,
|
||||
x0, incx0,
|
||||
y0, incy0,
|
||||
&rho,
|
||||
NULL
|
||||
);
|
||||
|
||||
/* Finalize BLIS. */
|
||||
// bli_finalize_auto();
|
||||
|
||||
return rho;
|
||||
}
|
||||
|
||||
INSERT_GENTFUNCDOT_BLAS_ZEN2( dot, dotv )
|
||||
#else
|
||||
INSERT_GENTFUNCDOT_BLAS( dot, dotv )
|
||||
#endif
|
||||
|
||||
// -- "Black sheep" dot product function definitions --
|
||||
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -48,38 +49,189 @@ void PASTEF77(ch,blasname) \
|
||||
ftype* y, const f77_int* incy \
|
||||
) \
|
||||
{ \
|
||||
dim_t n0; \
|
||||
ftype* x0; \
|
||||
ftype* y0; \
|
||||
inc_t incx0; \
|
||||
inc_t incy0; \
|
||||
dim_t n0; \
|
||||
ftype* x0; \
|
||||
ftype* y0; \
|
||||
inc_t incx0; \
|
||||
inc_t incy0; \
|
||||
\
|
||||
/* Initialize BLIS. */ \
|
||||
bli_init_auto(); \
|
||||
/* Initialize BLIS. */ \
|
||||
bli_init_auto(); \
|
||||
\
|
||||
/* Convert/typecast negative values of n to zero. */ \
|
||||
bli_convert_blas_dim1( *n, n0 ); \
|
||||
/* Convert/typecast negative values of n to zero. */ \
|
||||
bli_convert_blas_dim1( *n, n0 ); \
|
||||
\
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */ \
|
||||
bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \
|
||||
bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */ \
|
||||
bli_convert_blas_incv( n0, (ftype*)x, *incx, x0, incx0 ); \
|
||||
bli_convert_blas_incv( n0, (ftype*)y, *incy, y0, incy0 ); \
|
||||
\
|
||||
/* Call BLIS interface. */ \
|
||||
PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \
|
||||
( \
|
||||
n0, \
|
||||
x0, incx0, \
|
||||
y0, incy0, \
|
||||
NULL, \
|
||||
NULL \
|
||||
); \
|
||||
/* Call BLIS interface. */ \
|
||||
PASTEMAC2(ch,blisname,BLIS_TAPI_EX_SUF) \
|
||||
( \
|
||||
n0, \
|
||||
x0, incx0, \
|
||||
y0, incy0, \
|
||||
NULL, \
|
||||
NULL \
|
||||
); \
|
||||
\
|
||||
/* Finalize BLIS. */ \
|
||||
bli_finalize_auto(); \
|
||||
/* Finalize BLIS. */ \
|
||||
bli_finalize_auto(); \
|
||||
}
|
||||
|
||||
#ifdef BLIS_ENABLE_BLAS
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
|
||||
void sswap_
|
||||
(
|
||||
const f77_int* n,
|
||||
float* x, const f77_int* incx,
|
||||
float* y, const f77_int* incy
|
||||
)
|
||||
{
|
||||
dim_t n0;
|
||||
float* x0;
|
||||
float* y0;
|
||||
inc_t incx0;
|
||||
inc_t incy0;
|
||||
|
||||
/* Initialize BLIS. */
|
||||
// bli_init_auto();
|
||||
|
||||
/* Convert/typecast negative values of n to zero. */
|
||||
if ( *n < 0 ) n0 = ( dim_t )0;
|
||||
else n0 = ( dim_t )(*n);
|
||||
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */
|
||||
if ( *incx < 0 )
|
||||
{
|
||||
/* The semantics of negative stride in BLAS are that the vector
|
||||
operand be traversed in reverse order. (Another way to think
|
||||
of this is that negative strides effectively reverse the order
|
||||
of the vector, but without any explicit data movements.) This
|
||||
is also how BLIS interprets negative strides. The differences
|
||||
is that with BLAS, the caller *always* passes in the 0th (i.e.,
|
||||
top-most or left-most) element of the vector, even when the
|
||||
stride is negative. By contrast, in BLIS, negative strides are
|
||||
used *relative* to the vector address as it is given. Thus, in
|
||||
BLIS, if this backwards traversal is desired, the caller *must*
|
||||
pass in the address to the (n-1)th (i.e., the bottom-most or
|
||||
right-most) element along with a negative stride. */
|
||||
|
||||
x0 = (x) + (n0-1)*(-*incx);
|
||||
incx0 = ( inc_t )(*incx);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
x0 = (x);
|
||||
incx0 = ( inc_t )(*incx);
|
||||
}
|
||||
|
||||
if ( *incy < 0 )
|
||||
{
|
||||
y0 = (y) + (n0-1)*(-*incy);
|
||||
incy0 = ( inc_t )(*incy);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
y0 = (y);
|
||||
incy0 = ( inc_t )(*incy);
|
||||
}
|
||||
|
||||
|
||||
/* Call BLIS kernel */
|
||||
bli_sswapv_zen_int8
|
||||
(
|
||||
n0,
|
||||
x0, incx0,
|
||||
y0, incy0,
|
||||
NULL
|
||||
);
|
||||
|
||||
/* Finalize BLIS. */
|
||||
// bli_finalize_auto();
|
||||
}
|
||||
|
||||
void dswap_
|
||||
(
|
||||
const f77_int* n,
|
||||
double* x, const f77_int* incx,
|
||||
double* y, const f77_int* incy
|
||||
)
|
||||
{
|
||||
dim_t n0;
|
||||
double* x0;
|
||||
double* y0;
|
||||
inc_t incx0;
|
||||
inc_t incy0;
|
||||
|
||||
/* Initialize BLIS. */
|
||||
// bli_init_auto();
|
||||
|
||||
/* Convert/typecast negative values of n to zero. */
|
||||
if ( *n < 0 ) n0 = ( dim_t )0;
|
||||
else n0 = ( dim_t )(*n);
|
||||
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */
|
||||
if ( *incx < 0 )
|
||||
{
|
||||
/* The semantics of negative stride in BLAS are that the vector
|
||||
operand be traversed in reverse order. (Another way to think
|
||||
of this is that negative strides effectively reverse the order
|
||||
of the vector, but without any explicit data movements.) This
|
||||
is also how BLIS interprets negative strides. The differences
|
||||
is that with BLAS, the caller *always* passes in the 0th (i.e.,
|
||||
top-most or left-most) element of the vector, even when the
|
||||
stride is negative. By contrast, in BLIS, negative strides are
|
||||
used *relative* to the vector address as it is given. Thus, in
|
||||
BLIS, if this backwards traversal is desired, the caller *must*
|
||||
pass in the address to the (n-1)th (i.e., the bottom-most or
|
||||
right-most) element along with a negative stride. */
|
||||
|
||||
x0 = (x) + (n0-1)*(-*incx);
|
||||
incx0 = ( inc_t )(*incx);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
x0 = (x);
|
||||
incx0 = ( inc_t )(*incx);
|
||||
}
|
||||
|
||||
if ( *incy < 0 )
|
||||
{
|
||||
y0 = (y) + (n0-1)*(-*incy);
|
||||
incy0 = ( inc_t )(*incy);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
y0 = (y);
|
||||
incy0 = ( inc_t )(*incy);
|
||||
}
|
||||
|
||||
|
||||
/* Call BLIS kernel */
|
||||
bli_dswapv_zen_int8
|
||||
(
|
||||
n0,
|
||||
x0, incx0,
|
||||
y0, incy0,
|
||||
NULL
|
||||
);
|
||||
|
||||
/* Finalize BLIS. */
|
||||
// bli_finalize_auto();
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BLAS_ZEN2( swap, swapv )
|
||||
|
||||
#else
|
||||
INSERT_GENTFUNC_BLAS( swap, swapv )
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
* It calls the fortran wrapper before calling ddot.
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
@@ -22,7 +24,80 @@ double cblas_ddot( f77_int N, const double *X,
|
||||
#define F77_incX incX
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
dim_t n0;
|
||||
double* x0;
|
||||
double* y0;
|
||||
inc_t incx0;
|
||||
inc_t incy0;
|
||||
|
||||
/* Initialize BLIS. */
|
||||
// bli_init_auto();
|
||||
|
||||
/* Convert/typecast negative values of n to zero. */
|
||||
if ( F77_N < 0 ) n0 = ( dim_t )0;
|
||||
else n0 = ( dim_t )(F77_N);
|
||||
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */
|
||||
|
||||
if ( F77_incX < 0 )
|
||||
{
|
||||
/* The semantics of negative stride in BLAS are that the vector
|
||||
operand be traversed in reverse order. (Another way to think
|
||||
of this is that negative strides effectively reverse the order
|
||||
of the vector, but without any explicit data movements.) This
|
||||
is also how BLIS interprets negative strides. The differences
|
||||
is that with BLAS, the caller *always* passes in the 0th (i.e.,
|
||||
top-most or left-most) element of the vector, even when the
|
||||
stride is negative. By contrast, in BLIS, negative strides are
|
||||
used *relative* to the vector address as it is given. Thus, in
|
||||
BLIS, if this backwards traversal is desired, the caller *must*
|
||||
pass in the address to the (n-1)th (i.e., the bottom-most or
|
||||
right-most) element along with a negative stride. */
|
||||
|
||||
x0 = ((double*)X) + (n0-1)*(-F77_incX);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
x0 = ((double*)X);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
}
|
||||
|
||||
if ( F77_incY < 0 )
|
||||
{
|
||||
y0 = ((double*)Y) + (n0-1)*(-F77_incY);
|
||||
incy0 = ( inc_t )(F77_incY);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
y0 = ((double*)Y);
|
||||
incy0 = ( inc_t )(F77_incY);
|
||||
}
|
||||
|
||||
/* Call BLIS kernel. */
|
||||
bli_ddotv_zen_int10
|
||||
(
|
||||
BLIS_NO_CONJUGATE,
|
||||
BLIS_NO_CONJUGATE,
|
||||
n0,
|
||||
x0, incx0,
|
||||
y0, incy0,
|
||||
&dot,
|
||||
NULL
|
||||
);
|
||||
|
||||
/* Finalize BLIS. */
|
||||
// bli_finalize_auto();
|
||||
|
||||
return dot;
|
||||
|
||||
#else
|
||||
F77_ddot_sub( &F77_N, X, &F77_incX, Y, &F77_incY, &dot);
|
||||
return dot;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -7,6 +7,8 @@
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
@@ -20,6 +22,74 @@ void cblas_dswap( f77_int N, double *X, f77_int incX, double *Y,
|
||||
#define F77_incX incX
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
dim_t n0;
|
||||
double* x0;
|
||||
double* y0;
|
||||
inc_t incx0;
|
||||
inc_t incy0;
|
||||
|
||||
/* Initialize BLIS. */
|
||||
// bli_init_auto();
|
||||
|
||||
/* Convert/typecast negative values of n to zero. */
|
||||
if ( F77_N < 0 ) n0 = ( dim_t )0;
|
||||
else n0 = ( dim_t )(F77_N);
|
||||
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */
|
||||
if ( F77_incX < 0 )
|
||||
{
|
||||
/* The semantics of negative stride in BLAS are that the vector
|
||||
operand be traversed in reverse order. (Another way to think
|
||||
of this is that negative strides effectively reverse the order
|
||||
of the vector, but without any explicit data movements.) This
|
||||
is also how BLIS interprets negative strides. The differences
|
||||
is that with BLAS, the caller *always* passes in the 0th (i.e.,
|
||||
top-most or left-most) element of the vector, even when the
|
||||
stride is negative. By contrast, in BLIS, negative strides are
|
||||
used *relative* to the vector address as it is given. Thus, in
|
||||
BLIS, if this backwards traversal is desired, the caller *must*
|
||||
pass in the address to the (n-1)th (i.e., the bottom-most or
|
||||
right-most) element along with a negative stride. */
|
||||
|
||||
x0 = (X) + (n0-1)*(-F77_incX);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
x0 = (X);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
}
|
||||
|
||||
if ( F77_incY < 0 )
|
||||
{
|
||||
y0 = (Y) + (n0-1)*(-F77_incY);
|
||||
incy0 = ( inc_t )(F77_incY);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
y0 = (Y);
|
||||
incy0 = ( inc_t )(F77_incY);
|
||||
}
|
||||
|
||||
|
||||
/* Call BLIS kernel */
|
||||
bli_dswapv_zen_int8
|
||||
(
|
||||
n0,
|
||||
x0, incx0,
|
||||
y0, incy0,
|
||||
NULL
|
||||
);
|
||||
|
||||
/* Finalize BLIS. */
|
||||
// bli_finalize_auto();
|
||||
#else
|
||||
F77_dswap( &F77_N, X, &F77_incX, Y, &F77_incY);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -8,6 +8,8 @@
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
@@ -22,7 +24,79 @@ float cblas_sdot( f77_int N, const float *X,
|
||||
#define F77_incX incX
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
dim_t n0;
|
||||
float* x0;
|
||||
float* y0;
|
||||
inc_t incx0;
|
||||
inc_t incy0;
|
||||
|
||||
/* Initialize BLIS. */
|
||||
// bli_init_auto();
|
||||
|
||||
/* Convert/typecast negative values of n to zero. */
|
||||
if ( F77_N < 0 ) n0 = ( dim_t )0;
|
||||
else n0 = ( dim_t )(F77_N);
|
||||
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */
|
||||
|
||||
if ( F77_incX < 0 )
|
||||
{
|
||||
/* The semantics of negative stride in BLAS are that the vector
|
||||
operand be traversed in reverse order. (Another way to think
|
||||
of this is that negative strides effectively reverse the order
|
||||
of the vector, but without any explicit data movements.) This
|
||||
is also how BLIS interprets negative strides. The differences
|
||||
is that with BLAS, the caller *always* passes in the 0th (i.e.,
|
||||
top-most or left-most) element of the vector, even when the
|
||||
stride is negative. By contrast, in BLIS, negative strides are
|
||||
used *relative* to the vector address as it is given. Thus, in
|
||||
BLIS, if this backwards traversal is desired, the caller *must*
|
||||
pass in the address to the (n-1)th (i.e., the bottom-most or
|
||||
right-most) element along with a negative stride. */
|
||||
|
||||
x0 = ((float*)X) + (n0-1)*(-F77_incX);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
x0 = ((float*)X);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
}
|
||||
|
||||
if ( F77_incY < 0 )
|
||||
{
|
||||
y0 = ((float*)Y) + (n0-1)*(-F77_incY);
|
||||
incy0 = ( inc_t )(F77_incY);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
y0 = ((float*)Y);
|
||||
incy0 = ( inc_t )(F77_incY);
|
||||
}
|
||||
|
||||
/* Call BLIS kernel. */
|
||||
bli_sdotv_zen_int10
|
||||
(
|
||||
BLIS_NO_CONJUGATE,
|
||||
BLIS_NO_CONJUGATE,
|
||||
n0,
|
||||
x0, incx0,
|
||||
y0, incy0,
|
||||
&dot,
|
||||
NULL
|
||||
);
|
||||
|
||||
/* Finalize BLIS. */
|
||||
// bli_finalize_auto();
|
||||
|
||||
return dot;
|
||||
#else
|
||||
F77_sdot_sub( &F77_N, X, &F77_incX, Y, &F77_incY, &dot);
|
||||
return dot;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -7,7 +7,9 @@
|
||||
*
|
||||
* Written by Keita Teranishi. 2/11/1998
|
||||
*
|
||||
*/
|
||||
* Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
*
|
||||
*/
|
||||
#include "cblas.h"
|
||||
#include "cblas_f77.h"
|
||||
void cblas_sswap( f77_int N, float *X, f77_int incX, float *Y,
|
||||
@@ -20,6 +22,76 @@ void cblas_sswap( f77_int N, float *X, f77_int incX, float *Y,
|
||||
#define F77_incX incX
|
||||
#define F77_incY incY
|
||||
#endif
|
||||
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
|
||||
dim_t n0;
|
||||
float* x0;
|
||||
float* y0;
|
||||
inc_t incx0;
|
||||
inc_t incy0;
|
||||
|
||||
/* Initialize BLIS. */
|
||||
// bli_init_auto();
|
||||
|
||||
/* Convert/typecast negative values of n to zero. */
|
||||
if ( F77_N < 0 ) n0 = ( dim_t )0;
|
||||
else n0 = ( dim_t )(F77_N);
|
||||
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */
|
||||
if ( F77_incX < 0 )
|
||||
{
|
||||
/* The semantics of negative stride in BLAS are that the vector
|
||||
operand be traversed in reverse order. (Another way to think
|
||||
of this is that negative strides effectively reverse the order
|
||||
of the vector, but without any explicit data movements.) This
|
||||
is also how BLIS interprets negative strides. The differences
|
||||
is that with BLAS, the caller *always* passes in the 0th (i.e.,
|
||||
top-most or left-most) element of the vector, even when the
|
||||
stride is negative. By contrast, in BLIS, negative strides are
|
||||
used *relative* to the vector address as it is given. Thus, in
|
||||
BLIS, if this backwards traversal is desired, the caller *must*
|
||||
pass in the address to the (n-1)th (i.e., the bottom-most or
|
||||
right-most) element along with a negative stride. */
|
||||
|
||||
x0 = (X) + (n0-1)*(-F77_incX);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
x0 = (X);
|
||||
incx0 = ( inc_t )(F77_incX);
|
||||
}
|
||||
|
||||
if ( F77_incY < 0 )
|
||||
{
|
||||
y0 = (Y) + (n0-1)*(-F77_incY);
|
||||
incy0 = ( inc_t )(F77_incY);
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
y0 = (Y);
|
||||
incy0 = ( inc_t )(F77_incY);
|
||||
}
|
||||
|
||||
|
||||
/* Call BLIS kernel */
|
||||
bli_sswapv_zen_int8
|
||||
(
|
||||
n0,
|
||||
x0, incx0,
|
||||
y0, incy0,
|
||||
NULL
|
||||
);
|
||||
|
||||
/* Finalize BLIS. */
|
||||
// bli_finalize_auto();
|
||||
|
||||
#else
|
||||
F77_sswap( &F77_N, X, &F77_incX, Y, &F77_incY);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
@@ -56,10 +56,12 @@ GENTFUNC( double, d, blasname, blisname ) \
|
||||
GENTFUNC( scomplex, c, blasname, blisname ) \
|
||||
GENTFUNC( dcomplex, z, blasname, blisname )
|
||||
|
||||
|
||||
#define INSERT_GENTFUNC_BLAS_ZEN2( blasname, blisname ) \
|
||||
\
|
||||
GENTFUNC( scomplex, c, blasname, blisname ) \
|
||||
GENTFUNC( dcomplex, z, blasname, blisname )
|
||||
|
||||
// -- Basic one-operand macro with real domain only --
|
||||
|
||||
|
||||
@@ -80,6 +82,13 @@ GENTFUNCCO( dcomplex, double, z, d, blasname, blisname )
|
||||
|
||||
// -- Basic one-operand macro with conjugation (used only for dot, ger) --
|
||||
|
||||
#define INSERT_GENTFUNCDOT_BLAS_ZEN2( blasname, blisname ) \
|
||||
\
|
||||
GENTFUNCDOT( scomplex, c, c, BLIS_CONJUGATE, blasname, blisname ) \
|
||||
GENTFUNCDOT( scomplex, c, u, BLIS_NO_CONJUGATE, blasname, blisname ) \
|
||||
GENTFUNCDOT( dcomplex, z, c, BLIS_CONJUGATE, blasname, blisname ) \
|
||||
GENTFUNCDOT( dcomplex, z, u, BLIS_NO_CONJUGATE, blasname, blisname )
|
||||
|
||||
|
||||
#define INSERT_GENTFUNCDOT_BLAS( blasname, blisname ) \
|
||||
\
|
||||
|
||||
Reference in New Issue
Block a user