Modified function definition for AXPY BLAS interface

Details:
-Calling the kernel directly from API call to avoid framework
overhead.
-Currently these changes are only applicable for zen2 configuration.
 They will be enabled for zen family processors in future.

Change-Id: I0139e185178f726f5cd8cba0ff6a441a00d67868
Signed-off-by: Meghana Vankadari <Meghana.Vankadari@amd.com>
AMD-Internal: [CPUPL-805]
This commit is contained in:
Meghana
2020-04-17 15:57:34 +05:30
committed by Meghana Vankadari
parent 489d501f2e
commit 80086fad15
2 changed files with 152 additions and 2 deletions

View File

@@ -5,7 +5,8 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
@@ -83,6 +84,149 @@ void PASTEF77(ch,blasname) \
}
#ifdef BLIS_ENABLE_BLAS
#ifdef BLIS_CONFIG_ZEN2
void saxpy_
(
const f77_int* n,
const float* alpha,
const float* x, const f77_int* incx,
float* y, const f77_int* incy
)
{
dim_t n0;
float* x0;
float* y0;
inc_t incx0;
inc_t incy0;
/* Initialize BLIS. */
// bli_init_auto();
/* Convert/typecast negative values of n to zero. */
if ( *n < 0 ) n0 = ( dim_t )0;
else n0 = ( dim_t )(*n);
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */
if ( *incx < 0 )
{
/* The semantics of negative stride in BLAS are that the vector
operand be traversed in reverse order. (Another way to think
of this is that negative strides effectively reverse the order
of the vector, but without any explicit data movements.) This
is also how BLIS interprets negative strides. The differences
is that with BLAS, the caller *always* passes in the 0th (i.e.,
top-most or left-most) element of the vector, even when the
stride is negative. By contrast, in BLIS, negative strides are
used *relative* to the vector address as it is given. Thus, in
BLIS, if this backwards traversal is desired, the caller *must*
pass in the address to the (n-1)th (i.e., the bottom-most or
right-most) element along with a negative stride. */
x0 = ((float*)x) + (n0-1)*(-*incx);
incx0 = ( inc_t )(*incx);
}
else
{
x0 = ((float*)x);
incx0 = ( inc_t )(*incx);
}
if ( *incy < 0 )
{
y0 = ((float*)y) + (n0-1)*(-*incy);
incy0 = ( inc_t )(*incy);
}
else
{
y0 = ((float*)y);
incy0 = ( inc_t )(*incy);
}
bli_saxpyv_zen_int10(
BLIS_NO_CONJUGATE,
n0,
(float*)alpha,
x0, incx0,
y0, incy0,
NULL
);
/* Finalize BLIS. */
// bli_finalize_auto();
}
void daxpy_
(
const f77_int* n,
const double* alpha,
const double* x, const f77_int* incx,
double* y, const f77_int* incy
)
{
dim_t n0;
double* x0;
double* y0;
inc_t incx0;
inc_t incy0;
/* Initialize BLIS. */
// bli_init_auto();
/* Convert/typecast negative values of n to zero. */
if ( *n < 0 ) n0 = ( dim_t )0;
else n0 = ( dim_t )(*n);
/* If the input increments are negative, adjust the pointers so we can
use positive increments instead. */
if ( *incx < 0 )
{
/* The semantics of negative stride in BLAS are that the vector
operand be traversed in reverse order. (Another way to think
of this is that negative strides effectively reverse the order
of the vector, but without any explicit data movements.) This
is also how BLIS interprets negative strides. The differences
is that with BLAS, the caller *always* passes in the 0th (i.e.,
top-most or left-most) element of the vector, even when the
stride is negative. By contrast, in BLIS, negative strides are
used *relative* to the vector address as it is given. Thus, in
BLIS, if this backwards traversal is desired, the caller *must*
pass in the address to the (n-1)th (i.e., the bottom-most or
right-most) element along with a negative stride. */
x0 = ((double*)x) + (n0-1)*(-*incx);
incx0 = ( inc_t )(*incx);
}
else
{
x0 = ((double*)x);
incx0 = ( inc_t )(*incx);
}
if ( *incy < 0 )
{
y0 = ((double*)y) + (n0-1)*(-*incy);
incy0 = ( inc_t )(*incy);
}
else
{
y0 = ((double*)y);
incy0 = ( inc_t )(*incy);
}
bli_daxpyv_zen_int10(
BLIS_NO_CONJUGATE,
n0,
(double*)alpha,
x0, incx0,
y0, incy0,
NULL
);
/* Finalize BLIS. */
// bli_finalize_auto();
}
INSERT_GENTFUNC_BLAS_ZEN2( axpy, axpyv )
#else
INSERT_GENTFUNC_BLAS( axpy, axpyv )
#endif
#endif

View File

@@ -5,6 +5,7 @@
libraries.
Copyright (C) 2014, The University of Texas at Austin
Copyright (C) 2020, Advanced Micro Devices, Inc.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
@@ -56,6 +57,11 @@ GENTFUNC( scomplex, c, blasname, blisname ) \
GENTFUNC( dcomplex, z, blasname, blisname )
#define INSERT_GENTFUNC_BLAS_ZEN2( blasname, blisname ) \
\
GENTFUNC( scomplex, c, blasname, blisname ) \
GENTFUNC( dcomplex, z, blasname, blisname )
// -- Basic one-operand macro with real domain only --