mirror of
https://github.com/amd/blis.git
synced 2026-05-11 17:50:00 +00:00
Modified function definition for AXPY BLAS interface
Details: -Calling the kernel directly from API call to avoid framework overhead. -Currently these changes are only applicable for zen2 configuration. They will be enabled for zen family processors in future. Change-Id: I0139e185178f726f5cd8cba0ff6a441a00d67868 Signed-off-by: Meghana Vankadari <Meghana.Vankadari@amd.com> AMD-Internal: [CPUPL-805]
This commit is contained in:
committed by
Meghana Vankadari
parent
489d501f2e
commit
80086fad15
@@ -5,7 +5,8 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
@@ -83,6 +84,149 @@ void PASTEF77(ch,blasname) \
|
||||
}
|
||||
|
||||
#ifdef BLIS_ENABLE_BLAS
|
||||
|
||||
#ifdef BLIS_CONFIG_ZEN2
|
||||
void saxpy_
|
||||
(
|
||||
const f77_int* n,
|
||||
const float* alpha,
|
||||
const float* x, const f77_int* incx,
|
||||
float* y, const f77_int* incy
|
||||
)
|
||||
{
|
||||
dim_t n0;
|
||||
float* x0;
|
||||
float* y0;
|
||||
inc_t incx0;
|
||||
inc_t incy0;
|
||||
|
||||
/* Initialize BLIS. */
|
||||
// bli_init_auto();
|
||||
|
||||
/* Convert/typecast negative values of n to zero. */
|
||||
if ( *n < 0 ) n0 = ( dim_t )0;
|
||||
else n0 = ( dim_t )(*n);
|
||||
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */
|
||||
if ( *incx < 0 )
|
||||
{
|
||||
/* The semantics of negative stride in BLAS are that the vector
|
||||
operand be traversed in reverse order. (Another way to think
|
||||
of this is that negative strides effectively reverse the order
|
||||
of the vector, but without any explicit data movements.) This
|
||||
is also how BLIS interprets negative strides. The differences
|
||||
is that with BLAS, the caller *always* passes in the 0th (i.e.,
|
||||
top-most or left-most) element of the vector, even when the
|
||||
stride is negative. By contrast, in BLIS, negative strides are
|
||||
used *relative* to the vector address as it is given. Thus, in
|
||||
BLIS, if this backwards traversal is desired, the caller *must*
|
||||
pass in the address to the (n-1)th (i.e., the bottom-most or
|
||||
right-most) element along with a negative stride. */
|
||||
x0 = ((float*)x) + (n0-1)*(-*incx);
|
||||
incx0 = ( inc_t )(*incx);
|
||||
}
|
||||
else
|
||||
{
|
||||
x0 = ((float*)x);
|
||||
incx0 = ( inc_t )(*incx);
|
||||
}
|
||||
if ( *incy < 0 )
|
||||
{
|
||||
y0 = ((float*)y) + (n0-1)*(-*incy);
|
||||
incy0 = ( inc_t )(*incy);
|
||||
}
|
||||
else
|
||||
{
|
||||
y0 = ((float*)y);
|
||||
incy0 = ( inc_t )(*incy);
|
||||
}
|
||||
|
||||
bli_saxpyv_zen_int10(
|
||||
BLIS_NO_CONJUGATE,
|
||||
n0,
|
||||
(float*)alpha,
|
||||
x0, incx0,
|
||||
y0, incy0,
|
||||
NULL
|
||||
);
|
||||
|
||||
/* Finalize BLIS. */
|
||||
// bli_finalize_auto();
|
||||
}
|
||||
|
||||
void daxpy_
|
||||
(
|
||||
const f77_int* n,
|
||||
const double* alpha,
|
||||
const double* x, const f77_int* incx,
|
||||
double* y, const f77_int* incy
|
||||
)
|
||||
{
|
||||
dim_t n0;
|
||||
double* x0;
|
||||
double* y0;
|
||||
inc_t incx0;
|
||||
inc_t incy0;
|
||||
|
||||
/* Initialize BLIS. */
|
||||
// bli_init_auto();
|
||||
|
||||
/* Convert/typecast negative values of n to zero. */
|
||||
if ( *n < 0 ) n0 = ( dim_t )0;
|
||||
else n0 = ( dim_t )(*n);
|
||||
|
||||
/* If the input increments are negative, adjust the pointers so we can
|
||||
use positive increments instead. */
|
||||
if ( *incx < 0 )
|
||||
{
|
||||
/* The semantics of negative stride in BLAS are that the vector
|
||||
operand be traversed in reverse order. (Another way to think
|
||||
of this is that negative strides effectively reverse the order
|
||||
of the vector, but without any explicit data movements.) This
|
||||
is also how BLIS interprets negative strides. The differences
|
||||
is that with BLAS, the caller *always* passes in the 0th (i.e.,
|
||||
top-most or left-most) element of the vector, even when the
|
||||
stride is negative. By contrast, in BLIS, negative strides are
|
||||
used *relative* to the vector address as it is given. Thus, in
|
||||
BLIS, if this backwards traversal is desired, the caller *must*
|
||||
pass in the address to the (n-1)th (i.e., the bottom-most or
|
||||
right-most) element along with a negative stride. */
|
||||
x0 = ((double*)x) + (n0-1)*(-*incx);
|
||||
incx0 = ( inc_t )(*incx);
|
||||
}
|
||||
else
|
||||
{
|
||||
x0 = ((double*)x);
|
||||
incx0 = ( inc_t )(*incx);
|
||||
}
|
||||
if ( *incy < 0 )
|
||||
{
|
||||
y0 = ((double*)y) + (n0-1)*(-*incy);
|
||||
incy0 = ( inc_t )(*incy);
|
||||
}
|
||||
else
|
||||
{
|
||||
y0 = ((double*)y);
|
||||
incy0 = ( inc_t )(*incy);
|
||||
}
|
||||
|
||||
bli_daxpyv_zen_int10(
|
||||
BLIS_NO_CONJUGATE,
|
||||
n0,
|
||||
(double*)alpha,
|
||||
x0, incx0,
|
||||
y0, incy0,
|
||||
NULL
|
||||
);
|
||||
|
||||
/* Finalize BLIS. */
|
||||
// bli_finalize_auto();
|
||||
}
|
||||
|
||||
INSERT_GENTFUNC_BLAS_ZEN2( axpy, axpyv )
|
||||
|
||||
#else
|
||||
INSERT_GENTFUNC_BLAS( axpy, axpyv )
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
@@ -5,6 +5,7 @@
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas at Austin
|
||||
Copyright (C) 2020, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
@@ -56,6 +57,11 @@ GENTFUNC( scomplex, c, blasname, blisname ) \
|
||||
GENTFUNC( dcomplex, z, blasname, blisname )
|
||||
|
||||
|
||||
#define INSERT_GENTFUNC_BLAS_ZEN2( blasname, blisname ) \
|
||||
\
|
||||
GENTFUNC( scomplex, c, blasname, blisname ) \
|
||||
GENTFUNC( dcomplex, z, blasname, blisname )
|
||||
|
||||
// -- Basic one-operand macro with real domain only --
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user