mirror of
https://github.com/amd/blis.git
synced 2026-04-20 07:38:53 +00:00
Properly implemented beta == 0 semantics.
Details: - Changed name of set0 and set0_mxn macros to set0s and set0s_mxn, respectively. - Added code to the following operations that sets the output operand to zero if the corresponding scalar is zero (rather than performing the floating-point multiply, or in the case of setv, copying the value). This will prevent nan's and inf's from creeping into results from uninitialized memory. - axpy - dotxv - scalv - scal2v - setv - gemv - ger - hemv - her - her2 - gemm reference ukernels
This commit is contained in:
@@ -130,22 +130,22 @@
|
||||
|
||||
// set to constant
|
||||
|
||||
// set0
|
||||
// set0s
|
||||
|
||||
#define bl2_sset0( a ) \
|
||||
#define bl2_sset0s( a ) \
|
||||
{ \
|
||||
(a) = 0.0F; \
|
||||
}
|
||||
#define bl2_dset0( a ) \
|
||||
#define bl2_dset0s( a ) \
|
||||
{ \
|
||||
(a) = 0.0; \
|
||||
}
|
||||
#define bl2_cset0( a ) \
|
||||
#define bl2_cset0s( a ) \
|
||||
{ \
|
||||
(a).real = 0.0F; \
|
||||
(a).imag = 0.0F; \
|
||||
}
|
||||
#define bl2_zset0( a ) \
|
||||
#define bl2_zset0s( a ) \
|
||||
{ \
|
||||
(a).real = 0.0; \
|
||||
(a).imag = 0.0; \
|
||||
|
||||
@@ -32,49 +32,49 @@
|
||||
|
||||
*/
|
||||
|
||||
#ifndef BLIS_SET0_MXN_H
|
||||
#define BLIS_SET0_MXN_H
|
||||
#ifndef BLIS_SET0S_MXN_H
|
||||
#define BLIS_SET0S_MXN_H
|
||||
|
||||
// set0_mxn
|
||||
// set0s_mxn
|
||||
|
||||
// Notes:
|
||||
// - The first char encodes the type of x.
|
||||
// - The second char encodes the type of y.
|
||||
|
||||
#define bl2_sset0_mxn( m, n, y, rs_y, cs_y ) \
|
||||
#define bl2_sset0s_mxn( m, n, y, rs_y, cs_y ) \
|
||||
{ \
|
||||
dim_t i, j; \
|
||||
\
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
bl2_sset0( *(y + i*rs_y + j*cs_y) ); \
|
||||
bl2_sset0s( *(y + i*rs_y + j*cs_y) ); \
|
||||
}
|
||||
|
||||
#define bl2_dset0_mxn( m, n, y, rs_y, cs_y ) \
|
||||
#define bl2_dset0s_mxn( m, n, y, rs_y, cs_y ) \
|
||||
{ \
|
||||
dim_t i, j; \
|
||||
\
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
bl2_dset0( *(y + i*rs_y + j*cs_y) ); \
|
||||
bl2_dset0s( *(y + i*rs_y + j*cs_y) ); \
|
||||
}
|
||||
|
||||
#define bl2_cset0_mxn( m, n, y, rs_y, cs_y ) \
|
||||
#define bl2_cset0s_mxn( m, n, y, rs_y, cs_y ) \
|
||||
{ \
|
||||
dim_t i, j; \
|
||||
\
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
bl2_cset0( *(y + i*rs_y + j*cs_y) ); \
|
||||
bl2_cset0s( *(y + i*rs_y + j*cs_y) ); \
|
||||
}
|
||||
|
||||
#define bl2_zset0_mxn( m, n, y, rs_y, cs_y ) \
|
||||
#define bl2_zset0s_mxn( m, n, y, rs_y, cs_y ) \
|
||||
{ \
|
||||
dim_t i, j; \
|
||||
\
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
bl2_zset0( *(y + i*rs_y + j*cs_y) ); \
|
||||
bl2_zset0s( *(y + i*rs_y + j*cs_y) ); \
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user