Properly implemented beta == 0 semantics.

Details:
- Changed name of set0 and set0_mxn macros to set0s and set0s_mxn,
  respectively.
- Added code to the following operations that sets the output operand to
  zero if the corresponding scalar is zero (rather than performing the
  floating-point multiply, or in the case of setv, copying the value).
  This will prevent nan's and inf's from creeping into results from
  uninitialized memory.
  - axpy
  - dotxv
  - scalv
  - scal2v
  - setv
  - gemv
  - ger
  - hemv
  - her
  - her2
  - gemm reference ukernels
This commit is contained in:
Field G. Van Zee
2013-02-13 18:44:59 -06:00
parent aedccbc85d
commit e6ac623a90
50 changed files with 411 additions and 174 deletions

View File

@@ -130,22 +130,22 @@
// set to constant
// set0
// set0s
#define bl2_sset0( a ) \
#define bl2_sset0s( a ) \
{ \
(a) = 0.0F; \
}
#define bl2_dset0( a ) \
#define bl2_dset0s( a ) \
{ \
(a) = 0.0; \
}
#define bl2_cset0( a ) \
#define bl2_cset0s( a ) \
{ \
(a).real = 0.0F; \
(a).imag = 0.0F; \
}
#define bl2_zset0( a ) \
#define bl2_zset0s( a ) \
{ \
(a).real = 0.0; \
(a).imag = 0.0; \

View File

@@ -32,49 +32,49 @@
*/
#ifndef BLIS_SET0_MXN_H
#define BLIS_SET0_MXN_H
#ifndef BLIS_SET0S_MXN_H
#define BLIS_SET0S_MXN_H
// set0_mxn
// set0s_mxn
// Notes:
// - The first char encodes the type of x.
// - The second char encodes the type of y.
#define bl2_sset0_mxn( m, n, y, rs_y, cs_y ) \
#define bl2_sset0s_mxn( m, n, y, rs_y, cs_y ) \
{ \
dim_t i, j; \
\
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
bl2_sset0( *(y + i*rs_y + j*cs_y) ); \
bl2_sset0s( *(y + i*rs_y + j*cs_y) ); \
}
#define bl2_dset0_mxn( m, n, y, rs_y, cs_y ) \
#define bl2_dset0s_mxn( m, n, y, rs_y, cs_y ) \
{ \
dim_t i, j; \
\
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
bl2_dset0( *(y + i*rs_y + j*cs_y) ); \
bl2_dset0s( *(y + i*rs_y + j*cs_y) ); \
}
#define bl2_cset0_mxn( m, n, y, rs_y, cs_y ) \
#define bl2_cset0s_mxn( m, n, y, rs_y, cs_y ) \
{ \
dim_t i, j; \
\
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
bl2_cset0( *(y + i*rs_y + j*cs_y) ); \
bl2_cset0s( *(y + i*rs_y + j*cs_y) ); \
}
#define bl2_zset0_mxn( m, n, y, rs_y, cs_y ) \
#define bl2_zset0s_mxn( m, n, y, rs_y, cs_y ) \
{ \
dim_t i, j; \
\
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
bl2_zset0( *(y + i*rs_y + j*cs_y) ); \
bl2_zset0s( *(y + i*rs_y + j*cs_y) ); \
}