Fixed possible level-3 inf/NaN issue when beta=0.

Details:
- Redefined xpbys_mxn and xpbys_mxn_u/_l macros to employ a copy
  (instead of scaling by beta) when beta is zero. This will stamp out
  any possible infs or NaNs in the output matrix, if it happens to be
  uninitialized. Thanks to Tony Kelman for isolating this bug.
This commit is contained in:
Field G. Van Zee
2014-07-08 10:07:46 -05:00
parent 4702350278
commit cb12e456f9
2 changed files with 196 additions and 76 deletions

View File

@@ -46,44 +46,84 @@
{ \
dim_t i, j; \
\
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
bli_sssxpbys( *(x + i*rs_x + j*cs_x), \
*(beta), \
*(y + i*rs_y + j*cs_y) ); \
/* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \
if ( bli_seq0( *beta ) ) \
{ \
bli_sscopys_mxn( m, n, \
x, rs_x, cs_x, \
y, rs_y, cs_y ); \
} \
else \
{ \
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
bli_sssxpbys( *(x + i*rs_x + j*cs_x), \
*(beta), \
*(y + i*rs_y + j*cs_y) ); \
} \
}
#define bli_dddxpbys_mxn( m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \
{ \
dim_t i, j; \
\
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
bli_dddxpbys( *(x + i*rs_x + j*cs_x), \
*(beta), \
*(y + i*rs_y + j*cs_y) ); \
/* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \
if ( bli_deq0( *beta ) ) \
{ \
bli_ddcopys_mxn( m, n, \
x, rs_x, cs_x, \
y, rs_y, cs_y ); \
} \
else \
{ \
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
bli_dddxpbys( *(x + i*rs_x + j*cs_x), \
*(beta), \
*(y + i*rs_y + j*cs_y) ); \
} \
}
#define bli_cccxpbys_mxn( m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \
{ \
dim_t i, j; \
\
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
bli_cccxpbys( *(x + i*rs_x + j*cs_x), \
*(beta), \
*(y + i*rs_y + j*cs_y) ); \
/* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \
if ( bli_ceq0( *beta ) ) \
{ \
bli_cccopys_mxn( m, n, \
x, rs_x, cs_x, \
y, rs_y, cs_y ); \
} \
else \
{ \
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
bli_cccxpbys( *(x + i*rs_x + j*cs_x), \
*(beta), \
*(y + i*rs_y + j*cs_y) ); \
} \
}
#define bli_zzzxpbys_mxn( m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \
{ \
dim_t i, j; \
\
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
bli_zzzxpbys( *(x + i*rs_x + j*cs_x), \
*(beta), \
*(y + i*rs_y + j*cs_y) ); \
/* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \
if ( bli_zeq0( *beta ) ) \
{ \
bli_zzcopys_mxn( m, n, \
x, rs_x, cs_x, \
y, rs_y, cs_y ); \
} \
else \
{ \
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
bli_zzzxpbys( *(x + i*rs_x + j*cs_x), \
*(beta), \
*(y + i*rs_y + j*cs_y) ); \
} \
}

View File

@@ -41,16 +41,26 @@
{ \
dim_t i, j; \
\
for ( j = 0; j < n; ++j ) \
/* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \
if ( bli_seq0( *beta ) ) \
{ \
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
if ( (doff_t)j - (doff_t)i >= diagoff ) \
{ \
if ( (doff_t)j - (doff_t)i >= diagoff ) \
{ \
bli_sssxpbys( *(x + i*rs_x + j*cs_x), \
*(beta), \
*(y + i*rs_y + j*cs_y) ); \
} \
bli_sscopys( *(x + i*rs_x + j*cs_x), \
*(y + i*rs_y + j*cs_y) ); \
} \
} \
else \
{ \
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
if ( (doff_t)j - (doff_t)i >= diagoff ) \
{ \
bli_sssxpbys( *(x + i*rs_x + j*cs_x), \
*(beta), \
*(y + i*rs_y + j*cs_y) ); \
} \
} \
}
@@ -59,16 +69,26 @@
{ \
dim_t i, j; \
\
for ( j = 0; j < n; ++j ) \
/* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \
if ( bli_deq0( *beta ) ) \
{ \
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
if ( (doff_t)j - (doff_t)i >= diagoff ) \
{ \
if ( (doff_t)j - (doff_t)i >= diagoff ) \
{ \
bli_dddxpbys( *(x + i*rs_x + j*cs_x), \
*(beta), \
*(y + i*rs_y + j*cs_y) ); \
} \
bli_ddcopys( *(x + i*rs_x + j*cs_x), \
*(y + i*rs_y + j*cs_y) ); \
} \
} \
else \
{ \
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
if ( (doff_t)j - (doff_t)i >= diagoff ) \
{ \
bli_dddxpbys( *(x + i*rs_x + j*cs_x), \
*(beta), \
*(y + i*rs_y + j*cs_y) ); \
} \
} \
}
@@ -77,16 +97,26 @@
{ \
dim_t i, j; \
\
for ( j = 0; j < n; ++j ) \
/* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \
if ( bli_ceq0( *beta ) ) \
{ \
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
if ( (doff_t)j - (doff_t)i >= diagoff ) \
{ \
if ( (doff_t)j - (doff_t)i >= diagoff ) \
{ \
bli_cccxpbys( *(x + i*rs_x + j*cs_x), \
*(beta), \
*(y + i*rs_y + j*cs_y) ); \
} \
bli_cccopys( *(x + i*rs_x + j*cs_x), \
*(y + i*rs_y + j*cs_y) ); \
} \
} \
else \
{ \
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
if ( (doff_t)j - (doff_t)i >= diagoff ) \
{ \
bli_cccxpbys( *(x + i*rs_x + j*cs_x), \
*(beta), \
*(y + i*rs_y + j*cs_y) ); \
} \
} \
}
@@ -95,16 +125,26 @@
{ \
dim_t i, j; \
\
for ( j = 0; j < n; ++j ) \
/* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \
if ( bli_zeq0( *beta ) ) \
{ \
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
if ( (doff_t)j - (doff_t)i >= diagoff ) \
{ \
if ( (doff_t)j - (doff_t)i >= diagoff ) \
{ \
bli_zzzxpbys( *(x + i*rs_x + j*cs_x), \
*(beta), \
*(y + i*rs_y + j*cs_y) ); \
} \
bli_zzcopys( *(x + i*rs_x + j*cs_x), \
*(y + i*rs_y + j*cs_y) ); \
} \
} \
else \
{ \
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
if ( (doff_t)j - (doff_t)i >= diagoff ) \
{ \
bli_zzzxpbys( *(x + i*rs_x + j*cs_x), \
*(beta), \
*(y + i*rs_y + j*cs_y) ); \
} \
} \
}
@@ -115,16 +155,26 @@
{ \
dim_t i, j; \
\
for ( j = 0; j < n; ++j ) \
/* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \
if ( bli_seq0( *beta ) ) \
{ \
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
if ( (doff_t)j - (doff_t)i <= diagoff ) \
{ \
if ( (doff_t)j - (doff_t)i <= diagoff ) \
{ \
bli_sssxpbys( *(x + i*rs_x + j*cs_x), \
*(beta), \
*(y + i*rs_y + j*cs_y) ); \
} \
bli_sscopys( *(x + i*rs_x + j*cs_x), \
*(y + i*rs_y + j*cs_y) ); \
} \
} \
else \
{ \
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
if ( (doff_t)j - (doff_t)i <= diagoff ) \
{ \
bli_sssxpbys( *(x + i*rs_x + j*cs_x), \
*(beta), \
*(y + i*rs_y + j*cs_y) ); \
} \
} \
}
@@ -133,16 +183,26 @@
{ \
dim_t i, j; \
\
for ( j = 0; j < n; ++j ) \
/* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \
if ( bli_deq0( *beta ) ) \
{ \
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
if ( (doff_t)j - (doff_t)i <= diagoff ) \
{ \
if ( (doff_t)j - (doff_t)i <= diagoff ) \
{ \
bli_dddxpbys( *(x + i*rs_x + j*cs_x), \
*(beta), \
*(y + i*rs_y + j*cs_y) ); \
} \
bli_ddcopys( *(x + i*rs_x + j*cs_x), \
*(y + i*rs_y + j*cs_y) ); \
} \
} \
else \
{ \
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
if ( (doff_t)j - (doff_t)i <= diagoff ) \
{ \
bli_dddxpbys( *(x + i*rs_x + j*cs_x), \
*(beta), \
*(y + i*rs_y + j*cs_y) ); \
} \
} \
}
@@ -151,16 +211,26 @@
{ \
dim_t i, j; \
\
for ( j = 0; j < n; ++j ) \
/* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \
if ( bli_ceq0( *beta ) ) \
{ \
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
if ( (doff_t)j - (doff_t)i <= diagoff ) \
{ \
if ( (doff_t)j - (doff_t)i <= diagoff ) \
{ \
bli_cccxpbys( *(x + i*rs_x + j*cs_x), \
*(beta), \
*(y + i*rs_y + j*cs_y) ); \
} \
bli_cccopys( *(x + i*rs_x + j*cs_x), \
*(y + i*rs_y + j*cs_y) ); \
} \
} \
else \
{ \
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
if ( (doff_t)j - (doff_t)i <= diagoff ) \
{ \
bli_cccxpbys( *(x + i*rs_x + j*cs_x), \
*(beta), \
*(y + i*rs_y + j*cs_y) ); \
} \
} \
}
@@ -169,16 +239,26 @@
{ \
dim_t i, j; \
\
for ( j = 0; j < n; ++j ) \
/* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \
if ( bli_zeq0( *beta ) ) \
{ \
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
if ( (doff_t)j - (doff_t)i <= diagoff ) \
{ \
if ( (doff_t)j - (doff_t)i <= diagoff ) \
{ \
bli_zzzxpbys( *(x + i*rs_x + j*cs_x), \
*(beta), \
*(y + i*rs_y + j*cs_y) ); \
} \
bli_zzcopys( *(x + i*rs_x + j*cs_x), \
*(y + i*rs_y + j*cs_y) ); \
} \
} \
else \
{ \
for ( j = 0; j < n; ++j ) \
for ( i = 0; i < m; ++i ) \
if ( (doff_t)j - (doff_t)i <= diagoff ) \
{ \
bli_zzzxpbys( *(x + i*rs_x + j*cs_x), \
*(beta), \
*(y + i*rs_y + j*cs_y) ); \
} \
} \
}