mirror of
https://github.com/amd/blis.git
synced 2026-05-11 17:50:00 +00:00
Fixed possible level-3 inf/NaN issue when beta=0.
Details: - Redefined xpbys_mxn and xpbys_mxn_u/_l macros to employ a copy (instead of scaling by beta) when beta is zero. This will stamp out any possible infs or NaNs in the output matrix, if it happens to be uninitialized. Thanks to Tony Kelman for isolating this bug.
This commit is contained in:
@@ -46,44 +46,84 @@
|
||||
{ \
|
||||
dim_t i, j; \
|
||||
\
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
bli_sssxpbys( *(x + i*rs_x + j*cs_x), \
|
||||
*(beta), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
/* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \
|
||||
if ( bli_seq0( *beta ) ) \
|
||||
{ \
|
||||
bli_sscopys_mxn( m, n, \
|
||||
x, rs_x, cs_x, \
|
||||
y, rs_y, cs_y ); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
bli_sssxpbys( *(x + i*rs_x + j*cs_x), \
|
||||
*(beta), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define bli_dddxpbys_mxn( m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \
|
||||
{ \
|
||||
dim_t i, j; \
|
||||
\
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
bli_dddxpbys( *(x + i*rs_x + j*cs_x), \
|
||||
*(beta), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
/* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \
|
||||
if ( bli_deq0( *beta ) ) \
|
||||
{ \
|
||||
bli_ddcopys_mxn( m, n, \
|
||||
x, rs_x, cs_x, \
|
||||
y, rs_y, cs_y ); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
bli_dddxpbys( *(x + i*rs_x + j*cs_x), \
|
||||
*(beta), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define bli_cccxpbys_mxn( m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \
|
||||
{ \
|
||||
dim_t i, j; \
|
||||
\
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
bli_cccxpbys( *(x + i*rs_x + j*cs_x), \
|
||||
*(beta), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
/* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \
|
||||
if ( bli_ceq0( *beta ) ) \
|
||||
{ \
|
||||
bli_cccopys_mxn( m, n, \
|
||||
x, rs_x, cs_x, \
|
||||
y, rs_y, cs_y ); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
bli_cccxpbys( *(x + i*rs_x + j*cs_x), \
|
||||
*(beta), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
} \
|
||||
}
|
||||
|
||||
#define bli_zzzxpbys_mxn( m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \
|
||||
{ \
|
||||
dim_t i, j; \
|
||||
\
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
bli_zzzxpbys( *(x + i*rs_x + j*cs_x), \
|
||||
*(beta), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
/* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \
|
||||
if ( bli_zeq0( *beta ) ) \
|
||||
{ \
|
||||
bli_zzcopys_mxn( m, n, \
|
||||
x, rs_x, cs_x, \
|
||||
y, rs_y, cs_y ); \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
bli_zzzxpbys( *(x + i*rs_x + j*cs_x), \
|
||||
*(beta), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -41,16 +41,26 @@
|
||||
{ \
|
||||
dim_t i, j; \
|
||||
\
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
/* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \
|
||||
if ( bli_seq0( *beta ) ) \
|
||||
{ \
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
if ( (doff_t)j - (doff_t)i >= diagoff ) \
|
||||
{ \
|
||||
if ( (doff_t)j - (doff_t)i >= diagoff ) \
|
||||
{ \
|
||||
bli_sssxpbys( *(x + i*rs_x + j*cs_x), \
|
||||
*(beta), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
} \
|
||||
bli_sscopys( *(x + i*rs_x + j*cs_x), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
if ( (doff_t)j - (doff_t)i >= diagoff ) \
|
||||
{ \
|
||||
bli_sssxpbys( *(x + i*rs_x + j*cs_x), \
|
||||
*(beta), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
@@ -59,16 +69,26 @@
|
||||
{ \
|
||||
dim_t i, j; \
|
||||
\
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
/* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \
|
||||
if ( bli_deq0( *beta ) ) \
|
||||
{ \
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
if ( (doff_t)j - (doff_t)i >= diagoff ) \
|
||||
{ \
|
||||
if ( (doff_t)j - (doff_t)i >= diagoff ) \
|
||||
{ \
|
||||
bli_dddxpbys( *(x + i*rs_x + j*cs_x), \
|
||||
*(beta), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
} \
|
||||
bli_ddcopys( *(x + i*rs_x + j*cs_x), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
if ( (doff_t)j - (doff_t)i >= diagoff ) \
|
||||
{ \
|
||||
bli_dddxpbys( *(x + i*rs_x + j*cs_x), \
|
||||
*(beta), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
@@ -77,16 +97,26 @@
|
||||
{ \
|
||||
dim_t i, j; \
|
||||
\
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
/* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \
|
||||
if ( bli_ceq0( *beta ) ) \
|
||||
{ \
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
if ( (doff_t)j - (doff_t)i >= diagoff ) \
|
||||
{ \
|
||||
if ( (doff_t)j - (doff_t)i >= diagoff ) \
|
||||
{ \
|
||||
bli_cccxpbys( *(x + i*rs_x + j*cs_x), \
|
||||
*(beta), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
} \
|
||||
bli_cccopys( *(x + i*rs_x + j*cs_x), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
if ( (doff_t)j - (doff_t)i >= diagoff ) \
|
||||
{ \
|
||||
bli_cccxpbys( *(x + i*rs_x + j*cs_x), \
|
||||
*(beta), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
@@ -95,16 +125,26 @@
|
||||
{ \
|
||||
dim_t i, j; \
|
||||
\
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
/* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \
|
||||
if ( bli_zeq0( *beta ) ) \
|
||||
{ \
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
if ( (doff_t)j - (doff_t)i >= diagoff ) \
|
||||
{ \
|
||||
if ( (doff_t)j - (doff_t)i >= diagoff ) \
|
||||
{ \
|
||||
bli_zzzxpbys( *(x + i*rs_x + j*cs_x), \
|
||||
*(beta), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
} \
|
||||
bli_zzcopys( *(x + i*rs_x + j*cs_x), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
if ( (doff_t)j - (doff_t)i >= diagoff ) \
|
||||
{ \
|
||||
bli_zzzxpbys( *(x + i*rs_x + j*cs_x), \
|
||||
*(beta), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
@@ -115,16 +155,26 @@
|
||||
{ \
|
||||
dim_t i, j; \
|
||||
\
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
/* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \
|
||||
if ( bli_seq0( *beta ) ) \
|
||||
{ \
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
if ( (doff_t)j - (doff_t)i <= diagoff ) \
|
||||
{ \
|
||||
if ( (doff_t)j - (doff_t)i <= diagoff ) \
|
||||
{ \
|
||||
bli_sssxpbys( *(x + i*rs_x + j*cs_x), \
|
||||
*(beta), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
} \
|
||||
bli_sscopys( *(x + i*rs_x + j*cs_x), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
if ( (doff_t)j - (doff_t)i <= diagoff ) \
|
||||
{ \
|
||||
bli_sssxpbys( *(x + i*rs_x + j*cs_x), \
|
||||
*(beta), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
@@ -133,16 +183,26 @@
|
||||
{ \
|
||||
dim_t i, j; \
|
||||
\
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
/* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \
|
||||
if ( bli_deq0( *beta ) ) \
|
||||
{ \
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
if ( (doff_t)j - (doff_t)i <= diagoff ) \
|
||||
{ \
|
||||
if ( (doff_t)j - (doff_t)i <= diagoff ) \
|
||||
{ \
|
||||
bli_dddxpbys( *(x + i*rs_x + j*cs_x), \
|
||||
*(beta), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
} \
|
||||
bli_ddcopys( *(x + i*rs_x + j*cs_x), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
if ( (doff_t)j - (doff_t)i <= diagoff ) \
|
||||
{ \
|
||||
bli_dddxpbys( *(x + i*rs_x + j*cs_x), \
|
||||
*(beta), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
@@ -151,16 +211,26 @@
|
||||
{ \
|
||||
dim_t i, j; \
|
||||
\
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
/* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \
|
||||
if ( bli_ceq0( *beta ) ) \
|
||||
{ \
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
if ( (doff_t)j - (doff_t)i <= diagoff ) \
|
||||
{ \
|
||||
if ( (doff_t)j - (doff_t)i <= diagoff ) \
|
||||
{ \
|
||||
bli_cccxpbys( *(x + i*rs_x + j*cs_x), \
|
||||
*(beta), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
} \
|
||||
bli_cccopys( *(x + i*rs_x + j*cs_x), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
if ( (doff_t)j - (doff_t)i <= diagoff ) \
|
||||
{ \
|
||||
bli_cccxpbys( *(x + i*rs_x + j*cs_x), \
|
||||
*(beta), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
@@ -169,16 +239,26 @@
|
||||
{ \
|
||||
dim_t i, j; \
|
||||
\
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
/* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \
|
||||
if ( bli_zeq0( *beta ) ) \
|
||||
{ \
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
if ( (doff_t)j - (doff_t)i <= diagoff ) \
|
||||
{ \
|
||||
if ( (doff_t)j - (doff_t)i <= diagoff ) \
|
||||
{ \
|
||||
bli_zzzxpbys( *(x + i*rs_x + j*cs_x), \
|
||||
*(beta), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
} \
|
||||
bli_zzcopys( *(x + i*rs_x + j*cs_x), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
for ( j = 0; j < n; ++j ) \
|
||||
for ( i = 0; i < m; ++i ) \
|
||||
if ( (doff_t)j - (doff_t)i <= diagoff ) \
|
||||
{ \
|
||||
bli_zzzxpbys( *(x + i*rs_x + j*cs_x), \
|
||||
*(beta), \
|
||||
*(y + i*rs_y + j*cs_y) ); \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user