From cb12e456f94c196c093e52f02a7cbca0032fc86e Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Tue, 8 Jul 2014 10:07:46 -0500 Subject: [PATCH] Fixed possible level-3 inf/NaN issue when beta=0. Details: - Redefined xpbys_mxn and xpbys_mxn_u/_l macros to employ a copy (instead of scaling by beta) when beta is zero. This will stamp out any possible infs or NaNs in the output matrix, if it happens to be uninitialized. Thanks to Tony Kelman for isolating this bug. --- frame/include/level0/bli_xpbys_mxn.h | 80 ++++++--- frame/include/level0/bli_xpbys_mxn_uplo.h | 192 +++++++++++++++------- 2 files changed, 196 insertions(+), 76 deletions(-) diff --git a/frame/include/level0/bli_xpbys_mxn.h b/frame/include/level0/bli_xpbys_mxn.h index 0f857f156..511e0b452 100644 --- a/frame/include/level0/bli_xpbys_mxn.h +++ b/frame/include/level0/bli_xpbys_mxn.h @@ -46,44 +46,84 @@ { \ dim_t i, j; \ \ - for ( j = 0; j < n; ++j ) \ - for ( i = 0; i < m; ++i ) \ - bli_sssxpbys( *(x + i*rs_x + j*cs_x), \ - *(beta), \ - *(y + i*rs_y + j*cs_y) ); \ + /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ + if ( bli_seq0( *beta ) ) \ + { \ + bli_sscopys_mxn( m, n, \ + x, rs_x, cs_x, \ + y, rs_y, cs_y ); \ + } \ + else \ + { \ + for ( j = 0; j < n; ++j ) \ + for ( i = 0; i < m; ++i ) \ + bli_sssxpbys( *(x + i*rs_x + j*cs_x), \ + *(beta), \ + *(y + i*rs_y + j*cs_y) ); \ + } \ } #define bli_dddxpbys_mxn( m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ { \ dim_t i, j; \ \ - for ( j = 0; j < n; ++j ) \ - for ( i = 0; i < m; ++i ) \ - bli_dddxpbys( *(x + i*rs_x + j*cs_x), \ - *(beta), \ - *(y + i*rs_y + j*cs_y) ); \ + /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ + if ( bli_deq0( *beta ) ) \ + { \ + bli_ddcopys_mxn( m, n, \ + x, rs_x, cs_x, \ + y, rs_y, cs_y ); \ + } \ + else \ + { \ + for ( j = 0; j < n; ++j ) \ + for ( i = 0; i < m; ++i ) \ + bli_dddxpbys( *(x + i*rs_x + j*cs_x), \ + *(beta), \ + *(y + i*rs_y + j*cs_y) ); \ + } \ } #define bli_cccxpbys_mxn( m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ { \ dim_t i, j; \ \ - for ( j = 0; j < n; ++j ) \ - for ( i = 0; i < m; ++i ) \ - bli_cccxpbys( *(x + i*rs_x + j*cs_x), \ - *(beta), \ - *(y + i*rs_y + j*cs_y) ); \ + /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ + if ( bli_ceq0( *beta ) ) \ + { \ + bli_cccopys_mxn( m, n, \ + x, rs_x, cs_x, \ + y, rs_y, cs_y ); \ + } \ + else \ + { \ + for ( j = 0; j < n; ++j ) \ + for ( i = 0; i < m; ++i ) \ + bli_cccxpbys( *(x + i*rs_x + j*cs_x), \ + *(beta), \ + *(y + i*rs_y + j*cs_y) ); \ + } \ } #define bli_zzzxpbys_mxn( m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ { \ dim_t i, j; \ \ - for ( j = 0; j < n; ++j ) \ - for ( i = 0; i < m; ++i ) \ - bli_zzzxpbys( *(x + i*rs_x + j*cs_x), \ - *(beta), \ - *(y + i*rs_y + j*cs_y) ); \ + /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ + if ( bli_zeq0( *beta ) ) \ + { \ + bli_zzcopys_mxn( m, n, \ + x, rs_x, cs_x, \ + y, rs_y, cs_y ); \ + } \ + else \ + { \ + for ( j = 0; j < n; ++j ) \ + for ( i = 0; i < m; ++i ) \ + bli_zzzxpbys( *(x + i*rs_x + j*cs_x), \ + *(beta), \ + *(y + i*rs_y + j*cs_y) ); \ + } \ } diff --git a/frame/include/level0/bli_xpbys_mxn_uplo.h b/frame/include/level0/bli_xpbys_mxn_uplo.h index 584ca78cd..b2681e333 100644 --- a/frame/include/level0/bli_xpbys_mxn_uplo.h +++ b/frame/include/level0/bli_xpbys_mxn_uplo.h @@ -41,16 +41,26 @@ { \ dim_t i, j; \ \ - for ( j = 0; j < n; ++j ) \ + /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ + if ( bli_seq0( *beta ) ) \ { \ + for ( j = 0; j < n; ++j ) \ for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i >= diagoff ) \ { \ - if ( (doff_t)j - (doff_t)i >= diagoff ) \ - { \ - bli_sssxpbys( *(x + i*rs_x + j*cs_x), \ - *(beta), \ - *(y + i*rs_y + j*cs_y) ); \ - } \ + bli_sscopys( *(x + i*rs_x + j*cs_x), \ + *(y + i*rs_y + j*cs_y) ); \ + } \ + } \ + else \ + { \ + for ( j = 0; j < n; ++j ) \ + for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i >= diagoff ) \ + { \ + bli_sssxpbys( *(x + i*rs_x + j*cs_x), \ + *(beta), \ + *(y + i*rs_y + j*cs_y) ); \ } \ } \ } @@ -59,16 +69,26 @@ { \ dim_t i, j; \ \ - for ( j = 0; j < n; ++j ) \ + /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ + if ( bli_deq0( *beta ) ) \ { \ + for ( j = 0; j < n; ++j ) \ for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i >= diagoff ) \ { \ - if ( (doff_t)j - (doff_t)i >= diagoff ) \ - { \ - bli_dddxpbys( *(x + i*rs_x + j*cs_x), \ - *(beta), \ - *(y + i*rs_y + j*cs_y) ); \ - } \ + bli_ddcopys( *(x + i*rs_x + j*cs_x), \ + *(y + i*rs_y + j*cs_y) ); \ + } \ + } \ + else \ + { \ + for ( j = 0; j < n; ++j ) \ + for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i >= diagoff ) \ + { \ + bli_dddxpbys( *(x + i*rs_x + j*cs_x), \ + *(beta), \ + *(y + i*rs_y + j*cs_y) ); \ } \ } \ } @@ -77,16 +97,26 @@ { \ dim_t i, j; \ \ - for ( j = 0; j < n; ++j ) \ + /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ + if ( bli_ceq0( *beta ) ) \ { \ + for ( j = 0; j < n; ++j ) \ for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i >= diagoff ) \ { \ - if ( (doff_t)j - (doff_t)i >= diagoff ) \ - { \ - bli_cccxpbys( *(x + i*rs_x + j*cs_x), \ - *(beta), \ - *(y + i*rs_y + j*cs_y) ); \ - } \ + bli_cccopys( *(x + i*rs_x + j*cs_x), \ + *(y + i*rs_y + j*cs_y) ); \ + } \ + } \ + else \ + { \ + for ( j = 0; j < n; ++j ) \ + for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i >= diagoff ) \ + { \ + bli_cccxpbys( *(x + i*rs_x + j*cs_x), \ + *(beta), \ + *(y + i*rs_y + j*cs_y) ); \ } \ } \ } @@ -95,16 +125,26 @@ { \ dim_t i, j; \ \ - for ( j = 0; j < n; ++j ) \ + /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ + if ( bli_zeq0( *beta ) ) \ { \ + for ( j = 0; j < n; ++j ) \ for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i >= diagoff ) \ { \ - if ( (doff_t)j - (doff_t)i >= diagoff ) \ - { \ - bli_zzzxpbys( *(x + i*rs_x + j*cs_x), \ - *(beta), \ - *(y + i*rs_y + j*cs_y) ); \ - } \ + bli_zzcopys( *(x + i*rs_x + j*cs_x), \ + *(y + i*rs_y + j*cs_y) ); \ + } \ + } \ + else \ + { \ + for ( j = 0; j < n; ++j ) \ + for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i >= diagoff ) \ + { \ + bli_zzzxpbys( *(x + i*rs_x + j*cs_x), \ + *(beta), \ + *(y + i*rs_y + j*cs_y) ); \ } \ } \ } @@ -115,16 +155,26 @@ { \ dim_t i, j; \ \ - for ( j = 0; j < n; ++j ) \ + /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ + if ( bli_seq0( *beta ) ) \ { \ + for ( j = 0; j < n; ++j ) \ for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i <= diagoff ) \ { \ - if ( (doff_t)j - (doff_t)i <= diagoff ) \ - { \ - bli_sssxpbys( *(x + i*rs_x + j*cs_x), \ - *(beta), \ - *(y + i*rs_y + j*cs_y) ); \ - } \ + bli_sscopys( *(x + i*rs_x + j*cs_x), \ + *(y + i*rs_y + j*cs_y) ); \ + } \ + } \ + else \ + { \ + for ( j = 0; j < n; ++j ) \ + for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i <= diagoff ) \ + { \ + bli_sssxpbys( *(x + i*rs_x + j*cs_x), \ + *(beta), \ + *(y + i*rs_y + j*cs_y) ); \ } \ } \ } @@ -133,16 +183,26 @@ { \ dim_t i, j; \ \ - for ( j = 0; j < n; ++j ) \ + /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ + if ( bli_deq0( *beta ) ) \ { \ + for ( j = 0; j < n; ++j ) \ for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i <= diagoff ) \ { \ - if ( (doff_t)j - (doff_t)i <= diagoff ) \ - { \ - bli_dddxpbys( *(x + i*rs_x + j*cs_x), \ - *(beta), \ - *(y + i*rs_y + j*cs_y) ); \ - } \ + bli_ddcopys( *(x + i*rs_x + j*cs_x), \ + *(y + i*rs_y + j*cs_y) ); \ + } \ + } \ + else \ + { \ + for ( j = 0; j < n; ++j ) \ + for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i <= diagoff ) \ + { \ + bli_dddxpbys( *(x + i*rs_x + j*cs_x), \ + *(beta), \ + *(y + i*rs_y + j*cs_y) ); \ } \ } \ } @@ -151,16 +211,26 @@ { \ dim_t i, j; \ \ - for ( j = 0; j < n; ++j ) \ + /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ + if ( bli_ceq0( *beta ) ) \ { \ + for ( j = 0; j < n; ++j ) \ for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i <= diagoff ) \ { \ - if ( (doff_t)j - (doff_t)i <= diagoff ) \ - { \ - bli_cccxpbys( *(x + i*rs_x + j*cs_x), \ - *(beta), \ - *(y + i*rs_y + j*cs_y) ); \ - } \ + bli_cccopys( *(x + i*rs_x + j*cs_x), \ + *(y + i*rs_y + j*cs_y) ); \ + } \ + } \ + else \ + { \ + for ( j = 0; j < n; ++j ) \ + for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i <= diagoff ) \ + { \ + bli_cccxpbys( *(x + i*rs_x + j*cs_x), \ + *(beta), \ + *(y + i*rs_y + j*cs_y) ); \ } \ } \ } @@ -169,16 +239,26 @@ { \ dim_t i, j; \ \ - for ( j = 0; j < n; ++j ) \ + /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ + if ( bli_zeq0( *beta ) ) \ { \ + for ( j = 0; j < n; ++j ) \ for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i <= diagoff ) \ { \ - if ( (doff_t)j - (doff_t)i <= diagoff ) \ - { \ - bli_zzzxpbys( *(x + i*rs_x + j*cs_x), \ - *(beta), \ - *(y + i*rs_y + j*cs_y) ); \ - } \ + bli_zzcopys( *(x + i*rs_x + j*cs_x), \ + *(y + i*rs_y + j*cs_y) ); \ + } \ + } \ + else \ + { \ + for ( j = 0; j < n; ++j ) \ + for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i <= diagoff ) \ + { \ + bli_zzzxpbys( *(x + i*rs_x + j*cs_x), \ + *(beta), \ + *(y + i*rs_y + j*cs_y) ); \ } \ } \ }