diff --git a/frame/include/level0/bli_xpbys_mxn.h b/frame/include/level0/bli_xpbys_mxn.h index 0f857f156..511e0b452 100644 --- a/frame/include/level0/bli_xpbys_mxn.h +++ b/frame/include/level0/bli_xpbys_mxn.h @@ -46,44 +46,84 @@ { \ dim_t i, j; \ \ - for ( j = 0; j < n; ++j ) \ - for ( i = 0; i < m; ++i ) \ - bli_sssxpbys( *(x + i*rs_x + j*cs_x), \ - *(beta), \ - *(y + i*rs_y + j*cs_y) ); \ + /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ + if ( bli_seq0( *beta ) ) \ + { \ + bli_sscopys_mxn( m, n, \ + x, rs_x, cs_x, \ + y, rs_y, cs_y ); \ + } \ + else \ + { \ + for ( j = 0; j < n; ++j ) \ + for ( i = 0; i < m; ++i ) \ + bli_sssxpbys( *(x + i*rs_x + j*cs_x), \ + *(beta), \ + *(y + i*rs_y + j*cs_y) ); \ + } \ } #define bli_dddxpbys_mxn( m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ { \ dim_t i, j; \ \ - for ( j = 0; j < n; ++j ) \ - for ( i = 0; i < m; ++i ) \ - bli_dddxpbys( *(x + i*rs_x + j*cs_x), \ - *(beta), \ - *(y + i*rs_y + j*cs_y) ); \ + /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ + if ( bli_deq0( *beta ) ) \ + { \ + bli_ddcopys_mxn( m, n, \ + x, rs_x, cs_x, \ + y, rs_y, cs_y ); \ + } \ + else \ + { \ + for ( j = 0; j < n; ++j ) \ + for ( i = 0; i < m; ++i ) \ + bli_dddxpbys( *(x + i*rs_x + j*cs_x), \ + *(beta), \ + *(y + i*rs_y + j*cs_y) ); \ + } \ } #define bli_cccxpbys_mxn( m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ { \ dim_t i, j; \ \ - for ( j = 0; j < n; ++j ) \ - for ( i = 0; i < m; ++i ) \ - bli_cccxpbys( *(x + i*rs_x + j*cs_x), \ - *(beta), \ - *(y + i*rs_y + j*cs_y) ); \ + /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ + if ( bli_ceq0( *beta ) ) \ + { \ + bli_cccopys_mxn( m, n, \ + x, rs_x, cs_x, \ + y, rs_y, cs_y ); \ + } \ + else \ + { \ + for ( j = 0; j < n; ++j ) \ + for ( i = 0; i < m; ++i ) \ + bli_cccxpbys( *(x + i*rs_x + j*cs_x), \ + *(beta), \ + *(y + i*rs_y + j*cs_y) ); \ + } \ } #define bli_zzzxpbys_mxn( m, n, x, rs_x, cs_x, beta, y, rs_y, cs_y ) \ { \ dim_t i, j; \ \ - for ( j = 0; j < n; ++j ) \ - for ( i = 0; i < m; ++i ) \ - bli_zzzxpbys( *(x + i*rs_x + j*cs_x), \ - *(beta), \ - *(y + i*rs_y + j*cs_y) ); \ + /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ + if ( bli_zeq0( *beta ) ) \ + { \ + bli_zzcopys_mxn( m, n, \ + x, rs_x, cs_x, \ + y, rs_y, cs_y ); \ + } \ + else \ + { \ + for ( j = 0; j < n; ++j ) \ + for ( i = 0; i < m; ++i ) \ + bli_zzzxpbys( *(x + i*rs_x + j*cs_x), \ + *(beta), \ + *(y + i*rs_y + j*cs_y) ); \ + } \ } diff --git a/frame/include/level0/bli_xpbys_mxn_uplo.h b/frame/include/level0/bli_xpbys_mxn_uplo.h index 584ca78cd..b2681e333 100644 --- a/frame/include/level0/bli_xpbys_mxn_uplo.h +++ b/frame/include/level0/bli_xpbys_mxn_uplo.h @@ -41,16 +41,26 @@ { \ dim_t i, j; \ \ - for ( j = 0; j < n; ++j ) \ + /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ + if ( bli_seq0( *beta ) ) \ { \ + for ( j = 0; j < n; ++j ) \ for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i >= diagoff ) \ { \ - if ( (doff_t)j - (doff_t)i >= diagoff ) \ - { \ - bli_sssxpbys( *(x + i*rs_x + j*cs_x), \ - *(beta), \ - *(y + i*rs_y + j*cs_y) ); \ - } \ + bli_sscopys( *(x + i*rs_x + j*cs_x), \ + *(y + i*rs_y + j*cs_y) ); \ + } \ + } \ + else \ + { \ + for ( j = 0; j < n; ++j ) \ + for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i >= diagoff ) \ + { \ + bli_sssxpbys( *(x + i*rs_x + j*cs_x), \ + *(beta), \ + *(y + i*rs_y + j*cs_y) ); \ } \ } \ } @@ -59,16 +69,26 @@ { \ dim_t i, j; \ \ - for ( j = 0; j < n; ++j ) \ + /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ + if ( bli_deq0( *beta ) ) \ { \ + for ( j = 0; j < n; ++j ) \ for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i >= diagoff ) \ { \ - if ( (doff_t)j - (doff_t)i >= diagoff ) \ - { \ - bli_dddxpbys( *(x + i*rs_x + j*cs_x), \ - *(beta), \ - *(y + i*rs_y + j*cs_y) ); \ - } \ + bli_ddcopys( *(x + i*rs_x + j*cs_x), \ + *(y + i*rs_y + j*cs_y) ); \ + } \ + } \ + else \ + { \ + for ( j = 0; j < n; ++j ) \ + for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i >= diagoff ) \ + { \ + bli_dddxpbys( *(x + i*rs_x + j*cs_x), \ + *(beta), \ + *(y + i*rs_y + j*cs_y) ); \ } \ } \ } @@ -77,16 +97,26 @@ { \ dim_t i, j; \ \ - for ( j = 0; j < n; ++j ) \ + /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ + if ( bli_ceq0( *beta ) ) \ { \ + for ( j = 0; j < n; ++j ) \ for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i >= diagoff ) \ { \ - if ( (doff_t)j - (doff_t)i >= diagoff ) \ - { \ - bli_cccxpbys( *(x + i*rs_x + j*cs_x), \ - *(beta), \ - *(y + i*rs_y + j*cs_y) ); \ - } \ + bli_cccopys( *(x + i*rs_x + j*cs_x), \ + *(y + i*rs_y + j*cs_y) ); \ + } \ + } \ + else \ + { \ + for ( j = 0; j < n; ++j ) \ + for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i >= diagoff ) \ + { \ + bli_cccxpbys( *(x + i*rs_x + j*cs_x), \ + *(beta), \ + *(y + i*rs_y + j*cs_y) ); \ } \ } \ } @@ -95,16 +125,26 @@ { \ dim_t i, j; \ \ - for ( j = 0; j < n; ++j ) \ + /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ + if ( bli_zeq0( *beta ) ) \ { \ + for ( j = 0; j < n; ++j ) \ for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i >= diagoff ) \ { \ - if ( (doff_t)j - (doff_t)i >= diagoff ) \ - { \ - bli_zzzxpbys( *(x + i*rs_x + j*cs_x), \ - *(beta), \ - *(y + i*rs_y + j*cs_y) ); \ - } \ + bli_zzcopys( *(x + i*rs_x + j*cs_x), \ + *(y + i*rs_y + j*cs_y) ); \ + } \ + } \ + else \ + { \ + for ( j = 0; j < n; ++j ) \ + for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i >= diagoff ) \ + { \ + bli_zzzxpbys( *(x + i*rs_x + j*cs_x), \ + *(beta), \ + *(y + i*rs_y + j*cs_y) ); \ } \ } \ } @@ -115,16 +155,26 @@ { \ dim_t i, j; \ \ - for ( j = 0; j < n; ++j ) \ + /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ + if ( bli_seq0( *beta ) ) \ { \ + for ( j = 0; j < n; ++j ) \ for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i <= diagoff ) \ { \ - if ( (doff_t)j - (doff_t)i <= diagoff ) \ - { \ - bli_sssxpbys( *(x + i*rs_x + j*cs_x), \ - *(beta), \ - *(y + i*rs_y + j*cs_y) ); \ - } \ + bli_sscopys( *(x + i*rs_x + j*cs_x), \ + *(y + i*rs_y + j*cs_y) ); \ + } \ + } \ + else \ + { \ + for ( j = 0; j < n; ++j ) \ + for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i <= diagoff ) \ + { \ + bli_sssxpbys( *(x + i*rs_x + j*cs_x), \ + *(beta), \ + *(y + i*rs_y + j*cs_y) ); \ } \ } \ } @@ -133,16 +183,26 @@ { \ dim_t i, j; \ \ - for ( j = 0; j < n; ++j ) \ + /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ + if ( bli_deq0( *beta ) ) \ { \ + for ( j = 0; j < n; ++j ) \ for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i <= diagoff ) \ { \ - if ( (doff_t)j - (doff_t)i <= diagoff ) \ - { \ - bli_dddxpbys( *(x + i*rs_x + j*cs_x), \ - *(beta), \ - *(y + i*rs_y + j*cs_y) ); \ - } \ + bli_ddcopys( *(x + i*rs_x + j*cs_x), \ + *(y + i*rs_y + j*cs_y) ); \ + } \ + } \ + else \ + { \ + for ( j = 0; j < n; ++j ) \ + for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i <= diagoff ) \ + { \ + bli_dddxpbys( *(x + i*rs_x + j*cs_x), \ + *(beta), \ + *(y + i*rs_y + j*cs_y) ); \ } \ } \ } @@ -151,16 +211,26 @@ { \ dim_t i, j; \ \ - for ( j = 0; j < n; ++j ) \ + /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ + if ( bli_ceq0( *beta ) ) \ { \ + for ( j = 0; j < n; ++j ) \ for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i <= diagoff ) \ { \ - if ( (doff_t)j - (doff_t)i <= diagoff ) \ - { \ - bli_cccxpbys( *(x + i*rs_x + j*cs_x), \ - *(beta), \ - *(y + i*rs_y + j*cs_y) ); \ - } \ + bli_cccopys( *(x + i*rs_x + j*cs_x), \ + *(y + i*rs_y + j*cs_y) ); \ + } \ + } \ + else \ + { \ + for ( j = 0; j < n; ++j ) \ + for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i <= diagoff ) \ + { \ + bli_cccxpbys( *(x + i*rs_x + j*cs_x), \ + *(beta), \ + *(y + i*rs_y + j*cs_y) ); \ } \ } \ } @@ -169,16 +239,26 @@ { \ dim_t i, j; \ \ - for ( j = 0; j < n; ++j ) \ + /* If beta is zero, overwrite y with x (in case y has infs or NaNs). */ \ + if ( bli_zeq0( *beta ) ) \ { \ + for ( j = 0; j < n; ++j ) \ for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i <= diagoff ) \ { \ - if ( (doff_t)j - (doff_t)i <= diagoff ) \ - { \ - bli_zzzxpbys( *(x + i*rs_x + j*cs_x), \ - *(beta), \ - *(y + i*rs_y + j*cs_y) ); \ - } \ + bli_zzcopys( *(x + i*rs_x + j*cs_x), \ + *(y + i*rs_y + j*cs_y) ); \ + } \ + } \ + else \ + { \ + for ( j = 0; j < n; ++j ) \ + for ( i = 0; i < m; ++i ) \ + if ( (doff_t)j - (doff_t)i <= diagoff ) \ + { \ + bli_zzzxpbys( *(x + i*rs_x + j*cs_x), \ + *(beta), \ + *(y + i*rs_y + j*cs_y) ); \ } \ } \ }