diff --git a/frame/0/absqsc/bli_absqsc_unb_var1.c b/frame/0/absqsc/bli_absqsc_unb_var1.c index 9fbb38477..08e830d9b 100644 --- a/frame/0/absqsc/bli_absqsc_unb_var1.c +++ b/frame/0/absqsc/bli_absqsc_unb_var1.c @@ -72,7 +72,7 @@ void bli_absqsc_unb_var1( obj_t* chi, #undef GENTFUNCR -#define GENTFUNCR( ctype_x, ctype_xr, chx, chxr, opname, varname ) \ +#define GENTFUNCR( ctype_x, ctype_xr, chx, chxr, varname ) \ \ void PASTEMAC(chx,varname)( \ void* chi, \ @@ -95,5 +95,5 @@ void PASTEMAC(chx,varname)( \ PASTEMAC2(chxr,chxr,copys)( chi_r, *absq_cast ); \ } -INSERT_GENTFUNCR_BASIC( absqsc, absqsc_unb_var1 ) +INSERT_GENTFUNCR_BASIC0( absqsc_unb_var1 ) diff --git a/frame/0/absqsc/bli_absqsc_unb_var1.h b/frame/0/absqsc/bli_absqsc_unb_var1.h index 976f8a33e..f72b6d6c7 100644 --- a/frame/0/absqsc/bli_absqsc_unb_var1.h +++ b/frame/0/absqsc/bli_absqsc_unb_var1.h @@ -38,12 +38,12 @@ void bli_absqsc_unb_var1( obj_t* chi, #undef GENTPROTR -#define GENTPROTR( ctype_x, ctype_xr, chx, chxr, opname ) \ +#define GENTPROTR( ctype_x, ctype_xr, chx, chxr, varname ) \ \ -void PASTEMAC(chx,opname)( \ - void* chi, \ - void* absq \ - ); +void PASTEMAC(chx,varname)( \ + void* chi, \ + void* absq \ + ); INSERT_GENTPROTR_BASIC( absqsc_unb_var1 ) diff --git a/frame/0/addsc/bli_addsc_unb_var1.c b/frame/0/addsc/bli_addsc_unb_var1.c index 0cc9d7b27..cfdba6c15 100644 --- a/frame/0/addsc/bli_addsc_unb_var1.c +++ b/frame/0/addsc/bli_addsc_unb_var1.c @@ -85,7 +85,7 @@ void bli_addsc_unb_var1( obj_t* chi, #undef GENTFUNC2 -#define GENTFUNC2( ctype_x, ctype_y, chx, chy, opname, varname ) \ +#define GENTFUNC2( ctype_x, ctype_y, chx, chy, varname ) \ \ void PASTEMAC2(chx,chy,varname)( \ conj_t conjchi, \ @@ -105,13 +105,13 @@ void PASTEMAC2(chx,chy,varname)( \ // Define the basic set of functions unconditionally, and then also some // mixed datatype functions if requested. -INSERT_GENTFUNC2_BASIC( addsc, addsc_unb_var1 ) +INSERT_GENTFUNC2_BASIC0( addsc_unb_var1 ) #ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT -INSERT_GENTFUNC2_MIX_D( addsc, addsc_unb_var1 ) +INSERT_GENTFUNC2_MIX_D0( addsc_unb_var1 ) #endif #ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT -INSERT_GENTFUNC2_MIX_P( addsc, addsc_unb_var1 ) +INSERT_GENTFUNC2_MIX_P0( addsc_unb_var1 ) #endif diff --git a/frame/0/addsc/bli_addsc_unb_var1.h b/frame/0/addsc/bli_addsc_unb_var1.h index 701e326f5..e9f2ab357 100644 --- a/frame/0/addsc/bli_addsc_unb_var1.h +++ b/frame/0/addsc/bli_addsc_unb_var1.h @@ -38,13 +38,13 @@ void bli_addsc_unb_var1( obj_t* chi, #undef GENTPROT2 -#define GENTPROT2( ctype_x, ctype_y, chx, chy, opname ) \ +#define GENTPROT2( ctype_x, ctype_y, chx, chy, varname ) \ \ -void PASTEMAC2(chx,chy,opname)( \ - conj_t conjchi, \ - void* chi, \ - void* psi \ - ); +void PASTEMAC2(chx,chy,varname)( \ + conj_t conjchi, \ + void* chi, \ + void* psi \ + ); INSERT_GENTPROT2_BASIC( addsc_unb_var1 ) diff --git a/frame/0/copysc/bli_copysc_unb_var1.c b/frame/0/copysc/bli_copysc_unb_var1.c index 5959509aa..c18cf97d7 100644 --- a/frame/0/copysc/bli_copysc_unb_var1.c +++ b/frame/0/copysc/bli_copysc_unb_var1.c @@ -91,7 +91,7 @@ void bli_copysc_unb_var1( obj_t* chi, #undef GENTFUNC2 -#define GENTFUNC2( ctype_x, ctype_y, chx, chy, opname, varname ) \ +#define GENTFUNC2( ctype_x, ctype_y, chx, chy, varname ) \ \ void PASTEMAC2(chx,chy,varname)( \ conj_t conjchi, \ @@ -114,13 +114,13 @@ void PASTEMAC2(chx,chy,varname)( \ // Enable all datatype combinations even when only basic datatype support // is requested. -INSERT_GENTFUNC2_BASIC( copysc, copysc_unb_var1 ) +INSERT_GENTFUNC2_BASIC0( copysc_unb_var1 ) //#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT -INSERT_GENTFUNC2_MIX_D( copysc, copysc_unb_var1 ) +INSERT_GENTFUNC2_MIX_D0( copysc_unb_var1 ) //#endif //#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT -INSERT_GENTFUNC2_MIX_P( copysc, copysc_unb_var1 ) +INSERT_GENTFUNC2_MIX_P0( copysc_unb_var1 ) //#endif diff --git a/frame/0/divsc/bli_divsc_unb_var1.c b/frame/0/divsc/bli_divsc_unb_var1.c index f3920b47b..349d476a3 100644 --- a/frame/0/divsc/bli_divsc_unb_var1.c +++ b/frame/0/divsc/bli_divsc_unb_var1.c @@ -85,7 +85,7 @@ void bli_divsc_unb_var1( obj_t* chi, #undef GENTFUNC2 -#define GENTFUNC2( ctype_x, ctype_y, chx, chy, opname, varname ) \ +#define GENTFUNC2( ctype_x, ctype_y, chx, chy, varname ) \ \ void PASTEMAC2(chx,chy,varname)( \ conj_t conjchi, \ @@ -105,13 +105,13 @@ void PASTEMAC2(chx,chy,varname)( \ // Define the basic set of functions unconditionally, and then also some // mixed datatype functions if requested. -INSERT_GENTFUNC2_BASIC( divsc, divsc_unb_var1 ) +INSERT_GENTFUNC2_BASIC0( divsc_unb_var1 ) #ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT -INSERT_GENTFUNC2_MIX_D( divsc, divsc_unb_var1 ) +INSERT_GENTFUNC2_MIX_D0( divsc_unb_var1 ) #endif #ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT -INSERT_GENTFUNC2_MIX_P( divsc, divsc_unb_var1 ) +INSERT_GENTFUNC2_MIX_P0( divsc_unb_var1 ) #endif diff --git a/frame/0/divsc/bli_divsc_unb_var1.h b/frame/0/divsc/bli_divsc_unb_var1.h index e4d72ee0a..bd97ee1e2 100644 --- a/frame/0/divsc/bli_divsc_unb_var1.h +++ b/frame/0/divsc/bli_divsc_unb_var1.h @@ -38,13 +38,13 @@ void bli_divsc_unb_var1( obj_t* chi, #undef GENTPROT2 -#define GENTPROT2( ctype_x, ctype_y, chx, chy, opname ) \ +#define GENTPROT2( ctype_x, ctype_y, chx, chy, varname ) \ \ -void PASTEMAC2(chx,chy,opname)( \ - conj_t conjchi, \ - void* chi, \ - void* psi \ - ); +void PASTEMAC2(chx,chy,varname)( \ + conj_t conjchi, \ + void* chi, \ + void* psi \ + ); INSERT_GENTPROT2_BASIC( divsc_unb_var1 ) diff --git a/frame/0/mulsc/bli_mulsc_unb_var1.c b/frame/0/mulsc/bli_mulsc_unb_var1.c index 9720f2ea6..e0c090071 100644 --- a/frame/0/mulsc/bli_mulsc_unb_var1.c +++ b/frame/0/mulsc/bli_mulsc_unb_var1.c @@ -85,7 +85,7 @@ void bli_mulsc_unb_var1( obj_t* chi, #undef GENTFUNC2 -#define GENTFUNC2( ctype_x, ctype_y, chx, chy, opname, varname ) \ +#define GENTFUNC2( ctype_x, ctype_y, chx, chy, varname ) \ \ void PASTEMAC2(chx,chy,varname)( \ conj_t conjchi, \ @@ -111,13 +111,13 @@ void PASTEMAC2(chx,chy,varname)( \ // Define the basic set of functions unconditionally, and then also some // mixed datatype functions if requested. -INSERT_GENTFUNC2_BASIC( mulsc, mulsc_unb_var1 ) +INSERT_GENTFUNC2_BASIC0( mulsc_unb_var1 ) #ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT -INSERT_GENTFUNC2_MIX_D( mulsc, mulsc_unb_var1 ) +INSERT_GENTFUNC2_MIX_D0( mulsc_unb_var1 ) #endif #ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT -INSERT_GENTFUNC2_MIX_P( mulsc, mulsc_unb_var1 ) +INSERT_GENTFUNC2_MIX_P0( mulsc_unb_var1 ) #endif diff --git a/frame/0/mulsc/bli_mulsc_unb_var1.h b/frame/0/mulsc/bli_mulsc_unb_var1.h index 603a2fc0e..d62d0855e 100644 --- a/frame/0/mulsc/bli_mulsc_unb_var1.h +++ b/frame/0/mulsc/bli_mulsc_unb_var1.h @@ -38,13 +38,13 @@ void bli_mulsc_unb_var1( obj_t* chi, #undef GENTPROT2 -#define GENTPROT2( ctype_x, ctype_y, chx, chy, opname ) \ +#define GENTPROT2( ctype_x, ctype_y, chx, chy, varname ) \ \ -void PASTEMAC2(chx,chy,opname)( \ - conj_t conjchi, \ - void* chi, \ - void* psi \ - ); +void PASTEMAC2(chx,chy,varname)( \ + conj_t conjchi, \ + void* chi, \ + void* psi \ + ); INSERT_GENTPROT2_BASIC( mulsc_unb_var1 ) diff --git a/frame/0/fnormsc/bli_fnormsc.c b/frame/0/normfsc/bli_normfsc.c similarity index 93% rename from frame/0/fnormsc/bli_fnormsc.c rename to frame/0/normfsc/bli_normfsc.c index edeebf430..de310345c 100644 --- a/frame/0/fnormsc/bli_fnormsc.c +++ b/frame/0/normfsc/bli_normfsc.c @@ -38,13 +38,13 @@ // // Define object-based interface. // -void bli_fnormsc( obj_t* chi, +void bli_normfsc( obj_t* chi, obj_t* norm ) { if ( bli_error_checking_is_enabled() ) - bli_fnormsc_check( chi, norm ); + bli_normfsc_check( chi, norm ); - bli_fnormsc_unb_var1( chi, norm ); + bli_normfsc_unb_var1( chi, norm ); } @@ -63,5 +63,5 @@ void PASTEMAC2(chx,chxr,opname)( \ norm ); \ } -INSERT_GENTFUNCR_BASIC( fnormsc, fnormsc_unb_var1 ) +INSERT_GENTFUNCR_BASIC( normfsc, normfsc_unb_var1 ) diff --git a/frame/0/fnormsc/bli_fnormsc.h b/frame/0/normfsc/bli_normfsc.h similarity index 94% rename from frame/0/fnormsc/bli_fnormsc.h rename to frame/0/normfsc/bli_normfsc.h index aa1e074f2..aa4597f6c 100644 --- a/frame/0/fnormsc/bli_fnormsc.h +++ b/frame/0/normfsc/bli_normfsc.h @@ -32,14 +32,14 @@ */ -#include "bli_fnormsc_check.h" -#include "bli_fnormsc_unb_var1.h" +#include "bli_normfsc_check.h" +#include "bli_normfsc_unb_var1.h" // // Prototype object-based interface. // -void bli_fnormsc( obj_t* chi, +void bli_normfsc( obj_t* chi, obj_t* norm ); @@ -54,5 +54,5 @@ void PASTEMAC2(chx,chxr,opname)( \ ctype_xr* norm \ ); -INSERT_GENTPROTR_BASIC( fnormsc ) +INSERT_GENTPROTR_BASIC( normfsc ) diff --git a/frame/0/fnormsc/bli_fnormsc_check.c b/frame/0/normfsc/bli_normfsc_check.c similarity index 98% rename from frame/0/fnormsc/bli_fnormsc_check.c rename to frame/0/normfsc/bli_normfsc_check.c index dae822016..c61fc6cea 100644 --- a/frame/0/fnormsc/bli_fnormsc_check.c +++ b/frame/0/normfsc/bli_normfsc_check.c @@ -34,7 +34,7 @@ #include "blis.h" -void bli_fnormsc_check( obj_t* chi, +void bli_normfsc_check( obj_t* chi, obj_t* norm ) { err_t e_val; diff --git a/frame/0/fnormsc/bli_fnormsc_check.h b/frame/0/normfsc/bli_normfsc_check.h similarity index 97% rename from frame/0/fnormsc/bli_fnormsc_check.h rename to frame/0/normfsc/bli_normfsc_check.h index ece91396b..2d3ce8d89 100644 --- a/frame/0/fnormsc/bli_fnormsc_check.h +++ b/frame/0/normfsc/bli_normfsc_check.h @@ -32,5 +32,5 @@ */ -void bli_fnormsc_check( obj_t* chi, +void bli_normfsc_check( obj_t* chi, obj_t* norm ); diff --git a/frame/0/fnormsc/bli_fnormsc_unb_var1.c b/frame/0/normfsc/bli_normfsc_unb_var1.c similarity index 77% rename from frame/0/fnormsc/bli_fnormsc_unb_var1.c rename to frame/0/normfsc/bli_normfsc_unb_var1.c index a320f48b0..bda79878c 100644 --- a/frame/0/fnormsc/bli_fnormsc_unb_var1.c +++ b/frame/0/normfsc/bli_normfsc_unb_var1.c @@ -34,17 +34,17 @@ #include "blis.h" -#define FUNCPTR_T fnormsc_fp +#define FUNCPTR_T normfsc_fp typedef void (*FUNCPTR_T)( void* chi, void* norm ); -static FUNCPTR_T GENARRAY(ftypes,fnormsc_unb_var1); +static FUNCPTR_T GENARRAY(ftypes,normfsc_unb_var1); -void bli_fnormsc_unb_var1( obj_t* chi, +void bli_normfsc_unb_var1( obj_t* chi, obj_t* norm ) { num_t dt_chi; @@ -72,7 +72,7 @@ void bli_fnormsc_unb_var1( obj_t* chi, #undef GENTFUNCR -#define GENTFUNCR( ctype_x, ctype_xr, chx, chxr, opname, varname ) \ +#define GENTFUNCR( ctype_x, ctype_xr, chx, chxr, varname ) \ \ void PASTEMAC(chx,varname)( \ void* chi, \ @@ -81,27 +81,9 @@ void PASTEMAC(chx,varname)( \ { \ ctype_x* chi_cast = chi; \ ctype_xr* norm_cast = norm; \ - ctype_xr chi_r; \ - ctype_xr chi_i; \ \ - PASTEMAC2(chx,chxr,gets)( *chi_cast, \ - chi_r, \ - chi_i ); \ -\ - if ( bli_is_real( PASTEMAC(chx,type) ) ) \ - { \ - /* norm = abs( chi_r ); */ \ - *norm_cast = bli_fabs( chi_r ); \ - } \ - else \ - { \ - /* norm = sqrt( chi_r * chi_r + chi_i * chi_i ); */ \ - PASTEMAC2(chxr,chxr,scals)( chi_r, chi_r ); \ - PASTEMAC2(chxr,chxr,scals)( chi_i, chi_i ); \ - PASTEMAC2(chxr,chxr,adds)( chi_i, chi_r ); \ - PASTEMAC2(chxr,chxr,sqrt2s)( chi_r, *norm_cast ); \ - } \ + PASTEMAC2(chx,chxr,abval2s)( *chi_cast, *norm_cast ); \ } -INSERT_GENTFUNCR_BASIC( fnormsc, fnormsc_unb_var1 ) +INSERT_GENTFUNCR_BASIC0( normfsc_unb_var1 ) diff --git a/frame/0/fnormsc/bli_fnormsc_unb_var1.h b/frame/0/normfsc/bli_normfsc_unb_var1.h similarity index 85% rename from frame/0/fnormsc/bli_fnormsc_unb_var1.h rename to frame/0/normfsc/bli_normfsc_unb_var1.h index b9f261183..1ae3dee09 100644 --- a/frame/0/fnormsc/bli_fnormsc_unb_var1.h +++ b/frame/0/normfsc/bli_normfsc_unb_var1.h @@ -33,17 +33,17 @@ */ -void bli_fnormsc_unb_var1( obj_t* chi, +void bli_normfsc_unb_var1( obj_t* chi, obj_t* norm ); #undef GENTPROTR -#define GENTPROTR( ctype_x, ctype_xr, chx, chxr, opname ) \ +#define GENTPROTR( ctype_x, ctype_xr, chx, chxr, varname ) \ \ -void PASTEMAC(chx,opname)( \ - void* chi, \ - void* norm \ - ); +void PASTEMAC(chx,varname)( \ + void* chi, \ + void* norm \ + ); -INSERT_GENTPROTR_BASIC( fnormsc_unb_var1 ) +INSERT_GENTPROTR_BASIC( normfsc_unb_var1 ) diff --git a/frame/0/sqrtsc/bli_sqrtsc_unb_var1.c b/frame/0/sqrtsc/bli_sqrtsc_unb_var1.c index f8092127f..7fb16ba2a 100644 --- a/frame/0/sqrtsc/bli_sqrtsc_unb_var1.c +++ b/frame/0/sqrtsc/bli_sqrtsc_unb_var1.c @@ -81,7 +81,7 @@ void bli_sqrtsc_unb_var1( obj_t* chi, #undef GENTFUNC2 -#define GENTFUNC2( ctype_x, ctype_y, chx, chy, opname, varname ) \ +#define GENTFUNC2( ctype_x, ctype_y, chx, chy, varname ) \ \ void PASTEMAC2(chx,chy,varname)( \ void* chi, \ @@ -96,13 +96,13 @@ void PASTEMAC2(chx,chy,varname)( \ // Define the basic set of functions unconditionally, and then also some // mixed datatype functions if requested. -INSERT_GENTFUNC2_BASIC( sqrtsc, sqrtsc_unb_var1 ) +INSERT_GENTFUNC2_BASIC0( sqrtsc_unb_var1 ) #ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT -INSERT_GENTFUNC2_MIX_D( sqrtsc, sqrtsc_unb_var1 ) +INSERT_GENTFUNC2_MIX_D0( sqrtsc_unb_var1 ) #endif #ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT -INSERT_GENTFUNC2_MIX_P( sqrtsc, sqrtsc_unb_var1 ) +INSERT_GENTFUNC2_MIX_P0( sqrtsc_unb_var1 ) #endif diff --git a/frame/0/subsc/bli_subsc_unb_var1.c b/frame/0/subsc/bli_subsc_unb_var1.c index cc1191177..bf4e4cca3 100644 --- a/frame/0/subsc/bli_subsc_unb_var1.c +++ b/frame/0/subsc/bli_subsc_unb_var1.c @@ -85,7 +85,7 @@ void bli_subsc_unb_var1( obj_t* chi, #undef GENTFUNC2 -#define GENTFUNC2( ctype_x, ctype_y, chx, chy, opname, varname ) \ +#define GENTFUNC2( ctype_x, ctype_y, chx, chy, varname ) \ \ void PASTEMAC2(chx,chy,varname)( \ conj_t conjchi, \ @@ -105,13 +105,13 @@ void PASTEMAC2(chx,chy,varname)( \ // Define the basic set of functions unconditionally, and then also some // mixed datatype functions if requested. -INSERT_GENTFUNC2_BASIC( subsc, subsc_unb_var1 ) +INSERT_GENTFUNC2_BASIC0( subsc_unb_var1 ) #ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT -INSERT_GENTFUNC2_MIX_D( subsc, subsc_unb_var1 ) +INSERT_GENTFUNC2_MIX_D0( subsc_unb_var1 ) #endif #ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT -INSERT_GENTFUNC2_MIX_P( subsc, subsc_unb_var1 ) +INSERT_GENTFUNC2_MIX_P0( subsc_unb_var1 ) #endif diff --git a/frame/0/subsc/bli_subsc_unb_var1.h b/frame/0/subsc/bli_subsc_unb_var1.h index 95400974d..b7599e050 100644 --- a/frame/0/subsc/bli_subsc_unb_var1.h +++ b/frame/0/subsc/bli_subsc_unb_var1.h @@ -38,13 +38,13 @@ void bli_subsc_unb_var1( obj_t* chi, #undef GENTPROT2 -#define GENTPROT2( ctype_x, ctype_y, chx, chy, opname ) \ +#define GENTPROT2( ctype_x, ctype_y, chx, chy, varname ) \ \ -void PASTEMAC2(chx,chy,opname)( \ - conj_t conjchi, \ - void* chi, \ - void* psi \ - ); +void PASTEMAC2(chx,chy,varname)( \ + conj_t conjchi, \ + void* chi, \ + void* psi \ + ); INSERT_GENTPROT2_BASIC( subsc_unb_var1 ) diff --git a/frame/0/unzipsc/bli_unzipsc_unb_var1.c b/frame/0/unzipsc/bli_unzipsc_unb_var1.c index 1c9241896..e73ac0cf6 100644 --- a/frame/0/unzipsc/bli_unzipsc_unb_var1.c +++ b/frame/0/unzipsc/bli_unzipsc_unb_var1.c @@ -76,7 +76,7 @@ void bli_unzipsc_unb_var1( obj_t* beta, #undef GENTFUNCR -#define GENTFUNCR( ctype_b, ctype_br, chb, chbr, opname, varname ) \ +#define GENTFUNCR( ctype_b, ctype_br, chb, chbr, varname ) \ \ void PASTEMAC(chb,varname)( \ void* beta, \ @@ -93,5 +93,5 @@ void PASTEMAC(chb,varname)( \ *chi_i_cast ); \ } -INSERT_GENTFUNCR_BASIC( unzipsc, unzipsc_unb_var1 ) +INSERT_GENTFUNCR_BASIC0( unzipsc_unb_var1 ) diff --git a/frame/0/unzipsc/bli_unzipsc_unb_var1.h b/frame/0/unzipsc/bli_unzipsc_unb_var1.h index 7695fca50..e061231c8 100644 --- a/frame/0/unzipsc/bli_unzipsc_unb_var1.h +++ b/frame/0/unzipsc/bli_unzipsc_unb_var1.h @@ -39,13 +39,13 @@ void bli_unzipsc_unb_var1( obj_t* beta, #undef GENTPROTR -#define GENTPROTR( ctype_b, ctype_br, chb, chbr, opname ) \ +#define GENTPROTR( ctype_b, ctype_br, chb, chbr, varname ) \ \ -void PASTEMAC(chb,opname)( \ - void* beta, \ - void* chi_r, \ - void* chi_i \ - ); +void PASTEMAC(chb,varname)( \ + void* beta, \ + void* chi_r, \ + void* chi_i \ + ); INSERT_GENTPROTR_BASIC( unzipsc_unb_var1 ) diff --git a/frame/0/zipsc/bli_zipsc_unb_var1.c b/frame/0/zipsc/bli_zipsc_unb_var1.c index 872203dfa..0aca353ac 100644 --- a/frame/0/zipsc/bli_zipsc_unb_var1.c +++ b/frame/0/zipsc/bli_zipsc_unb_var1.c @@ -79,7 +79,7 @@ void bli_zipsc_unb_var1( obj_t* beta_r, #undef GENTFUNCR -#define GENTFUNCR( ctype_x, ctype_xr, chx, chxr, opname, varname ) \ +#define GENTFUNCR( ctype_x, ctype_xr, chx, chxr, varname ) \ \ void PASTEMAC(chx,varname)( \ void* beta_r, \ @@ -98,5 +98,5 @@ void PASTEMAC(chx,varname)( \ *chi_cast ); \ } -INSERT_GENTFUNCR_BASIC( zipsc, zipsc_unb_var1 ) +INSERT_GENTFUNCR_BASIC0( zipsc_unb_var1 ) diff --git a/frame/0/zipsc/bli_zipsc_unb_var1.h b/frame/0/zipsc/bli_zipsc_unb_var1.h index 29cbf7373..2f197f5fc 100644 --- a/frame/0/zipsc/bli_zipsc_unb_var1.h +++ b/frame/0/zipsc/bli_zipsc_unb_var1.h @@ -39,13 +39,13 @@ void bli_zipsc_unb_var1( obj_t* beta_r, #undef GENTPROTR -#define GENTPROTR( ctype_x, ctype_xr, chx, chxr, opname ) \ +#define GENTPROTR( ctype_x, ctype_xr, chx, chxr, varname ) \ \ -void PASTEMAC(chx,opname)( \ - void* beta_r, \ - void* beta_i, \ - void* chi \ - ); +void PASTEMAC(chx,varname)( \ + void* beta_r, \ + void* beta_i, \ + void* chi \ + ); INSERT_GENTPROTR_BASIC( zipsc_unb_var1 ) diff --git a/frame/1/addv/bli_addv_ref.c b/frame/1/addv/bli_addv_ref.c index 9107bcb20..cba7eaf94 100644 --- a/frame/1/addv/bli_addv_ref.c +++ b/frame/1/addv/bli_addv_ref.c @@ -88,7 +88,7 @@ void bli_addv_ref( obj_t* x, #undef GENTFUNC2 -#define GENTFUNC2( ctype_x, ctype_y, chx, chy, opname, varname ) \ +#define GENTFUNC2( ctype_x, ctype_y, chx, chy, varname ) \ \ void PASTEMAC2(chx,chy,varname) \ ( \ @@ -133,13 +133,13 @@ void PASTEMAC2(chx,chy,varname) \ // Define the basic set of functions unconditionally, and then also some // mixed datatype functions if requested. -INSERT_GENTFUNC2_BASIC( addv, addv_ref ) +INSERT_GENTFUNC2_BASIC0( addv_ref ) #ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT -INSERT_GENTFUNC2_MIX_D( addv, addv_ref ) +INSERT_GENTFUNC2_MIX_D0( addv_ref ) #endif #ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT -INSERT_GENTFUNC2_MIX_P( addv, addv_ref ) +INSERT_GENTFUNC2_MIX_P0( addv_ref ) #endif diff --git a/frame/1/copyv/bli_copyv_ref.c b/frame/1/copyv/bli_copyv_ref.c index 21def27a6..06bafedaf 100644 --- a/frame/1/copyv/bli_copyv_ref.c +++ b/frame/1/copyv/bli_copyv_ref.c @@ -88,7 +88,7 @@ void bli_copyv_ref( obj_t* x, #undef GENTFUNC2 -#define GENTFUNC2( ctype_x, ctype_y, chx, chy, opname, varname ) \ +#define GENTFUNC2( ctype_x, ctype_y, chx, chy, varname ) \ \ void PASTEMAC2(chx,chy,varname) \ ( \ @@ -133,13 +133,13 @@ void PASTEMAC2(chx,chy,varname) \ // Define the basic set of functions unconditionally, and then also some // mixed datatype functions if requested. -INSERT_GENTFUNC2_BASIC( copyv, copyv_ref ) +INSERT_GENTFUNC2_BASIC0( copyv_ref ) #ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT -INSERT_GENTFUNC2_MIX_D( copyv, copyv_ref ) +INSERT_GENTFUNC2_MIX_D0( copyv_ref ) #endif #ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT -INSERT_GENTFUNC2_MIX_P( copyv, copyv_ref ) +INSERT_GENTFUNC2_MIX_P0( copyv_ref ) #endif diff --git a/frame/1/dotv/bli_dotv_ref.c b/frame/1/dotv/bli_dotv_ref.c index cd7baaba3..cbdf33f8e 100644 --- a/frame/1/dotv/bli_dotv_ref.c +++ b/frame/1/dotv/bli_dotv_ref.c @@ -96,7 +96,7 @@ void bli_dotv_ref( obj_t* x, */ #undef GENTFUNC3 -#define GENTFUNC3( ctype_x, ctype_y, ctype_r, chx, chy, chr, opname, varname ) \ +#define GENTFUNC3( ctype_x, ctype_y, ctype_r, chx, chy, chr, varname ) \ \ void PASTEMAC3(chx,chy,chr,varname) \ ( \ @@ -165,13 +165,13 @@ void PASTEMAC3(chx,chy,chr,varname) \ // Define the basic set of functions unconditionally, and then also some // mixed datatype functions if requested. -INSERT_GENTFUNC3_BASIC( dotv, dotv_ref ) +INSERT_GENTFUNC3_BASIC0( dotv_ref ) #ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT -INSERT_GENTFUNC3_MIX_D( dotv, dotv_ref ) +INSERT_GENTFUNC3_MIX_D0( dotv_ref ) #endif #ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT -INSERT_GENTFUNC3_MIX_P( dotv, dotv_ref ) +INSERT_GENTFUNC3_MIX_P0( dotv_ref ) #endif diff --git a/frame/1/dotxv/bli_dotxv_ref.c b/frame/1/dotxv/bli_dotxv_ref.c index c5e0a78f2..001e37437 100644 --- a/frame/1/dotxv/bli_dotxv_ref.c +++ b/frame/1/dotxv/bli_dotxv_ref.c @@ -118,7 +118,7 @@ void bli_dotxv_ref( obj_t* alpha, #undef GENTFUNC3U12 -#define GENTFUNC3U12( ctype_x, ctype_y, ctype_r, ctype_xy, chx, chy, chr, chxy, opname, varname ) \ +#define GENTFUNC3U12( ctype_x, ctype_y, ctype_r, ctype_xy, chx, chy, chr, chxy, varname ) \ \ void PASTEMAC3(chx,chy,chr,varname) \ ( \ @@ -197,13 +197,13 @@ void PASTEMAC3(chx,chy,chr,varname) \ // Define the basic set of functions unconditionally, and then also some // mixed datatype functions if requested. -INSERT_GENTFUNC3U12_BASIC( dotxv, dotxv_ref ) +INSERT_GENTFUNC3U12_BASIC0( dotxv_ref ) #ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT -INSERT_GENTFUNC3U12_MIX_D( dotxv, dotxv_ref ) +INSERT_GENTFUNC3U12_MIX_D0( dotxv_ref ) #endif #ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT -INSERT_GENTFUNC3U12_MIX_P( dotxv, dotxv_ref ) +INSERT_GENTFUNC3U12_MIX_P0( dotxv_ref ) #endif diff --git a/frame/1/invertv/bli_invertv_ref.c b/frame/1/invertv/bli_invertv_ref.c index 024edfdc6..4d9d3d4fd 100644 --- a/frame/1/invertv/bli_invertv_ref.c +++ b/frame/1/invertv/bli_invertv_ref.c @@ -68,7 +68,7 @@ void bli_invertv_ref( obj_t* x ) #undef GENTFUNC -#define GENTFUNC( ctype, ch, opname, varname ) \ +#define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname) \ ( \ @@ -92,5 +92,5 @@ void PASTEMAC(ch,varname) \ } \ } -INSERT_GENTFUNC_BASIC( invertv, invertv_ref ) +INSERT_GENTFUNC_BASIC0( invertv_ref ) diff --git a/frame/1/setv/bli_setv_ref.c b/frame/1/setv/bli_setv_ref.c index 6fc59cc1c..3d231b01c 100644 --- a/frame/1/setv/bli_setv_ref.c +++ b/frame/1/setv/bli_setv_ref.c @@ -88,7 +88,7 @@ void bli_setv_ref( obj_t* beta, */ #undef GENTFUNC2 -#define GENTFUNC2( ctype_b, ctype_x, chb, chx, opname, varname ) \ +#define GENTFUNC2( ctype_b, ctype_x, chb, chx, varname ) \ \ void PASTEMAC2(chb,chx,varname) \ ( \ @@ -125,12 +125,12 @@ void PASTEMAC2(chb,chx,varname) \ // Define the basic set of functions unconditionally, and then also some // mixed datatype functions if requested. -INSERT_GENTFUNC2_BASIC( setv, setv_ref ) +INSERT_GENTFUNC2_BASIC0( setv_ref ) #ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT -INSERT_GENTFUNC2_MIX_D( setv, setv_ref ) +INSERT_GENTFUNC2_MIX_D0( setv_ref ) #endif #ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT -INSERT_GENTFUNC2_MIX_P( setv, setv_ref ) +INSERT_GENTFUNC2_MIX_P0( setv_ref ) #endif diff --git a/frame/1/subv/bli_subv_ref.c b/frame/1/subv/bli_subv_ref.c index 5e4655044..0c83ad123 100644 --- a/frame/1/subv/bli_subv_ref.c +++ b/frame/1/subv/bli_subv_ref.c @@ -88,7 +88,7 @@ void bli_subv_ref( obj_t* x, #undef GENTFUNC2 -#define GENTFUNC2( ctype_x, ctype_y, chx, chy, opname, varname ) \ +#define GENTFUNC2( ctype_x, ctype_y, chx, chy, varname ) \ \ void PASTEMAC2(chx,chy,varname) \ ( \ @@ -133,13 +133,13 @@ void PASTEMAC2(chx,chy,varname) \ // Define the basic set of functions unconditionally, and then also some // mixed datatype functions if requested. -INSERT_GENTFUNC2_BASIC( subv, subv_ref ) +INSERT_GENTFUNC2_BASIC0( subv_ref ) #ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT -INSERT_GENTFUNC2_MIX_D( subv, subv_ref ) +INSERT_GENTFUNC2_MIX_D0( subv_ref ) #endif #ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT -INSERT_GENTFUNC2_MIX_P( subv, subv_ref ) +INSERT_GENTFUNC2_MIX_P0( subv_ref ) #endif diff --git a/frame/1/swapv/bli_swapv_ref.c b/frame/1/swapv/bli_swapv_ref.c index b1d77d7b8..c3ab90b29 100644 --- a/frame/1/swapv/bli_swapv_ref.c +++ b/frame/1/swapv/bli_swapv_ref.c @@ -85,7 +85,7 @@ void bli_swapv_ref( obj_t* x, #undef GENTFUNC2 -#define GENTFUNC2( ctype_x, ctype_y, chx, chy, opname, varname ) \ +#define GENTFUNC2( ctype_x, ctype_y, chx, chy, varname ) \ \ void PASTEMAC2(chx,chy,varname) \ ( \ @@ -116,13 +116,13 @@ void PASTEMAC2(chx,chy,varname) \ // Define the basic set of functions unconditionally, and then also some // mixed datatype functions if requested. -INSERT_GENTFUNC2_BASIC( swapv, swapv_ref ) +INSERT_GENTFUNC2_BASIC0( swapv_ref ) #ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT -INSERT_GENTFUNC2_MIX_D( swapv, swapv_ref ) +INSERT_GENTFUNC2_MIX_D0( swapv_ref ) #endif #ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT -INSERT_GENTFUNC2_MIX_P( swapv, swapv_ref ) +INSERT_GENTFUNC2_MIX_P0( swapv_ref ) #endif diff --git a/frame/1m/packm/bli_packm_blk_var1.c b/frame/1m/packm/bli_packm_blk_var1.c index 7a5caf7de..c666a9b44 100644 --- a/frame/1m/packm/bli_packm_blk_var1.c +++ b/frame/1m/packm/bli_packm_blk_var1.c @@ -125,7 +125,7 @@ void bli_packm_blk_var1( obj_t* c, #undef GENTFUNC -#define GENTFUNC( ctype, ch, opname, varname ) \ +#define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname )( \ struc_t strucc, \ @@ -409,5 +409,5 @@ void PASTEMAC(ch,varname )( \ \ } -INSERT_GENTFUNC_BASIC( packm, packm_blk_var1 ) +INSERT_GENTFUNC_BASIC0( packm_blk_var1 ) diff --git a/frame/1m/packm/bli_packm_cxk.c b/frame/1m/packm/bli_packm_cxk.c index 53074008f..f104a8fd3 100644 --- a/frame/1m/packm/bli_packm_cxk.c +++ b/frame/1m/packm/bli_packm_cxk.c @@ -150,16 +150,16 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] = #undef GENTFUNC -#define GENTFUNC( ctype, ch, opname, scal2vker ) \ +#define GENTFUNC( ctype, ch, varname, scal2vker ) \ \ -void PASTEMAC(ch,opname)( \ - conj_t conja, \ - dim_t m, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t ldp \ - ) \ +void PASTEMAC(ch,varname)( \ + conj_t conja, \ + dim_t m, \ + dim_t n, \ + void* kappa, \ + void* a, inc_t inca, inc_t lda, \ + void* p, inc_t ldp \ + ) \ { \ dim_t panel_dim; \ num_t dt; \ diff --git a/frame/1m/packm/bli_packm_cxk_ri3.c b/frame/1m/packm/bli_packm_cxk_ri3.c index 022b26454..dde9031a5 100644 --- a/frame/1m/packm/bli_packm_cxk_ri3.c +++ b/frame/1m/packm/bli_packm_cxk_ri3.c @@ -150,16 +150,16 @@ static FUNCPTR_T ftypes[FUNCPTR_ARRAY_LENGTH][BLIS_NUM_FP_TYPES] = #undef GENTFUNCCO -#define GENTFUNCCO( ctype, ctype_r, ch, chr, opname, scal2vker ) \ +#define GENTFUNCCO( ctype, ctype_r, ch, chr, varname ) \ \ -void PASTEMAC(ch,opname)( \ - conj_t conja, \ - dim_t m, \ - dim_t n, \ - void* kappa, \ - void* a, inc_t inca, inc_t lda, \ - void* p, inc_t psp, inc_t ldp \ - ) \ +void PASTEMAC(ch,varname)( \ + conj_t conja, \ + dim_t m, \ + dim_t n, \ + void* kappa, \ + void* a, inc_t inca, inc_t lda, \ + void* p, inc_t psp, inc_t ldp \ + ) \ { \ dim_t panel_dim; \ dim_t i, j; \ @@ -253,5 +253,5 @@ void PASTEMAC(ch,opname)( \ } \ } -INSERT_GENTFUNCCO_BASIC( packm_cxk_ri3, SCAL2V_KERNEL ) +INSERT_GENTFUNCCO_BASIC0( packm_cxk_ri3 ) diff --git a/frame/3/trmm/bli_trmm_ll_ker_var2.c b/frame/3/trmm/bli_trmm_ll_ker_var2.c index 99e0dcec7..772d91816 100644 --- a/frame/3/trmm/bli_trmm_ll_ker_var2.c +++ b/frame/3/trmm/bli_trmm_ll_ker_var2.c @@ -227,14 +227,15 @@ void PASTEMAC(ch,varname)( \ \ /* If there is a zero region above where the diagonal of A intersects the left edge of the block, adjust the pointer to C and treat this case as - if the diagonal offset were zero. This skips over the region (in - increments of MR) that was not packed. (Note we skip in increments of - MR since that is how the region would have been skipped by packm.) */ \ + if the diagonal offset were zero. This skips over the region that was + not packed. (Note we assume the diagonal offset is a multiple of MR; + this assumption will hold as long as the cache blocksizes are each a + multiple of MR and NR.) */ \ if ( diagoffa < 0 ) \ { \ - i = ( -diagoffa / MR ) * MR; \ + i = -diagoffa; \ m = m - i; \ - diagoffa = -diagoffa % MR; \ + diagoffa = 0; \ c_cast = c_cast + (i )*rs_c; \ } \ \ diff --git a/frame/3/trmm/bli_trmm_ru_ker_var2.c b/frame/3/trmm/bli_trmm_ru_ker_var2.c index d9a28f86d..97626a717 100644 --- a/frame/3/trmm/bli_trmm_ru_ker_var2.c +++ b/frame/3/trmm/bli_trmm_ru_ker_var2.c @@ -228,14 +228,14 @@ void PASTEMAC(ch,varname)( \ /* If there is a zero region to the left of where the diagonal of B intersects the top edge of the panel, adjust the pointer to C and treat this case as if the diagonal offset were zero. This skips over - the region (in increments of NR) that was not packed. (Note we skip - in increments of NR since that is how the region would have been - skipped by packm.) */ \ + the region that was not packed. (Note we assume the diagonal offset + is a multiple of MR; this assumption will hold as long as the cache + blocksizes are each a multiple of MR and NR.) */ \ if ( diagoffb > 0 ) \ { \ - j = ( diagoffb / NR ) * NR; \ + j = diagoffb; \ n = n - j; \ - diagoffb = diagoffb % NR; \ + diagoffb = 0; \ c_cast = c_cast + (j )*cs_c; \ } \ \ diff --git a/frame/3/trsm/bli_trsm_ll_ker_var2.c b/frame/3/trsm/bli_trsm_ll_ker_var2.c index 11b39cc59..bb0ed34db 100644 --- a/frame/3/trsm/bli_trsm_ll_ker_var2.c +++ b/frame/3/trsm/bli_trsm_ll_ker_var2.c @@ -236,14 +236,15 @@ void PASTEMAC(ch,varname)( \ \ /* If there is a zero region above where the diagonal of A intersects the left edge of the block, adjust the pointer to C and treat this case as - if the diagonal offset were zero. This skips over the region (in - increments of MR) that was not packed. (Note we skip in increments of - MR since that is how the region would have been skipped by packm.) */ \ + if the diagonal offset were zero. This skips over the region that was + not packed. (Note we assume the diagonal offset is a multiple of MR; + this assumption will hold as long as the cache blocksizes are each a + multiple of MR and NR.) */ \ if ( diagoffa < 0 ) \ { \ - i = ( -diagoffa / MR ) * MR; \ + i = -diagoffa; \ m = m - i; \ - diagoffa = -diagoffa % MR; \ + diagoffa = 0; \ c_cast = c_cast + (i )*rs_c; \ } \ \ diff --git a/frame/3/trsm/bli_trsm_rl_ker_var2.c b/frame/3/trsm/bli_trsm_rl_ker_var2.c index ffa41aa9a..5d0288c40 100644 --- a/frame/3/trsm/bli_trsm_rl_ker_var2.c +++ b/frame/3/trsm/bli_trsm_rl_ker_var2.c @@ -333,7 +333,8 @@ void PASTEMAC(ch,varname)( \ \ n_cur = ( bli_is_not_edge_b( jb, n_iter, n_left ) ? NR : n_left ); \ \ - /* Compute various offsets into and lengths of parts of B. */ \ + /* Determine the offset to and length of the panel that was packed + so we can index into the corresponding location in A. */ \ off_b11 = bli_max( -diagoffb_j, 0 ); \ k_b1121 = k - off_b11; \ k_b11 = NR; \ diff --git a/frame/3/trsm/bli_trsm_ru_ker_var2.c b/frame/3/trsm/bli_trsm_ru_ker_var2.c index 44fe387a6..9bac5c946 100644 --- a/frame/3/trsm/bli_trsm_ru_ker_var2.c +++ b/frame/3/trsm/bli_trsm_ru_ker_var2.c @@ -245,14 +245,14 @@ void PASTEMAC(ch,varname)( \ /* If there is a zero region to the left of where the diagonal of B intersects the top edge of the panel, adjust the pointer to C and treat this case as if the diagonal offset were zero. This skips over - the region (in increments of NR) that was not packed. (Note we skip - in increments of NR since that is how the region would have been - skipped by packm.) */ \ + the region that was not packed. (Note we assume the diagonal offset + is a multiple of MR; this assumption will hold as long as the cache + blocksizes are each a multiple of MR and NR.) */ \ if ( diagoffb > 0 ) \ { \ - j = ( diagoffb / NR ) * NR; \ + j = diagoffb; \ n = n - j; \ - diagoffb = diagoffb % NR; \ + diagoffb = 0; \ c_cast = c_cast + (j )*cs_c; \ } \ \ diff --git a/frame/compat/bla_amax.c b/frame/compat/bla_amax.c index 78d98ee68..5e1a84c64 100644 --- a/frame/compat/bla_amax.c +++ b/frame/compat/bla_amax.c @@ -64,9 +64,9 @@ f77_int PASTEF772(i,chx,blasname)( \ bli_convert_blas_incv( n0, x, *incx, x0, incx0 ); \ \ /* Call BLIS interface. */ \ - PASTEMAC(chx,abmaxv)( n0, \ - x0, incx0, \ - &bli_index ); \ + PASTEMAC(chx,blisname)( n0, \ + x0, incx0, \ + &bli_index ); \ \ /* Convert zero-based BLIS (C) index to one-based BLAS (Fortran) index. */ \ @@ -79,6 +79,6 @@ f77_int PASTEF772(i,chx,blasname)( \ } #ifdef BLIS_ENABLE_BLAS2BLIS -INSERT_GENTFUNC_BLAS( amax, abmaxv ) +INSERT_GENTFUNC_BLAS( amax, amaxv ) #endif diff --git a/frame/compat/bla_asum.c b/frame/compat/bla_asum.c index 0c4eecec9..c7b36fe32 100644 --- a/frame/compat/bla_asum.c +++ b/frame/compat/bla_asum.c @@ -49,7 +49,7 @@ ftype_r PASTEF772(chr,chx,blasname)( \ dim_t n0; \ ftype_x* x0; \ inc_t incx0; \ - ftype_r absum; \ + ftype_r asum; \ err_t init_result; \ \ /* Initialize BLIS (if it is not already initialized). */ \ @@ -63,17 +63,17 @@ ftype_r PASTEF772(chr,chx,blasname)( \ bli_convert_blas_incv( n0, x, *incx, x0, incx0 ); \ \ /* Call BLIS interface. */ \ - PASTEMAC(chx,absumv)( n0, \ - x0, incx0, \ - &absum ); \ + PASTEMAC(chx,blisname)( n0, \ + x0, incx0, \ + &asum ); \ \ /* Finalize BLIS (if it was initialized above). */ \ bli_finalize_safe( init_result ); \ \ - return absum; \ + return asum; \ } #ifdef BLIS_ENABLE_BLAS2BLIS -INSERT_GENTFUNCR2_BLAS( asum, absumv ) +INSERT_GENTFUNCR2_BLAS( asum, asumv ) #endif diff --git a/frame/compat/bla_nrm2.c b/frame/compat/bla_nrm2.c index 865431131..8f199e4a7 100644 --- a/frame/compat/bla_nrm2.c +++ b/frame/compat/bla_nrm2.c @@ -63,9 +63,9 @@ ftype_r PASTEF772(chr,chx,blasname)( \ bli_convert_blas_incv( n0, x, *incx, x0, incx0 ); \ \ /* Call BLIS interface. */ \ - PASTEMAC(chx,fnormv)( n0, \ - x0, incx0, \ - &norm ); \ + PASTEMAC(chx,blisname)( n0, \ + x0, incx0, \ + &norm ); \ \ /* Finalize BLIS (if it was initialized above). */ \ bli_finalize_safe( init_result ); \ @@ -74,6 +74,6 @@ ftype_r PASTEF772(chr,chx,blasname)( \ } #ifdef BLIS_ENABLE_BLAS2BLIS -INSERT_GENTFUNCR2_BLAS( nrm2, fnormv ) +INSERT_GENTFUNCR2_BLAS( nrm2, normfv ) #endif diff --git a/frame/include/bli_gentfunc_macro_defs.h b/frame/include/bli_gentfunc_macro_defs.h index 19dc97e6b..12b104748 100644 --- a/frame/include/bli_gentfunc_macro_defs.h +++ b/frame/include/bli_gentfunc_macro_defs.h @@ -127,8 +127,9 @@ GENTFUNCSCAL( double, dcomplex, d, z, blasname, blisname ) // -- Macros for functions with one operand ------------------------------------ -// -- Basic one-operand macro (with no auxiliary arguments) -- +// -- Basic one-operand macro -- +// -- (no auxiliary arguments) -- #define INSERT_GENTFUNC_BASIC0( tfuncname ) \ \ @@ -137,9 +138,7 @@ GENTFUNC( double, d, tfuncname ) \ GENTFUNC( scomplex, c, tfuncname ) \ GENTFUNC( dcomplex, z, tfuncname ) - -// -- Basic one-operand macro (with one auxiliary argument) -- - +// -- (one auxiliary argument) -- #define INSERT_GENTFUNC_BASIC( tfuncname, varname ) \ \ @@ -148,9 +147,7 @@ GENTFUNC( double, d, tfuncname, varname ) \ GENTFUNC( scomplex, c, tfuncname, varname ) \ GENTFUNC( dcomplex, z, tfuncname, varname ) - -// -- Basic one-operand macro (with two auxiliary arguments) -- - +// -- (two auxiliary arguments) -- #define INSERT_GENTFUNC_BASIC2( tfuncname, varname1, varname2 ) \ \ @@ -160,8 +157,57 @@ GENTFUNC( scomplex, c, tfuncname, varname1, varname2 ) \ GENTFUNC( dcomplex, z, tfuncname, varname1, varname2 ) + +// -- Basic one-operand with real projection -- + +// -- (no auxiliary arguments) -- + +#define INSERT_GENTFUNCR_BASIC0( tfuncname ) \ +\ +GENTFUNCR( float, float, s, s, tfuncname ) \ +GENTFUNCR( double, double, d, d, tfuncname ) \ +GENTFUNCR( scomplex, float, c, s, tfuncname ) \ +GENTFUNCR( dcomplex, double, z, d, tfuncname ) + +// -- (one auxiliary argument) -- + +#define INSERT_GENTFUNCR_BASIC( tfuncname, varname ) \ +\ +GENTFUNCR( float, float, s, s, tfuncname, varname ) \ +GENTFUNCR( double, double, d, d, tfuncname, varname ) \ +GENTFUNCR( scomplex, float, c, s, tfuncname, varname ) \ +GENTFUNCR( dcomplex, double, z, d, tfuncname, varname ) + + + +// -- Basic one-operand macro with complex domain only and real projection -- + +// -- (no auxiliary arguments) -- + +#define INSERT_GENTFUNCCO_BASIC0( tfuncname ) \ +\ +GENTFUNCCO( scomplex, float, c, s, tfuncname ) \ +GENTFUNCCO( dcomplex, double, z, d, tfuncname ) + +// -- (one auxiliary argument) -- + +#define INSERT_GENTFUNCCO_BASIC( tfuncname, varname ) \ +\ +GENTFUNCCO( scomplex, float, c, s, tfuncname, varname ) \ +GENTFUNCCO( dcomplex, double, z, d, tfuncname, varname ) + +// -- (two auxiliary arguments) -- + +#define INSERT_GENTFUNCCO_BASIC2( tfuncname, varname1, varname2 ) \ +\ +GENTFUNCCO( scomplex, float, c, s, tfuncname, varname1, varname2 ) \ +GENTFUNCCO( dcomplex, double, z, d, tfuncname, varname1, varname2 ) + + + // -- Basic one-operand macro with integer instance -- +// -- (one auxiliary argument) -- #define INSERT_GENTFUNC_BASIC_I( tfuncname, varname ) \ \ @@ -172,8 +218,10 @@ GENTFUNC( dcomplex, z, tfuncname, varname ) \ GENTFUNC( gint_t, i, tfuncname, varname ) + // -- Basic one-operand with integer projection -- +// -- (one auxiliary argument) -- #define INSERT_GENTFUNCI_BASIC( tfuncname, varname ) \ \ @@ -183,53 +231,17 @@ GENTFUNCI( scomplex, gint_t, c, i, tfuncname, varname ) \ GENTFUNCI( dcomplex, gint_t, z, i, tfuncname, varname ) -// -- Basic one-operand with real projection -- - - -#define INSERT_GENTFUNCR_BASIC( tfuncname, varname ) \ -\ -GENTFUNCR( float, float, s, s, tfuncname, varname ) \ -GENTFUNCR( double, double, d, d, tfuncname, varname ) \ -GENTFUNCR( scomplex, float, c, s, tfuncname, varname ) \ -GENTFUNCR( dcomplex, double, z, d, tfuncname, varname ) - - -// -- Basic one-operand macro with complex domain only and real projection (with no auxiliary arguments) -- - - -#define INSERT_GENTFUNCCO_BASIC0( tfuncname ) \ -\ -GENTFUNCCO( scomplex, float, c, s, tfuncname ) \ -GENTFUNCCO( dcomplex, double, z, d, tfuncname ) - - -// -- Basic one-operand macro with complex domain only and real projection -- - - -#define INSERT_GENTFUNCCO_BASIC( tfuncname, varname ) \ -\ -GENTFUNCCO( scomplex, float, c, s, tfuncname, varname ) \ -GENTFUNCCO( dcomplex, double, z, d, tfuncname, varname ) - - -// -- Basic one-operand macro with complex domain only and real projection (with two auxiliary arguments) -- - - -#define INSERT_GENTFUNCCO_BASIC2( tfuncname, varname1, varname2 ) \ -\ -GENTFUNCCO( scomplex, float, c, s, tfuncname, varname1, varname2 ) \ -GENTFUNCCO( dcomplex, double, z, d, tfuncname, varname1, varname2 ) - // -- Basic one-operand with real and integer projections -- +// -- (no auxiliary arguments) -- -#define INSERT_GENTFUNCRI_BASIC( tfuncname, varname ) \ +#define INSERT_GENTFUNCRI_BASIC0( tfuncname ) \ \ -GENTFUNCRI( float, float, gint_t, s, s, i, tfuncname, varname ) \ -GENTFUNCRI( double, double, gint_t, d, d, i, tfuncname, varname ) \ -GENTFUNCRI( scomplex, float, gint_t, c, s, i, tfuncname, varname ) \ -GENTFUNCRI( dcomplex, double, gint_t, z, d, i, tfuncname, varname ) +GENTFUNCRI( float, float, gint_t, s, s, i, tfuncname ) \ +GENTFUNCRI( double, double, gint_t, d, d, i, tfuncname ) \ +GENTFUNCRI( scomplex, float, gint_t, c, s, i, tfuncname ) \ +GENTFUNCRI( dcomplex, double, gint_t, z, d, i, tfuncname ) @@ -239,6 +251,16 @@ GENTFUNCRI( dcomplex, double, gint_t, z, d, i, tfuncname, varname ) // -- Basic two-operand macro -- +// -- (no auxiliary arguments) -- + +#define INSERT_GENTFUNC2_BASIC0( tfuncname ) \ +\ +GENTFUNC2( float, float, s, s, tfuncname ) \ +GENTFUNC2( double, double, d, d, tfuncname ) \ +GENTFUNC2( scomplex, scomplex, c, c, tfuncname ) \ +GENTFUNC2( dcomplex, dcomplex, z, z, tfuncname ) + +// -- (one auxiliary argument) -- #define INSERT_GENTFUNC2_BASIC( tfuncname, varname ) \ \ @@ -248,8 +270,20 @@ GENTFUNC2( scomplex, scomplex, c, c, tfuncname, varname ) \ GENTFUNC2( dcomplex, dcomplex, z, z, tfuncname, varname ) + // -- Mixed domain two-operand macro -- +// -- (no auxiliary arguments) -- + +#define INSERT_GENTFUNC2_MIX_D0( tfuncname ) \ +\ +GENTFUNC2( float, scomplex, s, c, tfuncname ) \ +GENTFUNC2( scomplex, float, c, s, tfuncname ) \ +\ +GENTFUNC2( double, dcomplex, d, z, tfuncname ) \ +GENTFUNC2( dcomplex, double, z, d, tfuncname ) + +// -- (one auxiliary argument) -- #define INSERT_GENTFUNC2_MIX_D( tfuncname, varname ) \ \ @@ -260,8 +294,26 @@ GENTFUNC2( double, dcomplex, d, z, tfuncname, varname ) \ GENTFUNC2( dcomplex, double, z, d, tfuncname, varname ) + // -- Mixed precision two-operand macro -- +// -- (no auxiliary arguments) -- + +#define INSERT_GENTFUNC2_MIX_P0( tfuncname ) \ +\ +GENTFUNC2( float, double, s, d, tfuncname ) \ +GENTFUNC2( float, dcomplex, s, z, tfuncname ) \ +\ +GENTFUNC2( double, float, d, s, tfuncname ) \ +GENTFUNC2( double, scomplex, d, c, tfuncname ) \ +\ +GENTFUNC2( scomplex, double, c, d, tfuncname ) \ +GENTFUNC2( scomplex, dcomplex, c, z, tfuncname ) \ +\ +GENTFUNC2( dcomplex, float, z, s, tfuncname ) \ +GENTFUNC2( dcomplex, scomplex, z, c, tfuncname ) \ + +// -- (one auxiliary argument) -- #define INSERT_GENTFUNC2_MIX_P( tfuncname, varname ) \ \ @@ -281,6 +333,16 @@ GENTFUNC2( dcomplex, scomplex, z, c, tfuncname, varname ) \ // -- Basic two-operand with union of operands -- +// -- (no auxiliary arguments) -- + +#define INSERT_GENTFUNC2U_BASIC0( tfuncname ) \ +\ +GENTFUNC2U( float, float, float, s, s, s, tfuncname ) \ +GENTFUNC2U( double, double, double, d, d, d, tfuncname ) \ +GENTFUNC2U( scomplex, scomplex, scomplex, c, c, c, tfuncname ) \ +GENTFUNC2U( dcomplex, dcomplex, dcomplex, z, z, z, tfuncname ) + +// -- (one auxiliary argument) -- #define INSERT_GENTFUNC2U_BASIC( tfuncname, varname ) \ \ @@ -290,8 +352,20 @@ GENTFUNC2U( scomplex, scomplex, scomplex, c, c, c, tfuncname, varname ) \ GENTFUNC2U( dcomplex, dcomplex, dcomplex, z, z, z, tfuncname, varname ) + // -- Mixed domain two-operand with union of operands -- +// -- (no auxiliary arguments) -- + +#define INSERT_GENTFUNC2U_MIX_D0( tfuncname ) \ +\ +GENTFUNC2U( float, scomplex, scomplex, s, c, c, tfuncname ) \ +GENTFUNC2U( scomplex, float, scomplex, c, s, c, tfuncname ) \ +\ +GENTFUNC2U( double, dcomplex, dcomplex, d, z, z, tfuncname ) \ +GENTFUNC2U( dcomplex, double, dcomplex, z, d, z, tfuncname ) + +// -- (one auxiliary argument) -- #define INSERT_GENTFUNC2U_MIX_D( tfuncname, varname ) \ \ @@ -302,8 +376,26 @@ GENTFUNC2U( double, dcomplex, dcomplex, d, z, z, tfuncname, varname ) \ GENTFUNC2U( dcomplex, double, dcomplex, z, d, z, tfuncname, varname ) + // -- Mixed precision two-operand with union of operands -- +// -- (no auxiliary arguments) -- + +#define INSERT_GENTFUNC2U_MIX_P0( tfuncname ) \ +\ +GENTFUNC2U( float, double, double, s, d, d, tfuncname ) \ +GENTFUNC2U( float, dcomplex, dcomplex, s, z, z, tfuncname ) \ +\ +GENTFUNC2U( double, float, double, d, s, d, tfuncname ) \ +GENTFUNC2U( double, scomplex, dcomplex, d, c, z, tfuncname ) \ +\ +GENTFUNC2U( scomplex, double, dcomplex, c, d, z, tfuncname ) \ +GENTFUNC2U( scomplex, dcomplex, dcomplex, c, z, z, tfuncname ) \ +\ +GENTFUNC2U( dcomplex, float, dcomplex, z, s, z, tfuncname ) \ +GENTFUNC2U( dcomplex, scomplex, dcomplex, z, c, z, tfuncname ) + +// -- (one auxiliary argument) -- #define INSERT_GENTFUNC2U_MIX_P( tfuncname, varname ) \ \ @@ -323,6 +415,16 @@ GENTFUNC2U( dcomplex, scomplex, dcomplex, z, c, z, tfuncname, varname ) // -- Basic two-operand with real projection of first operand -- +// -- (no auxiliary arguments) -- + +#define INSERT_GENTFUNC2R_BASIC0( tfuncname ) \ +\ +GENTFUNC2R( float, float, float, s, s, s, tfuncname ) \ +GENTFUNC2R( double, double, double, d, d, d, tfuncname ) \ +GENTFUNC2R( scomplex, scomplex, float, c, c, s, tfuncname ) \ +GENTFUNC2R( dcomplex, dcomplex, double, z, z, d, tfuncname ) + +// -- (one auxiliary argument) -- #define INSERT_GENTFUNC2R_BASIC( tfuncname, varname ) \ \ @@ -332,8 +434,20 @@ GENTFUNC2R( scomplex, scomplex, float, c, c, s, tfuncname, varname ) \ GENTFUNC2R( dcomplex, dcomplex, double, z, z, d, tfuncname, varname ) + // -- Mixed domain two-operand with real projection of first operand -- +// -- (no auxiliary arguments) -- + +#define INSERT_GENTFUNC2R_MIX_D0( tfuncname ) \ +\ +GENTFUNC2R( float, scomplex, float, s, c, s, tfuncname ) \ +GENTFUNC2R( scomplex, float, float, c, s, s, tfuncname ) \ +\ +GENTFUNC2R( double, dcomplex, double, d, z, d, tfuncname ) \ +GENTFUNC2R( dcomplex, double, double, z, d, d, tfuncname ) + +// -- (one auxiliary argument) -- #define INSERT_GENTFUNC2R_MIX_D( tfuncname, varname ) \ \ @@ -344,8 +458,26 @@ GENTFUNC2R( double, dcomplex, double, d, z, d, tfuncname, varname ) \ GENTFUNC2R( dcomplex, double, double, z, d, d, tfuncname, varname ) + // -- Mixed precision two-operand with real projection of first operand -- +// -- (no auxiliary arguments) -- + +#define INSERT_GENTFUNC2R_MIX_P0( tfuncname ) \ +\ +GENTFUNC2R( float, double, float, s, d, s, tfuncname ) \ +GENTFUNC2R( float, dcomplex, float, s, z, s, tfuncname ) \ +\ +GENTFUNC2R( double, float, double, d, s, d, tfuncname ) \ +GENTFUNC2R( double, scomplex, double, d, c, d, tfuncname ) \ +\ +GENTFUNC2R( scomplex, double, float, c, d, s, tfuncname ) \ +GENTFUNC2R( scomplex, dcomplex, float, c, z, s, tfuncname ) \ +\ +GENTFUNC2R( dcomplex, float, double, z, s, d, tfuncname ) \ +GENTFUNC2R( dcomplex, scomplex, double, z, c, d, tfuncname ) + +// -- (one auxiliary argument) -- #define INSERT_GENTFUNC2R_MIX_P( tfuncname, varname ) \ \ @@ -368,6 +500,16 @@ GENTFUNC2R( dcomplex, scomplex, double, z, c, d, tfuncname, varname ) // -- Basic three-operand macro -- +// -- (no auxiliary arguments) -- + +#define INSERT_GENTFUNC3_BASIC0( tfuncname ) \ +\ +GENTFUNC3( float, float, float, s, s, s, tfuncname ) \ +GENTFUNC3( double, double, double, d, d, d, tfuncname ) \ +GENTFUNC3( scomplex, scomplex, scomplex, c, c, c, tfuncname ) \ +GENTFUNC3( dcomplex, dcomplex, dcomplex, z, z, z, tfuncname ) + +// -- (one auxiliary argument) -- #define INSERT_GENTFUNC3_BASIC( tfuncname, varname ) \ \ @@ -375,19 +517,41 @@ GENTFUNC3( float, float, float, s, s, s, tfuncname, varname ) \ GENTFUNC3( double, double, double, d, d, d, tfuncname, varname ) \ GENTFUNC3( scomplex, scomplex, scomplex, c, c, c, tfuncname, varname ) \ GENTFUNC3( dcomplex, dcomplex, dcomplex, z, z, z, tfuncname, varname ) -/* + +// -- (two auxiliary arguments) -- + #define INSERT_GENTFUNC3_BASIC2( tfuncname, varname1, varname2 ) \ \ GENTFUNC3( float, float, float, s, s, s, tfuncname, varname1, varname2 ) \ GENTFUNC3( double, double, double, d, d, d, tfuncname, varname1, varname2 ) \ GENTFUNC3( scomplex, scomplex, scomplex, c, c, c, tfuncname, varname1, varname2 ) \ GENTFUNC3( dcomplex, dcomplex, dcomplex, z, z, z, tfuncname, varname1, varname2 ) -*/ // -- Mixed domain three-operand macro -- +// -- (no auxiliary arguments) -- + +#define INSERT_GENTFUNC3_MIX_D0( tfuncname ) \ +\ +GENTFUNC3( float, float, scomplex, s, s, c, tfuncname ) \ +GENTFUNC3( float, scomplex, float, s, c, s, tfuncname ) \ +GENTFUNC3( float, scomplex, scomplex, s, c, c, tfuncname ) \ +\ +GENTFUNC3( double, double, dcomplex, d, d, z, tfuncname ) \ +GENTFUNC3( double, dcomplex, double, d, z, d, tfuncname ) \ +GENTFUNC3( double, dcomplex, dcomplex, d, z, z, tfuncname ) \ +\ +GENTFUNC3( scomplex, float, float, c, s, s, tfuncname ) \ +GENTFUNC3( scomplex, float, scomplex, c, s, c, tfuncname ) \ +GENTFUNC3( scomplex, scomplex, float, c, c, s, tfuncname ) \ +\ +GENTFUNC3( dcomplex, double, double, z, d, d, tfuncname ) \ +GENTFUNC3( dcomplex, double, dcomplex, z, d, z, tfuncname ) \ +GENTFUNC3( dcomplex, dcomplex, double, z, z, d, tfuncname ) + +// -- (one auxiliary argument) -- #define INSERT_GENTFUNC3_MIX_D( tfuncname, varname ) \ \ @@ -407,9 +571,102 @@ GENTFUNC3( dcomplex, double, double, z, d, d, tfuncname, varname ) \ GENTFUNC3( dcomplex, double, dcomplex, z, d, z, tfuncname, varname ) \ GENTFUNC3( dcomplex, dcomplex, double, z, z, d, tfuncname, varname ) +// -- (two auxiliary arguments) -- + +#define INSERT_GENTFUNC3_MIX_D2( tfuncname, varname1, varname2 ) \ +\ +GENTFUNC3( float, float, scomplex, s, s, c, tfuncname, varname1, varname2 ) \ +GENTFUNC3( float, scomplex, float, s, c, s, tfuncname, varname1, varname2 ) \ +GENTFUNC3( float, scomplex, scomplex, s, c, c, tfuncname, varname1, varname2 ) \ +\ +GENTFUNC3( double, double, dcomplex, d, d, z, tfuncname, varname1, varname2 ) \ +GENTFUNC3( double, dcomplex, double, d, z, d, tfuncname, varname1, varname2 ) \ +GENTFUNC3( double, dcomplex, dcomplex, d, z, z, tfuncname, varname1, varname2 ) \ +\ +GENTFUNC3( scomplex, float, float, c, s, s, tfuncname, varname1, varname2 ) \ +GENTFUNC3( scomplex, float, scomplex, c, s, c, tfuncname, varname1, varname2 ) \ +GENTFUNC3( scomplex, scomplex, float, c, c, s, tfuncname, varname1, varname2 ) \ +\ +GENTFUNC3( dcomplex, double, double, z, d, d, tfuncname, varname1, varname2 ) \ +GENTFUNC3( dcomplex, double, dcomplex, z, d, z, tfuncname, varname1, varname2 ) \ +GENTFUNC3( dcomplex, dcomplex, double, z, z, d, tfuncname, varname1, varname2 ) + + // -- Mixed precision three-operand macro -- +// -- (no auxiliary arguments) -- + +#define INSERT_GENTFUNC3_MIX_P0( tfuncname ) \ +\ +GENTFUNC3( float, float, double, s, s, d, tfuncname ) \ +GENTFUNC3( float, float, dcomplex, s, s, z, tfuncname ) \ +\ +GENTFUNC3( float, double, float, s, d, s, tfuncname ) \ +GENTFUNC3( float, double, double, s, d, d, tfuncname ) \ +GENTFUNC3( float, double, scomplex, s, d, c, tfuncname ) \ +GENTFUNC3( float, double, dcomplex, s, d, z, tfuncname ) \ +\ +GENTFUNC3( float, scomplex, double, s, c, d, tfuncname ) \ +GENTFUNC3( float, scomplex, dcomplex, s, c, z, tfuncname ) \ +\ +GENTFUNC3( float, dcomplex, float, s, z, s, tfuncname ) \ +GENTFUNC3( float, dcomplex, double, s, z, d, tfuncname ) \ +GENTFUNC3( float, dcomplex, scomplex, s, z, c, tfuncname ) \ +GENTFUNC3( float, dcomplex, dcomplex, s, z, z, tfuncname ) \ +\ +\ +GENTFUNC3( double, float, float, d, s, s, tfuncname ) \ +GENTFUNC3( double, float, double, d, s, d, tfuncname ) \ +GENTFUNC3( double, float, scomplex, d, s, c, tfuncname ) \ +GENTFUNC3( double, float, dcomplex, d, s, z, tfuncname ) \ +\ +GENTFUNC3( double, double, float, d, d, s, tfuncname ) \ +GENTFUNC3( double, double, scomplex, d, d, c, tfuncname ) \ +\ +GENTFUNC3( double, scomplex, float, d, c, s, tfuncname ) \ +GENTFUNC3( double, scomplex, double, d, c, d, tfuncname ) \ +GENTFUNC3( double, scomplex, scomplex, d, c, c, tfuncname ) \ +GENTFUNC3( double, scomplex, dcomplex, d, c, z, tfuncname ) \ +\ +GENTFUNC3( double, dcomplex, float, d, z, s, tfuncname ) \ +GENTFUNC3( double, dcomplex, scomplex, d, z, c, tfuncname ) \ +\ +\ +GENTFUNC3( scomplex, float, double, c, s, d, tfuncname ) \ +GENTFUNC3( scomplex, float, dcomplex, c, s, z, tfuncname ) \ +\ +GENTFUNC3( scomplex, double, float, c, d, s, tfuncname ) \ +GENTFUNC3( scomplex, double, double, c, d, d, tfuncname ) \ +GENTFUNC3( scomplex, double, scomplex, c, d, c, tfuncname ) \ +GENTFUNC3( scomplex, double, dcomplex, c, d, z, tfuncname ) \ +\ +GENTFUNC3( scomplex, scomplex, double, c, c, d, tfuncname ) \ +GENTFUNC3( scomplex, scomplex, dcomplex, c, c, z, tfuncname ) \ +\ +GENTFUNC3( scomplex, dcomplex, float, c, z, s, tfuncname ) \ +GENTFUNC3( scomplex, dcomplex, double, c, z, d, tfuncname ) \ +GENTFUNC3( scomplex, dcomplex, scomplex, c, z, c, tfuncname ) \ +GENTFUNC3( scomplex, dcomplex, dcomplex, c, z, z, tfuncname ) \ +\ +\ +GENTFUNC3( dcomplex, float, float, z, s, s, tfuncname ) \ +GENTFUNC3( dcomplex, float, double, z, s, d, tfuncname ) \ +GENTFUNC3( dcomplex, float, scomplex, z, s, c, tfuncname ) \ +GENTFUNC3( dcomplex, float, dcomplex, z, s, z, tfuncname ) \ +\ +GENTFUNC3( dcomplex, double, float, z, d, s, tfuncname ) \ +GENTFUNC3( dcomplex, double, scomplex, z, d, c, tfuncname ) \ +\ +GENTFUNC3( dcomplex, scomplex, float, z, c, s, tfuncname ) \ +GENTFUNC3( dcomplex, scomplex, double, z, c, d, tfuncname ) \ +GENTFUNC3( dcomplex, scomplex, scomplex, z, c, c, tfuncname ) \ +GENTFUNC3( dcomplex, scomplex, dcomplex, z, c, z, tfuncname ) \ +\ +GENTFUNC3( dcomplex, dcomplex, float, z, z, s, tfuncname ) \ +GENTFUNC3( dcomplex, dcomplex, scomplex, z, z, c, tfuncname ) + +// -- (one auxiliary argument) -- #define INSERT_GENTFUNC3_MIX_P( tfuncname, varname ) \ \ @@ -480,10 +737,91 @@ GENTFUNC3( dcomplex, scomplex, dcomplex, z, c, z, tfuncname, varname ) \ GENTFUNC3( dcomplex, dcomplex, float, z, z, s, tfuncname, varname ) \ GENTFUNC3( dcomplex, dcomplex, scomplex, z, z, c, tfuncname, varname ) +// -- (two auxiliary arguments) -- + +#define INSERT_GENTFUNC3_MIX_P2( tfuncname, varname1, varname2 ) \ +\ +GENTFUNC3( float, float, double, s, s, d, tfuncname, varname1, varname2 ) \ +GENTFUNC3( float, float, dcomplex, s, s, z, tfuncname, varname1, varname2 ) \ +\ +GENTFUNC3( float, double, float, s, d, s, tfuncname, varname1, varname2 ) \ +GENTFUNC3( float, double, double, s, d, d, tfuncname, varname1, varname2 ) \ +GENTFUNC3( float, double, scomplex, s, d, c, tfuncname, varname1, varname2 ) \ +GENTFUNC3( float, double, dcomplex, s, d, z, tfuncname, varname1, varname2 ) \ +\ +GENTFUNC3( float, scomplex, double, s, c, d, tfuncname, varname1, varname2 ) \ +GENTFUNC3( float, scomplex, dcomplex, s, c, z, tfuncname, varname1, varname2 ) \ +\ +GENTFUNC3( float, dcomplex, float, s, z, s, tfuncname, varname1, varname2 ) \ +GENTFUNC3( float, dcomplex, double, s, z, d, tfuncname, varname1, varname2 ) \ +GENTFUNC3( float, dcomplex, scomplex, s, z, c, tfuncname, varname1, varname2 ) \ +GENTFUNC3( float, dcomplex, dcomplex, s, z, z, tfuncname, varname1, varname2 ) \ +\ +\ +GENTFUNC3( double, float, float, d, s, s, tfuncname, varname1, varname2 ) \ +GENTFUNC3( double, float, double, d, s, d, tfuncname, varname1, varname2 ) \ +GENTFUNC3( double, float, scomplex, d, s, c, tfuncname, varname1, varname2 ) \ +GENTFUNC3( double, float, dcomplex, d, s, z, tfuncname, varname1, varname2 ) \ +\ +GENTFUNC3( double, double, float, d, d, s, tfuncname, varname1, varname2 ) \ +GENTFUNC3( double, double, scomplex, d, d, c, tfuncname, varname1, varname2 ) \ +\ +GENTFUNC3( double, scomplex, float, d, c, s, tfuncname, varname1, varname2 ) \ +GENTFUNC3( double, scomplex, double, d, c, d, tfuncname, varname1, varname2 ) \ +GENTFUNC3( double, scomplex, scomplex, d, c, c, tfuncname, varname1, varname2 ) \ +GENTFUNC3( double, scomplex, dcomplex, d, c, z, tfuncname, varname1, varname2 ) \ +\ +GENTFUNC3( double, dcomplex, float, d, z, s, tfuncname, varname1, varname2 ) \ +GENTFUNC3( double, dcomplex, scomplex, d, z, c, tfuncname, varname1, varname2 ) \ +\ +\ +GENTFUNC3( scomplex, float, double, c, s, d, tfuncname, varname1, varname2 ) \ +GENTFUNC3( scomplex, float, dcomplex, c, s, z, tfuncname, varname1, varname2 ) \ +\ +GENTFUNC3( scomplex, double, float, c, d, s, tfuncname, varname1, varname2 ) \ +GENTFUNC3( scomplex, double, double, c, d, d, tfuncname, varname1, varname2 ) \ +GENTFUNC3( scomplex, double, scomplex, c, d, c, tfuncname, varname1, varname2 ) \ +GENTFUNC3( scomplex, double, dcomplex, c, d, z, tfuncname, varname1, varname2 ) \ +\ +GENTFUNC3( scomplex, scomplex, double, c, c, d, tfuncname, varname1, varname2 ) \ +GENTFUNC3( scomplex, scomplex, dcomplex, c, c, z, tfuncname, varname1, varname2 ) \ +\ +GENTFUNC3( scomplex, dcomplex, float, c, z, s, tfuncname, varname1, varname2 ) \ +GENTFUNC3( scomplex, dcomplex, double, c, z, d, tfuncname, varname1, varname2 ) \ +GENTFUNC3( scomplex, dcomplex, scomplex, c, z, c, tfuncname, varname1, varname2 ) \ +GENTFUNC3( scomplex, dcomplex, dcomplex, c, z, z, tfuncname, varname1, varname2 ) \ +\ +\ +GENTFUNC3( dcomplex, float, float, z, s, s, tfuncname, varname1, varname2 ) \ +GENTFUNC3( dcomplex, float, double, z, s, d, tfuncname, varname1, varname2 ) \ +GENTFUNC3( dcomplex, float, scomplex, z, s, c, tfuncname, varname1, varname2 ) \ +GENTFUNC3( dcomplex, float, dcomplex, z, s, z, tfuncname, varname1, varname2 ) \ +\ +GENTFUNC3( dcomplex, double, float, z, d, s, tfuncname, varname1, varname2 ) \ +GENTFUNC3( dcomplex, double, scomplex, z, d, c, tfuncname, varname1, varname2 ) \ +\ +GENTFUNC3( dcomplex, scomplex, float, z, c, s, tfuncname, varname1, varname2 ) \ +GENTFUNC3( dcomplex, scomplex, double, z, c, d, tfuncname, varname1, varname2 ) \ +GENTFUNC3( dcomplex, scomplex, scomplex, z, c, c, tfuncname, varname1, varname2 ) \ +GENTFUNC3( dcomplex, scomplex, dcomplex, z, c, z, tfuncname, varname1, varname2 ) \ +\ +GENTFUNC3( dcomplex, dcomplex, float, z, z, s, tfuncname, varname1, varname2 ) \ +GENTFUNC3( dcomplex, dcomplex, scomplex, z, z, c, tfuncname, varname1, varname2 ) + // -- Basic three-operand with union of operands 1 and 2 -- +// -- (no auxiliary arguments) -- + +#define INSERT_GENTFUNC3U12_BASIC0( tfuncname ) \ +\ +GENTFUNC3U12( float, float, float, float, s, s, s, s, tfuncname ) \ +GENTFUNC3U12( double, double, double, double, d, d, d, d, tfuncname ) \ +GENTFUNC3U12( scomplex, scomplex, scomplex, scomplex, c, c, c, c, tfuncname ) \ +GENTFUNC3U12( dcomplex, dcomplex, dcomplex, dcomplex, z, z, z, z, tfuncname ) + +// -- (one auxiliary argument) -- #define INSERT_GENTFUNC3U12_BASIC( tfuncname, varname ) \ \ @@ -492,9 +830,40 @@ GENTFUNC3U12( double, double, double, double, d, d, d, d, tfuncname, var GENTFUNC3U12( scomplex, scomplex, scomplex, scomplex, c, c, c, c, tfuncname, varname ) \ GENTFUNC3U12( dcomplex, dcomplex, dcomplex, dcomplex, z, z, z, z, tfuncname, varname ) +// -- (two auxiliary arguments) -- + +#define INSERT_GENTFUNC3U12_BASIC2( tfuncname, varname1, varname2 ) \ +\ +GENTFUNC3U12( float, float, float, float, s, s, s, s, tfuncname, varname1, varname2 ) \ +GENTFUNC3U12( double, double, double, double, d, d, d, d, tfuncname, varname1, varname2 ) \ +GENTFUNC3U12( scomplex, scomplex, scomplex, scomplex, c, c, c, c, tfuncname, varname1, varname2 ) \ +GENTFUNC3U12( dcomplex, dcomplex, dcomplex, dcomplex, z, z, z, z, tfuncname, varname1, varname2 ) + + // -- Mixed domain three-operand with union of operands 1 and 2 -- +// -- (no auxiliary arguments) -- + +#define INSERT_GENTFUNC3U12_MIX_D0( tfuncname ) \ +\ +GENTFUNC3U12( float, float, scomplex, float, s, s, c, s, tfuncname ) \ +GENTFUNC3U12( float, scomplex, float, scomplex, s, c, s, c, tfuncname ) \ +GENTFUNC3U12( float, scomplex, scomplex, scomplex, s, c, c, c, tfuncname ) \ +\ +GENTFUNC3U12( double, double, dcomplex, double, d, d, z, d, tfuncname ) \ +GENTFUNC3U12( double, dcomplex, double, dcomplex, d, z, d, z, tfuncname ) \ +GENTFUNC3U12( double, dcomplex, dcomplex, dcomplex, d, z, z, z, tfuncname ) \ +\ +GENTFUNC3U12( scomplex, float, float, scomplex, c, s, s, c, tfuncname ) \ +GENTFUNC3U12( scomplex, float, scomplex, scomplex, c, s, c, c, tfuncname ) \ +GENTFUNC3U12( scomplex, scomplex, float, scomplex, c, c, s, c, tfuncname ) \ +\ +GENTFUNC3U12( dcomplex, double, double, dcomplex, z, d, d, z, tfuncname ) \ +GENTFUNC3U12( dcomplex, double, dcomplex, dcomplex, z, d, z, z, tfuncname ) \ +GENTFUNC3U12( dcomplex, dcomplex, double, dcomplex, z, z, d, z, tfuncname ) + +// -- (one auxiliary argument) -- #define INSERT_GENTFUNC3U12_MIX_D( tfuncname, varname ) \ \ @@ -514,9 +883,102 @@ GENTFUNC3U12( dcomplex, double, double, dcomplex, z, d, d, z, tfuncname, var GENTFUNC3U12( dcomplex, double, dcomplex, dcomplex, z, d, z, z, tfuncname, varname ) \ GENTFUNC3U12( dcomplex, dcomplex, double, dcomplex, z, z, d, z, tfuncname, varname ) +// -- (two auxiliary arguments) -- + +#define INSERT_GENTFUNC3U12_MIX_D2( tfuncname, varname1, varname2 ) \ +\ +GENTFUNC3U12( float, float, scomplex, float, s, s, c, s, tfuncname, varname1, varname2 ) \ +GENTFUNC3U12( float, scomplex, float, scomplex, s, c, s, c, tfuncname, varname1, varname2 ) \ +GENTFUNC3U12( float, scomplex, scomplex, scomplex, s, c, c, c, tfuncname, varname1, varname2 ) \ +\ +GENTFUNC3U12( double, double, dcomplex, double, d, d, z, d, tfuncname, varname1, varname2 ) \ +GENTFUNC3U12( double, dcomplex, double, dcomplex, d, z, d, z, tfuncname, varname1, varname2 ) \ +GENTFUNC3U12( double, dcomplex, dcomplex, dcomplex, d, z, z, z, tfuncname, varname1, varname2 ) \ +\ +GENTFUNC3U12( scomplex, float, float, scomplex, c, s, s, c, tfuncname, varname1, varname2 ) \ +GENTFUNC3U12( scomplex, float, scomplex, scomplex, c, s, c, c, tfuncname, varname1, varname2 ) \ +GENTFUNC3U12( scomplex, scomplex, float, scomplex, c, c, s, c, tfuncname, varname1, varname2 ) \ +\ +GENTFUNC3U12( dcomplex, double, double, dcomplex, z, d, d, z, tfuncname, varname1, varname2 ) \ +GENTFUNC3U12( dcomplex, double, dcomplex, dcomplex, z, d, z, z, tfuncname, varname1, varname2 ) \ +GENTFUNC3U12( dcomplex, dcomplex, double, dcomplex, z, z, d, z, tfuncname, varname1, varname2 ) + + // -- Mixed precision three-operand with union of operands 1 and 2 -- +// -- (no auxiliary arguments) -- + +#define INSERT_GENTFUNC3U12_MIX_P0( tfuncname ) \ +\ +GENTFUNC3U12( float, float, double, float, s, s, d, s, tfuncname ) \ +GENTFUNC3U12( float, float, dcomplex, float, s, s, z, s, tfuncname ) \ +\ +GENTFUNC3U12( float, double, float, double, s, d, s, d, tfuncname ) \ +GENTFUNC3U12( float, double, double, double, s, d, d, d, tfuncname ) \ +GENTFUNC3U12( float, double, scomplex, double, s, d, c, d, tfuncname ) \ +GENTFUNC3U12( float, double, dcomplex, double, s, d, z, d, tfuncname ) \ +\ +GENTFUNC3U12( float, scomplex, double, scomplex, s, c, d, c, tfuncname ) \ +GENTFUNC3U12( float, scomplex, dcomplex, scomplex, s, c, z, c, tfuncname ) \ +\ +GENTFUNC3U12( float, dcomplex, float, dcomplex, s, z, s, z, tfuncname ) \ +GENTFUNC3U12( float, dcomplex, double, dcomplex, s, z, d, z, tfuncname ) \ +GENTFUNC3U12( float, dcomplex, scomplex, dcomplex, s, z, c, z, tfuncname ) \ +GENTFUNC3U12( float, dcomplex, dcomplex, dcomplex, s, z, z, z, tfuncname ) \ +\ +\ +GENTFUNC3U12( double, float, float, double, d, s, s, d, tfuncname ) \ +GENTFUNC3U12( double, float, double, double, d, s, d, d, tfuncname ) \ +GENTFUNC3U12( double, float, scomplex, double, d, s, c, d, tfuncname ) \ +GENTFUNC3U12( double, float, dcomplex, double, d, s, z, d, tfuncname ) \ +\ +GENTFUNC3U12( double, double, float, double, d, d, s, d, tfuncname ) \ +GENTFUNC3U12( double, double, scomplex, double, d, d, c, d, tfuncname ) \ +\ +GENTFUNC3U12( double, scomplex, float, dcomplex, d, c, s, z, tfuncname ) \ +GENTFUNC3U12( double, scomplex, double, dcomplex, d, c, d, z, tfuncname ) \ +GENTFUNC3U12( double, scomplex, scomplex, dcomplex, d, c, c, z, tfuncname ) \ +GENTFUNC3U12( double, scomplex, dcomplex, dcomplex, d, c, z, z, tfuncname ) \ +\ +GENTFUNC3U12( double, dcomplex, float, dcomplex, d, z, s, z, tfuncname ) \ +GENTFUNC3U12( double, dcomplex, scomplex, dcomplex, d, z, c, z, tfuncname ) \ +\ +\ +GENTFUNC3U12( scomplex, float, double, scomplex, c, s, d, c, tfuncname ) \ +GENTFUNC3U12( scomplex, float, dcomplex, scomplex, c, s, z, c, tfuncname ) \ +\ +GENTFUNC3U12( scomplex, double, float, dcomplex, c, d, s, z, tfuncname ) \ +GENTFUNC3U12( scomplex, double, double, dcomplex, c, d, d, z, tfuncname ) \ +GENTFUNC3U12( scomplex, double, scomplex, dcomplex, c, d, c, z, tfuncname ) \ +GENTFUNC3U12( scomplex, double, dcomplex, dcomplex, c, d, z, z, tfuncname ) \ +\ +GENTFUNC3U12( scomplex, scomplex, double, scomplex, c, c, d, c, tfuncname ) \ +GENTFUNC3U12( scomplex, scomplex, dcomplex, scomplex, c, c, z, c, tfuncname ) \ +\ +GENTFUNC3U12( scomplex, dcomplex, float, dcomplex, c, z, s, z, tfuncname ) \ +GENTFUNC3U12( scomplex, dcomplex, double, dcomplex, c, z, d, z, tfuncname ) \ +GENTFUNC3U12( scomplex, dcomplex, scomplex, dcomplex, c, z, c, z, tfuncname ) \ +GENTFUNC3U12( scomplex, dcomplex, dcomplex, dcomplex, c, z, z, z, tfuncname ) \ +\ +\ +GENTFUNC3U12( dcomplex, float, float, dcomplex, z, s, s, z, tfuncname ) \ +GENTFUNC3U12( dcomplex, float, double, dcomplex, z, s, d, z, tfuncname ) \ +GENTFUNC3U12( dcomplex, float, scomplex, dcomplex, z, s, c, z, tfuncname ) \ +GENTFUNC3U12( dcomplex, float, dcomplex, dcomplex, z, s, z, z, tfuncname ) \ +\ +GENTFUNC3U12( dcomplex, double, float, dcomplex, z, d, s, z, tfuncname ) \ +GENTFUNC3U12( dcomplex, double, scomplex, dcomplex, z, d, c, z, tfuncname ) \ +\ +GENTFUNC3U12( dcomplex, scomplex, float, dcomplex, z, c, s, z, tfuncname ) \ +GENTFUNC3U12( dcomplex, scomplex, double, dcomplex, z, c, d, z, tfuncname ) \ +GENTFUNC3U12( dcomplex, scomplex, scomplex, dcomplex, z, c, c, z, tfuncname ) \ +GENTFUNC3U12( dcomplex, scomplex, dcomplex, dcomplex, z, c, z, z, tfuncname ) \ +\ +GENTFUNC3U12( dcomplex, dcomplex, float, dcomplex, z, z, s, z, tfuncname ) \ +GENTFUNC3U12( dcomplex, dcomplex, scomplex, dcomplex, z, z, c, z, tfuncname ) + +// -- (one auxiliary argument) -- #define INSERT_GENTFUNC3U12_MIX_P( tfuncname, varname ) \ \ @@ -587,46 +1049,7 @@ GENTFUNC3U12( dcomplex, scomplex, dcomplex, dcomplex, z, c, z, z, tfuncname, var GENTFUNC3U12( dcomplex, dcomplex, float, dcomplex, z, z, s, z, tfuncname, varname ) \ GENTFUNC3U12( dcomplex, dcomplex, scomplex, dcomplex, z, z, c, z, tfuncname, varname ) - - -// -- Basic three-operand with union of operands 1 and 2 -- -// -- (with two auxiliary arguments) -- - - -#define INSERT_GENTFUNC3U12_BASIC2( tfuncname, varname1, varname2 ) \ -\ -GENTFUNC3U12( float, float, float, float, s, s, s, s, tfuncname, varname1, varname2 ) \ -GENTFUNC3U12( double, double, double, double, d, d, d, d, tfuncname, varname1, varname2 ) \ -GENTFUNC3U12( scomplex, scomplex, scomplex, scomplex, c, c, c, c, tfuncname, varname1, varname2 ) \ -GENTFUNC3U12( dcomplex, dcomplex, dcomplex, dcomplex, z, z, z, z, tfuncname, varname1, varname2 ) - - -// -- Mixed domain three-operand with union of operands 1 and 2 -- -// -- (with two auxiliary arguments) -- - - -#define INSERT_GENTFUNC3U12_MIX_D2( tfuncname, varname1, varname2 ) \ -\ -GENTFUNC3U12( float, float, scomplex, float, s, s, c, s, tfuncname, varname1, varname2 ) \ -GENTFUNC3U12( float, scomplex, float, scomplex, s, c, s, c, tfuncname, varname1, varname2 ) \ -GENTFUNC3U12( float, scomplex, scomplex, scomplex, s, c, c, c, tfuncname, varname1, varname2 ) \ -\ -GENTFUNC3U12( double, double, dcomplex, double, d, d, z, d, tfuncname, varname1, varname2 ) \ -GENTFUNC3U12( double, dcomplex, double, dcomplex, d, z, d, z, tfuncname, varname1, varname2 ) \ -GENTFUNC3U12( double, dcomplex, dcomplex, dcomplex, d, z, z, z, tfuncname, varname1, varname2 ) \ -\ -GENTFUNC3U12( scomplex, float, float, scomplex, c, s, s, c, tfuncname, varname1, varname2 ) \ -GENTFUNC3U12( scomplex, float, scomplex, scomplex, c, s, c, c, tfuncname, varname1, varname2 ) \ -GENTFUNC3U12( scomplex, scomplex, float, scomplex, c, c, s, c, tfuncname, varname1, varname2 ) \ -\ -GENTFUNC3U12( dcomplex, double, double, dcomplex, z, d, d, z, tfuncname, varname1, varname2 ) \ -GENTFUNC3U12( dcomplex, double, dcomplex, dcomplex, z, d, z, z, tfuncname, varname1, varname2 ) \ -GENTFUNC3U12( dcomplex, dcomplex, double, dcomplex, z, z, d, z, tfuncname, varname1, varname2 ) - - -// -- Mixed precision three-operand with union of operands 1 and 2 -- -// -- (with two auxiliary arguments) -- - +// -- (two auxiliary arguments) -- #define INSERT_GENTFUNC3U12_MIX_P2( tfuncname, varname1, varname2 ) \ \ diff --git a/frame/include/bli_param_macro_defs.h b/frame/include/bli_param_macro_defs.h index 46dbed477..4cfa45e71 100644 --- a/frame/include/bli_param_macro_defs.h +++ b/frame/include/bli_param_macro_defs.h @@ -659,6 +659,86 @@ } \ } +// Set dimensions, increments, effective uplo/diagoff, etc for ONE matrix +// argument (without column-wise stride optimization). + +#define bli_set_dims_incs_uplo_1m_noswap( \ + diagoffa, diaga, \ + uploa, m, n, rs_a, cs_a, \ + uplo_eff, n_elem_max, n_iter, inca, lda, \ + ij0, n_shift \ + ) \ +{ \ + /* If matrix A is entirely "unstored", that is, if either: + - A is lower-stored and entirely above the diagonal, or + - A is upper-stored and entirely below the diagonal + then we mark the storage as implicitly zero. */ \ + if ( bli_is_unstored_subpart( diagoffa, BLIS_NO_TRANSPOSE, uploa, m, n ) ) \ + { \ + uplo_eff = BLIS_ZEROS; \ + } \ + else \ + { \ + doff_t diagoffa_use = diagoffa; \ + doff_t diagoff_eff; \ + dim_t n_iter_max; \ +\ + if ( bli_is_unit_diag( diaga ) ) \ + bli_shift_diag_offset_to_shrink_uplo( uploa, diagoffa_use ); \ +\ + /* If matrix A is entirely "stored", that is, if either: + - A is upper-stored and entirely above the diagonal, or + - A is lower-stored and entirely below the diagonal + then we mark the storage as dense. */ \ + if ( bli_is_stored_subpart( diagoffa_use, BLIS_NO_TRANSPOSE, uploa, m, n ) ) \ + uploa = BLIS_DENSE; \ +\ + n_iter_max = n; \ + n_elem_max = m; \ + inca = rs_a; \ + lda = cs_a; \ + uplo_eff = uploa; \ + diagoff_eff = diagoffa_use; \ +\ + if ( bli_is_dense( uplo_eff ) ) \ + { \ + n_iter = n_iter_max; \ + } \ + else if ( bli_is_upper( uplo_eff ) ) \ + { \ + if ( diagoff_eff < 0 ) \ + { \ + ij0 = 0; \ + n_shift = -diagoff_eff; \ + n_elem_max = bli_min( n_elem_max, n_shift + bli_min( m, n ) ); \ + n_iter = n_iter_max; \ + } \ + else \ + { \ + ij0 = diagoff_eff; \ + n_shift = 0; \ + n_iter = n_iter_max - diagoff_eff; \ + } \ + } \ + else /* if ( bli_is_lower( uplo_eff ) ) */ \ + { \ + if ( diagoff_eff < 0 ) \ + { \ + ij0 = -diagoff_eff; \ + n_shift = 0; \ + n_elem_max = n_elem_max + diagoff_eff; \ + n_iter = bli_min( n_elem_max, bli_min( m, n ) ); \ + } \ + else \ + { \ + ij0 = 0; \ + n_shift = diagoff_eff; \ + n_iter = bli_min( n_iter_max, n_shift + bli_min( m, n ) ); \ + } \ + } \ + } \ +} + // Set dimensions, increments, effective uplo/diagoff, etc for TWO matrix // arguments. diff --git a/frame/include/bli_scalar_macro_defs.h b/frame/include/bli_scalar_macro_defs.h index 8e9bd82aa..dd9b77c4f 100644 --- a/frame/include/bli_scalar_macro_defs.h +++ b/frame/include/bli_scalar_macro_defs.h @@ -198,6 +198,7 @@ #define bli_fabs( a ) ( (a) <= 0.0 ? -(a) : (a) ) // fminabs, fmaxabs + #define bli_fminabs( a, b ) \ \ bli_fmin( bli_fabs( a ), \ @@ -208,6 +209,11 @@ bli_fmax( bli_fabs( a ), \ bli_fabs( b ) ) +// isnan, isinf + +#define bli_isinf( a ) isinf( a ) +#define bli_isnan( a ) isnan( a ) + // swap_types #define bli_swap_types( type1, type2 ) \ diff --git a/frame/include/blis.h b/frame/include/blis.h index e097729b2..2ee68785b 100644 --- a/frame/include/blis.h +++ b/frame/include/blis.h @@ -121,9 +121,9 @@ extern "C" { #include "bli_addsc.h" #include "bli_copysc.h" #include "bli_divsc.h" -#include "bli_fnormsc.h" #include "bli_getsc.h" #include "bli_mulsc.h" +#include "bli_normfsc.h" #include "bli_setsc.h" #include "bli_sqrtsc.h" #include "bli_subsc.h" @@ -143,7 +143,6 @@ extern "C" { #include "bli_copyv.h" #include "bli_dotv.h" #include "bli_dotxv.h" -#include "bli_fnormv.h" #include "bli_scal2v.h" #include "bli_subv.h" #include "bli_swapv.h" @@ -183,7 +182,6 @@ extern "C" { #include "bli_addm.h" #include "bli_axpym.h" #include "bli_copym.h" -#include "bli_fnormm.h" #include "bli_scal2m.h" #include "bli_subm.h" #include "bli_packm.h" @@ -220,12 +218,17 @@ extern "C" { // -- Utility operations -- -#include "bli_abmaxv.h" -#include "bli_absumv.h" -#include "bli_absumm.h" +#include "bli_amaxv.h" +#include "bli_asumv.h" #include "bli_mkherm.h" #include "bli_mksymm.h" #include "bli_mktrim.h" +#include "bli_norm1v.h" +#include "bli_norm1m.h" +#include "bli_normfv.h" +#include "bli_normfm.h" +#include "bli_normiv.h" +#include "bli_normim.h" #include "bli_printv.h" #include "bli_printm.h" #include "bli_randv.h" diff --git a/frame/util/abmaxv/bli_abmaxv_check.h b/frame/util/abmaxv/bli_abmaxv_check.h deleted file mode 100644 index 4f84491e9..000000000 --- a/frame/util/abmaxv/bli_abmaxv_check.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -void bli_abmaxv_check( obj_t* x, - obj_t* abmax_i ); diff --git a/frame/util/absumm/bli_absumm_unb_var1.c b/frame/util/absumm/bli_absumm_unb_var1.c deleted file mode 100644 index 87180e5b7..000000000 --- a/frame/util/absumm/bli_absumm_unb_var1.c +++ /dev/null @@ -1,189 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -#define FUNCPTR_T absumm_fp - -typedef void (*FUNCPTR_T)( - doff_t diagoffx, - uplo_t uplox, - dim_t m, - dim_t n, - void* x, inc_t rs_x, inc_t cs_x, - void* absum - ); - -static FUNCPTR_T GENARRAY(ftypes,absumm_unb_var1); - - -void bli_absumm_unb_var1( obj_t* x, - obj_t* absum ) -{ - num_t dt_x = bli_obj_datatype( *x ); - - doff_t diagoffx = bli_obj_diag_offset( *x ); - uplo_t uplox = bli_obj_uplo( *x ); - - dim_t m = bli_obj_length( *x ); - dim_t n = bli_obj_width( *x ); - - void* buf_x = bli_obj_buffer_at_off( *x ); - inc_t rs_x = bli_obj_row_stride( *x ); - inc_t cs_x = bli_obj_col_stride( *x ); - - void* buf_absum = bli_obj_buffer_at_off( *absum ); - - FUNCPTR_T f; - - // Index into the type combination array to extract the correct - // function pointer. - f = ftypes[dt_x]; - - // Invoke the function. - f( diagoffx, - uplox, - m, - n, - buf_x, rs_x, cs_x, - buf_absum ); -} - - -#undef GENTFUNCR -#define GENTFUNCR( ctype_x, ctype_xr, chx, chxr, varname, kername ) \ -\ -void PASTEMAC(chx,varname)( \ - doff_t diagoffx, \ - uplo_t uplox, \ - dim_t m, \ - dim_t n, \ - void* x, inc_t rs_x, inc_t cs_x, \ - void* absum \ - ) \ -{ \ - ctype_x* x_cast = x; \ - ctype_xr* absum_cast = absum; \ - ctype_x* x1; \ - ctype_xr absum_r; \ - ctype_xr absum_temp_r; \ - uplo_t uplox_eff; \ - dim_t n_iter; \ - dim_t n_elem, n_elem_max; \ - inc_t ldx, incx; \ - dim_t j, i; \ - dim_t ij0, n_shift; \ -\ - /* Initialize the absolute sum accumulator to zero. */ \ - PASTEMAC(chxr,set0s)( absum_r ); \ -\ - /* If either dimension is zero, return swith absum equal to zero. */ \ - if ( bli_zero_dim2( m, n ) ) \ - { \ - PASTEMAC2(chxr,chxr,copys)( absum_r, *absum_cast ); \ - return; \ - } \ -\ - /* Set various loop parameters. */ \ - bli_set_dims_incs_uplo_1m( diagoffx, BLIS_NONUNIT_DIAG, \ - uplox, m, n, rs_x, cs_x, \ - uplox_eff, n_elem_max, n_iter, incx, ldx, \ - ij0, n_shift ); \ -\ - /* If the matrix is zeros, return swith absum equal to zero. */ \ - if ( bli_is_zeros( uplox_eff ) ) \ - { \ - PASTEMAC2(chxr,chxr,copys)( absum_r, *absum_cast ); \ - return; \ - } \ -\ -\ - /* Handle dense and upper/lower storage cases separately. */ \ - if ( bli_is_dense( uplox_eff ) ) \ - { \ - for ( j = 0; j < n_iter; ++j ) \ - { \ - n_elem = n_elem_max; \ -\ - x1 = x_cast + (j )*ldx + (0 )*incx; \ -\ - PASTEMAC(chx,kername)( n_elem, \ - x1, incx, \ - &absum_temp_r ); \ -\ - PASTEMAC2(chxr,chxr,adds)( absum_temp_r, absum_r ); \ - } \ - } \ - else \ - { \ - if ( bli_is_upper( uplox_eff ) ) \ - { \ - for ( j = 0; j < n_iter; ++j ) \ - { \ - n_elem = bli_min( n_shift + j + 1, n_elem_max ); \ -\ - x1 = x_cast + (ij0+j )*ldx + (0 )*incx; \ -\ - PASTEMAC(chx,kername)( n_elem, \ - x1, incx, \ - &absum_temp_r ); \ -\ - PASTEMAC2(chxr,chxr,adds)( absum_temp_r, absum_r ); \ - } \ - } \ - else if ( bli_is_lower( uplox_eff ) ) \ - { \ - for ( j = 0; j < n_iter; ++j ) \ - { \ - i = bli_max( 0, ( doff_t )j - ( doff_t )n_shift ); \ - n_elem = n_elem_max - i; \ -\ - x1 = x_cast + (j )*ldx + (ij0+i )*incx; \ -\ - PASTEMAC(chx,kername)( n_elem, \ - x1, incx, \ - &absum_temp_r ); \ -\ - PASTEMAC2(chxr,chxr,adds)( absum_temp_r, absum_r ); \ - } \ - } \ - } \ -\ - /* Store final values of absum to output variables. */ \ - PASTEMAC2(chxr,chxr,copys)( absum_r, *absum_cast ); \ -} - - -INSERT_GENTFUNCR_BASIC( absumm_unb_var1, absumv_unb_var1 ) - diff --git a/frame/util/absumv/bli_absumv_check.h b/frame/util/absumv/bli_absumv_check.h deleted file mode 100644 index 5690f77a1..000000000 --- a/frame/util/absumv/bli_absumv_check.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -void bli_absumv_check( obj_t* x, - obj_t* absum ); diff --git a/frame/util/abmaxv/bli_abmaxv.c b/frame/util/amaxv/bli_amaxv.c similarity index 93% rename from frame/util/abmaxv/bli_abmaxv.c rename to frame/util/amaxv/bli_amaxv.c index e5d244005..76ebd56c3 100644 --- a/frame/util/abmaxv/bli_abmaxv.c +++ b/frame/util/amaxv/bli_amaxv.c @@ -35,14 +35,14 @@ #include "blis.h" /* -void bli_abmaxv( obj_t* x, +void bli_amaxv( obj_t* x, obj_t* scale, obj_t* sumsq ) { if ( bli_error_checking_is_enabled() ) - bli_abmaxv_check( x, scale, sumsq ); + bli_amaxv_check( x, scale, sumsq ); - bli_abmaxv_unb_var1( x, scale, sumsq ); + bli_amaxv_unb_var1( x, scale, sumsq ); } */ @@ -64,7 +64,7 @@ void PASTEMAC0(opname)( \ abmax_i ); \ } -GENFRONT( abmaxv, abmaxv_unb_var1 ) +GENFRONT( amaxv, amaxv_unb_var1 ) // @@ -84,5 +84,5 @@ void PASTEMAC(chx,opname)( \ abmax_i ); \ } -INSERT_GENTFUNCI_BASIC( abmaxv, abmaxv_unb_var1 ) +INSERT_GENTFUNCI_BASIC( amaxv, amaxv_unb_var1 ) diff --git a/frame/util/abmaxv/bli_abmaxv.h b/frame/util/amaxv/bli_amaxv.h similarity index 92% rename from frame/util/abmaxv/bli_abmaxv.h rename to frame/util/amaxv/bli_amaxv.h index 6cc02cca9..ba319a15f 100644 --- a/frame/util/abmaxv/bli_abmaxv.h +++ b/frame/util/amaxv/bli_amaxv.h @@ -32,15 +32,15 @@ */ -#include "bli_abmaxv_check.h" -#include "bli_abmaxv_unb_var1.h" +#include "bli_amaxv_check.h" +#include "bli_amaxv_unb_var1.h" // // Prototype object-based interface. // -void bli_abmaxv( obj_t* x, - obj_t* abmax_i ); +void bli_amaxv( obj_t* x, + obj_t* abmax_i ); // @@ -55,5 +55,5 @@ void PASTEMAC(chx,opname)( \ ctype_i* abmax_i \ ); -INSERT_GENTPROTI_BASIC( abmaxv ) +INSERT_GENTPROTI_BASIC( amaxv ) diff --git a/frame/util/abmaxv/bli_abmaxv_check.c b/frame/util/amaxv/bli_amaxv_check.c similarity index 96% rename from frame/util/abmaxv/bli_abmaxv_check.c rename to frame/util/amaxv/bli_amaxv_check.c index 98e1e0ec6..5dff44ec4 100644 --- a/frame/util/abmaxv/bli_abmaxv_check.c +++ b/frame/util/amaxv/bli_amaxv_check.c @@ -34,8 +34,8 @@ #include "blis.h" -void bli_abmaxv_check( obj_t* x, - obj_t* abmax_i ) +void bli_amaxv_check( obj_t* x, + obj_t* abmax_i ) { err_t e_val; diff --git a/frame/util/absumm/bli_absumm_check.h b/frame/util/amaxv/bli_amaxv_check.h similarity index 95% rename from frame/util/absumm/bli_absumm_check.h rename to frame/util/amaxv/bli_amaxv_check.h index a80f1e0a3..59359088a 100644 --- a/frame/util/absumm/bli_absumm_check.h +++ b/frame/util/amaxv/bli_amaxv_check.h @@ -32,5 +32,5 @@ */ -void bli_absumm_check( obj_t* a, - obj_t* absum ); +void bli_amaxv_check( obj_t* x, + obj_t* abmax_i ); diff --git a/frame/util/abmaxv/bli_abmaxv_unb_var1.c b/frame/util/amaxv/bli_amaxv_unb_var1.c similarity index 72% rename from frame/util/abmaxv/bli_abmaxv_unb_var1.c rename to frame/util/amaxv/bli_amaxv_unb_var1.c index 83ca09b38..fd389ac3e 100644 --- a/frame/util/abmaxv/bli_abmaxv_unb_var1.c +++ b/frame/util/amaxv/bli_amaxv_unb_var1.c @@ -34,7 +34,7 @@ #include "blis.h" -#define FUNCPTR_T abmaxv_fp +#define FUNCPTR_T amaxv_fp typedef void (*FUNCPTR_T)( dim_t n, @@ -42,11 +42,11 @@ typedef void (*FUNCPTR_T)( void* abmax_i ); -static FUNCPTR_T GENARRAY(ftypes,abmaxv_unb_var1); +static FUNCPTR_T GENARRAY(ftypes,amaxv_unb_var1); -void bli_abmaxv_unb_var1( obj_t* x, - obj_t* abmax_i ) +void bli_amaxv_unb_var1( obj_t* x, + obj_t* abmax_i ) { num_t dt_x = bli_obj_datatype( *x ); @@ -71,7 +71,7 @@ void bli_abmaxv_unb_var1( obj_t* x, #undef GENTFUNCRI -#define GENTFUNCRI( ctype_x, ctype_xr, ctype_i, chx, chxr, chi, opname, varname ) \ +#define GENTFUNCRI( ctype_x, ctype_xr, ctype_i, chx, chxr, chi, varname ) \ \ void PASTEMAC(chx,varname)( \ dim_t n, \ @@ -82,55 +82,62 @@ void PASTEMAC(chx,varname)( \ ctype_x* x_cast = x; \ ctype_i* abmax_i_cast = abmax_i; \ ctype_xr* minus_one = PASTEMAC(chxr,m1); \ - ctype_i* minus_one_i = PASTEMAC(chi,m1); \ + ctype_i* zero_i = PASTEMAC(chi,0); \ \ ctype_x* chi1; \ ctype_xr chi1_r; \ ctype_xr chi1_i; \ - ctype_xr abs_chi1_save; \ ctype_xr abs_chi1; \ - ctype_i i_save; \ + ctype_xr abs_chi1_max; \ + ctype_i i_max; \ dim_t i; \ +\ + /* If the vector is empty, return an index of zero. */ \ + if ( bli_zero_dim1( n ) ) \ + { \ + PASTEMAC2(chi,chi,copys)( *zero_i, *abmax_i_cast ); \ + return; \ + } \ +\ + /* Initialize the index of the maximum absolute value to zero. */ \ + PASTEMAC2(chi,chi,copys)( *zero_i, i_max ); \ \ /* Initialize the maximum absolute value search candidate with -1, which is guaranteed to be less than all values we will compute. */ \ - PASTEMAC2(chxr,chxr,copys)( *minus_one, abs_chi1_save ); \ -\ - /* Initialize the index for the maximum absolute value search - candidate. We use -1 in case x has a length of zero. */ \ - PASTEMAC2(chi,chi,copys)( *minus_one_i, i_save ); \ -\ - chi1 = x_cast; \ + PASTEMAC2(chxr,chxr,copys)( *minus_one, abs_chi1_max ); \ \ for ( i = 0; i < n; ++i ) \ { \ + chi1 = x_cast + (i )*incx; \ +\ /* Get the real and imaginary components of chi1. */ \ PASTEMAC2(chx,chxr,gets)( *chi1, chi1_r, chi1_i ); \ \ /* Replace chi1_r and chi1_i with their absolute values. */ \ - chi1_r = bli_fabs( chi1_r ); \ - chi1_i = bli_fabs( chi1_i ); \ + PASTEMAC2(chxr,chxr,abval2s)( chi1_r, chi1_r ); \ + PASTEMAC2(chxr,chxr,abval2s)( chi1_i, chi1_i ); \ \ /* Add the real and imaginary absolute values together. */ \ PASTEMAC(chxr,set0s)( abs_chi1 ); \ PASTEMAC2(chxr,chxr,adds)( chi1_r, abs_chi1 ); \ PASTEMAC2(chxr,chxr,adds)( chi1_i, abs_chi1 ); \ \ - if ( abs_chi1_save < abs_chi1 ) \ + /* If the absolute value of the current element exceeds that of + the previous largest, save it and its index. If NaN is + encountered, then treat it the same as if it were a valid + value that was smaller than any previously seen. This + behavior mimics that of LAPACK's ?lange(). */ \ + if ( abs_chi1_max < abs_chi1 || bli_isnan( abs_chi1 ) ) \ { \ - /* If the absolute value of the current element exceeds - that of the previous largest, save it and its index. */ \ - PASTEMAC2(chxr,chxr,copys)( abs_chi1, abs_chi1_save ); \ - PASTEMAC2(chi,chi,copys)( i, i_save ); \ + PASTEMAC2(chxr,chxr,copys)( abs_chi1, abs_chi1_max ); \ + PASTEMAC2(chi,chi,copys)( i, i_max ); \ } \ -\ - chi1 += incx; \ } \ \ /* Store final index to output variable. */ \ - PASTEMAC2(chi,chi,copys)( i_save, *abmax_i_cast ); \ + PASTEMAC2(chi,chi,copys)( i_max, *abmax_i_cast ); \ } -INSERT_GENTFUNCRI_BASIC( abmaxv, abmaxv_unb_var1 ) +INSERT_GENTFUNCRI_BASIC0( amaxv_unb_var1 ) diff --git a/frame/util/abmaxv/bli_abmaxv_unb_var1.h b/frame/util/amaxv/bli_amaxv_unb_var1.h similarity index 94% rename from frame/util/abmaxv/bli_abmaxv_unb_var1.h rename to frame/util/amaxv/bli_amaxv_unb_var1.h index 6f8c3a085..f6b734e0b 100644 --- a/frame/util/abmaxv/bli_abmaxv_unb_var1.h +++ b/frame/util/amaxv/bli_amaxv_unb_var1.h @@ -32,8 +32,8 @@ */ -void bli_abmaxv_unb_var1( obj_t* x, - obj_t* abmax_i ); +void bli_amaxv_unb_var1( obj_t* x, + obj_t* abmax_i ); #undef GENTPROTRI @@ -45,5 +45,5 @@ void PASTEMAC(chx,varname)( \ void* abmax_i \ ); -INSERT_GENTPROTRI_BASIC( abmaxv_unb_var1 ) +INSERT_GENTPROTRI_BASIC( amaxv_unb_var1 ) diff --git a/frame/util/absumv/bli_absumv.c b/frame/util/asumv/bli_asumv.c similarity index 86% rename from frame/util/absumv/bli_absumv.c rename to frame/util/asumv/bli_asumv.c index 598fc9950..2ce74b006 100644 --- a/frame/util/absumv/bli_absumv.c +++ b/frame/util/asumv/bli_asumv.c @@ -35,14 +35,14 @@ #include "blis.h" /* -void bli_absumv( obj_t* x, +void bli_asumv( obj_t* x, obj_t* scale, obj_t* sumsq ) { if ( bli_error_checking_is_enabled() ) - bli_absumv_check( x, scale, sumsq ); + bli_asumv_check( x, scale, sumsq ); - bli_absumv_unb_var1( x, scale, sumsq ); + bli_asumv_unb_var1( x, scale, sumsq ); } */ @@ -54,17 +54,17 @@ void bli_absumv( obj_t* x, \ void PASTEMAC0(opname)( \ obj_t* x, \ - obj_t* absum \ + obj_t* asum \ ) \ { \ if ( bli_error_checking_is_enabled() ) \ - PASTEMAC(opname,_check)( x, absum ); \ + PASTEMAC(opname,_check)( x, asum ); \ \ PASTEMAC0(varname)( x, \ - absum ); \ + asum ); \ } -GENFRONT( absumv, absumv_unb_var1 ) +GENFRONT( asumv, asumv_unb_var1 ) // @@ -76,13 +76,13 @@ GENFRONT( absumv, absumv_unb_var1 ) void PASTEMAC(chx,opname)( \ dim_t n, \ ctype_x* x, inc_t incx, \ - ctype_xr* absum \ + ctype_xr* asum \ ) \ { \ PASTEMAC(chx,varname)( n, \ x, incx, \ - absum ); \ + asum ); \ } -INSERT_GENTFUNCR_BASIC( absumv, absumv_unb_var1 ) +INSERT_GENTFUNCR_BASIC( asumv, asumv_unb_var1 ) diff --git a/frame/util/absumv/bli_absumv.h b/frame/util/asumv/bli_asumv.h similarity index 90% rename from frame/util/absumv/bli_absumv.h rename to frame/util/asumv/bli_asumv.h index b1f34f5b0..598138e66 100644 --- a/frame/util/absumv/bli_absumv.h +++ b/frame/util/asumv/bli_asumv.h @@ -32,15 +32,15 @@ */ -#include "bli_absumv_check.h" -#include "bli_absumv_unb_var1.h" +#include "bli_asumv_check.h" +#include "bli_asumv_unb_var1.h" // // Prototype object-based interface. // -void bli_absumv( obj_t* x, - obj_t* absum ); +void bli_asumv( obj_t* x, + obj_t* asum ); // @@ -52,8 +52,8 @@ void bli_absumv( obj_t* x, void PASTEMAC(chx,opname)( \ dim_t n, \ ctype_x* x, inc_t incx, \ - ctype_xr* absum \ + ctype_xr* asum \ ); -INSERT_GENTPROTR_BASIC( absumv ) +INSERT_GENTPROTR_BASIC( asumv ) diff --git a/frame/util/absumv/bli_absumv_check.c b/frame/util/asumv/bli_asumv_check.c similarity index 92% rename from frame/util/absumv/bli_absumv_check.c rename to frame/util/asumv/bli_asumv_check.c index 89d83d78e..7c0ad17d0 100644 --- a/frame/util/absumv/bli_absumv_check.c +++ b/frame/util/asumv/bli_asumv_check.c @@ -34,8 +34,8 @@ #include "blis.h" -void bli_absumv_check( obj_t* x, - obj_t* absum ) +void bli_asumv_check( obj_t* x, + obj_t* asum ) { err_t e_val; @@ -44,7 +44,7 @@ void bli_absumv_check( obj_t* x, e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); - e_val = bli_check_nonconstant_object( absum ); + e_val = bli_check_nonconstant_object( asum ); bli_check_error_code( e_val ); // Check object dimensions. @@ -52,7 +52,7 @@ void bli_absumv_check( obj_t* x, e_val = bli_check_vector_object( x ); bli_check_error_code( e_val ); - e_val = bli_check_scalar_object( absum ); + e_val = bli_check_scalar_object( asum ); bli_check_error_code( e_val ); } diff --git a/frame/util/asumv/bli_asumv_check.h b/frame/util/asumv/bli_asumv_check.h new file mode 100644 index 000000000..bec9116c1 --- /dev/null +++ b/frame/util/asumv/bli_asumv_check.h @@ -0,0 +1,36 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_asumv_check( obj_t* x, + obj_t* asum ); diff --git a/frame/util/absumv/bli_absumv_unb_var1.c b/frame/util/asumv/bli_asumv_unb_var1.c similarity index 62% rename from frame/util/absumv/bli_absumv_unb_var1.c rename to frame/util/asumv/bli_asumv_unb_var1.c index 7ab73a58e..5779d37ef 100644 --- a/frame/util/absumv/bli_absumv_unb_var1.c +++ b/frame/util/asumv/bli_asumv_unb_var1.c @@ -34,86 +34,66 @@ #include "blis.h" -#define FUNCPTR_T absumv_fp +#define FUNCPTR_T asumv_fp typedef void (*FUNCPTR_T)( dim_t n, void* x, inc_t incx, - void* absum + void* asum ); -/* -// If some mixed datatype functions will not be compiled, we initialize -// the corresponding elements of the function array to NULL. -#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT -static FUNCPTR_T GENARRAY2_ALL(ftypes,absumv_unb_var1); -#else -#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT -static FUNCPTR_T GENARRAY2_EXT(ftypes,absumv_unb_var1); -#else -static FUNCPTR_T GENARRAY2_MIN(ftypes,absumv_unb_var1); -#endif -#endif -*/ -static FUNCPTR_T GENARRAY(ftypes,absumv_unb_var1); +static FUNCPTR_T GENARRAY(ftypes,asumv_unb_var1); -void bli_absumv_unb_var1( obj_t* x, - obj_t* absum ) +void bli_asumv_unb_var1( obj_t* x, + obj_t* asum ) { - num_t dt_x = bli_obj_datatype( *x ); + num_t dt_x = bli_obj_datatype( *x ); - dim_t n = bli_obj_vector_dim( *x ); + dim_t n = bli_obj_vector_dim( *x ); - inc_t inc_x = bli_obj_vector_inc( *x ); - void* buf_x = bli_obj_buffer_at_off( *x ); + inc_t inc_x = bli_obj_vector_inc( *x ); + void* buf_x = bli_obj_buffer_at_off( *x ); - void* buf_absum = bli_obj_buffer_at_off( *absum ); + void* buf_asum = bli_obj_buffer_at_off( *asum ); FUNCPTR_T f; // Index into the type combination array to extract the correct // function pointer. - f = ftypes[dt_x]; //[dt_s]; + f = ftypes[dt_x]; // Invoke the function. f( n, buf_x, inc_x, - buf_absum ); + buf_asum ); } #undef GENTFUNCR -#define GENTFUNCR( ctype_x, ctype_xr, chx, chxr, opname, varname ) \ +#define GENTFUNCR( ctype_x, ctype_xr, chx, chxr, varname ) \ \ void PASTEMAC(chx,varname)( \ dim_t n, \ void* x, inc_t incx, \ - void* absum \ + void* asum \ ) \ { \ - ctype_x* x_cast = x; \ - ctype_xr* absum_cast = absum; \ + ctype_x* x_cast = x; \ + ctype_xr* asum_cast = asum; \ ctype_x* chi1; \ ctype_xr chi1_r; \ ctype_xr chi1_i; \ - ctype_xr absum_r; \ + ctype_xr absum; \ dim_t i; \ \ /* Initialize the absolute sum accumulator to zero. */ \ - PASTEMAC(chxr,set0s)( absum_r ); \ -\ - /* If x is zero length, return swith absum equal to zero. */ \ - if ( bli_zero_dim1( n ) ) \ - { \ - PASTEMAC2(chxr,chxr,copys)( absum_r, *absum_cast ); \ - return; \ - } \ -\ - chi1 = x_cast; \ + PASTEMAC(chxr,set0s)( absum ); \ \ for ( i = 0; i < n; ++i ) \ { \ + chi1 = x_cast + (i )*incx; \ +\ /* Get the real and imaginary components of chi1. */ \ PASTEMAC2(chx,chxr,gets)( *chi1, chi1_r, chi1_i ); \ \ @@ -122,15 +102,13 @@ void PASTEMAC(chx,varname)( \ chi1_i = bli_fabs( chi1_i ); \ \ /* Accumulate the real and imaginary components into absum. */ \ - PASTEMAC2(chxr,chxr,adds)( chi1_r, absum_r ); \ - PASTEMAC2(chxr,chxr,adds)( chi1_i, absum_r ); \ -\ - chi1 += incx; \ + PASTEMAC2(chxr,chxr,adds)( chi1_r, absum ); \ + PASTEMAC2(chxr,chxr,adds)( chi1_i, absum ); \ } \ \ - /* Store final values of absum to output variables. */ \ - PASTEMAC2(chxr,chxr,copys)( absum_r, *absum_cast ); \ + /* Store the final value of absum to the output variable. */ \ + PASTEMAC2(chxr,chxr,copys)( absum, *asum_cast ); \ } -INSERT_GENTFUNCR_BASIC( absumv, absumv_unb_var1 ) +INSERT_GENTFUNCR_BASIC0( asumv_unb_var1 ) diff --git a/frame/util/absumv/bli_absumv_unb_var1.h b/frame/util/asumv/bli_asumv_unb_var1.h similarity index 92% rename from frame/util/absumv/bli_absumv_unb_var1.h rename to frame/util/asumv/bli_asumv_unb_var1.h index b3e23fc02..15b1a8688 100644 --- a/frame/util/absumv/bli_absumv_unb_var1.h +++ b/frame/util/asumv/bli_asumv_unb_var1.h @@ -32,8 +32,8 @@ */ -void bli_absumv_unb_var1( obj_t* x, - obj_t* absum ); +void bli_asumv_unb_var1( obj_t* x, + obj_t* asum ); #undef GENTPROTR @@ -42,8 +42,8 @@ void bli_absumv_unb_var1( obj_t* x, void PASTEMAC(chx,varname)( \ dim_t n, \ void* x, inc_t incx, \ - void* absum \ + void* asum \ ); -INSERT_GENTPROTR_BASIC( absumv_unb_var1 ) +INSERT_GENTPROTR_BASIC( asumv_unb_var1 ) diff --git a/frame/util/mkherm/bli_mkherm_unb_var1.c b/frame/util/mkherm/bli_mkherm_unb_var1.c index f1a8973fc..e0189f81c 100644 --- a/frame/util/mkherm/bli_mkherm_unb_var1.c +++ b/frame/util/mkherm/bli_mkherm_unb_var1.c @@ -71,7 +71,7 @@ void bli_mkherm_unb_var1( obj_t* a ) #undef GENTFUNC -#define GENTFUNC( ctype, ch, opname, varname ) \ +#define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname)( \ uplo_t uploa, \ @@ -117,5 +117,5 @@ void PASTEMAC(ch,varname)( \ } -INSERT_GENTFUNC_BASIC( mkherm, mkherm_unb_var1 ) +INSERT_GENTFUNC_BASIC0( mkherm_unb_var1 ) diff --git a/frame/util/mksymm/bli_mksymm_unb_var1.c b/frame/util/mksymm/bli_mksymm_unb_var1.c index fcc9dcd02..aa863f582 100644 --- a/frame/util/mksymm/bli_mksymm_unb_var1.c +++ b/frame/util/mksymm/bli_mksymm_unb_var1.c @@ -71,7 +71,7 @@ void bli_mksymm_unb_var1( obj_t* a ) #undef GENTFUNC -#define GENTFUNC( ctype, ch, opname, varname ) \ +#define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname)( \ uplo_t uploa, \ @@ -103,5 +103,5 @@ void PASTEMAC(ch,varname)( \ } -INSERT_GENTFUNC_BASIC( mksymm, mksymm_unb_var1 ) +INSERT_GENTFUNC_BASIC0( mksymm_unb_var1 ) diff --git a/frame/util/mktrim/bli_mktrim_unb_var1.c b/frame/util/mktrim/bli_mktrim_unb_var1.c index 158c1f803..46e9449e6 100644 --- a/frame/util/mktrim/bli_mktrim_unb_var1.c +++ b/frame/util/mktrim/bli_mktrim_unb_var1.c @@ -71,7 +71,7 @@ void bli_mktrim_unb_var1( obj_t* a ) #undef GENTFUNC -#define GENTFUNC( ctype, ch, opname, varname ) \ +#define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname)( \ uplo_t uploa, \ @@ -105,5 +105,5 @@ void PASTEMAC(ch,varname)( \ } -INSERT_GENTFUNC_BASIC( mktrim, mktrim_unb_var1 ) +INSERT_GENTFUNC_BASIC0( mktrim_unb_var1 ) diff --git a/frame/util/absumm/bli_absumm.c b/frame/util/norm1m/bli_norm1m.c similarity index 87% rename from frame/util/absumm/bli_absumm.c rename to frame/util/norm1m/bli_norm1m.c index 1df4b7a60..f519149af 100644 --- a/frame/util/absumm/bli_absumm.c +++ b/frame/util/norm1m/bli_norm1m.c @@ -43,17 +43,17 @@ \ void PASTEMAC0(opname)( \ obj_t* x, \ - obj_t* absum \ + obj_t* norm \ ) \ { \ if ( bli_error_checking_is_enabled() ) \ - PASTEMAC(opname,_check)( x, absum ); \ + PASTEMAC(opname,_check)( x, norm ); \ \ PASTEMAC0(varname)( x, \ - absum ); \ + norm ); \ } -GENFRONT( absumm, absumm_unb_var1 ) +GENFRONT( norm1m, norm1m_unb_var1 ) // @@ -64,20 +64,22 @@ GENFRONT( absumm, absumm_unb_var1 ) \ void PASTEMAC(chx,opname)( \ doff_t diagoffx, \ + diag_t diagx, \ uplo_t uplox, \ dim_t m, \ dim_t n, \ ctype_x* x, inc_t rs_x, inc_t cs_x, \ - ctype_xr* absum \ + ctype_xr* norm \ ) \ { \ PASTEMAC(chx,varname)( diagoffx, \ + diagx, \ uplox, \ m, \ n, \ x, rs_x, cs_x, \ - absum ); \ + norm ); \ } -INSERT_GENTFUNCR_BASIC( absumm, absumm_unb_var1 ) +INSERT_GENTFUNCR_BASIC( norm1m, norm1m_unb_var1 ) diff --git a/frame/1m/fnormm/bli_fnormm.h b/frame/util/norm1m/bli_norm1m.h similarity index 94% rename from frame/1m/fnormm/bli_fnormm.h rename to frame/util/norm1m/bli_norm1m.h index 0f47109bd..f2e53670e 100644 --- a/frame/1m/fnormm/bli_fnormm.h +++ b/frame/util/norm1m/bli_norm1m.h @@ -32,14 +32,14 @@ */ -#include "bli_fnormm_check.h" -#include "bli_fnormm_unb_var1.h" +#include "bli_norm1m_check.h" +#include "bli_norm1m_unb_var1.h" // // Prototype object-based interface. // -void bli_fnormm( obj_t* x, +void bli_norm1m( obj_t* x, obj_t* norm ); @@ -59,5 +59,5 @@ void PASTEMAC(chx,opname)( \ ctype_xr* norm \ ); -INSERT_GENTPROTR_BASIC( fnormm ) +INSERT_GENTPROTR_BASIC( norm1m ) diff --git a/frame/util/absumm/bli_absumm_check.c b/frame/util/norm1m/bli_norm1m_check.c similarity index 92% rename from frame/util/absumm/bli_absumm_check.c rename to frame/util/norm1m/bli_norm1m_check.c index afd8793dd..be734ba20 100644 --- a/frame/util/absumm/bli_absumm_check.c +++ b/frame/util/norm1m/bli_norm1m_check.c @@ -34,8 +34,8 @@ #include "blis.h" -void bli_absumm_check( obj_t* x, - obj_t* absum ) +void bli_norm1m_check( obj_t* x, + obj_t* norm ) { err_t e_val; @@ -44,7 +44,7 @@ void bli_absumm_check( obj_t* x, e_val = bli_check_floating_object( x ); bli_check_error_code( e_val ); - e_val = bli_check_nonconstant_object( absum ); + e_val = bli_check_nonconstant_object( norm ); bli_check_error_code( e_val ); // Check object dimensions. @@ -52,7 +52,7 @@ void bli_absumm_check( obj_t* x, e_val = bli_check_matrix_object( x ); bli_check_error_code( e_val ); - e_val = bli_check_scalar_object( absum ); + e_val = bli_check_scalar_object( norm ); bli_check_error_code( e_val ); } diff --git a/frame/1/fnormv/bli_fnormv_check.h b/frame/util/norm1m/bli_norm1m_check.h similarity index 98% rename from frame/1/fnormv/bli_fnormv_check.h rename to frame/util/norm1m/bli_norm1m_check.h index 16bb66378..5b069aa6d 100644 --- a/frame/1/fnormv/bli_fnormv_check.h +++ b/frame/util/norm1m/bli_norm1m_check.h @@ -32,5 +32,5 @@ */ -void bli_fnormv_check( obj_t* x, +void bli_norm1m_check( obj_t* a, obj_t* norm ); diff --git a/frame/util/norm1m/bli_norm1m_unb_var1.c b/frame/util/norm1m/bli_norm1m_unb_var1.c new file mode 100644 index 000000000..afe6a587a --- /dev/null +++ b/frame/util/norm1m/bli_norm1m_unb_var1.c @@ -0,0 +1,231 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +#define FUNCPTR_T norm1m_fp + +typedef void (*FUNCPTR_T)( + doff_t diagoffx, + diag_t diagx, + uplo_t uplox, + dim_t m, + dim_t n, + void* x, inc_t rs_x, inc_t cs_x, + void* norm + ); + +static FUNCPTR_T GENARRAY(ftypes,norm1m_unb_var1); + + +void bli_norm1m_unb_var1( obj_t* x, + obj_t* norm ) +{ + num_t dt_x = bli_obj_datatype( *x ); + + doff_t diagoffx = bli_obj_diag_offset( *x ); + uplo_t diagx = bli_obj_diag( *x ); + uplo_t uplox = bli_obj_uplo( *x ); + + dim_t m = bli_obj_length( *x ); + dim_t n = bli_obj_width( *x ); + + void* buf_x = bli_obj_buffer_at_off( *x ); + inc_t rs_x = bli_obj_row_stride( *x ); + inc_t cs_x = bli_obj_col_stride( *x ); + + void* buf_norm = bli_obj_buffer_at_off( *norm ); + + FUNCPTR_T f; + + // Index into the type combination array to extract the correct + // function pointer. + f = ftypes[dt_x]; + + // Invoke the function. + f( diagoffx, + diagx, + uplox, + m, + n, + buf_x, rs_x, cs_x, + buf_norm ); +} + + +#undef GENTFUNCR +#define GENTFUNCR( ctype_x, ctype_xr, chx, chxr, varname, kername ) \ +\ +void PASTEMAC(chx,varname)( \ + doff_t diagoffx, \ + diag_t diagx, \ + uplo_t uplox, \ + dim_t m, \ + dim_t n, \ + void* x, inc_t rs_x, inc_t cs_x, \ + void* norm \ + ) \ +{ \ + ctype_x* x_cast = x; \ + ctype_xr* norm_cast = norm; \ + ctype_x* one = PASTEMAC(chx,1); \ + ctype_x* x0; \ + ctype_x* chi1; \ + ctype_x* x2; \ + ctype_xr absum_max; \ + ctype_xr absum_j; \ + ctype_xr abval_chi1; \ + uplo_t uplox_eff; \ + dim_t n_iter; \ + dim_t n_elem, n_elem_max; \ + inc_t ldx, incx; \ + dim_t j, i; \ + dim_t ij0, n_shift; \ +\ + /* Initialize the maximum absolute column sum to zero. */ \ + PASTEMAC(chxr,set0s)( absum_max ); \ +\ + /* If either dimension is zero, return with absum_max equal to zero. */ \ + if ( bli_zero_dim2( m, n ) ) \ + { \ + PASTEMAC2(chxr,chxr,copys)( absum_max, *norm_cast ); \ + return; \ + } \ +\ + /* Set various loop parameters. */ \ + bli_set_dims_incs_uplo_1m_noswap( diagoffx, BLIS_NONUNIT_DIAG, \ + uplox, m, n, rs_x, cs_x, \ + uplox_eff, n_elem_max, n_iter, incx, ldx, \ + ij0, n_shift ); \ +\ + /* If the matrix is zeros, return with absum_max equal to zero. */ \ + if ( bli_is_zeros( uplox_eff ) ) \ + { \ + PASTEMAC2(chxr,chxr,copys)( absum_max, *norm_cast ); \ + return; \ + } \ +\ +\ + /* Handle dense and upper/lower storage cases separately. */ \ + if ( bli_is_dense( uplox_eff ) ) \ + { \ + for ( j = 0; j < n_iter; ++j ) \ + { \ + n_elem = n_elem_max; \ +\ + x0 = x_cast + (j )*ldx + (0 )*incx; \ +\ + /* Compute the norm of the current column. */ \ + PASTEMAC(chx,kername)( n_elem, \ + x0, incx, \ + &absum_j ); \ +\ + /* If absum_j is greater than the previous maximum value, + then save it. */ \ + if ( absum_max < absum_j || bli_isnan( absum_j ) ) \ + { \ + PASTEMAC2(chxr,chxr,copys)( absum_j, absum_max ); \ + } \ + } \ + } \ + else \ + { \ + if ( bli_is_upper( uplox_eff ) ) \ + { \ + for ( j = 0; j < n_iter; ++j ) \ + { \ + n_elem = bli_min( n_shift + j + 1, n_elem_max ); \ +\ + x0 = x_cast + (ij0+j )*ldx + (0 )*incx; \ + chi1 = x_cast + (ij0+j )*ldx + (n_elem-1)*incx; \ +\ + /* Compute the norm of the super-diagonal elements. */ \ + PASTEMAC(chx,kername)( n_elem - 1, \ + x0, incx, \ + &absum_j ); \ +\ + if ( bli_is_unit_diag( diagx ) ) chi1 = one; \ +\ + /* Handle the diagonal element separately in case it's + unit. */ \ + PASTEMAC2(chx,chxr,abval2s)( *chi1, abval_chi1 ); \ + PASTEMAC2(chxr,chxr,adds)( abval_chi1, absum_j ); \ +\ + /* If absum_j is greater than the previous maximum value, + then save it. */ \ + if ( absum_max < absum_j || bli_isnan( absum_j ) ) \ + { \ + PASTEMAC2(chxr,chxr,copys)( absum_j, absum_max ); \ + } \ + } \ + } \ + else if ( bli_is_lower( uplox_eff ) ) \ + { \ + for ( j = 0; j < n_iter; ++j ) \ + { \ + i = bli_max( 0, ( doff_t )j - ( doff_t )n_shift ); \ + n_elem = n_elem_max - i; \ +\ + chi1 = x_cast + (j )*ldx + (ij0+i )*incx; \ + x2 = x_cast + (j )*ldx + (ij0+i+1)*incx; \ +\ + /* Compute the norm of the sub-diagonal elements. */ \ + PASTEMAC(chx,kername)( n_elem - 1, \ + x2, incx, \ + &absum_j ); \ +\ + if ( bli_is_unit_diag( diagx ) ) chi1 = one; \ +\ + /* Handle the diagonal element separately in case it's + unit. */ \ + PASTEMAC2(chx,chxr,abval2s)( *chi1, abval_chi1 ); \ + PASTEMAC2(chxr,chxr,adds)( abval_chi1, absum_j ); \ +\ + /* If absum_j is greater than the previous maximum value, + then save it. */ \ + if ( absum_max < absum_j || bli_isnan( absum_j ) ) \ + { \ + PASTEMAC2(chxr,chxr,copys)( absum_j, absum_max ); \ + } \ + } \ + } \ + } \ +\ + /* Store final value of absum_max to the output variable. */ \ + PASTEMAC2(chxr,chxr,copys)( absum_max, *norm_cast ); \ +} + + +INSERT_GENTFUNCR_BASIC( norm1m_unb_var1, norm1v_unb_var1 ) + diff --git a/frame/util/absumm/bli_absumm_unb_var1.h b/frame/util/norm1m/bli_norm1m_unb_var1.h similarity index 90% rename from frame/util/absumm/bli_absumm_unb_var1.h rename to frame/util/norm1m/bli_norm1m_unb_var1.h index e24922d1b..2c01984e4 100644 --- a/frame/util/absumm/bli_absumm_unb_var1.h +++ b/frame/util/norm1m/bli_norm1m_unb_var1.h @@ -32,8 +32,8 @@ */ -void bli_absumm_unb_var1( obj_t* x, - obj_t* absum ); +void bli_norm1m_unb_var1( obj_t* x, + obj_t* norm ); #undef GENTPROTR @@ -41,12 +41,13 @@ void bli_absumm_unb_var1( obj_t* x, \ void PASTEMAC(chx,varname)( \ doff_t diagoffx, \ + diag_t diagx, \ uplo_t uplox, \ dim_t m, \ dim_t n, \ void* x, inc_t rs_x, inc_t cs_x, \ - void* absum \ + void* norm \ ); -INSERT_GENTPROTR_BASIC( absumm_unb_var1 ) +INSERT_GENTPROTR_BASIC( norm1m_unb_var1 ) diff --git a/frame/util/norm1v/bli_norm1v.c b/frame/util/norm1v/bli_norm1v.c new file mode 100644 index 000000000..8b285ba05 --- /dev/null +++ b/frame/util/norm1v/bli_norm1v.c @@ -0,0 +1,77 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + + +// +// Define object-based interface. +// +#undef GENFRONT +#define GENFRONT( opname, varname ) \ +\ +void PASTEMAC0(opname)( \ + obj_t* x, \ + obj_t* norm \ + ) \ +{ \ + if ( bli_error_checking_is_enabled() ) \ + PASTEMAC(opname,_check)( x, norm ); \ +\ + PASTEMAC0(varname)( x, \ + norm ); \ +} + +GENFRONT( norm1v, norm1v_unb_var1 ) + + +// +// Define BLAS-like interfaces. +// +#undef GENTFUNCR +#define GENTFUNCR( ctype_x, ctype_xr, chx, chxr, opname, varname ) \ +\ +void PASTEMAC(chx,opname)( \ + dim_t n, \ + ctype_x* x, inc_t incx, \ + ctype_xr* norm \ + ) \ +{ \ + PASTEMAC(chx,varname)( n, \ + x, incx, \ + norm ); \ +} + +INSERT_GENTFUNCR_BASIC( norm1v, norm1v_unb_var1 ) + diff --git a/frame/util/norm1v/bli_norm1v.h b/frame/util/norm1v/bli_norm1v.h new file mode 100644 index 000000000..e91576e30 --- /dev/null +++ b/frame/util/norm1v/bli_norm1v.h @@ -0,0 +1,59 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "bli_norm1v_check.h" +#include "bli_norm1v_unb_var1.h" + + +// +// Prototype object-based interface. +// +void bli_norm1v( obj_t* x, + obj_t* norm ); + + +// +// Prototype BLAS-like interfaces. +// +#undef GENTPROTR +#define GENTPROTR( ctype_x, ctype_xr, chx, chxr, opname ) \ +\ +void PASTEMAC(chx,opname)( \ + dim_t n, \ + ctype_x* x, inc_t incx, \ + ctype_xr* norm \ + ); + +INSERT_GENTPROTR_BASIC( norm1v ) + diff --git a/frame/util/norm1v/bli_norm1v_check.c b/frame/util/norm1v/bli_norm1v_check.c new file mode 100644 index 000000000..13eff8769 --- /dev/null +++ b/frame/util/norm1v/bli_norm1v_check.c @@ -0,0 +1,58 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +void bli_norm1v_check( obj_t* x, + obj_t* norm ) +{ + err_t e_val; + + // Check object datatypes. + + e_val = bli_check_floating_object( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_nonconstant_object( norm ); + bli_check_error_code( e_val ); + + // Check object dimensions. + + e_val = bli_check_vector_object( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_scalar_object( norm ); + bli_check_error_code( e_val ); +} + diff --git a/frame/1m/fnormm/bli_fnormm_check.h b/frame/util/norm1v/bli_norm1v_check.h similarity index 98% rename from frame/1m/fnormm/bli_fnormm_check.h rename to frame/util/norm1v/bli_norm1v_check.h index 8d900e307..088ec42b9 100644 --- a/frame/1m/fnormm/bli_fnormm_check.h +++ b/frame/util/norm1v/bli_norm1v_check.h @@ -32,5 +32,5 @@ */ -void bli_fnormm_check( obj_t* x, +void bli_norm1v_check( obj_t* x, obj_t* norm ); diff --git a/frame/util/norm1v/bli_norm1v_unb_var1.c b/frame/util/norm1v/bli_norm1v_unb_var1.c new file mode 100644 index 000000000..08e0c6c1d --- /dev/null +++ b/frame/util/norm1v/bli_norm1v_unb_var1.c @@ -0,0 +1,108 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +#define FUNCPTR_T norm1v_fp + +typedef void (*FUNCPTR_T)( + dim_t n, + void* x, inc_t incx, + void* norm + ); + +static FUNCPTR_T GENARRAY(ftypes,norm1v_unb_var1); + + +void bli_norm1v_unb_var1( obj_t* x, + obj_t* norm ) +{ + num_t dt_x = bli_obj_datatype( *x ); + + dim_t n = bli_obj_vector_dim( *x ); + + inc_t inc_x = bli_obj_vector_inc( *x ); + void* buf_x = bli_obj_buffer_at_off( *x ); + + void* buf_norm = bli_obj_buffer_at_off( *norm ); + + FUNCPTR_T f; + + // Index into the type combination array to extract the correct + // function pointer. + f = ftypes[dt_x]; + + // Invoke the function. + f( n, + buf_x, inc_x, + buf_norm ); +} + + +#undef GENTFUNCR +#define GENTFUNCR( ctype_x, ctype_xr, chx, chxr, varname ) \ +\ +void PASTEMAC(chx,varname)( \ + dim_t n, \ + void* x, inc_t incx, \ + void* norm \ + ) \ +{ \ + ctype_x* x_cast = x; \ + ctype_xr* norm_cast = norm; \ + ctype_x* chi1; \ + ctype_xr abs_chi1; \ + ctype_xr absum; \ + dim_t i; \ +\ + /* Initialize the absolute sum accumulator to zero. */ \ + PASTEMAC(chxr,set0s)( absum ); \ +\ + for ( i = 0; i < n; ++i ) \ + { \ + chi1 = x_cast + (i )*incx; \ +\ + /* Compute the absolute value (or complex magnitude) of chi1. */ \ + PASTEMAC2(chx,chxr,abval2s)( *chi1, abs_chi1 ); \ +\ + /* Accumulate the absolute value of chi1 into absum. */ \ + PASTEMAC2(chxr,chxr,adds)( abs_chi1, absum ); \ + } \ +\ + /* Store final value of absum to the output variable. */ \ + PASTEMAC2(chxr,chxr,copys)( absum, *norm_cast ); \ +} + +INSERT_GENTFUNCR_BASIC0( norm1v_unb_var1 ) + diff --git a/frame/util/norm1v/bli_norm1v_unb_var1.h b/frame/util/norm1v/bli_norm1v_unb_var1.h new file mode 100644 index 000000000..aefe737c3 --- /dev/null +++ b/frame/util/norm1v/bli_norm1v_unb_var1.h @@ -0,0 +1,49 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_norm1v_unb_var1( obj_t* x, + obj_t* norm ); + + +#undef GENTPROTR +#define GENTPROTR( ctype_x, ctype_xr, chx, chxr, varname ) \ +\ +void PASTEMAC(chx,varname)( \ + dim_t n, \ + void* x, inc_t incx, \ + void* norm \ + ); + +INSERT_GENTPROTR_BASIC( norm1v_unb_var1 ) + diff --git a/frame/1m/fnormm/bli_fnormm.c b/frame/util/normfm/bli_normfm.c similarity index 94% rename from frame/1m/fnormm/bli_fnormm.c rename to frame/util/normfm/bli_normfm.c index 3557abac2..b166c5f20 100644 --- a/frame/1m/fnormm/bli_fnormm.c +++ b/frame/util/normfm/bli_normfm.c @@ -38,13 +38,13 @@ // // Define object-based interface. // -void bli_fnormm( obj_t* x, +void bli_normfm( obj_t* x, obj_t* norm ) { if ( bli_error_checking_is_enabled() ) - bli_fnormm_check( x, norm ); + bli_normfm_check( x, norm ); - bli_fnormm_unb_var1( x, norm ); + bli_normfm_unb_var1( x, norm ); } @@ -73,5 +73,5 @@ void PASTEMAC(chx,opname)( \ norm ); \ } -INSERT_GENTFUNCR_BASIC( fnormm, fnormm_unb_var1 ) +INSERT_GENTFUNCR_BASIC( normfm, normfm_unb_var1 ) diff --git a/frame/util/absumm/bli_absumm.h b/frame/util/normfm/bli_normfm.h similarity index 89% rename from frame/util/absumm/bli_absumm.h rename to frame/util/normfm/bli_normfm.h index 9d455f1af..db27c9deb 100644 --- a/frame/util/absumm/bli_absumm.h +++ b/frame/util/normfm/bli_normfm.h @@ -32,15 +32,15 @@ */ -#include "bli_absumm_check.h" -#include "bli_absumm_unb_var1.h" +#include "bli_normfm_check.h" +#include "bli_normfm_unb_var1.h" // // Prototype object-based interface. // -void bli_absumm( obj_t* x, - obj_t* absum ); +void bli_normfm( obj_t* x, + obj_t* norm ); // @@ -51,12 +51,13 @@ void bli_absumm( obj_t* x, \ void PASTEMAC(chx,opname)( \ doff_t diagoffx, \ + diag_t diagx, \ uplo_t uplox, \ dim_t m, \ dim_t n, \ ctype_x* x, inc_t rs_x, inc_t cs_x, \ - ctype_xr* absum \ + ctype_xr* norm \ ); -INSERT_GENTPROTR_BASIC( absumm ) +INSERT_GENTPROTR_BASIC( normfm ) diff --git a/frame/1m/fnormm/bli_fnormm_check.c b/frame/util/normfm/bli_normfm_check.c similarity index 98% rename from frame/1m/fnormm/bli_fnormm_check.c rename to frame/util/normfm/bli_normfm_check.c index 5382df59f..fbec6e9a6 100644 --- a/frame/1m/fnormm/bli_fnormm_check.c +++ b/frame/util/normfm/bli_normfm_check.c @@ -34,7 +34,7 @@ #include "blis.h" -void bli_fnormm_check( obj_t* x, +void bli_normfm_check( obj_t* x, obj_t* norm ) { err_t e_val; diff --git a/frame/util/normfm/bli_normfm_check.h b/frame/util/normfm/bli_normfm_check.h new file mode 100644 index 000000000..c8fd08eb4 --- /dev/null +++ b/frame/util/normfm/bli_normfm_check.h @@ -0,0 +1,36 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_normfm_check( obj_t* x, + obj_t* norm ); diff --git a/frame/1m/fnormm/bli_fnormm_unb_var1.c b/frame/util/normfm/bli_normfm_unb_var1.c similarity index 78% rename from frame/1m/fnormm/bli_fnormm_unb_var1.c rename to frame/util/normfm/bli_normfm_unb_var1.c index d0d7c0e37..16200674a 100644 --- a/frame/1m/fnormm/bli_fnormm_unb_var1.c +++ b/frame/util/normfm/bli_normfm_unb_var1.c @@ -34,7 +34,7 @@ #include "blis.h" -#define FUNCPTR_T fnormm_fp +#define FUNCPTR_T normfm_fp typedef void (*FUNCPTR_T)( doff_t diagoffx, @@ -46,27 +46,13 @@ typedef void (*FUNCPTR_T)( void* norm ); -/* -// If some mixed datatype functions will not be compiled, we initialize -// the corresponding elements of the function array to NULL. -#ifdef BLIS_ENABLE_MIXED_PRECISION_SUPPORT -static FUNCPTR_T GENARRAY2_ALL(ftypes,fnormm_unb_var1); -#else -#ifdef BLIS_ENABLE_MIXED_DOMAIN_SUPPORT -static FUNCPTR_T GENARRAY2_EXT(ftypes,fnormm_unb_var1); -#else -static FUNCPTR_T GENARRAY2_MIN(ftypes,fnormm_unb_var1); -#endif -#endif -*/ -static FUNCPTR_T GENARRAY(ftypes,fnormm_unb_var1); +static FUNCPTR_T GENARRAY(ftypes,normfm_unb_var1); -void bli_fnormm_unb_var1( obj_t* x, +void bli_normfm_unb_var1( obj_t* x, obj_t* norm ) { num_t dt_x = bli_obj_datatype( *x ); - //num_t dt_norm = bli_obj_datatype( *norm ); doff_t diagoffx = bli_obj_diag_offset( *x ); diag_t diagx = bli_obj_diag( *x ); @@ -85,7 +71,7 @@ void bli_fnormm_unb_var1( obj_t* x, // Index into the type combination array to extract the correct // function pointer. - f = ftypes[dt_x]; //[dt_norm]; + f = ftypes[dt_x]; // Invoke the function. f( diagoffx, @@ -117,9 +103,8 @@ void PASTEMAC(chx,varname)( \ ctype_xr* one_r = PASTEMAC(chxr,1); \ ctype_xr* zero_r = PASTEMAC(chxr,0); \ ctype_x* x0; \ - ctype_x* x1; \ - ctype_x* x2; \ ctype_x* chi1; \ + ctype_x* x2; \ ctype_xr scale; \ ctype_xr sumsq; \ ctype_xr sqrt_sumsq; \ @@ -162,10 +147,11 @@ void PASTEMAC(chx,varname)( \ { \ n_elem = n_elem_max; \ \ - x1 = x_cast + (j )*ldx + (0 )*incx; \ + x0 = x_cast + (j )*ldx + (0 )*incx; \ \ + /* Compute the norm of the current column. */ \ PASTEMAC(chx,kername)( n_elem, \ - x1, incx, \ + x0, incx, \ &scale, \ &sumsq ); \ } \ @@ -178,23 +164,23 @@ void PASTEMAC(chx,varname)( \ { \ n_elem = bli_min( n_shift + j + 1, n_elem_max ); \ \ - x1 = x_cast + (ij0+j )*ldx + (0 )*incx; \ - x0 = x1; \ - chi1 = one; \ + x0 = x_cast + (ij0+j )*ldx + (0 )*incx; \ + chi1 = x_cast + (ij0+j )*ldx + (n_elem-1)*incx; \ \ - if ( bli_is_unit_diag( diagx ) ) \ - { \ - PASTEMAC(chx,kername)( 1, \ - chi1, incx, \ - &scale, \ - &sumsq ); \ - --n_elem; \ - } \ -\ - PASTEMAC(chx,kername)( n_elem, \ + /* Sum the squares of the super-diagonal elements. */ \ + PASTEMAC(chx,kername)( n_elem - 1, \ x0, incx, \ &scale, \ &sumsq ); \ +\ + if ( bli_is_unit_diag( diagx ) ) chi1 = one; \ +\ + /* Handle the diagonal element separately in case it's + unit. */ \ + PASTEMAC(chx,kername)( 1, \ + chi1, incx, \ + &scale, \ + &sumsq ); \ } \ } \ else if ( bli_is_lower( uplox_eff ) ) \ @@ -204,31 +190,34 @@ void PASTEMAC(chx,varname)( \ i = bli_max( 0, ( doff_t )j - ( doff_t )n_shift ); \ n_elem = n_elem_max - i; \ \ - x1 = x_cast + (j )*ldx + (ij0+i )*incx; \ - x2 = x1 + incx; \ - chi1 = one; \ + chi1 = x_cast + (j )*ldx + (ij0+i )*incx; \ + x2 = x_cast + (j )*ldx + (ij0+i+1)*incx; \ \ - if ( bli_is_unit_diag( diagx ) ) \ - { \ - PASTEMAC(chx,kername)( 1, \ - chi1, incx, \ - &scale, \ - &sumsq ); \ - --n_elem; \ - } \ -\ - PASTEMAC(chx,kername)( n_elem, \ + /* Sum the squares of the sub-diagonal elements. */ \ + PASTEMAC(chx,kername)( n_elem - 1, \ x2, incx, \ &scale, \ &sumsq ); \ +\ + if ( bli_is_unit_diag( diagx ) ) chi1 = one; \ +\ + /* Handle the diagonal element separately in case it's + unit. */ \ + PASTEMAC(chx,kername)( 1, \ + chi1, incx, \ + &scale, \ + &sumsq ); \ } \ } \ } \ \ /* Compute: norm = scale * sqrt( sumsq ) */ \ PASTEMAC2(chxr,chxr,sqrt2s)( sumsq, sqrt_sumsq ); \ - PASTEMAC3(chxr,chxr,chxr,scal2s)( scale, sqrt_sumsq, *norm_cast ); \ + PASTEMAC2(chxr,chxr,scals)( scale, sqrt_sumsq ); \ +\ + /* Store the final value to the output variable. */ \ + PASTEMAC2(chxr,chxr,copys)( sqrt_sumsq, *norm_cast ); \ } -INSERT_GENTFUNCR_BASIC( fnormm_unb_var1, sumsqv_unb_var1 ) +INSERT_GENTFUNCR_BASIC( normfm_unb_var1, sumsqv_unb_var1 ) diff --git a/frame/1m/fnormm/bli_fnormm_unb_var1.h b/frame/util/normfm/bli_normfm_unb_var1.h similarity index 95% rename from frame/1m/fnormm/bli_fnormm_unb_var1.h rename to frame/util/normfm/bli_normfm_unb_var1.h index 811608443..819abaf51 100644 --- a/frame/1m/fnormm/bli_fnormm_unb_var1.h +++ b/frame/util/normfm/bli_normfm_unb_var1.h @@ -32,7 +32,7 @@ */ -void bli_fnormm_unb_var1( obj_t* x, obj_t* norm ); +void bli_normfm_unb_var1( obj_t* x, obj_t* norm ); #undef GENTPROTR #define GENTPROTR( ctype_x, ctype_xr, chx, chxr, varname ) \ @@ -47,4 +47,4 @@ void PASTEMAC(chx,varname)( \ void* norm \ ); -INSERT_GENTPROTR_BASIC( fnormm_unb_var1 ) +INSERT_GENTPROTR_BASIC( normfm_unb_var1 ) diff --git a/frame/1/fnormv/bli_fnormv.c b/frame/util/normfv/bli_normfv.c similarity index 94% rename from frame/1/fnormv/bli_fnormv.c rename to frame/util/normfv/bli_normfv.c index db6f29524..457dbb3c1 100644 --- a/frame/1/fnormv/bli_fnormv.c +++ b/frame/util/normfv/bli_normfv.c @@ -38,13 +38,13 @@ // // Define object-based interface. // -void bli_fnormv( obj_t* x, +void bli_normfv( obj_t* x, obj_t* norm ) { if ( bli_error_checking_is_enabled() ) - bli_fnormv_check( x, norm ); + bli_normfv_check( x, norm ); - bli_fnormv_unb_var1( x, norm ); + bli_normfv_unb_var1( x, norm ); } @@ -65,5 +65,5 @@ void PASTEMAC(chx,opname)( \ norm ); \ } -INSERT_GENTFUNCR_BASIC( fnormv, fnormv_unb_var1 ) +INSERT_GENTFUNCR_BASIC( normfv, normfv_unb_var1 ) diff --git a/frame/1/fnormv/bli_fnormv.h b/frame/util/normfv/bli_normfv.h similarity index 94% rename from frame/1/fnormv/bli_fnormv.h rename to frame/util/normfv/bli_normfv.h index e83d9ba98..92b174db7 100644 --- a/frame/1/fnormv/bli_fnormv.h +++ b/frame/util/normfv/bli_normfv.h @@ -32,14 +32,14 @@ */ -#include "bli_fnormv_check.h" -#include "bli_fnormv_unb_var1.h" +#include "bli_normfv_check.h" +#include "bli_normfv_unb_var1.h" // // Prototype object-based interface. // -void bli_fnormv( obj_t* x, +void bli_normfv( obj_t* x, obj_t* norm ); @@ -55,5 +55,5 @@ void PASTEMAC(chx,opname)( \ ctype_xr* norm \ ); -INSERT_GENTPROTR_BASIC( fnormv ) +INSERT_GENTPROTR_BASIC( normfv ) diff --git a/frame/1/fnormv/bli_fnormv_check.c b/frame/util/normfv/bli_normfv_check.c similarity index 98% rename from frame/1/fnormv/bli_fnormv_check.c rename to frame/util/normfv/bli_normfv_check.c index 8e1d9df68..34d161b71 100644 --- a/frame/1/fnormv/bli_fnormv_check.c +++ b/frame/util/normfv/bli_normfv_check.c @@ -34,7 +34,7 @@ #include "blis.h" -void bli_fnormv_check( obj_t* x, +void bli_normfv_check( obj_t* x, obj_t* norm ) { err_t e_val; diff --git a/frame/util/normfv/bli_normfv_check.h b/frame/util/normfv/bli_normfv_check.h new file mode 100644 index 000000000..5b0f60ddd --- /dev/null +++ b/frame/util/normfv/bli_normfv_check.h @@ -0,0 +1,36 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_normfv_check( obj_t* x, + obj_t* norm ); diff --git a/frame/1/fnormv/bli_fnormv_unb_var1.c b/frame/util/normfv/bli_normfv_unb_var1.c similarity index 91% rename from frame/1/fnormv/bli_fnormv_unb_var1.c rename to frame/util/normfv/bli_normfv_unb_var1.c index f0820b4e5..7add1ded7 100644 --- a/frame/1/fnormv/bli_fnormv_unb_var1.c +++ b/frame/util/normfv/bli_normfv_unb_var1.c @@ -34,7 +34,7 @@ #include "blis.h" -#define FUNCPTR_T fnormv_fp +#define FUNCPTR_T normfv_fp typedef void (*FUNCPTR_T)( dim_t m, @@ -42,10 +42,10 @@ typedef void (*FUNCPTR_T)( void* norm ); -static FUNCPTR_T GENARRAY(ftypes,fnormv_unb_var1); +static FUNCPTR_T GENARRAY(ftypes,normfv_unb_var1); -void bli_fnormv_unb_var1( obj_t* x, +void bli_normfv_unb_var1( obj_t* x, obj_t* norm ) { num_t dt_x = bli_obj_datatype( *x ); @@ -106,8 +106,11 @@ void PASTEMAC(chx,varname)( \ \ /* Compute: norm = scale * sqrt( sumsq ) */ \ PASTEMAC2(chxr,chxr,sqrt2s)( sumsq, sqrt_sumsq ); \ - PASTEMAC3(chxr,chxr,chxr,scal2s)( scale, sqrt_sumsq, *norm_cast ); \ + PASTEMAC2(chxr,chxr,scals)( scale, sqrt_sumsq ); \ +\ + /* Store the final value to the output variable. */ \ + PASTEMAC2(chxr,chxr,copys)( sqrt_sumsq, *norm_cast ); \ } -INSERT_GENTFUNCR_BASIC( fnormv_unb_var1, sumsqv_unb_var1 ) +INSERT_GENTFUNCR_BASIC( normfv_unb_var1, sumsqv_unb_var1 ) diff --git a/frame/1/fnormv/bli_fnormv_unb_var1.h b/frame/util/normfv/bli_normfv_unb_var1.h similarity index 95% rename from frame/1/fnormv/bli_fnormv_unb_var1.h rename to frame/util/normfv/bli_normfv_unb_var1.h index e6262585a..3b1739112 100644 --- a/frame/1/fnormv/bli_fnormv_unb_var1.h +++ b/frame/util/normfv/bli_normfv_unb_var1.h @@ -32,7 +32,7 @@ */ -void bli_fnormv_unb_var1( obj_t* x, obj_t* norm ); +void bli_normfv_unb_var1( obj_t* x, obj_t* norm ); #undef GENTPROTR @@ -44,5 +44,5 @@ void PASTEMAC(chx,varname)( \ void* norm \ ); -INSERT_GENTPROTR_BASIC( fnormv_unb_var1 ) +INSERT_GENTPROTR_BASIC( normfv_unb_var1 ) diff --git a/frame/util/normim/bli_normim.c b/frame/util/normim/bli_normim.c new file mode 100644 index 000000000..7df608e60 --- /dev/null +++ b/frame/util/normim/bli_normim.c @@ -0,0 +1,85 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + + +// +// Define object-based interface. +// +#undef GENFRONT +#define GENFRONT( opname, varname ) \ +\ +void PASTEMAC0(opname)( \ + obj_t* x, \ + obj_t* norm \ + ) \ +{ \ + if ( bli_error_checking_is_enabled() ) \ + PASTEMAC(opname,_check)( x, norm ); \ +\ + PASTEMAC0(varname)( x, \ + norm ); \ +} + +GENFRONT( normim, normim_unb_var1 ) + + +// +// Define BLAS-like interfaces. +// +#undef GENTFUNCR +#define GENTFUNCR( ctype_x, ctype_xr, chx, chxr, opname, varname ) \ +\ +void PASTEMAC(chx,opname)( \ + doff_t diagoffx, \ + diag_t diagx, \ + uplo_t uplox, \ + dim_t m, \ + dim_t n, \ + ctype_x* x, inc_t rs_x, inc_t cs_x, \ + ctype_xr* norm \ + ) \ +{ \ + PASTEMAC(chx,varname)( diagoffx, \ + diagx, \ + uplox, \ + m, \ + n, \ + x, rs_x, cs_x, \ + norm ); \ +} + +INSERT_GENTFUNCR_BASIC( normim, normim_unb_var1 ) + diff --git a/frame/util/normim/bli_normim.h b/frame/util/normim/bli_normim.h new file mode 100644 index 000000000..f45153037 --- /dev/null +++ b/frame/util/normim/bli_normim.h @@ -0,0 +1,63 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "bli_normim_check.h" +#include "bli_normim_unb_var1.h" + + +// +// Prototype object-based interface. +// +void bli_normim( obj_t* x, + obj_t* norm ); + + +// +// Prototype BLAS-like interfaces. +// +#undef GENTPROTR +#define GENTPROTR( ctype_x, ctype_xr, chx, chxr, opname ) \ +\ +void PASTEMAC(chx,opname)( \ + doff_t diagoffx, \ + diag_t diagx, \ + uplo_t uplox, \ + dim_t m, \ + dim_t n, \ + ctype_x* x, inc_t rs_x, inc_t cs_x, \ + ctype_xr* norm \ + ); + +INSERT_GENTPROTR_BASIC( normim ) + diff --git a/frame/util/normim/bli_normim_check.c b/frame/util/normim/bli_normim_check.c new file mode 100644 index 000000000..952ff956a --- /dev/null +++ b/frame/util/normim/bli_normim_check.c @@ -0,0 +1,58 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +void bli_normim_check( obj_t* x, + obj_t* norm ) +{ + err_t e_val; + + // Check object datatypes. + + e_val = bli_check_floating_object( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_nonconstant_object( norm ); + bli_check_error_code( e_val ); + + // Check object dimensions. + + e_val = bli_check_matrix_object( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_scalar_object( norm ); + bli_check_error_code( e_val ); +} + diff --git a/frame/util/normim/bli_normim_check.h b/frame/util/normim/bli_normim_check.h new file mode 100644 index 000000000..6adc77cce --- /dev/null +++ b/frame/util/normim/bli_normim_check.h @@ -0,0 +1,36 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_normim_check( obj_t* a, + obj_t* norm ); diff --git a/frame/util/normim/bli_normim_unb_var1.c b/frame/util/normim/bli_normim_unb_var1.c new file mode 100644 index 000000000..63e7e7f9d --- /dev/null +++ b/frame/util/normim/bli_normim_unb_var1.c @@ -0,0 +1,120 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +#define FUNCPTR_T normim_fp + +typedef void (*FUNCPTR_T)( + doff_t diagoffx, + diag_t diagx, + uplo_t uplox, + dim_t m, + dim_t n, + void* x, inc_t rs_x, inc_t cs_x, + void* norm + ); + +static FUNCPTR_T GENARRAY(ftypes,normim_unb_var1); + + +void bli_normim_unb_var1( obj_t* x, + obj_t* norm ) +{ + num_t dt_x = bli_obj_datatype( *x ); + + doff_t diagoffx = bli_obj_diag_offset( *x ); + uplo_t diagx = bli_obj_diag( *x ); + uplo_t uplox = bli_obj_uplo( *x ); + + dim_t m = bli_obj_length( *x ); + dim_t n = bli_obj_width( *x ); + + void* buf_x = bli_obj_buffer_at_off( *x ); + inc_t rs_x = bli_obj_row_stride( *x ); + inc_t cs_x = bli_obj_col_stride( *x ); + + void* buf_norm = bli_obj_buffer_at_off( *norm ); + + FUNCPTR_T f; + + // Index into the type combination array to extract the correct + // function pointer. + f = ftypes[dt_x]; + + // Invoke the function. + f( diagoffx, + diagx, + uplox, + m, + n, + buf_x, rs_x, cs_x, + buf_norm ); +} + + +#undef GENTFUNCR +#define GENTFUNCR( ctype_x, ctype_xr, chx, chxr, varname, kername ) \ +\ +void PASTEMAC(chx,varname)( \ + doff_t diagoffx, \ + diag_t diagx, \ + uplo_t uplox, \ + dim_t m, \ + dim_t n, \ + void* x, inc_t rs_x, inc_t cs_x, \ + void* norm \ + ) \ +{ \ + /* Induce a transposition so that rows become columns. */ \ + bli_swap_dims( m, n ); \ + bli_swap_incs( rs_x, cs_x ); \ + bli_toggle_uplo( uplox ); \ + bli_negate_diag_offset( diagoffx ); \ +\ + /* Now we can simply compute the 1-norm of this transposed matrix, + which will be equivalent to the infinity-norm of the original + matrix. */ \ + PASTEMAC(chx,kername)( diagoffx, \ + diagx, \ + uplox, \ + m, \ + n, \ + x, rs_x, cs_x, \ + norm ); \ +} + + +INSERT_GENTFUNCR_BASIC( normim_unb_var1, norm1m_unb_var1 ) + diff --git a/frame/util/normim/bli_normim_unb_var1.h b/frame/util/normim/bli_normim_unb_var1.h new file mode 100644 index 000000000..976128c38 --- /dev/null +++ b/frame/util/normim/bli_normim_unb_var1.h @@ -0,0 +1,53 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_normim_unb_var1( obj_t* x, + obj_t* norm ); + + +#undef GENTPROTR +#define GENTPROTR( ctype_x, ctype_xr, chx, chxr, varname ) \ +\ +void PASTEMAC(chx,varname)( \ + doff_t diagoffx, \ + diag_t diagx, \ + uplo_t uplox, \ + dim_t m, \ + dim_t n, \ + void* x, inc_t rs_x, inc_t cs_x, \ + void* norm \ + ); + +INSERT_GENTPROTR_BASIC( normim_unb_var1 ) + diff --git a/frame/util/normiv/bli_normiv.c b/frame/util/normiv/bli_normiv.c new file mode 100644 index 000000000..50712a8bf --- /dev/null +++ b/frame/util/normiv/bli_normiv.c @@ -0,0 +1,77 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + + +// +// Define object-based interface. +// +#undef GENFRONT +#define GENFRONT( opname, varname ) \ +\ +void PASTEMAC0(opname)( \ + obj_t* x, \ + obj_t* norm \ + ) \ +{ \ + if ( bli_error_checking_is_enabled() ) \ + PASTEMAC(opname,_check)( x, norm ); \ +\ + PASTEMAC0(varname)( x, \ + norm ); \ +} + +GENFRONT( normiv, normiv_unb_var1 ) + + +// +// Define BLAS-like interfaces. +// +#undef GENTFUNCR +#define GENTFUNCR( ctype_x, ctype_xr, chx, chxr, opname, varname ) \ +\ +void PASTEMAC(chx,opname)( \ + dim_t n, \ + ctype_x* x, inc_t incx, \ + ctype_xr* norm \ + ) \ +{ \ + PASTEMAC(chx,varname)( n, \ + x, incx, \ + norm ); \ +} + +INSERT_GENTFUNCR_BASIC( normiv, normiv_unb_var1 ) + diff --git a/frame/util/normiv/bli_normiv.h b/frame/util/normiv/bli_normiv.h new file mode 100644 index 000000000..5ec7a42ee --- /dev/null +++ b/frame/util/normiv/bli_normiv.h @@ -0,0 +1,59 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "bli_normiv_check.h" +#include "bli_normiv_unb_var1.h" + + +// +// Prototype object-based interface. +// +void bli_normiv( obj_t* x, + obj_t* norm ); + + +// +// Prototype BLAS-like interfaces. +// +#undef GENTPROTR +#define GENTPROTR( ctype_x, ctype_xr, chx, chxr, opname ) \ +\ +void PASTEMAC(chx,opname)( \ + dim_t n, \ + ctype_x* x, inc_t incx, \ + ctype_xr* norm \ + ); + +INSERT_GENTPROTR_BASIC( normiv ) + diff --git a/frame/util/normiv/bli_normiv_check.c b/frame/util/normiv/bli_normiv_check.c new file mode 100644 index 000000000..0a7e0c0a6 --- /dev/null +++ b/frame/util/normiv/bli_normiv_check.c @@ -0,0 +1,58 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +void bli_normiv_check( obj_t* x, + obj_t* norm ) +{ + err_t e_val; + + // Check object datatypes. + + e_val = bli_check_floating_object( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_nonconstant_object( norm ); + bli_check_error_code( e_val ); + + // Check object dimensions. + + e_val = bli_check_vector_object( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_scalar_object( norm ); + bli_check_error_code( e_val ); +} + diff --git a/frame/util/normiv/bli_normiv_check.h b/frame/util/normiv/bli_normiv_check.h new file mode 100644 index 000000000..eb9853e37 --- /dev/null +++ b/frame/util/normiv/bli_normiv_check.h @@ -0,0 +1,36 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_normiv_check( obj_t* x, + obj_t* norm ); diff --git a/frame/util/normiv/bli_normiv_unb_var1.c b/frame/util/normiv/bli_normiv_unb_var1.c new file mode 100644 index 000000000..ac7b39d7a --- /dev/null +++ b/frame/util/normiv/bli_normiv_unb_var1.c @@ -0,0 +1,115 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +#define FUNCPTR_T normiv_fp + +typedef void (*FUNCPTR_T)( + dim_t n, + void* x, inc_t incx, + void* norm + ); + +static FUNCPTR_T GENARRAY(ftypes,normiv_unb_var1); + + +void bli_normiv_unb_var1( obj_t* x, + obj_t* norm ) +{ + num_t dt_x = bli_obj_datatype( *x ); + + dim_t n = bli_obj_vector_dim( *x ); + + inc_t inc_x = bli_obj_vector_inc( *x ); + void* buf_x = bli_obj_buffer_at_off( *x ); + + void* buf_norm = bli_obj_buffer_at_off( *norm ); + + FUNCPTR_T f; + + // Index into the type combination array to extract the correct + // function pointer. + f = ftypes[dt_x]; + + // Invoke the function. + f( n, + buf_x, inc_x, + buf_norm ); +} + + +#undef GENTFUNCR +#define GENTFUNCR( ctype_x, ctype_xr, chx, chxr, varname ) \ +\ +void PASTEMAC(chx,varname)( \ + dim_t n, \ + void* x, inc_t incx, \ + void* norm \ + ) \ +{ \ + ctype_x* x_cast = x; \ + ctype_xr* norm_cast = norm; \ + ctype_x* chi1; \ + ctype_xr abs_chi1; \ + ctype_xr abs_chi1_max; \ + dim_t i; \ +\ + /* Initialize the maximum absolute value to zero. */ \ + PASTEMAC(chxr,set0s)( abs_chi1_max ); \ +\ + for ( i = 0; i < n; ++i ) \ + { \ + chi1 = x_cast + (i )*incx; \ +\ + /* Compute the absolute value (or complex magnitude) of chi1. */ \ + PASTEMAC2(chx,chxr,abval2s)( *chi1, abs_chi1 ); \ +\ + /* If the absolute value of the current element exceeds that of + the previous largest, save it and its index. If NaN is + encountered, then treat it the same as if it were a valid + value that was smaller than any previously seen. This + behavior mimics that of LAPACK's ?lange(). */ \ + if ( abs_chi1_max < abs_chi1 || bli_isnan( abs_chi1 ) ) \ + { \ + PASTEMAC2(chxr,chxr,copys)( abs_chi1, abs_chi1_max ); \ + } \ + } \ +\ + /* Store the final value to the output variable. */ \ + PASTEMAC2(chxr,chxr,copys)( abs_chi1_max, *norm_cast ); \ +} + +INSERT_GENTFUNCR_BASIC0( normiv_unb_var1 ) + diff --git a/frame/util/normiv/bli_normiv_unb_var1.h b/frame/util/normiv/bli_normiv_unb_var1.h new file mode 100644 index 000000000..2aff2d835 --- /dev/null +++ b/frame/util/normiv/bli_normiv_unb_var1.h @@ -0,0 +1,49 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_normiv_unb_var1( obj_t* x, + obj_t* norm ); + + +#undef GENTPROTR +#define GENTPROTR( ctype_x, ctype_xr, chx, chxr, varname ) \ +\ +void PASTEMAC(chx,varname)( \ + dim_t n, \ + void* x, inc_t incx, \ + void* norm \ + ); + +INSERT_GENTPROTR_BASIC( normiv_unb_var1 ) + diff --git a/frame/util/randm/bli_randm_unb_var1.c b/frame/util/randm/bli_randm_unb_var1.c index 0cdc3dac8..a47d97ba6 100644 --- a/frame/util/randm/bli_randm_unb_var1.c +++ b/frame/util/randm/bli_randm_unb_var1.c @@ -77,7 +77,7 @@ void bli_randm_unb_var1( obj_t* x ) #undef GENTFUNC -#define GENTFUNC( ctype, ch, opname, varname ) \ +#define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname)( \ doff_t diagoffx, \ @@ -187,5 +187,5 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNC_BASIC( randm, randm_unb_var1 ) +INSERT_GENTFUNC_BASIC0( randm_unb_var1 ) diff --git a/frame/util/randv/bli_randv_unb_var1.c b/frame/util/randv/bli_randv_unb_var1.c index 9dd25a181..f89fd9cd9 100644 --- a/frame/util/randv/bli_randv_unb_var1.c +++ b/frame/util/randv/bli_randv_unb_var1.c @@ -66,7 +66,7 @@ void bli_randv_unb_var1( obj_t* x ) #undef GENTFUNC -#define GENTFUNC( ctype, ch, opname, varname ) \ +#define GENTFUNC( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname)( \ dim_t n, \ @@ -88,5 +88,5 @@ void PASTEMAC(ch,varname)( \ } \ } -INSERT_GENTFUNC_BASIC( randv, randv_unb_var1 ) +INSERT_GENTFUNC_BASIC0( randv_unb_var1 ) diff --git a/frame/util/sumsqv/bli_sumsqv_unb_var1.c b/frame/util/sumsqv/bli_sumsqv_unb_var1.c index 5004047ef..ee4172451 100644 --- a/frame/util/sumsqv/bli_sumsqv_unb_var1.c +++ b/frame/util/sumsqv/bli_sumsqv_unb_var1.c @@ -90,7 +90,7 @@ void bli_sumsqv_unb_var1( obj_t* x, #undef GENTFUNCR -#define GENTFUNCR( ctype_x, ctype_xr, chx, chxr, opname, varname ) \ +#define GENTFUNCR( ctype_x, ctype_xr, chx, chxr, varname ) \ \ void PASTEMAC(chx,varname)( \ dim_t n, \ @@ -99,18 +99,20 @@ void PASTEMAC(chx,varname)( \ void* sumsq \ ) \ { \ - ctype_x* x_cast = x; \ - ctype_xr* scale_cast = scale; \ - ctype_xr* sumsq_cast = sumsq; \ - ctype_xr* zero = PASTEMAC(chxr,0); \ - ctype_xr* one = PASTEMAC(chxr,1); \ - ctype_x* chi1; \ - ctype_xr chi1_r; \ - ctype_xr chi1_i; \ - ctype_xr scale_r; \ - ctype_xr sumsq_r; \ - ctype_xr abs_chi1_r; \ - dim_t i; \ + ctype_x* x_cast = x; \ + ctype_xr* scale_cast = scale; \ + ctype_xr* sumsq_cast = sumsq; \ +\ + const ctype_xr zero_r = *PASTEMAC(chxr,0); \ + const ctype_xr one_r = *PASTEMAC(chxr,1); \ +\ + ctype_x* chi1; \ + ctype_xr chi1_r; \ + ctype_xr chi1_i; \ + ctype_xr scale_r; \ + ctype_xr sumsq_r; \ + ctype_xr abs_chi1_r; \ + dim_t i; \ \ /* NOTE: This function attempts to mimic the algorithm for computing the Frobenius norm in netlib LAPACK's ?lassq(). */ \ @@ -128,16 +130,16 @@ void PASTEMAC(chx,varname)( \ { \ /* Get the real and imaginary components of chi1. */ \ PASTEMAC2(chx,chxr,gets)( *chi1, chi1_r, chi1_i ); \ +\ + abs_chi1_r = bli_fabs( chi1_r ); \ \ /* Accumulate real component into sumsq, adjusting scale if needed. */ \ - if ( chi1_r != *zero ) \ + if ( abs_chi1_r > zero_r || bli_isnan( abs_chi1_r) ) \ { \ - abs_chi1_r = bli_fabs( chi1_r ); \ -\ if ( scale_r < abs_chi1_r ) \ { \ - sumsq_r = *one + \ + sumsq_r = one_r + \ sumsq_r * ( scale_r / abs_chi1_r ) * \ ( scale_r / abs_chi1_r ); \ \ @@ -149,16 +151,16 @@ void PASTEMAC(chx,varname)( \ ( abs_chi1_r / scale_r ); \ } \ } \ +\ + abs_chi1_r = bli_fabs( chi1_i ); \ \ /* Accumulate imaginary component into sumsq, adjusting scale if needed. */ \ - if ( chi1_i != *zero ) \ + if ( abs_chi1_r > zero_r || bli_isnan( abs_chi1_r) ) \ { \ - abs_chi1_r = bli_fabs( chi1_i ); \ -\ if ( scale_r < abs_chi1_r ) \ { \ - sumsq_r = *one + \ + sumsq_r = one_r + \ sumsq_r * ( scale_r / abs_chi1_r ) * \ ( scale_r / abs_chi1_r ); \ \ @@ -179,5 +181,5 @@ void PASTEMAC(chx,varname)( \ PASTEMAC2(chxr,chxr,copys)( sumsq_r, *sumsq_cast ); \ } -INSERT_GENTFUNCR_BASIC( sumsqv, sumsqv_unb_var1 ) +INSERT_GENTFUNCR_BASIC0( sumsqv_unb_var1 ) diff --git a/testsuite/src/test_addm.c b/testsuite/src/test_addm.c index 927c6f809..6661c7335 100644 --- a/testsuite/src/test_addm.c +++ b/testsuite/src/test_addm.c @@ -74,7 +74,7 @@ void libblis_test_addm_check( obj_t* alpha, void libblis_test_addm_deps( test_params_t* params, test_op_t* op ) { libblis_test_setm( params, &(op->ops->setm) ); - libblis_test_fnormm( params, &(op->ops->fnormm) ); + libblis_test_normfm( params, &(op->ops->normfm) ); } @@ -234,7 +234,7 @@ void libblis_test_addm_check( obj_t* alpha, // // is functioning correctly if // - // fnormv(y) - sqrt( absqsc( beta + conjx(alpha) ) * m * n ) + // normfv(y) - sqrt( absqsc( beta + conjx(alpha) ) * m * n ) // // is negligible. // @@ -247,7 +247,7 @@ void libblis_test_addm_check( obj_t* alpha, bli_obj_scalar_init_detached_copy_of( dt, conjx, alpha, &alpha_conj ); - bli_fnormm( y, &norm_r ); + bli_normfm( y, &norm_r ); bli_copysc( beta, &aplusb ); bli_addsc( &alpha_conj, &aplusb ); diff --git a/testsuite/src/test_addv.c b/testsuite/src/test_addv.c index 7a53f6e4e..7b610bc73 100644 --- a/testsuite/src/test_addv.c +++ b/testsuite/src/test_addv.c @@ -73,7 +73,7 @@ void libblis_test_addv_check( obj_t* alpha, void libblis_test_addv_deps( test_params_t* params, test_op_t* op ) { libblis_test_setv( params, &(op->ops->setv) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); } @@ -229,7 +229,7 @@ void libblis_test_addv_check( obj_t* alpha, // // is functioning correctly if // - // fnormv(y) - sqrt( absqsc( beta + conjx(alpha) ) * m ) + // normfv(y) - sqrt( absqsc( beta + conjx(alpha) ) * m ) // // is negligible. // @@ -241,7 +241,7 @@ void libblis_test_addv_check( obj_t* alpha, bli_obj_scalar_init_detached_copy_of( dt, conjx, alpha, &alpha_conj ); - bli_fnormv( y, &norm_r ); + bli_normfv( y, &norm_r ); bli_copysc( beta, &aplusb ); bli_addsc( &alpha_conj, &aplusb ); diff --git a/testsuite/src/test_axpy2v.c b/testsuite/src/test_axpy2v.c index 255593ec7..a7d4aa9af 100644 --- a/testsuite/src/test_axpy2v.c +++ b/testsuite/src/test_axpy2v.c @@ -79,7 +79,7 @@ void libblis_test_axpy2v_check( obj_t* alpha1, void libblis_test_axpy2v_deps( test_params_t* params, test_op_t* op ) { libblis_test_randv( params, &(op->ops->randv) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_addv( params, &(op->ops->addv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_copyv( params, &(op->ops->copyv) ); @@ -264,7 +264,7 @@ void libblis_test_axpy2v_check( obj_t* alpha1, // // is functioning correctly if // - // fnorm( z - v ) + // normf( z - v ) // // is negligible, where v contains z as computed by two calls to axpyv. // @@ -285,7 +285,7 @@ void libblis_test_axpy2v_check( obj_t* alpha1, bli_addv( &y_temp, &z_temp ); bli_subv( &z_temp, z ); - bli_fnormv( z, &norm ); + bli_normfv( z, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &x_temp ); diff --git a/testsuite/src/test_axpyf.c b/testsuite/src/test_axpyf.c index dd9948682..ec50aa1dd 100644 --- a/testsuite/src/test_axpyf.c +++ b/testsuite/src/test_axpyf.c @@ -78,7 +78,7 @@ void libblis_test_axpyf_deps( test_params_t* params, test_op_t* op ) { libblis_test_randv( params, &(op->ops->randv) ); libblis_test_randm( params, &(op->ops->randm) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_copyv( params, &(op->ops->copyv) ); libblis_test_axpyv( params, &(op->ops->axpyv) ); @@ -269,7 +269,7 @@ void libblis_test_axpyf_check( obj_t* alpha, // // is functioning correctly if // - // fnorm( y - v ) + // normf( y - v ) // // is negligible, where v contains y as computed by repeated calls to // axpyv. @@ -294,7 +294,7 @@ void libblis_test_axpyf_check( obj_t* alpha, } bli_subv( y, &v ); - bli_fnormv( &v, &norm ); + bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &v ); diff --git a/testsuite/src/test_axpym.c b/testsuite/src/test_axpym.c index e7b922621..45ddc909d 100644 --- a/testsuite/src/test_axpym.c +++ b/testsuite/src/test_axpym.c @@ -75,7 +75,7 @@ void libblis_test_axpym_check( obj_t* alpha, void libblis_test_axpym_deps( test_params_t* params, test_op_t* op ) { libblis_test_randm( params, &(op->ops->randm) ); - libblis_test_fnormm( params, &(op->ops->fnormm) ); + libblis_test_normfm( params, &(op->ops->normfm) ); libblis_test_addm( params, &(op->ops->addm) ); libblis_test_subm( params, &(op->ops->subm) ); libblis_test_copym( params, &(op->ops->copym) ); @@ -248,7 +248,7 @@ void libblis_test_axpym_check( obj_t* alpha, // // is functioning correctly if // - // fnorm( y - ( y_orig + alpha * conjx(x) ) ) + // normf( y - ( y_orig + alpha * conjx(x) ) ) // // is negligible. // @@ -265,7 +265,7 @@ void libblis_test_axpym_check( obj_t* alpha, bli_addm( &x_temp, &y_temp ); bli_subm( &y_temp, y ); - bli_fnormm( y, &norm ); + bli_normfm( y, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &x_temp ); diff --git a/testsuite/src/test_axpyv.c b/testsuite/src/test_axpyv.c index 18b68edee..e5a1f0599 100644 --- a/testsuite/src/test_axpyv.c +++ b/testsuite/src/test_axpyv.c @@ -75,7 +75,7 @@ void libblis_test_axpyv_check( obj_t* alpha, void libblis_test_axpyv_deps( test_params_t* params, test_op_t* op ) { libblis_test_randv( params, &(op->ops->randv) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_addv( params, &(op->ops->addv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_copyv( params, &(op->ops->copyv) ); @@ -245,7 +245,7 @@ void libblis_test_axpyv_check( obj_t* alpha, // // is functioning correctly if // - // fnorm( y - ( y_orig + alpha * conjx(x) ) ) + // normf( y - ( y_orig + alpha * conjx(x) ) ) // // is negligible. // @@ -262,7 +262,7 @@ void libblis_test_axpyv_check( obj_t* alpha, bli_addv( &x_temp, &y_temp ); bli_subv( &y_temp, y ); - bli_fnormv( y, &norm ); + bli_normfv( y, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &x_temp ); diff --git a/testsuite/src/test_copym.c b/testsuite/src/test_copym.c index eb499d348..848d8c0d5 100644 --- a/testsuite/src/test_copym.c +++ b/testsuite/src/test_copym.c @@ -73,7 +73,7 @@ void libblis_test_copym_deps( test_params_t* params, test_op_t* op ) { libblis_test_randm( params, &(op->ops->randm) ); libblis_test_subm( params, &(op->ops->subm) ); - libblis_test_fnormm( params, &(op->ops->fnormm) ); + libblis_test_normfm( params, &(op->ops->normfm) ); } @@ -211,7 +211,7 @@ void libblis_test_copym_check( obj_t* x, // // is functioning correctly if // - // fnormm( y - conjx(x) ) + // normfm( y - conjx(x) ) // // is negligible. // @@ -220,7 +220,7 @@ void libblis_test_copym_check( obj_t* x, bli_subm( x, y ); - bli_fnormm( y, &norm_y_r ); + bli_normfm( y, &norm_y_r ); bli_getsc( &norm_y_r, resid, &junk ); } diff --git a/testsuite/src/test_copyv.c b/testsuite/src/test_copyv.c index 15ab8861c..cfa826cf2 100644 --- a/testsuite/src/test_copyv.c +++ b/testsuite/src/test_copyv.c @@ -73,7 +73,7 @@ void libblis_test_copyv_deps( test_params_t* params, test_op_t* op ) { libblis_test_randv( params, &(op->ops->randv) ); libblis_test_subv( params, &(op->ops->subv) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); } @@ -208,7 +208,7 @@ void libblis_test_copyv_check( obj_t* x, // // is functioning correctly if // - // fnormv( y - conjx(x) ) + // normfv( y - conjx(x) ) // // is negligible. // @@ -217,7 +217,7 @@ void libblis_test_copyv_check( obj_t* x, bli_subv( x, y ); - bli_fnormv( y, &norm_y_r ); + bli_normfv( y, &norm_y_r ); bli_getsc( &norm_y_r, resid, &junk ); } diff --git a/testsuite/src/test_dotaxpyv.c b/testsuite/src/test_dotaxpyv.c index e25bd1b6a..788843da0 100644 --- a/testsuite/src/test_dotaxpyv.c +++ b/testsuite/src/test_dotaxpyv.c @@ -81,7 +81,7 @@ void libblis_test_dotaxpyv_check( obj_t* alpha, void libblis_test_dotaxpyv_deps( test_params_t* params, test_op_t* op ) { libblis_test_randv( params, &(op->ops->randv) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_copyv( params, &(op->ops->copyv) ); libblis_test_dotv( params, &(op->ops->dotv) ); @@ -295,7 +295,7 @@ void libblis_test_dotaxpyv_check( obj_t* alpha, // // and // - // fnorm( z - z_temp ) + // normf( z - z_temp ) // // are negligible, where rho_temp and z_temp contain rho and z as // computed by dotv and axpyv, respectively. @@ -316,7 +316,7 @@ void libblis_test_dotaxpyv_check( obj_t* alpha, bli_getsc( &rho_temp, &resid1, &junk ); bli_subv( &z_temp, z ); - bli_fnormv( z, &norm_z ); + bli_normfv( z, &norm_z ); bli_getsc( &norm_z, &resid2, &junk ); *resid = bli_fmaxabs( resid1, resid2 ); diff --git a/testsuite/src/test_dotv.c b/testsuite/src/test_dotv.c index 3d68ed251..a19566083 100644 --- a/testsuite/src/test_dotv.c +++ b/testsuite/src/test_dotv.c @@ -74,7 +74,7 @@ void libblis_test_dotv_check( obj_t* x, void libblis_test_dotv_deps( test_params_t* params, test_op_t* op ) { libblis_test_randv( params, &(op->ops->randv) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_copyv( params, &(op->ops->copyv) ); } @@ -237,7 +237,7 @@ void libblis_test_dotv_check( obj_t* x, // // is functioning correctly if // - // sqrtsc( rho.real ) - fnorm( x ) + // sqrtsc( rho.real ) - normf( x ) // // and // @@ -251,7 +251,7 @@ void libblis_test_dotv_check( obj_t* x, bli_obj_scalar_init_detached( dt_real, &norm_x ); bli_obj_scalar_init_detached( dt_real, &norm_xy ); - bli_fnormv( x, &norm_x ); + bli_normfv( x, &norm_x ); bli_unzipsc( rho, &rho_r, &rho_i ); diff --git a/testsuite/src/test_dotxaxpyf.c b/testsuite/src/test_dotxaxpyf.c index ff1ac239a..ddd02273e 100644 --- a/testsuite/src/test_dotxaxpyf.c +++ b/testsuite/src/test_dotxaxpyf.c @@ -87,7 +87,7 @@ void libblis_test_dotxaxpyf_deps( test_params_t* params, test_op_t* op ) { libblis_test_randv( params, &(op->ops->randv) ); libblis_test_randm( params, &(op->ops->randm) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_copyv( params, &(op->ops->copyv) ); libblis_test_axpyv( params, &(op->ops->axpyv) ); @@ -316,11 +316,11 @@ void libblis_test_dotxaxpyf_check( obj_t* alpha, // // is functioning correctly if // - // fnorm( y - v ) + // normf( y - v ) // // and // - // fnorm( z - q ) + // normf( z - q ) // // are negligible, where v and q contain y and z as computed by repeated // calls to dotxv and axpyv, respectively. @@ -358,11 +358,11 @@ void libblis_test_dotxaxpyf_check( obj_t* alpha, bli_subv( y, &v ); - bli_fnormv( &v, &norm ); + bli_normfv( &v, &norm ); bli_getsc( &norm, &resid1, &junk ); bli_subv( z, &q ); - bli_fnormv( &q, &norm ); + bli_normfv( &q, &norm ); bli_getsc( &norm, &resid2, &junk ); diff --git a/testsuite/src/test_dotxf.c b/testsuite/src/test_dotxf.c index 7c545e86f..f9d682dc0 100644 --- a/testsuite/src/test_dotxf.c +++ b/testsuite/src/test_dotxf.c @@ -80,7 +80,7 @@ void libblis_test_dotxf_deps( test_params_t* params, test_op_t* op ) { libblis_test_randv( params, &(op->ops->randv) ); libblis_test_randm( params, &(op->ops->randm) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_copyv( params, &(op->ops->copyv) ); libblis_test_dotxv( params, &(op->ops->dotxv) ); @@ -274,7 +274,7 @@ void libblis_test_dotxf_check( obj_t* alpha, // // is functioning correctly if // - // fnorm( y - v ) + // normf( y - v ) // // is negligible, where v contains y as computed by repeated calls to // dotxv. @@ -295,7 +295,7 @@ void libblis_test_dotxf_check( obj_t* alpha, } bli_subv( y, &v ); - bli_fnormv( &v, &norm ); + bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &v ); diff --git a/testsuite/src/test_dotxv.c b/testsuite/src/test_dotxv.c index b73b0552d..d860d0a1b 100644 --- a/testsuite/src/test_dotxv.c +++ b/testsuite/src/test_dotxv.c @@ -79,7 +79,7 @@ void libblis_test_dotxv_check( obj_t* alpha, void libblis_test_dotxv_deps( test_params_t* params, test_op_t* op ) { libblis_test_randv( params, &(op->ops->randv) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_copyv( params, &(op->ops->copyv) ); } @@ -263,7 +263,7 @@ void libblis_test_dotxv_check( obj_t* alpha, // // is functioning correctly if // - // sqrtsc( rho.real ) - sqrtsc( alpha ) * fnorm( x ) + // sqrtsc( rho.real ) - sqrtsc( alpha ) * normf( x ) // // and // @@ -281,7 +281,7 @@ void libblis_test_dotxv_check( obj_t* alpha, bli_copysc( alpha, &temp_r ); bli_sqrtsc( &temp_r, &temp_r ); - bli_fnormv( x, &norm_x_r ); + bli_normfv( x, &norm_x_r ); bli_mulsc( &temp_r, &norm_x_r ); bli_unzipsc( rho, &rho_r, &rho_i ); diff --git a/testsuite/src/test_gemm.c b/testsuite/src/test_gemm.c index f8c75d717..c5c5b3137 100644 --- a/testsuite/src/test_gemm.c +++ b/testsuite/src/test_gemm.c @@ -81,7 +81,7 @@ void libblis_test_gemm_deps( test_params_t* params, test_op_t* op ) libblis_test_randv( params, &(op->ops->randv) ); libblis_test_randm( params, &(op->ops->randm) ); libblis_test_setv( params, &(op->ops->setv) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_scalv( params, &(op->ops->scalv) ); libblis_test_copym( params, &(op->ops->copym) ); @@ -285,7 +285,7 @@ void libblis_test_gemm_check( obj_t* alpha, // // is functioning correctly if // - // fnorm( v - z ) + // normf( v - z ) // // is negligible, where // @@ -315,7 +315,7 @@ void libblis_test_gemm_check( obj_t* alpha, bli_gemv( beta, c_orig, &t, &BLIS_ONE, &z ); bli_subv( &z, &v ); - bli_fnormv( &v, &norm ); + bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); diff --git a/testsuite/src/test_gemm_ukr.c b/testsuite/src/test_gemm_ukr.c index 7dbae77e2..3ce1b688f 100644 --- a/testsuite/src/test_gemm_ukr.c +++ b/testsuite/src/test_gemm_ukr.c @@ -81,7 +81,7 @@ void libblis_test_gemm_ukr_deps( test_params_t* params, test_op_t* op ) libblis_test_randv( params, &(op->ops->randv) ); libblis_test_randm( params, &(op->ops->randm) ); libblis_test_setv( params, &(op->ops->setv) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_scalv( params, &(op->ops->scalv) ); libblis_test_copym( params, &(op->ops->copym) ); @@ -315,7 +315,7 @@ void libblis_test_gemm_ukr_check( obj_t* alpha, // // is functioning correctly if // - // fnorm( v - z ) + // normf( v - z ) // // is negligible, where // @@ -345,7 +345,7 @@ void libblis_test_gemm_ukr_check( obj_t* alpha, bli_gemv( beta, c_orig, &t, &BLIS_ONE, &z ); bli_subv( &z, &v ); - bli_fnormv( &v, &norm ); + bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); diff --git a/testsuite/src/test_gemmtrsm_ukr.c b/testsuite/src/test_gemmtrsm_ukr.c index f1e05572f..e2701d313 100644 --- a/testsuite/src/test_gemmtrsm_ukr.c +++ b/testsuite/src/test_gemmtrsm_ukr.c @@ -92,7 +92,7 @@ void libblis_test_gemmtrsm_ukr_deps( test_params_t* params, test_op_t* op ) libblis_test_randv( params, &(op->ops->randv) ); libblis_test_randm( params, &(op->ops->randm) ); libblis_test_setv( params, &(op->ops->setv) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_scalv( params, &(op->ops->scalv) ); libblis_test_copym( params, &(op->ops->copym) ); @@ -358,7 +358,7 @@ void libblis_test_gemmtrsm_ukr_check( side_t side, // // is functioning correctly if // - // fnorm( v - z ) + // normf( v - z ) // // is negligible, where // @@ -409,7 +409,7 @@ void libblis_test_gemmtrsm_ukr_check( side_t side, } bli_subv( &z, &v ); - bli_fnormv( &v, &norm ); + bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); diff --git a/testsuite/src/test_gemv.c b/testsuite/src/test_gemv.c index 0b39cb825..7957de1e8 100644 --- a/testsuite/src/test_gemv.c +++ b/testsuite/src/test_gemv.c @@ -80,7 +80,7 @@ void libblis_test_gemv_check( obj_t* kappa, void libblis_test_gemv_deps( test_params_t* params, test_op_t* op ) { libblis_test_randv( params, &(op->ops->randv) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_copyv( params, &(op->ops->copyv) ); libblis_test_scalv( params, &(op->ops->scalv) ); @@ -283,7 +283,7 @@ void libblis_test_gemv_check( obj_t* kappa, // // is functioning correctly if // - // fnorm( y - z ) + // normf( y - z ) // // is negligible, where // @@ -311,7 +311,7 @@ void libblis_test_gemv_check( obj_t* kappa, bli_axpyv( alpha, &xT_temp, &yT_temp ); bli_subv( &yT_temp, &yT ); - bli_fnormv( &yT, &norm ); + bli_normfv( &yT, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &x_temp ); diff --git a/testsuite/src/test_ger.c b/testsuite/src/test_ger.c index 1526cde41..c0db8a2d6 100644 --- a/testsuite/src/test_ger.c +++ b/testsuite/src/test_ger.c @@ -77,7 +77,7 @@ void libblis_test_ger_check( obj_t* alpha, void libblis_test_ger_deps( test_params_t* params, test_op_t* op ) { libblis_test_randv( params, &(op->ops->randv) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_scal2v( params, &(op->ops->scal2v) ); libblis_test_dotv( params, &(op->ops->dotv) ); @@ -262,7 +262,7 @@ void libblis_test_ger_check( obj_t* alpha, // // is functioning correctly if // - // fnorm( v - w ) + // normf( v - w ) // // is negligible, where // @@ -292,7 +292,7 @@ void libblis_test_ger_check( obj_t* alpha, bli_gemv( &BLIS_ONE, a_orig, &t, &BLIS_ONE, &w ); bli_subv( &w, &v ); - bli_fnormv( &v, &norm ); + bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); diff --git a/testsuite/src/test_hemm.c b/testsuite/src/test_hemm.c index 1685fc2b2..8cd99074e 100644 --- a/testsuite/src/test_hemm.c +++ b/testsuite/src/test_hemm.c @@ -83,7 +83,7 @@ void libblis_test_hemm_deps( test_params_t* params, test_op_t* op ) libblis_test_randv( params, &(op->ops->randv) ); libblis_test_randm( params, &(op->ops->randm) ); libblis_test_setv( params, &(op->ops->setv) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_scalv( params, &(op->ops->scalv) ); libblis_test_copym( params, &(op->ops->copym) ); @@ -303,7 +303,7 @@ void libblis_test_hemm_check( side_t side, // // is functioning correctly if // - // fnorm( v - z ) + // normf( v - z ) // // is negligible, where // @@ -357,7 +357,7 @@ void libblis_test_hemm_check( side_t side, bli_gemv( beta, c_orig, &t, &BLIS_ONE, &z ); bli_subv( &z, &v ); - bli_fnormv( &v, &norm ); + bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); diff --git a/testsuite/src/test_hemv.c b/testsuite/src/test_hemv.c index 8896f83d1..c937afb4d 100644 --- a/testsuite/src/test_hemv.c +++ b/testsuite/src/test_hemv.c @@ -80,7 +80,7 @@ void libblis_test_hemv_deps( test_params_t* params, test_op_t* op ) { libblis_test_randv( params, &(op->ops->randv) ); libblis_test_randm( params, &(op->ops->randm) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_copyv( params, &(op->ops->copyv) ); libblis_test_scalv( params, &(op->ops->scalv) ); @@ -289,7 +289,7 @@ void libblis_test_hemv_check( obj_t* alpha, // // is functioning correctly if // - // fnorm( y - v ) + // normf( y - v ) // // is negligible, where // @@ -309,7 +309,7 @@ void libblis_test_hemv_check( obj_t* alpha, bli_gemv( alpha, a, x, beta, &v ); bli_subv( &v, y ); - bli_fnormv( y, &norm ); + bli_normfv( y, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &v ); diff --git a/testsuite/src/test_her.c b/testsuite/src/test_her.c index 7bdbebe1f..6490303c9 100644 --- a/testsuite/src/test_her.c +++ b/testsuite/src/test_her.c @@ -76,7 +76,7 @@ void libblis_test_her_deps( test_params_t* params, test_op_t* op ) { libblis_test_randv( params, &(op->ops->randv) ); libblis_test_randm( params, &(op->ops->randm) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_copym( params, &(op->ops->copym) ); libblis_test_scal2v( params, &(op->ops->scal2v) ); @@ -260,7 +260,7 @@ void libblis_test_her_check( obj_t* alpha, // // is functioning correctly if // - // fnorm( v - w ) + // normf( v - w ) // // is negligible, where // @@ -299,7 +299,7 @@ void libblis_test_her_check( obj_t* alpha, bli_gemv( &BLIS_ONE, a_orig, &t, &BLIS_ONE, &w ); bli_subv( &w, &v ); - bli_fnormv( &v, &norm ); + bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); diff --git a/testsuite/src/test_her2.c b/testsuite/src/test_her2.c index b052dfd8c..b75082702 100644 --- a/testsuite/src/test_her2.c +++ b/testsuite/src/test_her2.c @@ -78,7 +78,7 @@ void libblis_test_her2_deps( test_params_t* params, test_op_t* op ) { libblis_test_randv( params, &(op->ops->randv) ); libblis_test_randm( params, &(op->ops->randm) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_copym( params, &(op->ops->copym) ); libblis_test_scal2v( params, &(op->ops->scal2v) ); @@ -270,7 +270,7 @@ void libblis_test_her2_check( obj_t* alpha, // // is functioning correctly if // - // fnorm( v - w ) + // normf( v - w ) // // is negligible, where // @@ -322,7 +322,7 @@ void libblis_test_her2_check( obj_t* alpha, bli_gemv( &BLIS_ONE, a_orig, &t, &BLIS_ONE, &w1 ); bli_subv( &w1, &v ); - bli_fnormv( &v, &norm ); + bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); diff --git a/testsuite/src/test_her2k.c b/testsuite/src/test_her2k.c index 217dd24b8..65d797bbb 100644 --- a/testsuite/src/test_her2k.c +++ b/testsuite/src/test_her2k.c @@ -81,7 +81,7 @@ void libblis_test_her2k_deps( test_params_t* params, test_op_t* op ) libblis_test_randv( params, &(op->ops->randv) ); libblis_test_randm( params, &(op->ops->randm) ); libblis_test_setv( params, &(op->ops->setv) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_scalv( params, &(op->ops->scalv) ); libblis_test_copym( params, &(op->ops->copym) ); @@ -302,7 +302,7 @@ void libblis_test_her2k_check( obj_t* alpha, // // is functioning correctly if // - // fnorm( v - z ) + // normf( v - z ) // // is negligible, where // @@ -341,7 +341,7 @@ void libblis_test_her2k_check( obj_t* alpha, bli_hemv( beta, c_orig, &t, &BLIS_ONE, &z ); bli_subv( &z, &v ); - bli_fnormv( &v, &norm ); + bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); diff --git a/testsuite/src/test_herk.c b/testsuite/src/test_herk.c index 1f849a36d..d35e0d1e4 100644 --- a/testsuite/src/test_herk.c +++ b/testsuite/src/test_herk.c @@ -79,7 +79,7 @@ void libblis_test_herk_deps( test_params_t* params, test_op_t* op ) libblis_test_randv( params, &(op->ops->randv) ); libblis_test_randm( params, &(op->ops->randm) ); libblis_test_setv( params, &(op->ops->setv) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_scalv( params, &(op->ops->scalv) ); libblis_test_copym( params, &(op->ops->copym) ); @@ -288,7 +288,7 @@ void libblis_test_herk_check( obj_t* alpha, // // is functioning correctly if // - // fnorm( v - z ) + // normf( v - z ) // // is negligible, where // @@ -320,7 +320,7 @@ void libblis_test_herk_check( obj_t* alpha, bli_hemv( beta, c_orig, &t, &BLIS_ONE, &z ); bli_subv( &z, &v ); - bli_fnormv( &v, &norm ); + bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); diff --git a/testsuite/src/test_libblis.c b/testsuite/src/test_libblis.c index 4d7575682..f37e7ac43 100644 --- a/testsuite/src/test_libblis.c +++ b/testsuite/src/test_libblis.c @@ -116,7 +116,7 @@ void libblis_test_level1v_ops( test_params_t* params, test_ops_t* ops ) libblis_test_copyv( params, &(ops->copyv) ); libblis_test_dotv( params, &(ops->dotv) ); libblis_test_dotxv( params, &(ops->dotxv) ); - libblis_test_fnormv( params, &(ops->fnormv) ); + libblis_test_normfv( params, &(ops->normfv) ); libblis_test_scalv( params, &(ops->scalv) ); libblis_test_scal2v( params, &(ops->scal2v) ); libblis_test_setv( params, &(ops->setv) ); @@ -130,7 +130,7 @@ void libblis_test_level1m_ops( test_params_t* params, test_ops_t* ops ) libblis_test_addm( params, &(ops->addm) ); libblis_test_axpym( params, &(ops->axpym) ); libblis_test_copym( params, &(ops->copym) ); - libblis_test_fnormm( params, &(ops->fnormm) ); + libblis_test_normfm( params, &(ops->normfm) ); libblis_test_scalm( params, &(ops->scalm) ); libblis_test_scal2m( params, &(ops->scal2m) ); libblis_test_setm( params, &(ops->setm) ); @@ -223,7 +223,7 @@ void libblis_test_read_ops_file( char* input_filename, test_ops_t* ops ) libblis_test_read_op_info( ops, input_stream, BLIS_TEST_DIMS_M, 1, &(ops->copyv) ); libblis_test_read_op_info( ops, input_stream, BLIS_TEST_DIMS_M, 2, &(ops->dotv) ); libblis_test_read_op_info( ops, input_stream, BLIS_TEST_DIMS_M, 2, &(ops->dotxv) ); - libblis_test_read_op_info( ops, input_stream, BLIS_TEST_DIMS_M, 0, &(ops->fnormv) ); + libblis_test_read_op_info( ops, input_stream, BLIS_TEST_DIMS_M, 0, &(ops->normfv) ); libblis_test_read_op_info( ops, input_stream, BLIS_TEST_DIMS_M, 1, &(ops->scalv) ); libblis_test_read_op_info( ops, input_stream, BLIS_TEST_DIMS_M, 1, &(ops->scal2v) ); libblis_test_read_op_info( ops, input_stream, BLIS_TEST_DIMS_M, 0, &(ops->setv) ); @@ -233,7 +233,7 @@ void libblis_test_read_ops_file( char* input_filename, test_ops_t* ops ) libblis_test_read_op_info( ops, input_stream, BLIS_TEST_DIMS_MN, 1, &(ops->addm) ); libblis_test_read_op_info( ops, input_stream, BLIS_TEST_DIMS_MN, 1, &(ops->axpym) ); libblis_test_read_op_info( ops, input_stream, BLIS_TEST_DIMS_MN, 1, &(ops->copym) ); - libblis_test_read_op_info( ops, input_stream, BLIS_TEST_DIMS_MN, 0, &(ops->fnormm) ); + libblis_test_read_op_info( ops, input_stream, BLIS_TEST_DIMS_MN, 0, &(ops->normfm) ); libblis_test_read_op_info( ops, input_stream, BLIS_TEST_DIMS_MN, 1, &(ops->scalm) ); libblis_test_read_op_info( ops, input_stream, BLIS_TEST_DIMS_MN, 1, &(ops->scal2m) ); libblis_test_read_op_info( ops, input_stream, BLIS_TEST_DIMS_MN, 0, &(ops->setm) ); diff --git a/testsuite/src/test_libblis.h b/testsuite/src/test_libblis.h index 00df8e599..81603a876 100644 --- a/testsuite/src/test_libblis.h +++ b/testsuite/src/test_libblis.h @@ -206,7 +206,7 @@ typedef struct test_ops_s test_op_t copyv; test_op_t dotv; test_op_t dotxv; - test_op_t fnormv; + test_op_t normfv; test_op_t scalv; test_op_t scal2v; test_op_t setv; @@ -216,7 +216,7 @@ typedef struct test_ops_s test_op_t addm; test_op_t axpym; test_op_t copym; - test_op_t fnormm; + test_op_t normfm; test_op_t scalm; test_op_t scal2m; test_op_t setm; @@ -414,7 +414,7 @@ void libblis_test_check_empty_problem( obj_t* c, double* perf, double* resid ); #include "test_copyv.h" #include "test_dotv.h" #include "test_dotxv.h" -#include "test_fnormv.h" +#include "test_normfv.h" #include "test_scalv.h" #include "test_scal2v.h" #include "test_setv.h" @@ -424,7 +424,7 @@ void libblis_test_check_empty_problem( obj_t* c, double* perf, double* resid ); #include "test_addm.h" #include "test_axpym.h" #include "test_copym.h" -#include "test_fnormm.h" +#include "test_normfm.h" #include "test_scalm.h" #include "test_scal2m.h" #include "test_setm.h" diff --git a/testsuite/src/test_fnormm.c b/testsuite/src/test_normfm.c similarity index 89% rename from testsuite/src/test_fnormm.c rename to testsuite/src/test_normfm.c index d24421c6e..e0d98c79e 100644 --- a/testsuite/src/test_fnormm.c +++ b/testsuite/src/test_normfm.c @@ -37,7 +37,7 @@ // Static variables. -static char* op_str = "fnormm"; +static char* op_str = "normfm"; static char* o_types = "m"; // x static char* p_types = ""; // (no parameters) static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s @@ -46,10 +46,10 @@ static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. -void libblis_test_fnormm_deps( test_params_t* params, +void libblis_test_normfm_deps( test_params_t* params, test_op_t* op ); -void libblis_test_fnormm_experiment( test_params_t* params, +void libblis_test_normfm_experiment( test_params_t* params, test_op_t* op, iface_t iface, num_t datatype, @@ -59,25 +59,25 @@ void libblis_test_fnormm_experiment( test_params_t* params, double* perf, double* resid ); -void libblis_test_fnormm_impl( iface_t iface, +void libblis_test_normfm_impl( iface_t iface, obj_t* x, obj_t* norm ); -void libblis_test_fnormm_check( obj_t* beta, +void libblis_test_normfm_check( obj_t* beta, obj_t* x, obj_t* norm, double* resid ); -void libblis_test_fnormm_deps( test_params_t* params, test_op_t* op ) +void libblis_test_normfm_deps( test_params_t* params, test_op_t* op ) { libblis_test_setm( params, &(op->ops->setm) ); } -void libblis_test_fnormm( test_params_t* params, test_op_t* op ) +void libblis_test_normfm( test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. @@ -88,7 +88,7 @@ void libblis_test_fnormm( test_params_t* params, test_op_t* op ) op->ops->l1m_over == DISABLE_ALL ) return; // Call dependencies first. - if ( TRUE ) libblis_test_fnormm_deps( params, op ); + if ( TRUE ) libblis_test_normfm_deps( params, op ); // Execute the test driver for each implementation requested. if ( op->front_seq == ENABLE ) @@ -100,13 +100,13 @@ void libblis_test_fnormm( test_params_t* params, test_op_t* op ) p_types, o_types, thresh, - libblis_test_fnormm_experiment ); + libblis_test_normfm_experiment ); } } -void libblis_test_fnormm_experiment( test_params_t* params, +void libblis_test_normfm_experiment( test_params_t* params, test_op_t* op, iface_t iface, num_t datatype, @@ -156,7 +156,7 @@ void libblis_test_fnormm_experiment( test_params_t* params, { time = bli_clock(); - libblis_test_fnormm_impl( iface, &x, &norm ); + libblis_test_normfm_impl( iface, &x, &norm ); time_min = bli_clock_min_diff( time_min, time ); } @@ -166,7 +166,7 @@ void libblis_test_fnormm_experiment( test_params_t* params, if ( bli_obj_is_complex( x ) ) *perf *= 2.0; // Perform checks. - libblis_test_fnormm_check( &beta, &x, &norm, resid ); + libblis_test_normfm_check( &beta, &x, &norm, resid ); // Zero out performance and residual if input matrix is empty. libblis_test_check_empty_problem( &x, perf, resid ); @@ -177,14 +177,14 @@ void libblis_test_fnormm_experiment( test_params_t* params, -void libblis_test_fnormm_impl( iface_t iface, +void libblis_test_normfm_impl( iface_t iface, obj_t* x, obj_t* norm ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: - bli_fnormm( x, norm ); + bli_normfm( x, norm ); break; default: @@ -194,7 +194,7 @@ void libblis_test_fnormm_impl( iface_t iface, -void libblis_test_fnormm_check( obj_t* beta, +void libblis_test_normfm_check( obj_t* beta, obj_t* x, obj_t* norm, double* resid ) @@ -216,7 +216,7 @@ void libblis_test_fnormm_check( obj_t* beta, // // Under these conditions, we assume that the implementation for // - // norm := fnorm( x ) + // norm := normf( x ) // // is functioning correctly if // diff --git a/testsuite/src/test_fnormm.h b/testsuite/src/test_normfm.h similarity index 96% rename from testsuite/src/test_fnormm.h rename to testsuite/src/test_normfm.h index d558bd7db..d250f20a0 100644 --- a/testsuite/src/test_fnormm.h +++ b/testsuite/src/test_normfm.h @@ -32,5 +32,5 @@ */ -void libblis_test_fnormm( test_params_t* params, test_op_t* op ); +void libblis_test_normfm( test_params_t* params, test_op_t* op ); diff --git a/testsuite/src/test_fnormv.c b/testsuite/src/test_normfv.c similarity index 89% rename from testsuite/src/test_fnormv.c rename to testsuite/src/test_normfv.c index 2da4d046a..d9e793d80 100644 --- a/testsuite/src/test_fnormv.c +++ b/testsuite/src/test_normfv.c @@ -37,7 +37,7 @@ // Static variables. -static char* op_str = "fnormv"; +static char* op_str = "normfv"; static char* o_types = "v"; // x static char* p_types = ""; // (no parameters) static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s @@ -46,10 +46,10 @@ static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass { 1e-13, 1e-14 } }; // warn, pass for z // Local prototypes. -void libblis_test_fnormv_deps( test_params_t* params, +void libblis_test_normfv_deps( test_params_t* params, test_op_t* op ); -void libblis_test_fnormv_experiment( test_params_t* params, +void libblis_test_normfv_experiment( test_params_t* params, test_op_t* op, iface_t iface, num_t datatype, @@ -59,25 +59,25 @@ void libblis_test_fnormv_experiment( test_params_t* params, double* perf, double* resid ); -void libblis_test_fnormv_impl( iface_t iface, +void libblis_test_normfv_impl( iface_t iface, obj_t* x, obj_t* norm ); -void libblis_test_fnormv_check( obj_t* beta, +void libblis_test_normfv_check( obj_t* beta, obj_t* x, obj_t* norm, double* resid ); -void libblis_test_fnormv_deps( test_params_t* params, test_op_t* op ) +void libblis_test_normfv_deps( test_params_t* params, test_op_t* op ) { libblis_test_setv( params, &(op->ops->setv) ); } -void libblis_test_fnormv( test_params_t* params, test_op_t* op ) +void libblis_test_normfv( test_params_t* params, test_op_t* op ) { // Return early if this test has already been done. @@ -88,7 +88,7 @@ void libblis_test_fnormv( test_params_t* params, test_op_t* op ) op->ops->l1v_over == DISABLE_ALL ) return; // Call dependencies first. - if ( TRUE ) libblis_test_fnormv_deps( params, op ); + if ( TRUE ) libblis_test_normfv_deps( params, op ); // Execute the test driver for each implementation requested. if ( op->front_seq == ENABLE ) @@ -100,13 +100,13 @@ void libblis_test_fnormv( test_params_t* params, test_op_t* op ) p_types, o_types, thresh, - libblis_test_fnormv_experiment ); + libblis_test_normfv_experiment ); } } -void libblis_test_fnormv_experiment( test_params_t* params, +void libblis_test_normfv_experiment( test_params_t* params, test_op_t* op, iface_t iface, num_t datatype, @@ -154,7 +154,7 @@ void libblis_test_fnormv_experiment( test_params_t* params, { time = bli_clock(); - libblis_test_fnormv_impl( iface, &x, &norm ); + libblis_test_normfv_impl( iface, &x, &norm ); time_min = bli_clock_min_diff( time_min, time ); } @@ -164,7 +164,7 @@ void libblis_test_fnormv_experiment( test_params_t* params, if ( bli_obj_is_complex( x ) ) *perf *= 2.0; // Perform checks. - libblis_test_fnormv_check( &beta, &x, &norm, resid ); + libblis_test_normfv_check( &beta, &x, &norm, resid ); // Zero out performance and residual if input vector is empty. libblis_test_check_empty_problem( &x, perf, resid ); @@ -175,14 +175,14 @@ void libblis_test_fnormv_experiment( test_params_t* params, -void libblis_test_fnormv_impl( iface_t iface, +void libblis_test_normfv_impl( iface_t iface, obj_t* x, obj_t* norm ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: - bli_fnormv( x, norm ); + bli_normfv( x, norm ); break; default: @@ -192,7 +192,7 @@ void libblis_test_fnormv_impl( iface_t iface, -void libblis_test_fnormv_check( obj_t* beta, +void libblis_test_normfv_check( obj_t* beta, obj_t* x, obj_t* norm, double* resid ) @@ -213,7 +213,7 @@ void libblis_test_fnormv_check( obj_t* beta, // // Under these conditions, we assume that the implementation for // - // norm := fnorm( x ) + // norm := normf( x ) // // is functioning correctly if // diff --git a/testsuite/src/test_fnormv.h b/testsuite/src/test_normfv.h similarity index 96% rename from testsuite/src/test_fnormv.h rename to testsuite/src/test_normfv.h index 33857fb52..a4ada409a 100644 --- a/testsuite/src/test_fnormv.h +++ b/testsuite/src/test_normfv.h @@ -32,5 +32,5 @@ */ -void libblis_test_fnormv( test_params_t* params, test_op_t* op ); +void libblis_test_normfv( test_params_t* params, test_op_t* op ); diff --git a/testsuite/src/test_randm.c b/testsuite/src/test_randm.c index 5d34ff642..b37521ae7 100644 --- a/testsuite/src/test_randm.c +++ b/testsuite/src/test_randm.c @@ -187,17 +187,10 @@ void libblis_test_randm_impl( iface_t iface, void libblis_test_randm_check( obj_t* x, double* resid ) { - doff_t diagoffx = bli_obj_diag_offset( *x ); - uplo_t uplox = bli_obj_uplo( *x ); - - dim_t m_x = bli_obj_length( *x ); - dim_t n_x = bli_obj_width( *x ); - - inc_t rs_x = bli_obj_row_stride( *x ); - inc_t cs_x = bli_obj_col_stride( *x ); - void* buf_x = bli_obj_buffer_at_off( *x ); - - *resid = 0.0; + num_t dt_real = bli_obj_datatype_proj_to_real( *x ); + dim_t m_x = bli_obj_length( *x ); + dim_t n_x = bli_obj_width( *x ); + obj_t sum; // // The two most likely ways that randm would fail is if all elements @@ -206,61 +199,103 @@ void libblis_test_randm_check( obj_t* x, // absolute values of the elements of x. // - if ( bli_obj_is_float( *x ) ) + *resid = 0.0; + + bli_obj_scalar_init_detached( dt_real, &sum ); + + bli_absumm( x, &sum ); + + if ( bli_is_float( dt_real ) ) { - float sum_x; + float* sum_x = bli_obj_buffer_at_off( sum ); - bli_sabsumm( diagoffx, - uplox, - m_x, - n_x, - buf_x, rs_x, cs_x, - &sum_x ); - - if ( sum_x == *bli_s0 ) *resid = 1.0; - else if ( sum_x >= 1.0 * m_x * n_x ) *resid = 2.0; + if ( *sum_x == *bli_d0 ) *resid = 1.0; + else if ( *sum_x >= 2.0 * m_x * n_x ) *resid = 2.0; } - else if ( bli_obj_is_double( *x ) ) + else // if ( bli_is_double( dt_real ) ) { - double sum_x; + double* sum_x = bli_obj_buffer_at_off( sum ); - bli_dabsumm( diagoffx, - uplox, - m_x, - n_x, - buf_x, rs_x, cs_x, - &sum_x ); - - if ( sum_x == *bli_d0 ) *resid = 1.0; - else if ( sum_x >= 1.0 * m_x * n_x ) *resid = 2.0; - } - else if ( bli_obj_is_scomplex( *x ) ) - { - float sum_x; - - bli_cabsumm( diagoffx, - uplox, - m_x, - n_x, - buf_x, rs_x, cs_x, - &sum_x ); - - if ( sum_x == *bli_s0 ) *resid = 1.0; - else if ( sum_x >= 2.0 * m_x * n_x ) *resid = 2.0; - } - else // if ( bli_obj_is_dcomplex( *x ) ) - { - double sum_x; - - bli_zabsumm( diagoffx, - uplox, - m_x, - n_x, - buf_x, rs_x, cs_x, - &sum_x ); - - if ( sum_x == *bli_d0 ) *resid = 1.0; - else if ( sum_x >= 2.0 * m_x * n_x ) *resid = 2.0; + if ( *sum_x == *bli_d0 ) *resid = 1.0; + else if ( *sum_x >= 2.0 * m_x * n_x ) *resid = 2.0; } } + + + +#define FUNCPTR_T absumm_fp + +typedef void (*FUNCPTR_T)( + dim_t m, + dim_t n, + void* x, inc_t rs_x, inc_t cs_x, + void* sum_x + ); + +static FUNCPTR_T GENARRAY(ftypes,absumm); + + +void bli_absumm( obj_t* x, + obj_t* sum_x ) +{ + num_t dt = bli_obj_datatype( *x ); + + dim_t m = bli_obj_length( *x ); + dim_t n = bli_obj_width( *x ); + + void* buf_x = bli_obj_buffer_at_off( *x ); + inc_t rs_x = bli_obj_row_stride( *x ); + inc_t cs_x = bli_obj_col_stride( *x ); + + void* buf_sum_x = bli_obj_buffer_at_off( *sum_x ); + + FUNCPTR_T f; + + + // Index into the type combination array to extract the correct + // function pointer. + f = ftypes[dt]; + + // Invoke the function. + f( m, + n, + buf_x, rs_x, cs_x, + buf_sum_x ); +} + + +#undef GENTFUNCR +#define GENTFUNCR( ctype, ctype_r, ch, chr, varname ) \ +\ +void PASTEMAC(ch,varname)( \ + dim_t m, \ + dim_t n, \ + void* x, inc_t rs_x, inc_t cs_x, \ + void* sum_x \ + ) \ +{ \ + ctype* x_cast = x; \ + ctype_r* sum_x_cast = sum_x; \ + ctype_r abs_chi1; \ + ctype_r sum; \ + dim_t i, j; \ +\ + PASTEMAC(chr,set0s)( sum ); \ +\ + for ( j = 0; j < n; j++ ) \ + { \ + for ( i = 0; i < m; i++ ) \ + { \ + ctype* chi1 = x_cast + (i )*rs_x + (j )*cs_x; \ +\ + PASTEMAC2(ch,chr,abval2s)( *chi1, abs_chi1 ); \ + PASTEMAC2(chr,chr,adds)( abs_chi1, sum ); \ + } \ + } \ +\ + PASTEMAC2(chr,chr,copys)( sum, *sum_x_cast ); \ +} + +INSERT_GENTFUNCR_BASIC0( absumm ) + diff --git a/testsuite/src/test_randm.h b/testsuite/src/test_randm.h index 059b740e5..1d192774a 100644 --- a/testsuite/src/test_randm.h +++ b/testsuite/src/test_randm.h @@ -34,3 +34,18 @@ void libblis_test_randm( test_params_t* params, test_op_t* op ); + +void bli_absumm( obj_t* x, + obj_t* sum_x ); + +#undef GENTPROTR +#define GENTPROTR( ctype, ctype_r, ch, chr, varname ) \ +\ +void PASTEMAC(ch,varname)( \ + dim_t m, \ + dim_t n, \ + void* x, inc_t rs_x, inc_t cs_x, \ + void* sum_x \ + ); + +INSERT_GENTPROTR_BASIC( absumm ) diff --git a/testsuite/src/test_randv.c b/testsuite/src/test_randv.c index a8638fa9f..7d919d205 100644 --- a/testsuite/src/test_randv.c +++ b/testsuite/src/test_randv.c @@ -187,9 +187,9 @@ void libblis_test_randv_impl( iface_t iface, void libblis_test_randv_check( obj_t* x, double* resid ) { - dim_t m_x = bli_obj_vector_dim( *x ); - inc_t inc_x = bli_obj_vector_inc( *x ); - void* buf_x = bli_obj_buffer_at_off( *x ); + num_t dt_real = bli_obj_datatype_proj_to_real( *x ); + dim_t m_x = bli_obj_vector_dim( *x ); + obj_t sum; *resid = 0.0; @@ -200,49 +200,23 @@ void libblis_test_randv_check( obj_t* x, // absolute values of the elements of x. // - if ( bli_obj_is_float( *x ) ) + bli_obj_scalar_init_detached( dt_real, &sum ); + + bli_norm1v( x, &sum ); + + if ( bli_is_float( dt_real ) ) { - float sum_x; + float* sum_x = bli_obj_buffer_at_off( sum ); - bli_sabsumv( m_x, - buf_x, inc_x, - &sum_x ); - - if ( sum_x == *bli_s0 ) *resid = 1.0; - else if ( sum_x >= 1.0 * m_x ) *resid = 2.0; + if ( *sum_x == *bli_d0 ) *resid = 1.0; + else if ( *sum_x >= 2.0 * m_x ) *resid = 2.0; } - else if ( bli_obj_is_double( *x ) ) + else // if ( bli_is_double( dt_real ) ) { - double sum_x; + double* sum_x = bli_obj_buffer_at_off( sum ); - bli_dabsumv( m_x, - buf_x, inc_x, - &sum_x ); - - if ( sum_x == *bli_d0 ) *resid = 1.0; - else if ( sum_x >= 1.0 * m_x ) *resid = 2.0; - } - else if ( bli_obj_is_scomplex( *x ) ) - { - float sum_x; - - bli_cabsumv( m_x, - buf_x, inc_x, - &sum_x ); - - if ( sum_x == *bli_s0 ) *resid = 1.0; - else if ( sum_x >= 2.0 * m_x ) *resid = 2.0; - } - else // if ( bli_obj_is_dcomplex( *x ) ) - { - double sum_x; - - bli_zabsumv( m_x, - buf_x, inc_x, - &sum_x ); - - if ( sum_x == *bli_d0 ) *resid = 1.0; - else if ( sum_x >= 2.0 * m_x ) *resid = 2.0; + if ( *sum_x == *bli_d0 ) *resid = 1.0; + else if ( *sum_x >= 2.0 * m_x ) *resid = 2.0; } } diff --git a/testsuite/src/test_scal2m.c b/testsuite/src/test_scal2m.c index a8419349f..c8135e1ce 100644 --- a/testsuite/src/test_scal2m.c +++ b/testsuite/src/test_scal2m.c @@ -75,7 +75,7 @@ void libblis_test_scal2m_check( obj_t* alpha, void libblis_test_scal2m_deps( test_params_t* params, test_op_t* op ) { libblis_test_randm( params, &(op->ops->randm) ); - libblis_test_fnormm( params, &(op->ops->fnormm) ); + libblis_test_normfm( params, &(op->ops->normfm) ); libblis_test_subm( params, &(op->ops->subm) ); libblis_test_copym( params, &(op->ops->copym) ); libblis_test_scalm( params, &(op->ops->scalm) ); @@ -247,7 +247,7 @@ void libblis_test_scal2m_check( obj_t* alpha, // // is functioning correctly if // - // fnorm( y - alpha * conjx(x) ) + // normf( y - alpha * conjx(x) ) // // is negligible. // @@ -261,7 +261,7 @@ void libblis_test_scal2m_check( obj_t* alpha, bli_scalm( alpha, &x_temp ); bli_subm( &x_temp, y ); - bli_fnormm( y, &norm ); + bli_normfm( y, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &x_temp ); diff --git a/testsuite/src/test_scal2v.c b/testsuite/src/test_scal2v.c index fe3cbdc33..47d37e847 100644 --- a/testsuite/src/test_scal2v.c +++ b/testsuite/src/test_scal2v.c @@ -75,7 +75,7 @@ void libblis_test_scal2v_check( obj_t* alpha, void libblis_test_scal2v_deps( test_params_t* params, test_op_t* op ) { libblis_test_randv( params, &(op->ops->randv) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_copyv( params, &(op->ops->copyv) ); libblis_test_scalv( params, &(op->ops->scalv) ); @@ -244,7 +244,7 @@ void libblis_test_scal2v_check( obj_t* alpha, // // is functioning correctly if // - // fnorm( y - alpha * conjx(x) ) + // normf( y - alpha * conjx(x) ) // // is negligible. // @@ -258,7 +258,7 @@ void libblis_test_scal2v_check( obj_t* alpha, bli_scalv( alpha, &x_temp ); bli_subv( &x_temp, y ); - bli_fnormv( y, &norm ); + bli_normfv( y, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &x_temp ); diff --git a/testsuite/src/test_scalm.c b/testsuite/src/test_scalm.c index d2a4fc3c0..62778ea79 100644 --- a/testsuite/src/test_scalm.c +++ b/testsuite/src/test_scalm.c @@ -73,7 +73,7 @@ void libblis_test_scalm_check( obj_t* beta, void libblis_test_scalm_deps( test_params_t* params, test_op_t* op ) { libblis_test_randm( params, &(op->ops->randm) ); - libblis_test_fnormm( params, &(op->ops->fnormm) ); + libblis_test_normfm( params, &(op->ops->normfm) ); libblis_test_copym( params, &(op->ops->copym) ); } @@ -239,7 +239,7 @@ void libblis_test_scalm_check( obj_t* beta, // // is functioning correctly if // - // fnorm( y + -conjbeta(beta) * y_orig ) + // normf( y + -conjbeta(beta) * y_orig ) // // is negligible. // @@ -256,7 +256,7 @@ void libblis_test_scalm_check( obj_t* beta, bli_scalm( &nbeta, &y2 ); bli_addm( &y2, y ); - bli_fnormm( y, &norm_y_r ); + bli_normfm( y, &norm_y_r ); bli_getsc( &norm_y_r, resid, &junk ); diff --git a/testsuite/src/test_scalv.c b/testsuite/src/test_scalv.c index 97da48529..3ea6defbb 100644 --- a/testsuite/src/test_scalv.c +++ b/testsuite/src/test_scalv.c @@ -73,7 +73,7 @@ void libblis_test_scalv_check( obj_t* beta, void libblis_test_scalv_deps( test_params_t* params, test_op_t* op ) { libblis_test_randv( params, &(op->ops->randv) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_addv( params, &(op->ops->addv) ); libblis_test_copyv( params, &(op->ops->copyv) ); } @@ -235,7 +235,7 @@ void libblis_test_scalv_check( obj_t* beta, // // is functioning correctly if // - // fnorm( y + -conjbeta(beta) * y_orig ) + // normf( y + -conjbeta(beta) * y_orig ) // // is negligible. // @@ -252,7 +252,7 @@ void libblis_test_scalv_check( obj_t* beta, bli_scalv( &nbeta, &y2 ); bli_addv( &y2, y ); - bli_fnormv( y, &norm_y_r ); + bli_normfv( y, &norm_y_r ); bli_getsc( &norm_y_r, resid, &junk ); diff --git a/testsuite/src/test_subm.c b/testsuite/src/test_subm.c index 8f270daff..2ad8016cf 100644 --- a/testsuite/src/test_subm.c +++ b/testsuite/src/test_subm.c @@ -74,7 +74,7 @@ void libblis_test_subm_check( obj_t* alpha, void libblis_test_subm_deps( test_params_t* params, test_op_t* op ) { libblis_test_setm( params, &(op->ops->setm) ); - libblis_test_fnormm( params, &(op->ops->fnormm) ); + libblis_test_normfm( params, &(op->ops->normfm) ); } @@ -234,7 +234,7 @@ void libblis_test_subm_check( obj_t* alpha, // // is functioning correctly if // - // fnormv(y) - sqrt( absqsc( beta - conjx(alpha) ) * m * n ) + // normfv(y) - sqrt( absqsc( beta - conjx(alpha) ) * m * n ) // // is negligible. // @@ -247,7 +247,7 @@ void libblis_test_subm_check( obj_t* alpha, bli_obj_scalar_init_detached_copy_of( dt, conjx, alpha, &alpha_conj ); - bli_fnormm( y, &norm_r ); + bli_normfm( y, &norm_r ); bli_copysc( beta, &aminusb ); bli_subsc( &alpha_conj, &aminusb ); diff --git a/testsuite/src/test_subv.c b/testsuite/src/test_subv.c index 74ce8ffbc..5c1db1a49 100644 --- a/testsuite/src/test_subv.c +++ b/testsuite/src/test_subv.c @@ -74,7 +74,7 @@ void libblis_test_subv_check( obj_t* alpha, void libblis_test_subv_deps( test_params_t* params, test_op_t* op ) { libblis_test_setv( params, &(op->ops->setv) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); } @@ -230,7 +230,7 @@ void libblis_test_subv_check( obj_t* alpha, // // is functioning correctly if // - // fnormv(y) - sqrt( absqsc( beta - conjx(alpha) ) * m ) + // normfv(y) - sqrt( absqsc( beta - conjx(alpha) ) * m ) // // is negligible. // @@ -242,7 +242,7 @@ void libblis_test_subv_check( obj_t* alpha, bli_obj_scalar_init_detached_copy_of( dt, conjx, alpha, &alpha_conj ); - bli_fnormv( y, &norm_r ); + bli_normfv( y, &norm_r ); bli_copysc( beta, &aminusb ); bli_subsc( &alpha_conj, &aminusb ); diff --git a/testsuite/src/test_symm.c b/testsuite/src/test_symm.c index c26fa2dd5..84da1d9bd 100644 --- a/testsuite/src/test_symm.c +++ b/testsuite/src/test_symm.c @@ -83,7 +83,7 @@ void libblis_test_symm_deps( test_params_t* params, test_op_t* op ) libblis_test_randv( params, &(op->ops->randv) ); libblis_test_randm( params, &(op->ops->randm) ); libblis_test_setv( params, &(op->ops->setv) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_scalv( params, &(op->ops->scalv) ); libblis_test_copym( params, &(op->ops->copym) ); @@ -303,7 +303,7 @@ void libblis_test_symm_check( side_t side, // // is functioning correctly if // - // fnorm( v - z ) + // normf( v - z ) // // is negligible, where // @@ -357,7 +357,7 @@ void libblis_test_symm_check( side_t side, bli_gemv( beta, c_orig, &t, &BLIS_ONE, &z ); bli_subv( &z, &v ); - bli_fnormv( &v, &norm ); + bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); diff --git a/testsuite/src/test_symv.c b/testsuite/src/test_symv.c index 2c7f63fef..634ade8cc 100644 --- a/testsuite/src/test_symv.c +++ b/testsuite/src/test_symv.c @@ -80,7 +80,7 @@ void libblis_test_symv_deps( test_params_t* params, test_op_t* op ) { libblis_test_randv( params, &(op->ops->randv) ); libblis_test_randm( params, &(op->ops->randm) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_copyv( params, &(op->ops->copyv) ); libblis_test_scalv( params, &(op->ops->scalv) ); @@ -289,7 +289,7 @@ void libblis_test_symv_check( obj_t* alpha, // // is functioning correctly if // - // fnorm( y - v ) + // normf( y - v ) // // is negligible, where // @@ -309,7 +309,7 @@ void libblis_test_symv_check( obj_t* alpha, bli_gemv( alpha, a, x, beta, &v ); bli_subv( &v, y ); - bli_fnormv( y, &norm ); + bli_normfv( y, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &v ); diff --git a/testsuite/src/test_syr.c b/testsuite/src/test_syr.c index 1ae7ead15..c722ac8b9 100644 --- a/testsuite/src/test_syr.c +++ b/testsuite/src/test_syr.c @@ -76,7 +76,7 @@ void libblis_test_syr_deps( test_params_t* params, test_op_t* op ) { libblis_test_randv( params, &(op->ops->randv) ); libblis_test_randm( params, &(op->ops->randm) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_copym( params, &(op->ops->copym) ); libblis_test_scal2v( params, &(op->ops->scal2v) ); @@ -260,7 +260,7 @@ void libblis_test_syr_check( obj_t* alpha, // // is functioning correctly if // - // fnorm( v - w ) + // normf( v - w ) // // is negligible, where // @@ -299,7 +299,7 @@ void libblis_test_syr_check( obj_t* alpha, bli_gemv( &BLIS_ONE, a_orig, &t, &BLIS_ONE, &w ); bli_subv( &w, &v ); - bli_fnormv( &v, &norm ); + bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); diff --git a/testsuite/src/test_syr2.c b/testsuite/src/test_syr2.c index f6fb1b52d..be4e4b220 100644 --- a/testsuite/src/test_syr2.c +++ b/testsuite/src/test_syr2.c @@ -78,7 +78,7 @@ void libblis_test_syr2_deps( test_params_t* params, test_op_t* op ) { libblis_test_randv( params, &(op->ops->randv) ); libblis_test_randm( params, &(op->ops->randm) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_copym( params, &(op->ops->copym) ); libblis_test_scal2v( params, &(op->ops->scal2v) ); @@ -272,7 +272,7 @@ void libblis_test_syr2_check( obj_t* alpha, // // is functioning correctly if // - // fnorm( v - w ) + // normf( v - w ) // // is negligible, where // @@ -322,7 +322,7 @@ void libblis_test_syr2_check( obj_t* alpha, bli_gemv( &BLIS_ONE, a_orig, &t, &BLIS_ONE, &w1 ); bli_subv( &w1, &v ); - bli_fnormv( &v, &norm ); + bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); diff --git a/testsuite/src/test_syr2k.c b/testsuite/src/test_syr2k.c index fb74237d6..99463db79 100644 --- a/testsuite/src/test_syr2k.c +++ b/testsuite/src/test_syr2k.c @@ -81,7 +81,7 @@ void libblis_test_syr2k_deps( test_params_t* params, test_op_t* op ) libblis_test_randv( params, &(op->ops->randv) ); libblis_test_randm( params, &(op->ops->randm) ); libblis_test_setv( params, &(op->ops->setv) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_scalv( params, &(op->ops->scalv) ); libblis_test_copym( params, &(op->ops->copym) ); @@ -301,7 +301,7 @@ void libblis_test_syr2k_check( obj_t* alpha, // // is functioning correctly if // - // fnorm( v - z ) + // normf( v - z ) // // is negligible, where // @@ -339,7 +339,7 @@ void libblis_test_syr2k_check( obj_t* alpha, bli_symv( beta, c_orig, &t, &BLIS_ONE, &z ); bli_subv( &z, &v ); - bli_fnormv( &v, &norm ); + bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); diff --git a/testsuite/src/test_syrk.c b/testsuite/src/test_syrk.c index f83c28c95..e6be7cf79 100644 --- a/testsuite/src/test_syrk.c +++ b/testsuite/src/test_syrk.c @@ -79,7 +79,7 @@ void libblis_test_syrk_deps( test_params_t* params, test_op_t* op ) libblis_test_randv( params, &(op->ops->randv) ); libblis_test_randm( params, &(op->ops->randm) ); libblis_test_setv( params, &(op->ops->setv) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_scalv( params, &(op->ops->scalv) ); libblis_test_copym( params, &(op->ops->copym) ); @@ -289,7 +289,7 @@ void libblis_test_syrk_check( obj_t* alpha, // // is functioning correctly if // - // fnorm( v - z ) + // normf( v - z ) // // is negligible, where // @@ -321,7 +321,7 @@ void libblis_test_syrk_check( obj_t* alpha, bli_symv( beta, c_orig, &t, &BLIS_ONE, &z ); bli_subv( &z, &v ); - bli_fnormv( &v, &norm ); + bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); diff --git a/testsuite/src/test_trmm.c b/testsuite/src/test_trmm.c index 06edcc35e..45925090c 100644 --- a/testsuite/src/test_trmm.c +++ b/testsuite/src/test_trmm.c @@ -79,7 +79,7 @@ void libblis_test_trmm_deps( test_params_t* params, test_op_t* op ) libblis_test_randv( params, &(op->ops->randv) ); libblis_test_randm( params, &(op->ops->randm) ); libblis_test_setv( params, &(op->ops->setv) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_scalv( params, &(op->ops->scalv) ); libblis_test_copym( params, &(op->ops->copym) ); @@ -287,7 +287,7 @@ void libblis_test_trmm_check( side_t side, // // is functioning correctly if // - // fnorm( v - z ) + // normf( v - z ) // // is negligible, where // @@ -339,7 +339,7 @@ void libblis_test_trmm_check( side_t side, } bli_subv( &z, &v ); - bli_fnormv( &v, &norm ); + bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); diff --git a/testsuite/src/test_trmm3.c b/testsuite/src/test_trmm3.c index c1a1938b8..8e2409d74 100644 --- a/testsuite/src/test_trmm3.c +++ b/testsuite/src/test_trmm3.c @@ -83,7 +83,7 @@ void libblis_test_trmm3_deps( test_params_t* params, test_op_t* op ) libblis_test_randv( params, &(op->ops->randv) ); libblis_test_randm( params, &(op->ops->randm) ); libblis_test_setv( params, &(op->ops->setv) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_scalv( params, &(op->ops->scalv) ); libblis_test_copym( params, &(op->ops->copym) ); @@ -304,7 +304,7 @@ void libblis_test_trmm3_check( side_t side, // // is functioning correctly if // - // fnorm( v - z ) + // normf( v - z ) // // is negligible, where // @@ -360,7 +360,7 @@ void libblis_test_trmm3_check( side_t side, bli_gemv( beta, c_orig, &t, &BLIS_ONE, &z ); bli_subv( &z, &v ); - bli_fnormv( &v, &norm ); + bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); diff --git a/testsuite/src/test_trmv.c b/testsuite/src/test_trmv.c index 0c914f092..dd8fd7690 100644 --- a/testsuite/src/test_trmv.c +++ b/testsuite/src/test_trmv.c @@ -76,7 +76,7 @@ void libblis_test_trmv_deps( test_params_t* params, test_op_t* op ) { libblis_test_randv( params, &(op->ops->randv) ); libblis_test_randm( params, &(op->ops->randm) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_copyv( params, &(op->ops->copyv) ); libblis_test_scalv( params, &(op->ops->scalv) ); @@ -269,7 +269,7 @@ void libblis_test_trmv_check( obj_t* alpha, // // is functioning correctly if // - // fnorm( y - x ) + // normf( y - x ) // // is negligible, where // @@ -293,7 +293,7 @@ void libblis_test_trmv_check( obj_t* alpha, bli_gemv( alpha, &a_local, x_orig, &BLIS_ZERO, &y ); bli_subv( x, &y ); - bli_fnormv( &y, &norm ); + bli_normfv( &y, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &y ); diff --git a/testsuite/src/test_trsm.c b/testsuite/src/test_trsm.c index 534a26960..b89d70c46 100644 --- a/testsuite/src/test_trsm.c +++ b/testsuite/src/test_trsm.c @@ -79,7 +79,7 @@ void libblis_test_trsm_deps( test_params_t* params, test_op_t* op ) libblis_test_randv( params, &(op->ops->randv) ); libblis_test_randm( params, &(op->ops->randm) ); libblis_test_setv( params, &(op->ops->setv) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_scalv( params, &(op->ops->scalv) ); libblis_test_copym( params, &(op->ops->copym) ); @@ -285,7 +285,7 @@ void libblis_test_trsm_check( side_t side, // // is functioning correctly if // - // fnorm( v - z ) + // normf( v - z ) // // is negligible, where // @@ -337,7 +337,7 @@ void libblis_test_trsm_check( side_t side, } bli_subv( &z, &v ); - bli_fnormv( &v, &norm ); + bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); diff --git a/testsuite/src/test_trsm_ukr.c b/testsuite/src/test_trsm_ukr.c index 86764c7d7..234bec68d 100644 --- a/testsuite/src/test_trsm_ukr.c +++ b/testsuite/src/test_trsm_ukr.c @@ -78,7 +78,7 @@ void libblis_test_trsm_ukr_deps( test_params_t* params, test_op_t* op ) libblis_test_randv( params, &(op->ops->randv) ); libblis_test_randm( params, &(op->ops->randm) ); libblis_test_setv( params, &(op->ops->setv) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_scalv( params, &(op->ops->scalv) ); libblis_test_copym( params, &(op->ops->copym) ); @@ -306,7 +306,7 @@ void libblis_test_trsm_ukr_check( side_t side, // // is functioning correctly if // - // fnorm( v - z ) + // normf( v - z ) // // is negligible, where // @@ -358,7 +358,7 @@ void libblis_test_trsm_ukr_check( side_t side, } bli_subv( &z, &v ); - bli_fnormv( &v, &norm ); + bli_normfv( &v, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &t ); diff --git a/testsuite/src/test_trsv.c b/testsuite/src/test_trsv.c index 1826179e8..e8d5c8c97 100644 --- a/testsuite/src/test_trsv.c +++ b/testsuite/src/test_trsv.c @@ -76,7 +76,7 @@ void libblis_test_trsv_deps( test_params_t* params, test_op_t* op ) { libblis_test_randv( params, &(op->ops->randv) ); libblis_test_randm( params, &(op->ops->randm) ); - libblis_test_fnormv( params, &(op->ops->fnormv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_copyv( params, &(op->ops->copyv) ); libblis_test_scalv( params, &(op->ops->scalv) ); @@ -270,7 +270,7 @@ void libblis_test_trsv_check( obj_t* alpha, // // is functioning correctly if // - // fnorm( y - x_orig ) + // normf( y - x_orig ) // // is negligible, where // @@ -298,7 +298,7 @@ void libblis_test_trsv_check( obj_t* alpha, bli_gemv( &alpha_inv, &a_local, x, &BLIS_ZERO, &y ); bli_subv( x_orig, &y ); - bli_fnormv( &y, &norm ); + bli_normfv( &y, &norm ); bli_getsc( &norm, resid, &junk ); bli_obj_free( &y );