diff --git a/frame/1/bli_l1v_check.c b/frame/1/bli_l1v_check.c index 737fbaceb..b998a65fb 100644 --- a/frame/1/bli_l1v_check.c +++ b/frame/1/bli_l1v_check.c @@ -62,31 +62,15 @@ GENFRONT( swapv ) void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ - obj_t* x, \ - obj_t* y \ - ) \ -{ \ - bli_l1v_axy_check( alpha, x, y ); \ -} - -GENFRONT( axpyv ) -GENFRONT( scal2v ) - - -#undef GENFRONT -#define GENFRONT( opname ) \ -\ -void PASTEMAC(opname,_check) \ - ( \ obj_t* x, \ obj_t* beta, \ obj_t* y \ ) \ { \ - bli_l1v_xby_check( x, beta, y ); \ + bli_l1v_axby_check( alpha, x, beta, y ); \ } -GENFRONT( xpbyv ) +GENFRONT( axpbyv ) #undef GENFRONT @@ -96,14 +80,14 @@ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* x, \ - obj_t* beta, \ obj_t* y \ ) \ { \ - bli_l1v_axby_check( alpha, x, beta, y ); \ + bli_l1v_axy_check( alpha, x, y ); \ } -GENFRONT( axpbyv ) +GENFRONT( axpyv ) +GENFRONT( scal2v ) #undef GENFRONT @@ -170,6 +154,22 @@ GENFRONT( scalv ) GENFRONT( setv ) +#undef GENFRONT +#define GENFRONT( opname ) \ +\ +void PASTEMAC(opname,_check) \ + ( \ + obj_t* x, \ + obj_t* beta, \ + obj_t* y \ + ) \ +{ \ + bli_l1v_xby_check( x, beta, y ); \ +} + +GENFRONT( xpbyv ) + + // ----------------------------------------------------------------------------- void bli_l1v_xy_check @@ -215,43 +215,43 @@ void bli_l1v_axy_check obj_t* y ) { - err_t e_val; + err_t e_val; - // Check object datatypes. + // Check object datatypes. - e_val = bli_check_noninteger_object( alpha ); - bli_check_error_code( e_val ); + e_val = bli_check_noninteger_object( alpha ); + bli_check_error_code( e_val ); - e_val = bli_check_floating_object( x ); - bli_check_error_code( e_val ); + e_val = bli_check_floating_object( x ); + bli_check_error_code( e_val ); - e_val = bli_check_floating_object( y ); - bli_check_error_code( e_val ); + e_val = bli_check_floating_object( y ); + bli_check_error_code( e_val ); - // Check object dimensions. + // Check object dimensions. - e_val = bli_check_scalar_object( alpha ); - bli_check_error_code( e_val ); + e_val = bli_check_scalar_object( alpha ); + bli_check_error_code( e_val ); - e_val = bli_check_vector_object( x ); - bli_check_error_code( e_val ); + e_val = bli_check_vector_object( x ); + bli_check_error_code( e_val ); - e_val = bli_check_vector_object( y ); - bli_check_error_code( e_val ); + e_val = bli_check_vector_object( y ); + bli_check_error_code( e_val ); - e_val = bli_check_equal_vector_lengths( x, y ); - bli_check_error_code( e_val ); + e_val = bli_check_equal_vector_lengths( x, y ); + bli_check_error_code( e_val ); - // Check object buffers (for non-NULLness). + // Check object buffers (for non-NULLness). - e_val = bli_check_object_buffer( alpha ); - bli_check_error_code( e_val ); + e_val = bli_check_object_buffer( alpha ); + bli_check_error_code( e_val ); - e_val = bli_check_object_buffer( x ); - bli_check_error_code( e_val ); + e_val = bli_check_object_buffer( x ); + bli_check_error_code( e_val ); - e_val = bli_check_object_buffer( y ); - bli_check_error_code( e_val ); + e_val = bli_check_object_buffer( y ); + bli_check_error_code( e_val ); } void bli_l1v_xby_check @@ -261,43 +261,43 @@ void bli_l1v_xby_check obj_t* y ) { - err_t e_val; + err_t e_val; - // Check object datatypes. + // Check object datatypes. - e_val = bli_check_noninteger_object( beta ); - bli_check_error_code( e_val ); + e_val = bli_check_noninteger_object( beta ); + bli_check_error_code( e_val ); - e_val = bli_check_floating_object( x ); - bli_check_error_code( e_val ); + e_val = bli_check_floating_object( x ); + bli_check_error_code( e_val ); - e_val = bli_check_floating_object( y ); - bli_check_error_code( e_val ); + e_val = bli_check_floating_object( y ); + bli_check_error_code( e_val ); - // Check object dimensions. + // Check object dimensions. - e_val = bli_check_scalar_object( beta ); - bli_check_error_code( e_val ); + e_val = bli_check_scalar_object( beta ); + bli_check_error_code( e_val ); - e_val = bli_check_vector_object( x ); - bli_check_error_code( e_val ); + e_val = bli_check_vector_object( x ); + bli_check_error_code( e_val ); - e_val = bli_check_vector_object( y ); - bli_check_error_code( e_val ); + e_val = bli_check_vector_object( y ); + bli_check_error_code( e_val ); - e_val = bli_check_equal_vector_lengths( x, y ); - bli_check_error_code( e_val ); + e_val = bli_check_equal_vector_lengths( x, y ); + bli_check_error_code( e_val ); - // Check object buffers (for non-NULLness). + // Check object buffers (for non-NULLness). - e_val = bli_check_object_buffer( beta ); - bli_check_error_code( e_val ); + e_val = bli_check_object_buffer( beta ); + bli_check_error_code( e_val ); - e_val = bli_check_object_buffer( x ); - bli_check_error_code( e_val ); + e_val = bli_check_object_buffer( x ); + bli_check_error_code( e_val ); - e_val = bli_check_object_buffer( y ); - bli_check_error_code( e_val ); + e_val = bli_check_object_buffer( y ); + bli_check_error_code( e_val ); } void bli_l1v_axby_check @@ -308,52 +308,52 @@ void bli_l1v_axby_check obj_t* y ) { - err_t e_val; + err_t e_val; - // Check object datatypes. + // Check object datatypes. - e_val = bli_check_noninteger_object( alpha ); - bli_check_error_code( e_val ); + e_val = bli_check_noninteger_object( alpha ); + bli_check_error_code( e_val ); - e_val = bli_check_noninteger_object( beta ); - bli_check_error_code( e_val ); + e_val = bli_check_noninteger_object( beta ); + bli_check_error_code( e_val ); - e_val = bli_check_floating_object( x ); - bli_check_error_code( e_val ); + e_val = bli_check_floating_object( x ); + bli_check_error_code( e_val ); - e_val = bli_check_floating_object( y ); - bli_check_error_code( e_val ); + e_val = bli_check_floating_object( y ); + bli_check_error_code( e_val ); - // Check object dimensions. + // Check object dimensions. - e_val = bli_check_scalar_object( alpha ); - bli_check_error_code( e_val ); + e_val = bli_check_scalar_object( alpha ); + bli_check_error_code( e_val ); - e_val = bli_check_scalar_object( beta ); - bli_check_error_code( e_val ); + e_val = bli_check_scalar_object( beta ); + bli_check_error_code( e_val ); - e_val = bli_check_vector_object( x ); - bli_check_error_code( e_val ); + e_val = bli_check_vector_object( x ); + bli_check_error_code( e_val ); - e_val = bli_check_vector_object( y ); - bli_check_error_code( e_val ); + e_val = bli_check_vector_object( y ); + bli_check_error_code( e_val ); - e_val = bli_check_equal_vector_lengths( x, y ); - bli_check_error_code( e_val ); + e_val = bli_check_equal_vector_lengths( x, y ); + bli_check_error_code( e_val ); - // Check object buffers (for non-NULLness). + // Check object buffers (for non-NULLness). - e_val = bli_check_object_buffer( alpha ); - bli_check_error_code( e_val ); + e_val = bli_check_object_buffer( alpha ); + bli_check_error_code( e_val ); - e_val = bli_check_object_buffer( beta ); - bli_check_error_code( e_val ); + e_val = bli_check_object_buffer( beta ); + bli_check_error_code( e_val ); - e_val = bli_check_object_buffer( x ); - bli_check_error_code( e_val ); + e_val = bli_check_object_buffer( x ); + bli_check_error_code( e_val ); - e_val = bli_check_object_buffer( y ); - bli_check_error_code( e_val ); + e_val = bli_check_object_buffer( y ); + bli_check_error_code( e_val ); } void bli_l1v_dot_check diff --git a/frame/1/bli_l1v_check.h b/frame/1/bli_l1v_check.h index 1c87f5f51..d4a1e9ff9 100644 --- a/frame/1/bli_l1v_check.h +++ b/frame/1/bli_l1v_check.h @@ -58,25 +58,12 @@ GENTPROT( swapv ) void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ - obj_t* x, \ - obj_t* y \ - ); - -GENTPROT( axpyv ) -GENTPROT( scal2v ) - - -#undef GENTPROT -#define GENTPROT( opname ) \ -\ -void PASTEMAC(opname,_check) \ - ( \ obj_t* x, \ obj_t* beta, \ obj_t* y \ ); -GENTPROT( xpbyv ) +GENTPROT( axpbyv ) #undef GENTPROT @@ -86,11 +73,11 @@ void PASTEMAC(opname,_check) \ ( \ obj_t* alpha, \ obj_t* x, \ - obj_t* beta, \ obj_t* y \ - ); + ); -GENTPROT( axpbyv ) +GENTPROT( axpyv ) +GENTPROT( scal2v ) #undef GENTPROT @@ -145,6 +132,20 @@ GENTPROT( scalv ) GENTPROT( setv ) +#undef GENTPROT +#define GENTPROT( opname ) \ +\ +void PASTEMAC(opname,_check) \ + ( \ + obj_t* x, \ + obj_t* beta, \ + obj_t* y \ + ); + +GENTPROT( xpbyv ) + + + // ----------------------------------------------------------------------------- void bli_l1v_xy_check diff --git a/frame/1/bli_l1v_cntx.c b/frame/1/bli_l1v_cntx.c index d1c504528..a1bba0354 100644 --- a/frame/1/bli_l1v_cntx.c +++ b/frame/1/bli_l1v_cntx.c @@ -64,23 +64,48 @@ GENFRONT( subv, BLIS_SUBV_KER ) GENFRONT( swapv, BLIS_SWAPV_KER ) +#undef GENFRONT +#define GENFRONT( opname, kertype, dep1, dep2, dep3, dep4 ) \ +\ +void PASTEMAC(opname,_cntx_init)( cntx_t* cntx ) \ +{ \ + bli_cntx_obj_create( cntx ); \ +\ + /* Initialize the context with kernel dependencies. */ \ + PASTEMAC(dep1,_cntx_init)( cntx ); \ + PASTEMAC(dep2,_cntx_init)( cntx ); \ + PASTEMAC(dep3,_cntx_init)( cntx ); \ + PASTEMAC(dep4,_cntx_init)( cntx ); \ +\ + /* Initialize the context with the kernel associated with the current + operation. */ \ + bli_gks_cntx_set_l1v_ker( kertype, cntx ); \ +} \ +void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx ) \ +{ \ + bli_cntx_obj_free( cntx ); \ +} + +GENFRONT( axpbyv, BLIS_AXPBYV_KER, axpyv, xpbyv, scal2v, scalv ) + + #undef GENFRONT #define GENFRONT( opname, kertype, depname ) \ \ void PASTEMAC(opname,_cntx_init)( cntx_t* cntx ) \ { \ - bli_cntx_obj_create( cntx ); \ + bli_cntx_obj_create( cntx ); \ \ - /* Initialize the context with kernel dependencies. */ \ - PASTEMAC(depname,_cntx_init)( cntx ); \ + /* Initialize the context with kernel dependencies. */ \ + PASTEMAC(depname,_cntx_init)( cntx ); \ \ - /* Initialize the context with the kernel associated with the current - operation. */ \ - bli_gks_cntx_set_l1v_ker( kertype, cntx ); \ + /* Initialize the context with the kernel associated with the current + operation. */ \ + bli_gks_cntx_set_l1v_ker( kertype, cntx ); \ } \ void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx ) \ { \ - bli_cntx_obj_free( cntx ); \ + bli_cntx_obj_free( cntx ); \ } GENFRONT( axpyv, BLIS_AXPYV_KER, addv ) @@ -92,46 +117,21 @@ GENFRONT( scalv, BLIS_SCALV_KER, setv ) \ void PASTEMAC(opname,_cntx_init)( cntx_t* cntx ) \ { \ - bli_cntx_obj_create( cntx ); \ + bli_cntx_obj_create( cntx ); \ \ - /* Initialize the context with kernel dependencies. */ \ - PASTEMAC(dep1,_cntx_init)( cntx ); \ - PASTEMAC(dep2,_cntx_init)( cntx ); \ + /* Initialize the context with kernel dependencies. */ \ + PASTEMAC(dep1,_cntx_init)( cntx ); \ + PASTEMAC(dep2,_cntx_init)( cntx ); \ \ - /* Initialize the context with the kernel associated with the current - operation. */ \ - bli_gks_cntx_set_l1v_ker( kertype, cntx ); \ + /* Initialize the context with the kernel associated with the current + operation. */ \ + bli_gks_cntx_set_l1v_ker( kertype, cntx ); \ } \ void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx ) \ { \ - bli_cntx_obj_free( cntx ); \ + bli_cntx_obj_free( cntx ); \ } GENFRONT( scal2v, BLIS_SCAL2V_KER, setv, copyv ) GENFRONT( xpbyv, BLIS_XPBYV_KER, addv, copyv ) - -#undef GENFRONT -#define GENFRONT( opname, kertype, dep1, dep2, dep3, dep4 ) \ -\ -void PASTEMAC(opname,_cntx_init)( cntx_t* cntx ) \ -{ \ - bli_cntx_obj_create( cntx ); \ -\ - /* Initialize the context with kernel dependencies. */ \ - PASTEMAC(dep1,_cntx_init)( cntx ); \ - PASTEMAC(dep2,_cntx_init)( cntx ); \ - PASTEMAC(dep3,_cntx_init)( cntx ); \ - PASTEMAC(dep4,_cntx_init)( cntx ); \ -\ - /* Initialize the context with the kernel associated with the current - operation. */ \ - bli_gks_cntx_set_l1v_ker( kertype, cntx ); \ -} \ -void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx ) \ -{ \ - bli_cntx_obj_free( cntx ); \ -} - -GENFRONT( axpbyv, BLIS_AXPBYV_KER, axpyv, xpbyv, scal2v, scalv ) - diff --git a/frame/1/bli_l1v_ft.h b/frame/1/bli_l1v_ft.h index 051ca0f6c..c4e206df7 100644 --- a/frame/1/bli_l1v_ft.h +++ b/frame/1/bli_l1v_ft.h @@ -49,8 +49,8 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \ ( \ conj_t conjx, \ dim_t n, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ); @@ -58,41 +58,6 @@ INSERT_GENTDEF( addv ) INSERT_GENTDEF( copyv ) INSERT_GENTDEF( subv ) -// axpyv, scal2v - -#undef GENTDEF -#define GENTDEF( ctype, ch, opname, tsuf ) \ -\ -typedef void (*PASTECH2(ch,opname,tsuf)) \ - ( \ - conj_t conjx, \ - dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ - ); - -INSERT_GENTDEF( axpyv ) -INSERT_GENTDEF( scal2v ) - -// xpybv - -#undef GENTDEF -#define GENTDEF( ctype, ch, opname, tsuf ) \ -\ -typedef void (*PASTECH2(ch,opname,tsuf)) \ - ( \ - conj_t conjx, \ - dim_t n, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ - ); - -INSERT_GENTDEF( xpbyv ) - // axpbyv #undef GENTDEF @@ -102,15 +67,33 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \ ( \ conj_t conjx, \ dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ); INSERT_GENTDEF( axpbyv ) +// axpyv, scal2v + +#undef GENTDEF +#define GENTDEF( ctype, ch, opname, tsuf ) \ +\ +typedef void (*PASTECH2(ch,opname,tsuf)) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ + ); + +INSERT_GENTDEF( axpyv ) +INSERT_GENTDEF( scal2v ) + // dotv #undef GENTDEF @@ -121,9 +104,9 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* rho, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict rho, \ cntx_t* cntx \ ); @@ -139,11 +122,11 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* beta, \ - ctype* rho, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict beta, \ + ctype* restrict rho, \ cntx_t* cntx \ ); @@ -157,7 +140,7 @@ INSERT_GENTDEF( dotxv ) typedef void (*PASTECH2(ch,opname,tsuf)) \ ( \ dim_t n, \ - ctype* x, inc_t incx, \ + ctype* restrict x, inc_t incx, \ cntx_t* cntx \ ); @@ -172,8 +155,8 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \ ( \ conj_t conjalpha, \ dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ cntx_t* cntx \ ); @@ -188,14 +171,29 @@ INSERT_GENTDEF( setv ) typedef void (*PASTECH2(ch,opname,tsuf)) \ ( \ dim_t n, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ); INSERT_GENTDEF( swapv ) +// xpybv +#undef GENTDEF +#define GENTDEF( ctype, ch, opname, tsuf ) \ +\ +typedef void (*PASTECH2(ch,opname,tsuf)) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ + ); + +INSERT_GENTDEF( xpbyv ) #endif diff --git a/frame/1/bli_l1v_ker.h b/frame/1/bli_l1v_ker.h index 13c675215..cf80eda46 100644 --- a/frame/1/bli_l1v_ker.h +++ b/frame/1/bli_l1v_ker.h @@ -42,11 +42,11 @@ \ void PASTEMAC(ch,opname) \ ( \ - conj_t conjx, \ - dim_t n, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ + conj_t conjx, \ + dim_t n, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ ); INSERT_GENTPROT_BASIC( addv_ker_name ) @@ -59,12 +59,29 @@ INSERT_GENTPROT_BASIC( subv_ker_name ) \ void PASTEMAC(ch,opname) \ ( \ - conj_t conjx, \ - dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ + conj_t conjx, \ + dim_t n, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ + ); \ + +INSERT_GENTPROT_BASIC( axpbyv_ker_name ) + + +#undef GENTPROT +#define GENTPROT( ctype, ch, opname ) \ +\ +void PASTEMAC(ch,opname) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ ); \ INSERT_GENTPROT_BASIC( axpyv_ker_name ) @@ -76,46 +93,13 @@ INSERT_GENTPROT_BASIC( scal2v_ker_name ) \ void PASTEMAC(ch,opname) \ ( \ - conj_t conjx, \ - dim_t n, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ - ); \ - -INSERT_GENTPROT_BASIC( xpbyv_ker_name ) - - -#undef GENTPROT -#define GENTPROT( ctype, ch, opname ) \ -\ -void PASTEMAC(ch,opname) \ - ( \ - conj_t conjx, \ - dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ - ); \ - -INSERT_GENTPROT_BASIC( axpbyv_ker_name ) - - -#undef GENTPROT -#define GENTPROT( ctype, ch, opname ) \ -\ -void PASTEMAC(ch,opname) \ - ( \ - conj_t conjx, \ - conj_t conjy, \ - dim_t n, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* rho, \ - cntx_t* cntx \ + conj_t conjx, \ + conj_t conjy, \ + dim_t n, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict rho, \ + cntx_t* cntx \ ); \ INSERT_GENTPROT_BASIC( dotv_ker_name ) @@ -126,15 +110,15 @@ INSERT_GENTPROT_BASIC( dotv_ker_name ) \ void PASTEMAC(ch,opname) \ ( \ - conj_t conjx, \ - conj_t conjy, \ - dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* beta, \ - ctype* rho, \ - cntx_t* cntx \ + conj_t conjx, \ + conj_t conjy, \ + dim_t n, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict beta, \ + ctype* restrict rho, \ + cntx_t* cntx \ ); \ INSERT_GENTPROT_BASIC( dotxv_ker_name ) @@ -145,9 +129,9 @@ INSERT_GENTPROT_BASIC( dotxv_ker_name ) \ void PASTEMAC(ch,opname) \ ( \ - dim_t n, \ - ctype* x, inc_t incx, \ - cntx_t* cntx \ + dim_t n, \ + ctype* restrict x, inc_t incx, \ + cntx_t* cntx \ ); \ INSERT_GENTPROT_BASIC( invertv_ker_name ) @@ -158,11 +142,11 @@ INSERT_GENTPROT_BASIC( invertv_ker_name ) \ void PASTEMAC(ch,opname) \ ( \ - conj_t conjalpha, \ - dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - cntx_t* cntx \ + conj_t conjalpha, \ + dim_t n, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + cntx_t* cntx \ ); \ INSERT_GENTPROT_BASIC( scalv_ker_name ) @@ -174,11 +158,27 @@ INSERT_GENTPROT_BASIC( setv_ker_name ) \ void PASTEMAC(ch,opname) \ ( \ - dim_t n, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ + dim_t n, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ ); \ INSERT_GENTPROT_BASIC( swapv_ker_name ) + +#undef GENTPROT +#define GENTPROT( ctype, ch, opname ) \ +\ +void PASTEMAC(ch,opname) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ + ); \ + +INSERT_GENTPROT_BASIC( xpbyv_ker_name ) + diff --git a/frame/1/bli_l1v_oapi.c b/frame/1/bli_l1v_oapi.c index c43551236..cebc3bfb5 100644 --- a/frame/1/bli_l1v_oapi.c +++ b/frame/1/bli_l1v_oapi.c @@ -82,6 +82,64 @@ GENFRONT( copyv ) GENFRONT( subv ) +#undef GENFRONT +#define GENFRONT( opname ) \ +\ +void PASTEMAC(opname,EX_SUF) \ + ( \ + obj_t* alpha, \ + obj_t* x, \ + obj_t* beta, \ + obj_t* y \ + BLIS_OAPI_CNTX_PARAM \ + ) \ +{ \ + BLIS_OAPI_CNTX_DECL \ +\ + num_t dt = bli_obj_datatype( *x ); \ +\ + conj_t conjx = bli_obj_conj_status( *x ); \ + dim_t n = bli_obj_vector_dim( *x ); \ + void* buf_x = bli_obj_buffer_at_off( *x ); \ + inc_t inc_x = bli_obj_vector_inc( *x ); \ + void* buf_y = bli_obj_buffer_at_off( *y ); \ + inc_t inc_y = bli_obj_vector_inc( *y ); \ +\ + void* buf_alpha; \ + void* buf_beta; \ +\ + obj_t alpha_local; \ + obj_t beta_local; \ +\ + if ( bli_error_checking_is_enabled() ) \ + PASTEMAC(opname,_check)( alpha, x, beta, y ); \ +\ + /* Create local copy-casts of scalars (and apply internal conjugation + as needed). */ \ + bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ + alpha, &alpha_local ); \ + bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ + beta, &beta_local ); \ + buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \ + buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \ +\ + /* Invoke the void pointer-based function. */ \ + bli_call_ft_9 \ + ( \ + dt, \ + opname, \ + conjx, \ + n, \ + buf_alpha, \ + buf_x, inc_x, \ + buf_beta, \ + buf_y, inc_y, \ + cntx \ + ); \ +} + +GENFRONT( axpbyv ) + #undef GENFRONT #define GENFRONT( opname ) \ @@ -136,119 +194,6 @@ GENFRONT( axpyv ) GENFRONT( scal2v ) - -#undef GENFRONT -#define GENFRONT( opname ) \ -\ -void PASTEMAC(opname,EX_SUF) \ - ( \ - obj_t* x, \ - obj_t* beta, \ - obj_t* y \ - BLIS_OAPI_CNTX_PARAM \ - ) \ -{ \ - BLIS_OAPI_CNTX_DECL \ -\ - num_t dt = bli_obj_datatype( *x ); \ -\ - conj_t conjx = bli_obj_conj_status( *x ); \ - dim_t n = bli_obj_vector_dim( *x ); \ - void* buf_x = bli_obj_buffer_at_off( *x ); \ - inc_t inc_x = bli_obj_vector_inc( *x ); \ - void* buf_y = bli_obj_buffer_at_off( *y ); \ - inc_t inc_y = bli_obj_vector_inc( *y ); \ -\ - void* buf_beta; \ -\ - obj_t beta_local; \ -\ - if ( bli_error_checking_is_enabled() ) \ - PASTEMAC(opname,_check)( x, beta, y ); \ -\ - /* Create local copy-casts of scalars (and apply internal conjugation - as needed). */ \ - bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ - beta, &beta_local ); \ - buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \ -\ - /* Invoke the void pointer-based function. */ \ - bli_call_ft_8 \ - ( \ - dt, \ - opname, \ - conjx, \ - n, \ - buf_x, inc_x, \ - buf_beta, \ - buf_y, inc_y, \ - cntx \ - ); \ -} - -GENFRONT( xpbyv ) - - - -#undef GENFRONT -#define GENFRONT( opname ) \ -\ -void PASTEMAC(opname,EX_SUF) \ - ( \ - obj_t* alpha, \ - obj_t* x, \ - obj_t* beta, \ - obj_t* y \ - BLIS_OAPI_CNTX_PARAM \ - ) \ -{ \ - BLIS_OAPI_CNTX_DECL \ -\ - num_t dt = bli_obj_datatype( *x ); \ -\ - conj_t conjx = bli_obj_conj_status( *x ); \ - dim_t n = bli_obj_vector_dim( *x ); \ - void* buf_x = bli_obj_buffer_at_off( *x ); \ - inc_t inc_x = bli_obj_vector_inc( *x ); \ - void* buf_y = bli_obj_buffer_at_off( *y ); \ - inc_t inc_y = bli_obj_vector_inc( *y ); \ -\ - void* buf_alpha; \ - void* buf_beta; \ -\ - obj_t alpha_local; \ - obj_t beta_local; \ -\ - if ( bli_error_checking_is_enabled() ) \ - PASTEMAC(opname,_check)( alpha, x, beta, y ); \ -\ - /* Create local copy-casts of scalars (and apply internal conjugation - as needed). */ \ - bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ - alpha, &alpha_local ); \ - bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ - beta, &beta_local ); \ - buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \ - buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \ -\ - /* Invoke the void pointer-based function. */ \ - bli_call_ft_9 \ - ( \ - dt, \ - opname, \ - conjx, \ - n, \ - buf_alpha, \ - buf_x, inc_x, \ - buf_beta, \ - buf_y, inc_y, \ - cntx \ - ); \ -} - -GENFRONT( axpbyv ) - - #undef GENFRONT #define GENFRONT( opname ) \ \ @@ -479,5 +424,57 @@ void PASTEMAC(opname,EX_SUF) \ GENFRONT( swapv ) +#undef GENFRONT +#define GENFRONT( opname ) \ +\ +void PASTEMAC(opname,EX_SUF) \ + ( \ + obj_t* x, \ + obj_t* beta, \ + obj_t* y \ + BLIS_OAPI_CNTX_PARAM \ + ) \ +{ \ + BLIS_OAPI_CNTX_DECL \ +\ + num_t dt = bli_obj_datatype( *x ); \ +\ + conj_t conjx = bli_obj_conj_status( *x ); \ + dim_t n = bli_obj_vector_dim( *x ); \ + void* buf_x = bli_obj_buffer_at_off( *x ); \ + inc_t inc_x = bli_obj_vector_inc( *x ); \ + void* buf_y = bli_obj_buffer_at_off( *y ); \ + inc_t inc_y = bli_obj_vector_inc( *y ); \ +\ + void* buf_beta; \ +\ + obj_t beta_local; \ +\ + if ( bli_error_checking_is_enabled() ) \ + PASTEMAC(opname,_check)( x, beta, y ); \ +\ + /* Create local copy-casts of scalars (and apply internal conjugation + as needed). */ \ + bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ + beta, &beta_local ); \ + buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \ +\ + /* Invoke the void pointer-based function. */ \ + bli_call_ft_8 \ + ( \ + dt, \ + opname, \ + conjx, \ + n, \ + buf_x, inc_x, \ + buf_beta, \ + buf_y, inc_y, \ + cntx \ + ); \ +} + +GENFRONT( xpbyv ) + + #endif diff --git a/frame/1/bli_l1v_oapi.h b/frame/1/bli_l1v_oapi.h index b6ec5094d..ff277421c 100644 --- a/frame/1/bli_l1v_oapi.h +++ b/frame/1/bli_l1v_oapi.h @@ -58,27 +58,13 @@ GENTPROT( subv ) void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ - obj_t* x, \ - obj_t* y \ - BLIS_OAPI_CNTX_PARAM \ - ); - -GENTPROT( axpyv ) -GENTPROT( scal2v ) - - -#undef GENTPROT -#define GENTPROT( opname ) \ -\ -void PASTEMAC(opname,EX_SUF) \ - ( \ obj_t* x, \ obj_t* beta, \ obj_t* y \ BLIS_OAPI_CNTX_PARAM \ ); -GENTPROT( xpbyv ) +GENTPROT( axpbyv ) #undef GENTPROT @@ -88,12 +74,12 @@ void PASTEMAC(opname,EX_SUF) \ ( \ obj_t* alpha, \ obj_t* x, \ - obj_t* beta, \ obj_t* y \ BLIS_OAPI_CNTX_PARAM \ - ); + ); -GENTPROT( axpbyv ) +GENTPROT( axpyv ) +GENTPROT( scal2v ) #undef GENTPROT @@ -164,3 +150,17 @@ void PASTEMAC(opname,EX_SUF) \ GENTPROT( swapv ) + +#undef GENTPROT +#define GENTPROT( opname ) \ +\ +void PASTEMAC(opname,EX_SUF) \ + ( \ + obj_t* x, \ + obj_t* beta, \ + obj_t* y \ + BLIS_OAPI_CNTX_PARAM \ + ); + +GENTPROT( xpbyv ) + diff --git a/frame/1/bli_l1v_tapi.c b/frame/1/bli_l1v_tapi.c index 551a41d18..4cf6be24e 100644 --- a/frame/1/bli_l1v_tapi.c +++ b/frame/1/bli_l1v_tapi.c @@ -74,6 +74,44 @@ INSERT_GENTFUNC_BASIC( copyv, BLIS_COPYV_KER ) INSERT_GENTFUNC_BASIC( subv, BLIS_SUBV_KER ) +#undef GENTFUNC +#define GENTFUNC( ctype, ch, opname, kerid ) \ +\ +void PASTEMAC(ch,opname) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* alpha, \ + ctype* x, inc_t incx, \ + ctype* beta, \ + ctype* y, inc_t incy, \ + cntx_t* cntx \ + ) \ +{ \ + const num_t dt = PASTEMAC(ch,type); \ + cntx_t* cntx_p; \ +\ + bli_cntx_init_local_if( opname, cntx, cntx_p ); \ +\ + PASTECH2(ch,opname,_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx_p ); \ +\ + f \ + ( \ + conjx, \ + n, \ + alpha, \ + x, incx, \ + beta, \ + y, incy, \ + cntx_p \ + ); \ +\ + bli_cntx_finalize_local_if( opname, cntx ); \ +} + +INSERT_GENTFUNC_BASIC( axpbyv, BLIS_AXPBYV_KER ) + + #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kerid ) \ \ @@ -111,80 +149,6 @@ INSERT_GENTFUNC_BASIC( axpyv, BLIS_AXPYV_KER ) INSERT_GENTFUNC_BASIC( scal2v, BLIS_SCAL2V_KER ) -#undef GENTFUNC -#define GENTFUNC( ctype, ch, opname, kerid ) \ -\ -void PASTEMAC(ch,opname) \ - ( \ - conj_t conjx, \ - dim_t n, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ - ) \ -{ \ - const num_t dt = PASTEMAC(ch,type); \ - cntx_t* cntx_p; \ -\ - bli_cntx_init_local_if( opname, cntx, cntx_p ); \ -\ - PASTECH2(ch,opname,_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx_p ); \ -\ - f \ - ( \ - conjx, \ - n, \ - x, incx, \ - beta, \ - y, incy, \ - cntx_p \ - ); \ -\ - bli_cntx_finalize_local_if( opname, cntx ); \ -} - -INSERT_GENTFUNC_BASIC( xpbyv, BLIS_XPBYV_KER ) - - -#undef GENTFUNC -#define GENTFUNC( ctype, ch, opname, kerid ) \ -\ -void PASTEMAC(ch,opname) \ - ( \ - conj_t conjx, \ - dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ - ) \ -{ \ - const num_t dt = PASTEMAC(ch,type); \ - cntx_t* cntx_p; \ -\ - bli_cntx_init_local_if( opname, cntx, cntx_p ); \ -\ - PASTECH2(ch,opname,_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx_p ); \ -\ - f \ - ( \ - conjx, \ - n, \ - alpha, \ - x, incx, \ - beta, \ - y, incy, \ - cntx_p \ - ); \ -\ - bli_cntx_finalize_local_if( opname, cntx ); \ -} - -INSERT_GENTFUNC_BASIC( axpbyv, BLIS_AXPBYV_KER ) - - #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kerid ) \ \ @@ -361,3 +325,39 @@ void PASTEMAC(ch,opname) \ INSERT_GENTFUNC_BASIC( swapv, BLIS_SWAPV_KER ) +#undef GENTFUNC +#define GENTFUNC( ctype, ch, opname, kerid ) \ +\ +void PASTEMAC(ch,opname) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* x, inc_t incx, \ + ctype* beta, \ + ctype* y, inc_t incy, \ + cntx_t* cntx \ + ) \ +{ \ + const num_t dt = PASTEMAC(ch,type); \ + cntx_t* cntx_p; \ +\ + bli_cntx_init_local_if( opname, cntx, cntx_p ); \ +\ + PASTECH2(ch,opname,_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx_p ); \ +\ + f \ + ( \ + conjx, \ + n, \ + x, incx, \ + beta, \ + y, incy, \ + cntx_p \ + ); \ +\ + bli_cntx_finalize_local_if( opname, cntx ); \ +} + +INSERT_GENTFUNC_BASIC( xpbyv, BLIS_XPBYV_KER ) + + diff --git a/frame/1/bli_l1v_tapi.h b/frame/1/bli_l1v_tapi.h index b50fe8eb9..b4b36b059 100644 --- a/frame/1/bli_l1v_tapi.h +++ b/frame/1/bli_l1v_tapi.h @@ -41,7 +41,7 @@ #define addv_ker_name addv #undef axpbyv_ker_name -#define axpbyv_ker_name axpbyv +#define axpbyv_ker_name axpbyv #undef axpyv_ker_name #define axpyv_ker_name axpyv diff --git a/frame/1/kernels/bli_addv_ref.c b/frame/1/kernels/bli_addv_ref.c index c18748ae9..aaf47859c 100644 --- a/frame/1/kernels/bli_addv_ref.c +++ b/frame/1/kernels/bli_addv_ref.c @@ -39,11 +39,11 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjx, \ - dim_t n, \ + conj_t conjx, \ + dim_t n, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ - cntx_t* cntx \ + cntx_t* cntx \ ) \ { \ ctype* restrict chi1; \ @@ -57,43 +57,43 @@ void PASTEMAC(ch,varname) \ \ if ( bli_is_conj( conjx ) ) \ { \ - if (incx == 1 && incy == 1) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,addjs)( chi1[i], psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,addjs)( *chi1, *psi1 ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if (incx == 1 && incy == 1) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,addjs)( chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,addjs)( *chi1, *psi1 ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - if (incx == 1 && incy == 1) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,adds)( chi1[i], psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,adds)( *chi1, *psi1 ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if (incx == 1 && incy == 1) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,adds)( chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,adds)( *chi1, *psi1 ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ } diff --git a/frame/1/kernels/bli_axpbyv_ref.c b/frame/1/kernels/bli_axpbyv_ref.c index 91d92c733..2751640af 100644 --- a/frame/1/kernels/bli_axpbyv_ref.c +++ b/frame/1/kernels/bli_axpbyv_ref.c @@ -54,193 +54,189 @@ void PASTEMAC(ch,varname) \ \ if ( bli_zero_dim1( n ) ) return; \ \ - if ( PASTEMAC(ch,eq0)( *alpha ) ) \ - { \ + if ( PASTEMAC(ch,eq0)( *alpha ) ) \ + { \ + /* If alpha is zero and beta is zero, set to zero. */ \ + if ( PASTEMAC(ch,eq0)( *beta ) ) \ + { \ + ctype* zero = PASTEMAC(ch,0); \ \ - /* If alpha is zero and beta is zero, set to zero. */ \ - if ( PASTEMAC(ch,eq0)( *beta ) ) \ - { \ - ctype* zero = PASTEMAC(ch,0); \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,setv_ft) setv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_SETV_KER, cntx ); \ \ - /* Query the context for the kernel function pointer. */ \ - const num_t dt = PASTEMAC(ch,type); \ - PASTECH(ch,setv_ft) setv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_SETV_KER, cntx ); \ + setv_p \ + ( \ + BLIS_NO_CONJUGATE, \ + n, \ + zero, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ + /* If alpha is zero and beta is one, return. */ \ + else if ( PASTEMAC(ch,eq1)( *beta ) ) \ + { \ + return; \ + } \ + /* If alpha is zero, scale by beta. */ \ + else \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,scalv_ft) scalv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_SCALV_KER, cntx ); \ \ - setv_p \ - ( \ - BLIS_NO_CONJUGATE, \ - n, \ - zero, \ - y, incy, \ - cntx \ - ); \ - return; \ - } \ - /* If alpha is zero and beta is one, return. */ \ - else if ( PASTEMAC(ch,eq1)( *beta ) ) \ - { \ - return; \ - } \ - /* If alpha is zero, scale by beta. */ \ - else \ - { \ - /* Query the context for the kernel function pointer. */ \ - const num_t dt = PASTEMAC(ch,type); \ - PASTECH(ch,scalv_ft) scalv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_SCALV_KER, cntx ); \ + scalv_p \ + ( \ + BLIS_NO_CONJUGATE, \ + n, \ + beta, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ \ - scalv_p \ - ( \ - BLIS_NO_CONJUGATE, \ - n, \ - beta, \ - y, incy, \ - cntx \ - ); \ - return; \ - } \ + } \ + else if ( PASTEMAC(ch,eq1)( *alpha ) ) \ + { \ + /* If alpha is one and beta is zero, copy. */ \ + if ( PASTEMAC(ch,eq0)( *beta ) ) \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,copyv_ft) copyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_COPYV_KER, cntx ); \ \ - } \ - else if ( PASTEMAC(ch,eq1)( *alpha ) ) \ - { \ - \ - /* If alpha is one and beta is zero, copy. */ \ - if ( PASTEMAC(ch,eq0)( *beta ) ) \ - { \ - /* Query the context for the kernel function pointer. */ \ - const num_t dt = PASTEMAC(ch,type); \ - PASTECH(ch,copyv_ft) copyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_COPYV_KER, cntx ); \ - \ - copyv_p \ - ( \ - conjx, \ - n, \ - x, incx, \ - y, incy, \ - cntx \ - ); \ - return; \ - } \ - /* If alpha is one and beta is one, add. */ \ - else if ( PASTEMAC(ch,eq1)( *beta ) ) \ - { \ - /* Query the context for the kernel function pointer. */ \ - const num_t dt = PASTEMAC(ch,type); \ - PASTECH(ch,addv_ft) addv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_ADDV_KER, cntx ); \ - \ - addv_p \ - ( \ - conjx, \ - n, \ - x, incx, \ - y, incy, \ - cntx \ - ); \ - return; \ - } \ - /* If alpha is one, call xpby. */ \ - else \ - { \ - /* Query the context for the kernel function pointer. */ \ - const num_t dt = PASTEMAC(ch,type); \ - PASTECH(ch,xpbyv_ft) xpbyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_XPBYV_KER, cntx ); \ - \ - xpbyv_p \ - ( \ - conjx, \ - n, \ - x, incx, \ - beta, \ - y, incy, \ - cntx \ - ); \ - return; \ - } \ - \ - } \ - else \ - { \ - \ - /* If beta is zero, call scal2. */ \ - if ( PASTEMAC(ch,eq0)( *beta ) ) \ - { \ - /* Query the context for the kernel function pointer. */ \ - const num_t dt = PASTEMAC(ch,type); \ - PASTECH(ch,scal2v_ft) scal2v_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_SCAL2V_KER, cntx ); \ - \ - scal2v_p \ - ( \ - conjx, \ - n, \ - alpha, \ - x, incx, \ - y, incy, \ - cntx \ - ); \ - return; \ - } \ - /* If beta is one, call axpy. */ \ - else if ( PASTEMAC(ch,eq1)( *beta ) ) \ - { \ - /* Query the context for the kernel function pointer. */ \ - const num_t dt = PASTEMAC(ch,type); \ - PASTECH(ch,axpyv_ft) axpyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_AXPYV_KER, cntx ); \ - \ - axpyv_p \ - ( \ - conjx, \ - n, \ - alpha, \ - x, incx, \ - y, incy, \ - cntx \ - ); \ - return; \ - } \ - \ - } \ + copyv_p \ + ( \ + conjx, \ + n, \ + x, incx, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ + /* If alpha is one and beta is one, add. */ \ + else if ( PASTEMAC(ch,eq1)( *beta ) ) \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,addv_ft) addv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_ADDV_KER, cntx ); \ +\ + addv_p \ + ( \ + conjx, \ + n, \ + x, incx, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ + /* If alpha is one, call xpby. */ \ + else \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,xpbyv_ft) xpbyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_XPBYV_KER, cntx ); \ +\ + xpbyv_p \ + ( \ + conjx, \ + n, \ + x, incx, \ + beta, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ + } \ + else \ + { \ + /* If beta is zero, call scal2. */ \ + if ( PASTEMAC(ch,eq0)( *beta ) ) \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,scal2v_ft) scal2v_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_SCAL2V_KER, cntx ); \ +\ + scal2v_p \ + ( \ + conjx, \ + n, \ + alpha, \ + x, incx, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ + /* If beta is one, call axpy. */ \ + else if ( PASTEMAC(ch,eq1)( *beta ) ) \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,axpyv_ft) axpyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_AXPYV_KER, cntx ); \ +\ + axpyv_p \ + ( \ + conjx, \ + n, \ + alpha, \ + x, incx, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ + \ + } \ \ chi1 = x; \ psi1 = y; \ \ if ( bli_is_conj( conjx ) ) \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,axpbyjs)( *alpha, chi1[i], *beta, psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,axpbyjs)( *alpha, *chi1, *beta, *psi1 ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpbyjs)( *alpha, chi1[i], *beta, psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpbyjs)( *alpha, *chi1, *beta, *psi1 ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,axpbys)( *alpha, chi1[i], *beta, psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,axpbys)( *alpha, *chi1, *beta, *psi1 ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpbys)( *alpha, chi1[i], *beta, psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpbys)( *alpha, *chi1, *beta, *psi1 ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ } diff --git a/frame/1/kernels/bli_axpyv_ref.c b/frame/1/kernels/bli_axpyv_ref.c index a6dd49976..f2733d7bc 100644 --- a/frame/1/kernels/bli_axpyv_ref.c +++ b/frame/1/kernels/bli_axpyv_ref.c @@ -39,12 +39,12 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjx, \ - dim_t n, \ + conj_t conjx, \ + dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ - cntx_t* cntx \ + cntx_t* cntx \ ) \ { \ ctype* restrict chi1; \ @@ -79,43 +79,43 @@ void PASTEMAC(ch,varname) \ \ if ( bli_is_conj( conjx ) ) \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,axpyjs)( *alpha, chi1[i], psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,axpyjs)( *alpha, *chi1, *psi1 ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpyjs)( *alpha, chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpyjs)( *alpha, *chi1, *psi1 ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,axpys)( *alpha, chi1[i], psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,axpys)( *alpha, *chi1, *psi1 ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpys)( *alpha, chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpys)( *alpha, *chi1, *psi1 ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ } diff --git a/frame/1/kernels/bli_copyv_ref.c b/frame/1/kernels/bli_copyv_ref.c index 28d22c7b2..e364de57f 100644 --- a/frame/1/kernels/bli_copyv_ref.c +++ b/frame/1/kernels/bli_copyv_ref.c @@ -39,11 +39,11 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjx, \ - dim_t n, \ + conj_t conjx, \ + dim_t n, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ - cntx_t* cntx \ + cntx_t* cntx \ ) \ { \ ctype* restrict chi1; \ @@ -57,43 +57,43 @@ void PASTEMAC(ch,varname) \ \ if ( bli_is_conj( conjx ) ) \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,copyjs)( chi1[i], psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,copyjs)( *chi1, *psi1 ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,copyjs)( chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,copyjs)( *chi1, *psi1 ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,copys)( chi1[i], psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,copys)( *chi1, *psi1 ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,copys)( chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,copys)( *chi1, *psi1 ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ } diff --git a/frame/1/kernels/bli_dotv_ref.c b/frame/1/kernels/bli_dotv_ref.c index 6f790c81c..61a4784da 100644 --- a/frame/1/kernels/bli_dotv_ref.c +++ b/frame/1/kernels/bli_dotv_ref.c @@ -39,13 +39,13 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjx, \ - conj_t conjy, \ - dim_t n, \ + conj_t conjx, \ + conj_t conjy, \ + dim_t n, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ ctype* restrict rho, \ - cntx_t* cntx \ + cntx_t* cntx \ ) \ { \ ctype* restrict chi1; \ @@ -75,43 +75,43 @@ void PASTEMAC(ch,varname) \ \ if ( bli_is_conj( conjx_use ) ) \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,dotjs)( chi1[i], psi1[i], dotxy ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,dotjs)( *chi1, *psi1, dotxy ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dotjs)( chi1[i], psi1[i], dotxy ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dotjs)( *chi1, *psi1, dotxy ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,dots)( chi1[i], psi1[i], dotxy ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,dots)( *chi1, *psi1, dotxy ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dots)( chi1[i], psi1[i], dotxy ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dots)( *chi1, *psi1, dotxy ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ \ if ( bli_is_conj( conjy ) ) \ diff --git a/frame/1/kernels/bli_dotxv_ref.c b/frame/1/kernels/bli_dotxv_ref.c index 89dedcc48..3e26f4ee8 100644 --- a/frame/1/kernels/bli_dotxv_ref.c +++ b/frame/1/kernels/bli_dotxv_ref.c @@ -39,15 +39,15 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjx, \ - conj_t conjy, \ - dim_t n, \ + conj_t conjx, \ + conj_t conjy, \ + dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ ctype* restrict beta, \ ctype* restrict rho, \ - cntx_t* cntx \ + cntx_t* cntx \ ) \ { \ ctype* restrict chi1; \ @@ -83,43 +83,43 @@ void PASTEMAC(ch,varname) \ \ if ( bli_is_conj( conjx_use ) ) \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,dotjs)( chi1[i], psi1[i], dotxy ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,dotjs)( *chi1, *psi1, dotxy ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dotjs)( chi1[i], psi1[i], dotxy ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dotjs)( *chi1, *psi1, dotxy ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,dots)( chi1[i], psi1[i], dotxy ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,dots)( *chi1, *psi1, dotxy ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dots)( chi1[i], psi1[i], dotxy ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dots)( *chi1, *psi1, dotxy ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ \ if ( bli_is_conj( conjy ) ) \ diff --git a/frame/1/kernels/bli_invertv_ref.c b/frame/1/kernels/bli_invertv_ref.c index a79d8c9f0..9585a970c 100644 --- a/frame/1/kernels/bli_invertv_ref.c +++ b/frame/1/kernels/bli_invertv_ref.c @@ -39,9 +39,9 @@ \ void PASTEMAC(ch,varname) \ ( \ - dim_t n, \ + dim_t n, \ ctype* restrict x, inc_t incx, \ - cntx_t* cntx \ + cntx_t* cntx \ ) \ { \ ctype* restrict chi1; \ @@ -51,22 +51,22 @@ void PASTEMAC(ch,varname) \ \ chi1 = x; \ \ - if ( incx == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,inverts)( chi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,inverts)( *chi1 ); \ - \ - chi1 += incx; \ - } \ - } \ + if ( incx == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,inverts)( chi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,inverts)( *chi1 ); \ + \ + chi1 += incx; \ + } \ + } \ } INSERT_GENTFUNC_BASIC0( invertv_ref ) diff --git a/frame/1/kernels/bli_l1v_ref.h b/frame/1/kernels/bli_l1v_ref.h index 60cdbd2ee..51eb5b6c4 100644 --- a/frame/1/kernels/bli_l1v_ref.h +++ b/frame/1/kernels/bli_l1v_ref.h @@ -32,149 +32,48 @@ */ +// Redefine level-1v kernel API names to induce prototypes. -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conjx, \ - dim_t n, \ - ctype* restrict x, inc_t incx, \ - ctype* restrict y, inc_t incy, \ - cntx_t* cntx \ - ); +#undef addv_ker_name +#define addv_ker_name addv_ref -INSERT_GENTPROT_BASIC( addv_ref ) -INSERT_GENTPROT_BASIC( copyv_ref ) -INSERT_GENTPROT_BASIC( subv_ref ) +#undef axpbyv_ker_name +#define axpbyv_ker_name axpbyv_ref +#undef axpyv_ker_name +#define axpyv_ker_name axpyv_ref -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conjx, \ - dim_t n, \ - ctype* restrict alpha, \ - ctype* restrict x, inc_t incx, \ - ctype* restrict y, inc_t incy, \ - cntx_t* cntx \ - ); +#undef copyv_ker_name +#define copyv_ker_name copyv_ref -INSERT_GENTPROT_BASIC( axpyv_ref ) -INSERT_GENTPROT_BASIC( scal2v_ref ) +#undef dotv_ker_name +#define dotv_ker_name dotv_ref +#undef dotxv_ker_name +#define dotxv_ker_name dotxv_ref -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conjx, \ - dim_t n, \ - ctype* restrict x, inc_t incx, \ - ctype* restrict beta, \ - ctype* restrict y, inc_t incy, \ - cntx_t* cntx \ - ); +#undef invertv_ker_name +#define invertv_ker_name invertv_ref -INSERT_GENTPROT_BASIC( xpbyv_ref ) +#undef scalv_ker_name +#define scalv_ker_name scalv_ref +#undef scal2v_ker_name +#define scal2v_ker_name scal2v_ref -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conjx, \ - dim_t n, \ - ctype* restrict alpha, \ - ctype* restrict x, inc_t incx, \ - ctype* restrict beta, \ - ctype* restrict y, inc_t incy, \ - cntx_t* cntx \ - ); +#undef setv_ker_name +#define setv_ker_name setv_ref -INSERT_GENTPROT_BASIC( axpbyv_ref ) +#undef subv_ker_name +#define subv_ker_name subv_ref +#undef swapv_ker_name +#define swapv_ker_name swapv_ref -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conjx, \ - conj_t conjy, \ - dim_t n, \ - ctype* restrict x, inc_t incx, \ - ctype* restrict y, inc_t incy, \ - ctype* restrict rho, \ - cntx_t* cntx \ - ); +#undef xpbyv_ker_name +#define xpbyv_ker_name xpbyv_ref -INSERT_GENTPROT_BASIC( dotv_ref ) +// Include the level-1v kernel API template. - -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conjx, \ - conj_t conjy, \ - dim_t n, \ - ctype* restrict alpha, \ - ctype* restrict x, inc_t incx, \ - ctype* restrict y, inc_t incy, \ - ctype* restrict beta, \ - ctype* restrict rho, \ - cntx_t* cntx \ - ); - -INSERT_GENTPROT_BASIC( dotxv_ref ) - - -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - dim_t n, \ - ctype* restrict x, inc_t incx, \ - cntx_t* cntx \ - ); - -INSERT_GENTPROT_BASIC( invertv_ref ) - - -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conjalpha, \ - dim_t n, \ - ctype* restrict alpha, \ - ctype* restrict x, inc_t incx, \ - cntx_t* cntx \ - ); - -INSERT_GENTPROT_BASIC( scalv_ref ) -INSERT_GENTPROT_BASIC( setv_ref ) - - -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - dim_t n, \ - ctype* restrict x, inc_t incx, \ - ctype* restrict y, inc_t incy, \ - cntx_t* cntx \ - ); - -INSERT_GENTPROT_BASIC( swapv_ref ) +#include "bli_l1v_ker.h" diff --git a/frame/1/kernels/bli_scal2v_ref.c b/frame/1/kernels/bli_scal2v_ref.c index f4bc0d541..cb874d4ba 100644 --- a/frame/1/kernels/bli_scal2v_ref.c +++ b/frame/1/kernels/bli_scal2v_ref.c @@ -39,12 +39,12 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjx, \ - dim_t n, \ + conj_t conjx, \ + dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ - cntx_t* cntx \ + cntx_t* cntx \ ) \ { \ ctype* restrict chi1; \ @@ -72,66 +72,66 @@ void PASTEMAC(ch,varname) \ ); \ return; \ } \ - /* If alpha is one, use copyv. */ \ - else if ( PASTEMAC(ch,eq0)( *alpha ) ) \ - { \ - /* Query the context for the kernel function pointer. */ \ - const num_t dt = PASTEMAC(ch,type); \ - PASTECH(ch,copyv_ft) copyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_COPYV_KER, cntx ); \ + /* If alpha is one, use copyv. */ \ + else if ( PASTEMAC(ch,eq0)( *alpha ) ) \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,copyv_ft) copyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_COPYV_KER, cntx ); \ \ - copyv_p \ - ( \ - BLIS_NO_CONJUGATE, \ - n, \ - x, incx, \ - y, incy, \ - cntx \ - ); \ - return; \ - } \ + copyv_p \ + ( \ + BLIS_NO_CONJUGATE, \ + n, \ + x, incx, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ \ chi1 = x; \ psi1 = y; \ \ if ( bli_is_conj( conjx ) ) \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,scal2js)( *alpha, chi1[i], psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,scal2js)( *alpha, *chi1, *psi1 ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,scal2js)( *alpha, chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,scal2js)( *alpha, *chi1, *psi1 ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,scal2s)( *alpha, chi1[i], psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,scal2s)( *alpha, *chi1, *psi1 ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,scal2s)( *alpha, chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,scal2s)( *alpha, *chi1, *psi1 ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ } diff --git a/frame/1/kernels/bli_scalv_ref.c b/frame/1/kernels/bli_scalv_ref.c index 7d238c6f4..cc6817a43 100644 --- a/frame/1/kernels/bli_scalv_ref.c +++ b/frame/1/kernels/bli_scalv_ref.c @@ -39,11 +39,11 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjalpha, \ - dim_t n, \ + conj_t conjalpha, \ + dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ - cntx_t* cntx \ + cntx_t* cntx \ ) \ { \ ctype* restrict chi1; \ @@ -79,22 +79,22 @@ void PASTEMAC(ch,varname) \ \ chi1 = x; \ \ - if ( incx == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,scals)( alpha_conj, chi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,scals)( alpha_conj, *chi1 ); \ - \ - chi1 += incx; \ - } \ - } \ + if ( incx == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,scals)( alpha_conj, chi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,scals)( alpha_conj, *chi1 ); \ +\ + chi1 += incx; \ + } \ + } \ } INSERT_GENTFUNC_BASIC0( scalv_ref ) diff --git a/frame/1/kernels/bli_setv_ref.c b/frame/1/kernels/bli_setv_ref.c index 4f7d1db2d..19b5c4ffb 100644 --- a/frame/1/kernels/bli_setv_ref.c +++ b/frame/1/kernels/bli_setv_ref.c @@ -39,11 +39,11 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjalpha, \ - dim_t n, \ + conj_t conjalpha, \ + dim_t n, \ ctype* restrict alpha, \ ctype* restrict x, inc_t incx, \ - cntx_t* cntx \ + cntx_t* cntx \ ) \ { \ ctype* restrict chi1; \ @@ -56,43 +56,43 @@ void PASTEMAC(ch,varname) \ \ if ( PASTEMAC(ch,eq0)( *alpha ) ) \ { \ - if ( incx == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,set0s)( chi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,set0s)( *chi1 ); \ - \ - chi1 += incx; \ - } \ - } \ + if ( incx == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,set0s)( chi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,set0s)( *chi1 ); \ +\ + chi1 += incx; \ + } \ + } \ } \ else \ { \ PASTEMAC(ch,copycjs)( conjalpha, *alpha, alpha_conj ); \ \ - if ( incx == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,copys)( alpha_conj, chi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,copys)( alpha_conj, *chi1 ); \ - \ - chi1 += incx; \ - } \ - } \ + if ( incx == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,copys)( alpha_conj, chi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,copys)( alpha_conj, *chi1 ); \ +\ + chi1 += incx; \ + } \ + } \ } \ } diff --git a/frame/1/kernels/bli_subv_ref.c b/frame/1/kernels/bli_subv_ref.c index bc59b01b6..cde01638a 100644 --- a/frame/1/kernels/bli_subv_ref.c +++ b/frame/1/kernels/bli_subv_ref.c @@ -39,11 +39,11 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjx, \ - dim_t n, \ + conj_t conjx, \ + dim_t n, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ - cntx_t* cntx \ + cntx_t* cntx \ ) \ { \ ctype* restrict chi1; \ @@ -57,43 +57,43 @@ void PASTEMAC(ch,varname) \ \ if ( bli_is_conj( conjx ) ) \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,subjs)( chi1[i], psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,subjs)( *chi1, *psi1 ); \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,subjs)( chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,subjs)( *chi1, *psi1 ); \ \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,subs)( chi1[i], psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,subs)( *chi1, *psi1 ); \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,subs)( chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,subs)( *chi1, *psi1 ); \ \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ } diff --git a/frame/1/kernels/bli_swapv_ref.c b/frame/1/kernels/bli_swapv_ref.c index 6a43fb0d0..495ffd57b 100644 --- a/frame/1/kernels/bli_swapv_ref.c +++ b/frame/1/kernels/bli_swapv_ref.c @@ -39,10 +39,10 @@ \ void PASTEMAC(ch,varname) \ ( \ - dim_t n, \ + dim_t n, \ ctype* restrict x, inc_t incx, \ ctype* restrict y, inc_t incy, \ - cntx_t* cntx \ + cntx_t* cntx \ ) \ { \ ctype* restrict chi1; \ @@ -54,23 +54,23 @@ void PASTEMAC(ch,varname) \ chi1 = x; \ psi1 = y; \ \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,swaps)( chi1[i], psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,swaps)( *chi1, *psi1 ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,swaps)( chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,swaps)( *chi1, *psi1 ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } INSERT_GENTFUNC_BASIC0( swapv_ref ) diff --git a/frame/1/kernels/bli_xpbyv_ref.c b/frame/1/kernels/bli_xpbyv_ref.c index 508e06ce8..e6d1b1ad5 100644 --- a/frame/1/kernels/bli_xpbyv_ref.c +++ b/frame/1/kernels/bli_xpbyv_ref.c @@ -39,12 +39,12 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjx, \ - dim_t n, \ + conj_t conjx, \ + dim_t n, \ ctype* restrict x, inc_t incx, \ ctype* restrict beta, \ ctype* restrict y, inc_t incy, \ - cntx_t* cntx \ + cntx_t* cntx \ ) \ { \ ctype* restrict chi1; \ @@ -55,21 +55,21 @@ void PASTEMAC(ch,varname) \ \ /* If beta is zero, use copyv. */ \ if ( PASTEMAC(ch,eq0)( *beta ) ) \ - { \ - /* Query the context for the kernel function pointer. */ \ - const num_t dt = PASTEMAC(ch,type); \ - PASTECH(ch,copyv_ft) copyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_COPYV_KER, cntx ); \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,copyv_ft) copyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_COPYV_KER, cntx ); \ \ - copyv_p \ - ( \ - conjx, \ - n, \ - x, incx, \ - y, incy, \ - cntx \ - ); \ - return; \ - } \ + copyv_p \ + ( \ + conjx, \ + n, \ + x, incx, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ /* If alpha is one, use addv. */ \ else if ( PASTEMAC(ch,eq1)( *beta ) ) \ { \ @@ -93,43 +93,43 @@ void PASTEMAC(ch,varname) \ \ if ( bli_is_conj( conjx ) ) \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,xpbyjs)( chi1[i], *beta, psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,xpbyjs)( *chi1, *beta, *psi1 ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,xpbyjs)( chi1[i], *beta, psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,xpbyjs)( *chi1, *beta, *psi1 ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - if ( incx == 1 && incy == 1 ) \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,xpbys)( chi1[i], *beta, psi1[i] ); \ - } \ - } \ - else \ - { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,xpbys)( *chi1, *beta, *psi1 ); \ - \ - chi1 += incx; \ - psi1 += incy; \ - } \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,xpbys)( chi1[i], *beta, psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,xpbys)( *chi1, *beta, *psi1 ); \ +\ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ } diff --git a/frame/1f/bli_l1f_ft.h b/frame/1f/bli_l1f_ft.h index f8d15fc3c..5fa688b5d 100644 --- a/frame/1f/bli_l1f_ft.h +++ b/frame/1f/bli_l1f_ft.h @@ -50,11 +50,11 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ - ctype* alpha1, \ - ctype* alpha2, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* z, inc_t incz, \ + ctype* restrict alpha1, \ + ctype* restrict alpha2, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict z, inc_t incz, \ cntx_t* cntx \ ); @@ -71,10 +71,10 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \ conj_t conjx, \ dim_t m, \ dim_t b_n, \ - ctype* alpha, \ - ctype* a, inc_t inca, inc_t lda, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ + ctype* restrict alpha, \ + ctype* restrict a, inc_t inca, inc_t lda, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ); @@ -91,11 +91,11 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \ conj_t conjx, \ conj_t conjy, \ dim_t m, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* rho, \ - ctype* z, inc_t incz, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict rho, \ + ctype* restrict z, inc_t incz, \ cntx_t* cntx \ ); @@ -112,11 +112,11 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \ conj_t conjx, \ dim_t m, \ dim_t b_n, \ - ctype* alpha, \ - ctype* a, inc_t inca, inc_t lda, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ + ctype* restrict alpha, \ + ctype* restrict a, inc_t inca, inc_t lda, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ); @@ -135,13 +135,13 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \ conj_t conjx, \ dim_t m, \ dim_t b_n, \ - ctype* alpha, \ - ctype* a, inc_t inca, inc_t lda, \ - ctype* w, inc_t incw, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ - ctype* z, inc_t incz, \ + ctype* restrict alpha, \ + ctype* restrict a, inc_t inca, inc_t lda, \ + ctype* restrict w, inc_t incw, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict z, inc_t incz, \ cntx_t* cntx \ ); diff --git a/frame/1f/bli_l1f_ker.h b/frame/1f/bli_l1f_ker.h index 953aaf0af..9c040490e 100644 --- a/frame/1f/bli_l1f_ker.h +++ b/frame/1f/bli_l1f_ker.h @@ -42,15 +42,15 @@ \ void PASTEMAC(ch,opname) \ ( \ - conj_t conjx, \ - conj_t conjy, \ - dim_t n, \ - ctype* alphax, \ - ctype* alphay, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* z, inc_t incz, \ - cntx_t* cntx \ + conj_t conjx, \ + conj_t conjy, \ + dim_t n, \ + ctype* restrict alphax, \ + ctype* restrict alphay, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict z, inc_t incz, \ + cntx_t* cntx \ ); INSERT_GENTPROT_BASIC( axpy2v_ker_name ) @@ -61,15 +61,15 @@ INSERT_GENTPROT_BASIC( axpy2v_ker_name ) \ void PASTEMAC(ch,opname) \ ( \ - conj_t conja, \ - conj_t conjx, \ - dim_t m, \ - dim_t b_n, \ - ctype* alpha, \ - ctype* a, inc_t inca, inc_t lda, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ + conj_t conja, \ + conj_t conjx, \ + dim_t m, \ + dim_t b_n, \ + ctype* restrict alpha, \ + ctype* restrict a, inc_t inca, inc_t lda, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ ); INSERT_GENTPROT_BASIC( axpyf_ker_name ) @@ -80,16 +80,16 @@ INSERT_GENTPROT_BASIC( axpyf_ker_name ) \ void PASTEMAC(ch,opname) \ ( \ - conj_t conjxt, \ - conj_t conjx, \ - conj_t conjy, \ - dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* rho, \ - ctype* z, inc_t incz, \ - cntx_t* cntx \ + conj_t conjxt, \ + conj_t conjx, \ + conj_t conjy, \ + dim_t n, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict rho, \ + ctype* restrict z, inc_t incz, \ + cntx_t* cntx \ ); INSERT_GENTPROT_BASIC( dotaxpyv_ker_name ) @@ -100,20 +100,20 @@ INSERT_GENTPROT_BASIC( dotaxpyv_ker_name ) \ void PASTEMAC(ch,opname) \ ( \ - conj_t conjat, \ - conj_t conja, \ - conj_t conjw, \ - conj_t conjx, \ - dim_t m, \ - dim_t b_n, \ - ctype* alpha, \ - ctype* a, inc_t inca, inc_t lda, \ - ctype* w, inc_t incw, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ - ctype* z, inc_t incz, \ - cntx_t* cntx \ + conj_t conjat, \ + conj_t conja, \ + conj_t conjw, \ + conj_t conjx, \ + dim_t m, \ + dim_t b_n, \ + ctype* restrict alpha, \ + ctype* restrict a, inc_t inca, inc_t lda, \ + ctype* restrict w, inc_t incw, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict z, inc_t incz, \ + cntx_t* cntx \ ); INSERT_GENTPROT_BASIC( dotxaxpyf_ker_name ) @@ -124,16 +124,16 @@ INSERT_GENTPROT_BASIC( dotxaxpyf_ker_name ) \ void PASTEMAC(ch,opname) \ ( \ - conj_t conjat, \ - conj_t conjx, \ - dim_t m, \ - dim_t b_n, \ - ctype* alpha, \ - ctype* a, inc_t inca, inc_t lda, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ + conj_t conjat, \ + conj_t conjx, \ + dim_t m, \ + dim_t b_n, \ + ctype* restrict alpha, \ + ctype* restrict a, inc_t inca, inc_t lda, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ ); INSERT_GENTPROT_BASIC( dotxf_ker_name ) diff --git a/frame/1f/kernels/bli_axpy2v_ref.c b/frame/1f/kernels/bli_axpy2v_ref.c index e91a510cb..a65558540 100644 --- a/frame/1f/kernels/bli_axpy2v_ref.c +++ b/frame/1f/kernels/bli_axpy2v_ref.c @@ -40,15 +40,15 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjx, \ - conj_t conjy, \ - dim_t n, \ - ctype* alphax, \ - ctype* alphay, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* z, inc_t incz, \ - cntx_t* cntx \ + conj_t conjx, \ + conj_t conjy, \ + dim_t n, \ + ctype* restrict alphax, \ + ctype* restrict alphay, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict z, inc_t incz, \ + cntx_t* cntx \ ) \ { \ /* Query the context for the kernel function pointer. */ \ diff --git a/frame/1f/kernels/bli_axpyf_ref.c b/frame/1f/kernels/bli_axpyf_ref.c index 228d53823..72caaf695 100644 --- a/frame/1f/kernels/bli_axpyf_ref.c +++ b/frame/1f/kernels/bli_axpyf_ref.c @@ -40,15 +40,15 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conja, \ - conj_t conjx, \ - dim_t m, \ - dim_t b_n, \ - ctype* alpha, \ - ctype* a, inc_t inca, inc_t lda, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ + conj_t conja, \ + conj_t conjx, \ + dim_t m, \ + dim_t b_n, \ + ctype* restrict alpha, \ + ctype* restrict a, inc_t inca, inc_t lda, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ ) \ { \ ctype* a1; \ diff --git a/frame/1f/kernels/bli_dotaxpyv_ref.c b/frame/1f/kernels/bli_dotaxpyv_ref.c index 22893a5d4..bae1183c5 100644 --- a/frame/1f/kernels/bli_dotaxpyv_ref.c +++ b/frame/1f/kernels/bli_dotaxpyv_ref.c @@ -40,16 +40,16 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjxt, \ - conj_t conjx, \ - conj_t conjy, \ - dim_t m, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* rho, \ - ctype* z, inc_t incz, \ - cntx_t* cntx \ + conj_t conjxt, \ + conj_t conjx, \ + conj_t conjy, \ + dim_t m, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict rho, \ + ctype* restrict z, inc_t incz, \ + cntx_t* cntx \ ) \ { \ ctype* one = PASTEMAC(ch,1); \ diff --git a/frame/1f/kernels/bli_dotxaxpyf_ref_var1.c b/frame/1f/kernels/bli_dotxaxpyf_ref_var1.c index 4d2851fed..0de91f862 100644 --- a/frame/1f/kernels/bli_dotxaxpyf_ref_var1.c +++ b/frame/1f/kernels/bli_dotxaxpyf_ref_var1.c @@ -40,20 +40,20 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjat, \ - conj_t conja, \ - conj_t conjw, \ - conj_t conjx, \ - dim_t m, \ - dim_t b_n, \ - ctype* alpha, \ - ctype* a, inc_t inca, inc_t lda, \ - ctype* w, inc_t incw, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ - ctype* z, inc_t incz, \ - cntx_t* cntx \ + conj_t conjat, \ + conj_t conja, \ + conj_t conjw, \ + conj_t conjx, \ + dim_t m, \ + dim_t b_n, \ + ctype* restrict alpha, \ + ctype* restrict a, inc_t inca, inc_t lda, \ + ctype* restrict w, inc_t incw, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict z, inc_t incz, \ + cntx_t* cntx \ ) \ { \ ctype* a1; \ diff --git a/frame/1f/kernels/bli_dotxaxpyf_ref_var2.c b/frame/1f/kernels/bli_dotxaxpyf_ref_var2.c index 051e86f01..f0123c94b 100644 --- a/frame/1f/kernels/bli_dotxaxpyf_ref_var2.c +++ b/frame/1f/kernels/bli_dotxaxpyf_ref_var2.c @@ -40,20 +40,20 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjat, \ - conj_t conja, \ - conj_t conjw, \ - conj_t conjx, \ - dim_t m, \ - dim_t b_n, \ - ctype* alpha, \ - ctype* a, inc_t inca, inc_t lda, \ - ctype* w, inc_t incw, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ - ctype* z, inc_t incz, \ - cntx_t* cntx \ + conj_t conjat, \ + conj_t conja, \ + conj_t conjw, \ + conj_t conjx, \ + dim_t m, \ + dim_t b_n, \ + ctype* restrict alpha, \ + ctype* restrict a, inc_t inca, inc_t lda, \ + ctype* restrict w, inc_t incw, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict z, inc_t incz, \ + cntx_t* cntx \ ) \ { \ /* A is m x n. */ \ diff --git a/frame/1f/kernels/bli_dotxf_ref.c b/frame/1f/kernels/bli_dotxf_ref.c index 5e50847db..41866b87a 100644 --- a/frame/1f/kernels/bli_dotxf_ref.c +++ b/frame/1f/kernels/bli_dotxf_ref.c @@ -40,16 +40,16 @@ \ void PASTEMAC(ch,varname) \ ( \ - conj_t conjat, \ - conj_t conjx, \ - dim_t m, \ - dim_t b_n, \ - ctype* alpha, \ - ctype* a, inc_t inca, inc_t lda, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ + conj_t conjat, \ + conj_t conjx, \ + dim_t m, \ + dim_t b_n, \ + ctype* restrict alpha, \ + ctype* restrict a, inc_t inca, inc_t lda, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ ) \ { \ ctype* a1; \ diff --git a/frame/1f/kernels/bli_l1f_ref.h b/frame/1f/kernels/bli_l1f_ref.h index 6a73ac5d1..899a4ba29 100644 --- a/frame/1f/kernels/bli_l1f_ref.h +++ b/frame/1f/kernels/bli_l1f_ref.h @@ -32,129 +32,24 @@ */ +// Redefine level-1f kernel API names to induce prototypes. -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conjx, \ - conj_t conjy, \ - dim_t n, \ - ctype* alpha1, \ - ctype* alpha2, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* z, inc_t incz, \ - cntx_t* cntx \ - ); +#undef axpy2v_ker_name +#define axpy2v_ker_name axpy2v_ref -INSERT_GENTPROT_BASIC( axpy2v_ref ) +#undef dotaxpyv_ker_name +#define dotaxpyv_ker_name dotaxpyv_ref +#undef axpyf_ker_name +#define axpyf_ker_name axpyf_ref -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conja, \ - conj_t conjx, \ - dim_t m, \ - dim_t b_n, \ - ctype* alpha, \ - ctype* a, inc_t inca, inc_t lda, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ - ); +#undef dotxf_ker_name +#define dotxf_ker_name dotxf_ref -INSERT_GENTPROT_BASIC( axpyf_ref ) +#undef dotxaxpy_ker_name +#define dotxaxpy_ker_name dotxaxpyf_ref +// Include the level-1f kernel API template. -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conjxt, \ - conj_t conjx, \ - conj_t conjy, \ - dim_t m, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* rho, \ - ctype* z, inc_t incz, \ - cntx_t* cntx \ - ); - -INSERT_GENTPROT_BASIC( dotaxpyv_ref ) - - -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conjat, \ - conj_t conja, \ - conj_t conjw, \ - conj_t conjx, \ - dim_t m, \ - dim_t b_n, \ - ctype* alpha, \ - ctype* a, inc_t inca, inc_t lda, \ - ctype* w, inc_t incw, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ - ctype* z, inc_t incz, \ - cntx_t* cntx \ - ); - -INSERT_GENTPROT_BASIC( dotxaxpyf_ref_var1 ) - - -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conjat, \ - conj_t conja, \ - conj_t conjw, \ - conj_t conjx, \ - dim_t m, \ - dim_t b_n, \ - ctype* alpha, \ - ctype* a, inc_t inca, inc_t lda, \ - ctype* w, inc_t incw, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ - ctype* z, inc_t incz, \ - cntx_t* cntx \ - ); - -INSERT_GENTPROT_BASIC( dotxaxpyf_ref_var2 ) - - -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname) \ - ( \ - conj_t conjat, \ - conj_t conjx, \ - dim_t m, \ - dim_t b_n, \ - ctype* alpha, \ - ctype* a, inc_t inca, inc_t lda, \ - ctype* x, inc_t incx, \ - ctype* beta, \ - ctype* y, inc_t incy, \ - cntx_t* cntx \ - ); - -INSERT_GENTPROT_BASIC( dotxf_ref ) +#include "bli_l1v_ker.h" diff --git a/frame/include/bli_kernel_pre_macro_defs.h b/frame/include/bli_kernel_pre_macro_defs.h index 10f27ac54..98e4c3928 100644 --- a/frame/include/bli_kernel_pre_macro_defs.h +++ b/frame/include/bli_kernel_pre_macro_defs.h @@ -262,10 +262,10 @@ // axpbyv kernels -#define BLIS_SAXPBYV_KERNEL_REF bli_saxpbyv_ref -#define BLIS_DAXPBYV_KERNEL_REF bli_daxpbyv_ref -#define BLIS_CAXPBYV_KERNEL_REF bli_caxpbyv_ref -#define BLIS_ZAXPBYV_KERNEL_REF bli_zaxpbyv_ref +#define BLIS_SAXPBYV_KERNEL_REF bli_saxpbyv_ref +#define BLIS_DAXPBYV_KERNEL_REF bli_daxpbyv_ref +#define BLIS_CAXPBYV_KERNEL_REF bli_caxpbyv_ref +#define BLIS_ZAXPBYV_KERNEL_REF bli_zaxpbyv_ref // axpyv kernels diff --git a/frame/include/bli_kernel_prototypes.h b/frame/include/bli_kernel_prototypes.h index 6a61f484d..d3524358c 100644 --- a/frame/include/bli_kernel_prototypes.h +++ b/frame/include/bli_kernel_prototypes.h @@ -108,6 +108,11 @@ #define bli_caddv_ker_name BLIS_CADDV_KERNEL #define bli_zaddv_ker_name BLIS_ZADDV_KERNEL +#define bli_saxpbyv_ker_name BLIS_SAXPBYV_KERNEL +#define bli_daxpbyv_ker_name BLIS_DAXPBYV_KERNEL +#define bli_caxpbyv_ker_name BLIS_CAXPBYV_KERNEL +#define bli_zaxpbyv_ker_name BLIS_ZAXPBYV_KERNEL + #define bli_saxpyv_ker_name BLIS_SAXPYV_KERNEL #define bli_daxpyv_ker_name BLIS_DAXPYV_KERNEL #define bli_caxpyv_ker_name BLIS_CAXPYV_KERNEL @@ -158,6 +163,11 @@ #define bli_cswapv_ker_name BLIS_CSWAPV_KERNEL #define bli_zswapv_ker_name BLIS_ZSWAPV_KERNEL +#define bli_sxpbyv_ker_name BLIS_SXPBYV_KERNEL +#define bli_dxpbyv_ker_name BLIS_DXPBYV_KERNEL +#define bli_cxpbyv_ker_name BLIS_CXPBYV_KERNEL +#define bli_zxpbyv_ker_name BLIS_ZXPBYV_KERNEL + #include "bli_l1v_ker.h" diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index 5f921b79d..ffdcba56b 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -785,7 +785,7 @@ typedef enum typedef enum { BLIS_ADDV_KER = 0, - BLIS_AXPBYV_KER, + BLIS_AXPBYV_KER, BLIS_AXPYV_KER, BLIS_COPYV_KER, BLIS_DOTV_KER, @@ -796,7 +796,7 @@ typedef enum BLIS_SETV_KER, BLIS_SUBV_KER, BLIS_SWAPV_KER, - BLIS_XPBYV_KER, + BLIS_XPBYV_KER, } l1vkr_t; #define BLIS_NUM_LEVEL1V_KERS 13 diff --git a/testsuite/src/test_axpbyv.c b/testsuite/src/test_axpbyv.c index d9e3c18a0..805e092ec 100644 --- a/testsuite/src/test_axpbyv.c +++ b/testsuite/src/test_axpbyv.c @@ -79,12 +79,12 @@ void libblis_test_axpbyv_deps( test_params_t* params, test_op_t* op ) libblis_test_randv( params, &(op->ops->randv) ); libblis_test_normfv( params, &(op->ops->normfv) ); libblis_test_addv( params, &(op->ops->addv) ); - libblis_test_axpyv( params, &(op->ops->axpyv) ); + libblis_test_axpyv( params, &(op->ops->axpyv) ); libblis_test_subv( params, &(op->ops->subv) ); libblis_test_copyv( params, &(op->ops->copyv) ); - libblis_test_scalv( params, &(op->ops->scalv) ); - libblis_test_scal2v( params, &(op->ops->scal2v) ); - libblis_test_xpbyv( params, &(op->ops->xpbyv) ); + libblis_test_scalv( params, &(op->ops->scalv) ); + libblis_test_scal2v( params, &(op->ops->scal2v) ); + libblis_test_xpbyv( params, &(op->ops->xpbyv) ); } @@ -149,8 +149,8 @@ void libblis_test_axpbyv_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); // Create test scalars. - bli_obj_scalar_init_detached( datatype, &alpha ); - bli_obj_scalar_init_detached( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); @@ -165,7 +165,7 @@ void libblis_test_axpbyv_experiment( test_params_t* params, else bli_setsc( 0.0, -2.0, &alpha ); - bli_setsc( -1.0, 0.0, &beta ); + bli_setsc( -1.0, 0.0, &beta ); // Randomize x and y, and save y. bli_randv( &x ); @@ -268,8 +268,8 @@ void libblis_test_axpbyv_check( obj_t* alpha, bli_copyv( x, &x_temp ); bli_copyv( y_orig, &y_temp ); - bli_scalv( alpha, &x_temp ); - bli_scalv( beta, &y_temp ); + bli_scalv( alpha, &x_temp ); + bli_scalv( beta, &y_temp ); bli_addv( &x_temp, &y_temp ); bli_subv( &y_temp, y ); diff --git a/testsuite/src/test_libblis.c b/testsuite/src/test_libblis.c index d48516a12..644c1c2d6 100644 --- a/testsuite/src/test_libblis.c +++ b/testsuite/src/test_libblis.c @@ -113,8 +113,8 @@ void libblis_test_utility_ops( test_params_t* params, test_ops_t* ops ) void libblis_test_level1v_ops( test_params_t* params, test_ops_t* ops ) { libblis_test_addv( params, &(ops->addv) ); - libblis_test_axpbyv( params, &(ops->axpbyv) ); - libblis_test_axpyv( params, &(ops->axpyv) ); + libblis_test_axpbyv( params, &(ops->axpbyv) ); + libblis_test_axpyv( params, &(ops->axpyv) ); libblis_test_copyv( params, &(ops->copyv) ); libblis_test_dotv( params, &(ops->dotv) ); libblis_test_dotxv( params, &(ops->dotxv) ); @@ -123,7 +123,7 @@ void libblis_test_level1v_ops( test_params_t* params, test_ops_t* ops ) libblis_test_scal2v( params, &(ops->scal2v) ); libblis_test_setv( params, &(ops->setv) ); libblis_test_subv( params, &(ops->subv) ); - libblis_test_xpbyv( params, &(ops->xpbyv) ); + libblis_test_xpbyv( params, &(ops->xpbyv) ); } @@ -222,7 +222,7 @@ void libblis_test_read_ops_file( char* input_filename, test_ops_t* ops ) // Level-1v libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->addv) ); - libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->axpbyv) ); + libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->axpbyv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->axpyv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->copyv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 2, &(ops->dotv) ); @@ -232,7 +232,7 @@ void libblis_test_read_ops_file( char* input_filename, test_ops_t* ops ) libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->scal2v) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 0, &(ops->setv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->subv) ); - libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->xpbyv) ); + libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->xpbyv) ); // Level-1m libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_MN, 1, &(ops->addm) ); diff --git a/testsuite/src/test_libblis.h b/testsuite/src/test_libblis.h index 8a84a2d9b..86d90b99b 100644 --- a/testsuite/src/test_libblis.h +++ b/testsuite/src/test_libblis.h @@ -204,8 +204,8 @@ typedef struct test_ops_s // level-1v test_op_t addv; - test_op_t axpbyv; - test_op_t axpyv; + test_op_t axpbyv; + test_op_t axpyv; test_op_t copyv; test_op_t dotv; test_op_t dotxv; @@ -214,8 +214,8 @@ typedef struct test_ops_s test_op_t scal2v; test_op_t setv; test_op_t subv; - test_op_t xpbyv; - + test_op_t xpbyv; + // level-1m test_op_t addm; test_op_t axpym; diff --git a/testsuite/src/test_xpbyv.c b/testsuite/src/test_xpbyv.c index a6610905b..684fedf23 100644 --- a/testsuite/src/test_xpbyv.c +++ b/testsuite/src/test_xpbyv.c @@ -143,7 +143,7 @@ void libblis_test_xpbyv_experiment( test_params_t* params, bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); // Create test scalars. - bli_obj_scalar_init_detached( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &beta ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); @@ -255,7 +255,7 @@ void libblis_test_xpbyv_check( obj_t* x, bli_copyv( x, &x_temp ); bli_copyv( y_orig, &y_temp ); - bli_scalv( beta, &y_temp ); + bli_scalv( beta, &y_temp ); bli_addv( &x_temp, &y_temp ); bli_subv( &y_temp, y );