From bdbda6e6acc682ab1b6ca680edebd09ae12a832c Mon Sep 17 00:00:00 2001 From: Devin Matthews Date: Mon, 25 Apr 2016 11:05:57 -0500 Subject: [PATCH] Give the level1v operations some love: - Add missing axpby and xpby operations (plus test cases). - Add special case for scal2v with alpha=1. - Add restrict qualifiers. - Add special-case algorithms for incx=incy=1. --- Makefile | 4 +- frame/1/bli_l1v_check.c | 185 +++++++-- frame/1/bli_l1v_check.h | 42 ++ frame/1/bli_l1v_cntx.c | 64 ++- frame/1/bli_l1v_cntx.h | 2 + frame/1/bli_l1v_ft.h | 35 ++ frame/1/bli_l1v_ker.h | 33 ++ frame/1/bli_l1v_oapi.c | 113 +++++ frame/1/bli_l1v_oapi.h | 29 ++ frame/1/bli_l1v_tapi.c | 74 ++++ frame/1/bli_l1v_tapi.h | 6 + frame/1/kernels/bli_addv_ref.c | 56 ++- frame/1/kernels/bli_axpbyv_ref.c | 248 +++++++++++ frame/1/kernels/bli_axpyv_ref.c | 58 ++- frame/1/kernels/bli_copyv_ref.c | 56 ++- frame/1/kernels/bli_dotv_ref.c | 58 ++- frame/1/kernels/bli_dotxv_ref.c | 62 ++- frame/1/kernels/bli_invertv_ref.c | 26 +- frame/1/kernels/bli_l1v_ref.h | 69 +++- frame/1/kernels/bli_scal2v_ref.c | 75 +++- frame/1/kernels/bli_scalv_ref.c | 28 +- frame/1/kernels/bli_setv_ref.c | 50 ++- frame/1/kernels/bli_subv_ref.c | 52 ++- frame/1/kernels/bli_swapv_ref.c | 32 +- frame/1/kernels/bli_xpbyv_ref.c | 137 ++++++ frame/base/bli_gks.c | 12 + frame/include/bli_kernel_macro_defs.h | 36 ++ frame/include/bli_kernel_pre_macro_defs.h | 14 + frame/include/bli_scalar_macro_defs.h | 8 + frame/include/bli_type_defs.h | 4 +- frame/include/level0/bli_axpbyjs.h | 481 ++++++++++++++++++++++ frame/include/level0/bli_axpbys.h | 481 ++++++++++++++++++++++ frame/include/level0/bli_xpbyjs.h | 192 +++++++++ frame/include/level0/ri/bli_axpbyjris.h | 163 ++++++++ frame/include/level0/ri/bli_axpbyris.h | 104 ++++- frame/include/level0/ri/bli_xpbyjris.h | 79 ++++ testsuite/input.operations | 10 + testsuite/src/test_axpbyv.c | 282 +++++++++++++ testsuite/src/test_axpbyv.h | 36 ++ testsuite/src/test_libblis.c | 6 +- testsuite/src/test_libblis.h | 6 +- testsuite/src/test_xpbyv.c | 268 ++++++++++++ testsuite/src/test_xpbyv.h | 36 ++ 43 files changed, 3579 insertions(+), 233 deletions(-) create mode 100644 frame/1/kernels/bli_axpbyv_ref.c create mode 100644 frame/1/kernels/bli_xpbyv_ref.c create mode 100644 frame/include/level0/bli_axpbyjs.h create mode 100644 frame/include/level0/bli_axpbys.h create mode 100644 frame/include/level0/bli_xpbyjs.h create mode 100644 frame/include/level0/ri/bli_axpbyjris.h create mode 100644 frame/include/level0/ri/bli_xpbyjris.h create mode 100644 testsuite/src/test_axpbyv.c create mode 100644 testsuite/src/test_axpbyv.h create mode 100644 testsuite/src/test_xpbyv.c create mode 100644 testsuite/src/test_xpbyv.h diff --git a/Makefile b/Makefile index e52cebd57..5ac386fec 100644 --- a/Makefile +++ b/Makefile @@ -678,11 +678,11 @@ endif cleantest: check-env ifeq ($(BLIS_ENABLE_VERBOSE_MAKE_OUTPUT),yes) - - $(FIND) $(BASE_OBJ_TESTSUITE_PATH) -name "*.o" -name "*.pexe" | $(XARGS) $(RM_F) + - $(FIND) $(BASE_OBJ_TESTSUITE_PATH) \( -name "*.o" -o -name "*.pexe" \) | $(XARGS) $(RM_F) - $(RM_RF) $(TESTSUITE_BIN) else @echo "Removing object files from $(BASE_OBJ_TESTSUITE_PATH)." - @- $(FIND) $(BASE_OBJ_TESTSUITE_PATH) -name "*.o" -name "*.pexe" | $(XARGS) $(RM_F) + @- $(FIND) $(BASE_OBJ_TESTSUITE_PATH) \( -name "*.o" -o -name "*.pexe" \) | $(XARGS) $(RM_F) @echo "Removing $(TESTSUITE_BIN) binary." @- $(RM_RF) $(TESTSUITE_BIN) endif diff --git a/frame/1/bli_l1v_check.c b/frame/1/bli_l1v_check.c index b3ac34397..737fbaceb 100644 --- a/frame/1/bli_l1v_check.c +++ b/frame/1/bli_l1v_check.c @@ -66,13 +66,46 @@ void PASTEMAC(opname,_check) \ obj_t* y \ ) \ { \ - bli_l1v_axy_check( alpha, x, y ); \ + bli_l1v_axy_check( alpha, x, y ); \ } GENFRONT( axpyv ) GENFRONT( scal2v ) +#undef GENFRONT +#define GENFRONT( opname ) \ +\ +void PASTEMAC(opname,_check) \ + ( \ + obj_t* x, \ + obj_t* beta, \ + obj_t* y \ + ) \ +{ \ + bli_l1v_xby_check( x, beta, y ); \ +} + +GENFRONT( xpbyv ) + + +#undef GENFRONT +#define GENFRONT( opname ) \ +\ +void PASTEMAC(opname,_check) \ + ( \ + obj_t* alpha, \ + obj_t* x, \ + obj_t* beta, \ + obj_t* y \ + ) \ +{ \ + bli_l1v_axby_check( alpha, x, beta, y ); \ +} + +GENFRONT( axpbyv ) + + #undef GENFRONT #define GENFRONT( opname ) \ \ @@ -182,43 +215,145 @@ void bli_l1v_axy_check obj_t* y ) { - err_t e_val; + err_t e_val; - // Check object datatypes. + // Check object datatypes. - e_val = bli_check_noninteger_object( alpha ); - bli_check_error_code( e_val ); + e_val = bli_check_noninteger_object( alpha ); + bli_check_error_code( e_val ); - e_val = bli_check_floating_object( x ); - bli_check_error_code( e_val ); + e_val = bli_check_floating_object( x ); + bli_check_error_code( e_val ); - e_val = bli_check_floating_object( y ); - bli_check_error_code( e_val ); + e_val = bli_check_floating_object( y ); + bli_check_error_code( e_val ); - // Check object dimensions. + // Check object dimensions. - e_val = bli_check_scalar_object( alpha ); - bli_check_error_code( e_val ); + e_val = bli_check_scalar_object( alpha ); + bli_check_error_code( e_val ); - e_val = bli_check_vector_object( x ); - bli_check_error_code( e_val ); + e_val = bli_check_vector_object( x ); + bli_check_error_code( e_val ); - e_val = bli_check_vector_object( y ); - bli_check_error_code( e_val ); + e_val = bli_check_vector_object( y ); + bli_check_error_code( e_val ); - e_val = bli_check_equal_vector_lengths( x, y ); - bli_check_error_code( e_val ); + e_val = bli_check_equal_vector_lengths( x, y ); + bli_check_error_code( e_val ); - // Check object buffers (for non-NULLness). + // Check object buffers (for non-NULLness). - e_val = bli_check_object_buffer( alpha ); - bli_check_error_code( e_val ); + e_val = bli_check_object_buffer( alpha ); + bli_check_error_code( e_val ); - e_val = bli_check_object_buffer( x ); - bli_check_error_code( e_val ); + e_val = bli_check_object_buffer( x ); + bli_check_error_code( e_val ); - e_val = bli_check_object_buffer( y ); - bli_check_error_code( e_val ); + e_val = bli_check_object_buffer( y ); + bli_check_error_code( e_val ); +} + +void bli_l1v_xby_check + ( + obj_t* x, + obj_t* beta, + obj_t* y + ) +{ + err_t e_val; + + // Check object datatypes. + + e_val = bli_check_noninteger_object( beta ); + bli_check_error_code( e_val ); + + e_val = bli_check_floating_object( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_floating_object( y ); + bli_check_error_code( e_val ); + + // Check object dimensions. + + e_val = bli_check_scalar_object( beta ); + bli_check_error_code( e_val ); + + e_val = bli_check_vector_object( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_vector_object( y ); + bli_check_error_code( e_val ); + + e_val = bli_check_equal_vector_lengths( x, y ); + bli_check_error_code( e_val ); + + // Check object buffers (for non-NULLness). + + e_val = bli_check_object_buffer( beta ); + bli_check_error_code( e_val ); + + e_val = bli_check_object_buffer( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_object_buffer( y ); + bli_check_error_code( e_val ); +} + +void bli_l1v_axby_check + ( + obj_t* alpha, + obj_t* x, + obj_t* beta, + obj_t* y + ) +{ + err_t e_val; + + // Check object datatypes. + + e_val = bli_check_noninteger_object( alpha ); + bli_check_error_code( e_val ); + + e_val = bli_check_noninteger_object( beta ); + bli_check_error_code( e_val ); + + e_val = bli_check_floating_object( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_floating_object( y ); + bli_check_error_code( e_val ); + + // Check object dimensions. + + e_val = bli_check_scalar_object( alpha ); + bli_check_error_code( e_val ); + + e_val = bli_check_scalar_object( beta ); + bli_check_error_code( e_val ); + + e_val = bli_check_vector_object( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_vector_object( y ); + bli_check_error_code( e_val ); + + e_val = bli_check_equal_vector_lengths( x, y ); + bli_check_error_code( e_val ); + + // Check object buffers (for non-NULLness). + + e_val = bli_check_object_buffer( alpha ); + bli_check_error_code( e_val ); + + e_val = bli_check_object_buffer( beta ); + bli_check_error_code( e_val ); + + e_val = bli_check_object_buffer( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_object_buffer( y ); + bli_check_error_code( e_val ); } void bli_l1v_dot_check diff --git a/frame/1/bli_l1v_check.h b/frame/1/bli_l1v_check.h index ab3cfeee9..1c87f5f51 100644 --- a/frame/1/bli_l1v_check.h +++ b/frame/1/bli_l1v_check.h @@ -66,6 +66,33 @@ GENTPROT( axpyv ) GENTPROT( scal2v ) +#undef GENTPROT +#define GENTPROT( opname ) \ +\ +void PASTEMAC(opname,_check) \ + ( \ + obj_t* x, \ + obj_t* beta, \ + obj_t* y \ + ); + +GENTPROT( xpbyv ) + + +#undef GENTPROT +#define GENTPROT( opname ) \ +\ +void PASTEMAC(opname,_check) \ + ( \ + obj_t* alpha, \ + obj_t* x, \ + obj_t* beta, \ + obj_t* y \ + ); + +GENTPROT( axpbyv ) + + #undef GENTPROT #define GENTPROT( opname ) \ \ @@ -133,6 +160,21 @@ void bli_l1v_axy_check obj_t* y ); +void bli_l1v_xby_check + ( + obj_t* x, + obj_t* beta, + obj_t* y + ); + +void bli_l1v_axby_check + ( + obj_t* alpha, + obj_t* x, + obj_t* beta, + obj_t* y + ); + void bli_l1v_dot_check ( obj_t* alpha, diff --git a/frame/1/bli_l1v_cntx.c b/frame/1/bli_l1v_cntx.c index 482441451..d1c504528 100644 --- a/frame/1/bli_l1v_cntx.c +++ b/frame/1/bli_l1v_cntx.c @@ -69,21 +69,69 @@ GENFRONT( swapv, BLIS_SWAPV_KER ) \ void PASTEMAC(opname,_cntx_init)( cntx_t* cntx ) \ { \ - bli_cntx_obj_create( cntx ); \ + bli_cntx_obj_create( cntx ); \ \ - /* Initialize the context with kernel dependencies. */ \ - PASTEMAC(depname,_cntx_init)( cntx ); \ + /* Initialize the context with kernel dependencies. */ \ + PASTEMAC(depname,_cntx_init)( cntx ); \ \ - /* Initialize the context with the kernel associated with the current - operation. */ \ - bli_gks_cntx_set_l1v_ker( kertype, cntx ); \ + /* Initialize the context with the kernel associated with the current + operation. */ \ + bli_gks_cntx_set_l1v_ker( kertype, cntx ); \ } \ void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx ) \ { \ - bli_cntx_obj_free( cntx ); \ + bli_cntx_obj_free( cntx ); \ } GENFRONT( axpyv, BLIS_AXPYV_KER, addv ) -GENFRONT( scal2v, BLIS_SCAL2V_KER, setv ) GENFRONT( scalv, BLIS_SCALV_KER, setv ) + +#undef GENFRONT +#define GENFRONT( opname, kertype, dep1, dep2 ) \ +\ +void PASTEMAC(opname,_cntx_init)( cntx_t* cntx ) \ +{ \ + bli_cntx_obj_create( cntx ); \ +\ + /* Initialize the context with kernel dependencies. */ \ + PASTEMAC(dep1,_cntx_init)( cntx ); \ + PASTEMAC(dep2,_cntx_init)( cntx ); \ +\ + /* Initialize the context with the kernel associated with the current + operation. */ \ + bli_gks_cntx_set_l1v_ker( kertype, cntx ); \ +} \ +void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx ) \ +{ \ + bli_cntx_obj_free( cntx ); \ +} + +GENFRONT( scal2v, BLIS_SCAL2V_KER, setv, copyv ) +GENFRONT( xpbyv, BLIS_XPBYV_KER, addv, copyv ) + + +#undef GENFRONT +#define GENFRONT( opname, kertype, dep1, dep2, dep3, dep4 ) \ +\ +void PASTEMAC(opname,_cntx_init)( cntx_t* cntx ) \ +{ \ + bli_cntx_obj_create( cntx ); \ +\ + /* Initialize the context with kernel dependencies. */ \ + PASTEMAC(dep1,_cntx_init)( cntx ); \ + PASTEMAC(dep2,_cntx_init)( cntx ); \ + PASTEMAC(dep3,_cntx_init)( cntx ); \ + PASTEMAC(dep4,_cntx_init)( cntx ); \ +\ + /* Initialize the context with the kernel associated with the current + operation. */ \ + bli_gks_cntx_set_l1v_ker( kertype, cntx ); \ +} \ +void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx ) \ +{ \ + bli_cntx_obj_free( cntx ); \ +} + +GENFRONT( axpbyv, BLIS_AXPBYV_KER, axpyv, xpbyv, scal2v, scalv ) + diff --git a/frame/1/bli_l1v_cntx.h b/frame/1/bli_l1v_cntx.h index 6db0a29c1..a8c16d342 100644 --- a/frame/1/bli_l1v_cntx.h +++ b/frame/1/bli_l1v_cntx.h @@ -44,6 +44,7 @@ void PASTEMAC(opname,_cntx_init)( cntx_t* cntx ); \ void PASTEMAC(opname,_cntx_finalize)( cntx_t* cntx ); GENPROT( addv ) +GENPROT( axpbyv ) GENPROT( axpyv ) GENPROT( copyv ) GENPROT( dotv ) @@ -54,4 +55,5 @@ GENPROT( scal2v ) GENPROT( setv ) GENPROT( subv ) GENPROT( swapv ) +GENPROT( xpbyv ) diff --git a/frame/1/bli_l1v_ft.h b/frame/1/bli_l1v_ft.h index e206938ce..051ca0f6c 100644 --- a/frame/1/bli_l1v_ft.h +++ b/frame/1/bli_l1v_ft.h @@ -76,6 +76,41 @@ typedef void (*PASTECH2(ch,opname,tsuf)) \ INSERT_GENTDEF( axpyv ) INSERT_GENTDEF( scal2v ) +// xpybv + +#undef GENTDEF +#define GENTDEF( ctype, ch, opname, tsuf ) \ +\ +typedef void (*PASTECH2(ch,opname,tsuf)) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* x, inc_t incx, \ + ctype* beta, \ + ctype* y, inc_t incy, \ + cntx_t* cntx \ + ); + +INSERT_GENTDEF( xpbyv ) + +// axpbyv + +#undef GENTDEF +#define GENTDEF( ctype, ch, opname, tsuf ) \ +\ +typedef void (*PASTECH2(ch,opname,tsuf)) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* alpha, \ + ctype* x, inc_t incx, \ + ctype* beta, \ + ctype* y, inc_t incy, \ + cntx_t* cntx \ + ); + +INSERT_GENTDEF( axpbyv ) + // dotv #undef GENTDEF diff --git a/frame/1/bli_l1v_ker.h b/frame/1/bli_l1v_ker.h index 33cc7e6ae..13c675215 100644 --- a/frame/1/bli_l1v_ker.h +++ b/frame/1/bli_l1v_ker.h @@ -71,6 +71,39 @@ INSERT_GENTPROT_BASIC( axpyv_ker_name ) INSERT_GENTPROT_BASIC( scal2v_ker_name ) +#undef GENTPROT +#define GENTPROT( ctype, ch, opname ) \ +\ +void PASTEMAC(ch,opname) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* x, inc_t incx, \ + ctype* beta, \ + ctype* y, inc_t incy, \ + cntx_t* cntx \ + ); \ + +INSERT_GENTPROT_BASIC( xpbyv_ker_name ) + + +#undef GENTPROT +#define GENTPROT( ctype, ch, opname ) \ +\ +void PASTEMAC(ch,opname) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* alpha, \ + ctype* x, inc_t incx, \ + ctype* beta, \ + ctype* y, inc_t incy, \ + cntx_t* cntx \ + ); \ + +INSERT_GENTPROT_BASIC( axpbyv_ker_name ) + + #undef GENTPROT #define GENTPROT( ctype, ch, opname ) \ \ diff --git a/frame/1/bli_l1v_oapi.c b/frame/1/bli_l1v_oapi.c index 6482d5cdf..c43551236 100644 --- a/frame/1/bli_l1v_oapi.c +++ b/frame/1/bli_l1v_oapi.c @@ -136,6 +136,119 @@ GENFRONT( axpyv ) GENFRONT( scal2v ) + +#undef GENFRONT +#define GENFRONT( opname ) \ +\ +void PASTEMAC(opname,EX_SUF) \ + ( \ + obj_t* x, \ + obj_t* beta, \ + obj_t* y \ + BLIS_OAPI_CNTX_PARAM \ + ) \ +{ \ + BLIS_OAPI_CNTX_DECL \ +\ + num_t dt = bli_obj_datatype( *x ); \ +\ + conj_t conjx = bli_obj_conj_status( *x ); \ + dim_t n = bli_obj_vector_dim( *x ); \ + void* buf_x = bli_obj_buffer_at_off( *x ); \ + inc_t inc_x = bli_obj_vector_inc( *x ); \ + void* buf_y = bli_obj_buffer_at_off( *y ); \ + inc_t inc_y = bli_obj_vector_inc( *y ); \ +\ + void* buf_beta; \ +\ + obj_t beta_local; \ +\ + if ( bli_error_checking_is_enabled() ) \ + PASTEMAC(opname,_check)( x, beta, y ); \ +\ + /* Create local copy-casts of scalars (and apply internal conjugation + as needed). */ \ + bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ + beta, &beta_local ); \ + buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \ +\ + /* Invoke the void pointer-based function. */ \ + bli_call_ft_8 \ + ( \ + dt, \ + opname, \ + conjx, \ + n, \ + buf_x, inc_x, \ + buf_beta, \ + buf_y, inc_y, \ + cntx \ + ); \ +} + +GENFRONT( xpbyv ) + + + +#undef GENFRONT +#define GENFRONT( opname ) \ +\ +void PASTEMAC(opname,EX_SUF) \ + ( \ + obj_t* alpha, \ + obj_t* x, \ + obj_t* beta, \ + obj_t* y \ + BLIS_OAPI_CNTX_PARAM \ + ) \ +{ \ + BLIS_OAPI_CNTX_DECL \ +\ + num_t dt = bli_obj_datatype( *x ); \ +\ + conj_t conjx = bli_obj_conj_status( *x ); \ + dim_t n = bli_obj_vector_dim( *x ); \ + void* buf_x = bli_obj_buffer_at_off( *x ); \ + inc_t inc_x = bli_obj_vector_inc( *x ); \ + void* buf_y = bli_obj_buffer_at_off( *y ); \ + inc_t inc_y = bli_obj_vector_inc( *y ); \ +\ + void* buf_alpha; \ + void* buf_beta; \ +\ + obj_t alpha_local; \ + obj_t beta_local; \ +\ + if ( bli_error_checking_is_enabled() ) \ + PASTEMAC(opname,_check)( alpha, x, beta, y ); \ +\ + /* Create local copy-casts of scalars (and apply internal conjugation + as needed). */ \ + bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ + alpha, &alpha_local ); \ + bli_obj_scalar_init_detached_copy_of( dt, BLIS_NO_CONJUGATE, \ + beta, &beta_local ); \ + buf_alpha = bli_obj_buffer_for_1x1( dt, alpha_local ); \ + buf_beta = bli_obj_buffer_for_1x1( dt, beta_local ); \ +\ + /* Invoke the void pointer-based function. */ \ + bli_call_ft_9 \ + ( \ + dt, \ + opname, \ + conjx, \ + n, \ + buf_alpha, \ + buf_x, inc_x, \ + buf_beta, \ + buf_y, inc_y, \ + cntx \ + ); \ +} + +GENFRONT( axpbyv ) + + #undef GENFRONT #define GENFRONT( opname ) \ \ diff --git a/frame/1/bli_l1v_oapi.h b/frame/1/bli_l1v_oapi.h index 2f4da57d8..b6ec5094d 100644 --- a/frame/1/bli_l1v_oapi.h +++ b/frame/1/bli_l1v_oapi.h @@ -67,6 +67,35 @@ GENTPROT( axpyv ) GENTPROT( scal2v ) +#undef GENTPROT +#define GENTPROT( opname ) \ +\ +void PASTEMAC(opname,EX_SUF) \ + ( \ + obj_t* x, \ + obj_t* beta, \ + obj_t* y \ + BLIS_OAPI_CNTX_PARAM \ + ); + +GENTPROT( xpbyv ) + + +#undef GENTPROT +#define GENTPROT( opname ) \ +\ +void PASTEMAC(opname,EX_SUF) \ + ( \ + obj_t* alpha, \ + obj_t* x, \ + obj_t* beta, \ + obj_t* y \ + BLIS_OAPI_CNTX_PARAM \ + ); + +GENTPROT( axpbyv ) + + #undef GENTPROT #define GENTPROT( opname ) \ \ diff --git a/frame/1/bli_l1v_tapi.c b/frame/1/bli_l1v_tapi.c index af92aa92d..551a41d18 100644 --- a/frame/1/bli_l1v_tapi.c +++ b/frame/1/bli_l1v_tapi.c @@ -111,6 +111,80 @@ INSERT_GENTFUNC_BASIC( axpyv, BLIS_AXPYV_KER ) INSERT_GENTFUNC_BASIC( scal2v, BLIS_SCAL2V_KER ) +#undef GENTFUNC +#define GENTFUNC( ctype, ch, opname, kerid ) \ +\ +void PASTEMAC(ch,opname) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* x, inc_t incx, \ + ctype* beta, \ + ctype* y, inc_t incy, \ + cntx_t* cntx \ + ) \ +{ \ + const num_t dt = PASTEMAC(ch,type); \ + cntx_t* cntx_p; \ +\ + bli_cntx_init_local_if( opname, cntx, cntx_p ); \ +\ + PASTECH2(ch,opname,_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx_p ); \ +\ + f \ + ( \ + conjx, \ + n, \ + x, incx, \ + beta, \ + y, incy, \ + cntx_p \ + ); \ +\ + bli_cntx_finalize_local_if( opname, cntx ); \ +} + +INSERT_GENTFUNC_BASIC( xpbyv, BLIS_XPBYV_KER ) + + +#undef GENTFUNC +#define GENTFUNC( ctype, ch, opname, kerid ) \ +\ +void PASTEMAC(ch,opname) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* alpha, \ + ctype* x, inc_t incx, \ + ctype* beta, \ + ctype* y, inc_t incy, \ + cntx_t* cntx \ + ) \ +{ \ + const num_t dt = PASTEMAC(ch,type); \ + cntx_t* cntx_p; \ +\ + bli_cntx_init_local_if( opname, cntx, cntx_p ); \ +\ + PASTECH2(ch,opname,_ft) f = bli_cntx_get_l1v_ker_dt( dt, kerid, cntx_p ); \ +\ + f \ + ( \ + conjx, \ + n, \ + alpha, \ + x, incx, \ + beta, \ + y, incy, \ + cntx_p \ + ); \ +\ + bli_cntx_finalize_local_if( opname, cntx ); \ +} + +INSERT_GENTFUNC_BASIC( axpbyv, BLIS_AXPBYV_KER ) + + #undef GENTFUNC #define GENTFUNC( ctype, ch, opname, kerid ) \ \ diff --git a/frame/1/bli_l1v_tapi.h b/frame/1/bli_l1v_tapi.h index 618d9a280..b50fe8eb9 100644 --- a/frame/1/bli_l1v_tapi.h +++ b/frame/1/bli_l1v_tapi.h @@ -40,6 +40,9 @@ #undef addv_ker_name #define addv_ker_name addv +#undef axpbyv_ker_name +#define axpbyv_ker_name axpbyv + #undef axpyv_ker_name #define axpyv_ker_name axpyv @@ -70,6 +73,9 @@ #undef swapv_ker_name #define swapv_ker_name swapv +#undef xpbyv_ker_name +#define xpbyv_ker_name xpbyv + // Include the level-1v kernel API template. diff --git a/frame/1/kernels/bli_addv_ref.c b/frame/1/kernels/bli_addv_ref.c index 4a91667a2..c18748ae9 100644 --- a/frame/1/kernels/bli_addv_ref.c +++ b/frame/1/kernels/bli_addv_ref.c @@ -41,13 +41,13 @@ void PASTEMAC(ch,varname) \ ( \ conj_t conjx, \ dim_t n, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ) \ { \ - ctype* chi1; \ - ctype* psi1; \ + ctype* restrict chi1; \ + ctype* restrict psi1; \ dim_t i; \ \ if ( bli_zero_dim1( n ) ) return; \ @@ -57,23 +57,43 @@ void PASTEMAC(ch,varname) \ \ if ( bli_is_conj( conjx ) ) \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,addjs)( *chi1, *psi1 ); \ -\ - chi1 += incx; \ - psi1 += incy; \ - } \ + if (incx == 1 && incy == 1) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,addjs)( chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,addjs)( *chi1, *psi1 ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,adds)( *chi1, *psi1 ); \ -\ - chi1 += incx; \ - psi1 += incy; \ - } \ + if (incx == 1 && incy == 1) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,adds)( chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,adds)( *chi1, *psi1 ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ } diff --git a/frame/1/kernels/bli_axpbyv_ref.c b/frame/1/kernels/bli_axpbyv_ref.c new file mode 100644 index 000000000..91d92c733 --- /dev/null +++ b/frame/1/kernels/bli_axpbyv_ref.c @@ -0,0 +1,248 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +#undef GENTFUNC +#define GENTFUNC( ctype, ch, varname ) \ +\ +void PASTEMAC(ch,varname) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ + ) \ +{ \ + ctype* restrict chi1; \ + ctype* restrict psi1; \ + dim_t i; \ +\ + if ( bli_zero_dim1( n ) ) return; \ +\ + if ( PASTEMAC(ch,eq0)( *alpha ) ) \ + { \ +\ + /* If alpha is zero and beta is zero, set to zero. */ \ + if ( PASTEMAC(ch,eq0)( *beta ) ) \ + { \ + ctype* zero = PASTEMAC(ch,0); \ +\ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,setv_ft) setv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_SETV_KER, cntx ); \ +\ + setv_p \ + ( \ + BLIS_NO_CONJUGATE, \ + n, \ + zero, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ + /* If alpha is zero and beta is one, return. */ \ + else if ( PASTEMAC(ch,eq1)( *beta ) ) \ + { \ + return; \ + } \ + /* If alpha is zero, scale by beta. */ \ + else \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,scalv_ft) scalv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_SCALV_KER, cntx ); \ +\ + scalv_p \ + ( \ + BLIS_NO_CONJUGATE, \ + n, \ + beta, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ +\ + } \ + else if ( PASTEMAC(ch,eq1)( *alpha ) ) \ + { \ + \ + /* If alpha is one and beta is zero, copy. */ \ + if ( PASTEMAC(ch,eq0)( *beta ) ) \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,copyv_ft) copyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_COPYV_KER, cntx ); \ + \ + copyv_p \ + ( \ + conjx, \ + n, \ + x, incx, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ + /* If alpha is one and beta is one, add. */ \ + else if ( PASTEMAC(ch,eq1)( *beta ) ) \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,addv_ft) addv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_ADDV_KER, cntx ); \ + \ + addv_p \ + ( \ + conjx, \ + n, \ + x, incx, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ + /* If alpha is one, call xpby. */ \ + else \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,xpbyv_ft) xpbyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_XPBYV_KER, cntx ); \ + \ + xpbyv_p \ + ( \ + conjx, \ + n, \ + x, incx, \ + beta, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ + \ + } \ + else \ + { \ + \ + /* If beta is zero, call scal2. */ \ + if ( PASTEMAC(ch,eq0)( *beta ) ) \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,scal2v_ft) scal2v_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_SCAL2V_KER, cntx ); \ + \ + scal2v_p \ + ( \ + conjx, \ + n, \ + alpha, \ + x, incx, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ + /* If beta is one, call axpy. */ \ + else if ( PASTEMAC(ch,eq1)( *beta ) ) \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,axpyv_ft) axpyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_AXPYV_KER, cntx ); \ + \ + axpyv_p \ + ( \ + conjx, \ + n, \ + alpha, \ + x, incx, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ + \ + } \ +\ + chi1 = x; \ + psi1 = y; \ +\ + if ( bli_is_conj( conjx ) ) \ + { \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpbyjs)( *alpha, chi1[i], *beta, psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpbyjs)( *alpha, *chi1, *beta, *psi1 ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ + } \ + else \ + { \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpbys)( *alpha, chi1[i], *beta, psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpbys)( *alpha, *chi1, *beta, *psi1 ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ + } \ +} + +INSERT_GENTFUNC_BASIC0( axpbyv_ref ) + diff --git a/frame/1/kernels/bli_axpyv_ref.c b/frame/1/kernels/bli_axpyv_ref.c index 4b29505cf..a6dd49976 100644 --- a/frame/1/kernels/bli_axpyv_ref.c +++ b/frame/1/kernels/bli_axpyv_ref.c @@ -41,14 +41,14 @@ void PASTEMAC(ch,varname) \ ( \ conj_t conjx, \ dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ) \ { \ - ctype* chi1; \ - ctype* psi1; \ + ctype* restrict chi1; \ + ctype* restrict psi1; \ dim_t i; \ \ if ( bli_zero_dim1( n ) ) return; \ @@ -79,23 +79,43 @@ void PASTEMAC(ch,varname) \ \ if ( bli_is_conj( conjx ) ) \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,axpyjs)( *alpha, *chi1, *psi1 ); \ -\ - chi1 += incx; \ - psi1 += incy; \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpyjs)( *alpha, chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpyjs)( *alpha, *chi1, *psi1 ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,axpys)( *alpha, *chi1, *psi1 ); \ -\ - chi1 += incx; \ - psi1 += incy; \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpys)( *alpha, chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,axpys)( *alpha, *chi1, *psi1 ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ } diff --git a/frame/1/kernels/bli_copyv_ref.c b/frame/1/kernels/bli_copyv_ref.c index b852f76e7..28d22c7b2 100644 --- a/frame/1/kernels/bli_copyv_ref.c +++ b/frame/1/kernels/bli_copyv_ref.c @@ -41,13 +41,13 @@ void PASTEMAC(ch,varname) \ ( \ conj_t conjx, \ dim_t n, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ) \ { \ - ctype* chi1; \ - ctype* psi1; \ + ctype* restrict chi1; \ + ctype* restrict psi1; \ dim_t i; \ \ if ( bli_zero_dim1( n ) ) return; \ @@ -57,23 +57,43 @@ void PASTEMAC(ch,varname) \ \ if ( bli_is_conj( conjx ) ) \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,copyjs)( *chi1, *psi1 ); \ -\ - chi1 += incx; \ - psi1 += incy; \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,copyjs)( chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,copyjs)( *chi1, *psi1 ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,copys)( *chi1, *psi1 ); \ -\ - chi1 += incx; \ - psi1 += incy; \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,copys)( chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,copys)( *chi1, *psi1 ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ } diff --git a/frame/1/kernels/bli_dotv_ref.c b/frame/1/kernels/bli_dotv_ref.c index b17480b07..6f790c81c 100644 --- a/frame/1/kernels/bli_dotv_ref.c +++ b/frame/1/kernels/bli_dotv_ref.c @@ -42,14 +42,14 @@ void PASTEMAC(ch,varname) \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* rho, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict rho, \ cntx_t* cntx \ ) \ { \ - ctype* chi1; \ - ctype* psi1; \ + ctype* restrict chi1; \ + ctype* restrict psi1; \ ctype dotxy; \ dim_t i; \ conj_t conjx_use; \ @@ -75,23 +75,43 @@ void PASTEMAC(ch,varname) \ \ if ( bli_is_conj( conjx_use ) ) \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,dotjs)( *chi1, *psi1, dotxy ); \ -\ - chi1 += incx; \ - psi1 += incy; \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dotjs)( chi1[i], psi1[i], dotxy ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dotjs)( *chi1, *psi1, dotxy ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,dots)( *chi1, *psi1, dotxy ); \ -\ - chi1 += incx; \ - psi1 += incy; \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dots)( chi1[i], psi1[i], dotxy ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dots)( *chi1, *psi1, dotxy ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ \ if ( bli_is_conj( conjy ) ) \ diff --git a/frame/1/kernels/bli_dotxv_ref.c b/frame/1/kernels/bli_dotxv_ref.c index b611533d4..89dedcc48 100644 --- a/frame/1/kernels/bli_dotxv_ref.c +++ b/frame/1/kernels/bli_dotxv_ref.c @@ -42,16 +42,16 @@ void PASTEMAC(ch,varname) \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* beta, \ - ctype* rho, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict beta, \ + ctype* restrict rho, \ cntx_t* cntx \ ) \ { \ - ctype* chi1; \ - ctype* psi1; \ + ctype* restrict chi1; \ + ctype* restrict psi1; \ ctype dotxy; \ dim_t i; \ conj_t conjx_use; \ @@ -83,23 +83,43 @@ void PASTEMAC(ch,varname) \ \ if ( bli_is_conj( conjx_use ) ) \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,dotjs)( *chi1, *psi1, dotxy ); \ -\ - chi1 += incx; \ - psi1 += incy; \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dotjs)( chi1[i], psi1[i], dotxy ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dotjs)( *chi1, *psi1, dotxy ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,dots)( *chi1, *psi1, dotxy ); \ -\ - chi1 += incx; \ - psi1 += incy; \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dots)( chi1[i], psi1[i], dotxy ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,dots)( *chi1, *psi1, dotxy ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ \ if ( bli_is_conj( conjy ) ) \ diff --git a/frame/1/kernels/bli_invertv_ref.c b/frame/1/kernels/bli_invertv_ref.c index c7f3dbcb7..a79d8c9f0 100644 --- a/frame/1/kernels/bli_invertv_ref.c +++ b/frame/1/kernels/bli_invertv_ref.c @@ -40,23 +40,33 @@ void PASTEMAC(ch,varname) \ ( \ dim_t n, \ - ctype* x, inc_t incx, \ + ctype* restrict x, inc_t incx, \ cntx_t* cntx \ ) \ { \ - ctype* chi1; \ + ctype* restrict chi1; \ dim_t i; \ \ if ( bli_zero_dim1( n ) ) return; \ \ chi1 = x; \ \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,inverts)( *chi1 ); \ -\ - chi1 += incx; \ - } \ + if ( incx == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,inverts)( chi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,inverts)( *chi1 ); \ + \ + chi1 += incx; \ + } \ + } \ } INSERT_GENTFUNC_BASIC0( invertv_ref ) diff --git a/frame/1/kernels/bli_l1v_ref.h b/frame/1/kernels/bli_l1v_ref.h index f3857d841..60cdbd2ee 100644 --- a/frame/1/kernels/bli_l1v_ref.h +++ b/frame/1/kernels/bli_l1v_ref.h @@ -40,8 +40,8 @@ void PASTEMAC(ch,varname) \ ( \ conj_t conjx, \ dim_t n, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ); @@ -57,9 +57,9 @@ void PASTEMAC(ch,varname) \ ( \ conj_t conjx, \ dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ); @@ -67,6 +67,39 @@ INSERT_GENTPROT_BASIC( axpyv_ref ) INSERT_GENTPROT_BASIC( scal2v_ref ) +#undef GENTPROT +#define GENTPROT( ctype, ch, varname ) \ +\ +void PASTEMAC(ch,varname) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ + ); + +INSERT_GENTPROT_BASIC( xpbyv_ref ) + + +#undef GENTPROT +#define GENTPROT( ctype, ch, varname ) \ +\ +void PASTEMAC(ch,varname) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ + ); + +INSERT_GENTPROT_BASIC( axpbyv_ref ) + + #undef GENTPROT #define GENTPROT( ctype, ch, varname ) \ \ @@ -75,9 +108,9 @@ void PASTEMAC(ch,varname) \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* rho, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict rho, \ cntx_t* cntx \ ); @@ -92,11 +125,11 @@ void PASTEMAC(ch,varname) \ conj_t conjx, \ conj_t conjy, \ dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ - ctype* beta, \ - ctype* rho, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ + ctype* restrict beta, \ + ctype* restrict rho, \ cntx_t* cntx \ ); @@ -109,7 +142,7 @@ INSERT_GENTPROT_BASIC( dotxv_ref ) void PASTEMAC(ch,varname) \ ( \ dim_t n, \ - ctype* x, inc_t incx, \ + ctype* restrict x, inc_t incx, \ cntx_t* cntx \ ); @@ -123,8 +156,8 @@ void PASTEMAC(ch,varname) \ ( \ conj_t conjalpha, \ dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ cntx_t* cntx \ ); @@ -138,8 +171,8 @@ INSERT_GENTPROT_BASIC( setv_ref ) void PASTEMAC(ch,varname) \ ( \ dim_t n, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ); diff --git a/frame/1/kernels/bli_scal2v_ref.c b/frame/1/kernels/bli_scal2v_ref.c index 3f739cd90..f4bc0d541 100644 --- a/frame/1/kernels/bli_scal2v_ref.c +++ b/frame/1/kernels/bli_scal2v_ref.c @@ -41,14 +41,14 @@ void PASTEMAC(ch,varname) \ ( \ conj_t conjx, \ dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ) \ { \ - ctype* chi1; \ - ctype* psi1; \ + ctype* restrict chi1; \ + ctype* restrict psi1; \ dim_t i; \ \ if ( bli_zero_dim1( n ) ) return; \ @@ -72,29 +72,66 @@ void PASTEMAC(ch,varname) \ ); \ return; \ } \ + /* If alpha is one, use copyv. */ \ + else if ( PASTEMAC(ch,eq0)( *alpha ) ) \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,copyv_ft) copyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_COPYV_KER, cntx ); \ +\ + copyv_p \ + ( \ + BLIS_NO_CONJUGATE, \ + n, \ + x, incx, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ \ chi1 = x; \ psi1 = y; \ \ if ( bli_is_conj( conjx ) ) \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,scal2js)( *alpha, *chi1, *psi1 ); \ -\ - chi1 += incx; \ - psi1 += incy; \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,scal2js)( *alpha, chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,scal2js)( *alpha, *chi1, *psi1 ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,scal2s)( *alpha, *chi1, *psi1 ); \ -\ - chi1 += incx; \ - psi1 += incy; \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,scal2s)( *alpha, chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,scal2s)( *alpha, *chi1, *psi1 ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ } diff --git a/frame/1/kernels/bli_scalv_ref.c b/frame/1/kernels/bli_scalv_ref.c index 982313c9b..7d238c6f4 100644 --- a/frame/1/kernels/bli_scalv_ref.c +++ b/frame/1/kernels/bli_scalv_ref.c @@ -41,12 +41,12 @@ void PASTEMAC(ch,varname) \ ( \ conj_t conjalpha, \ dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ cntx_t* cntx \ ) \ { \ - ctype* chi1; \ + ctype* restrict chi1; \ ctype alpha_conj; \ dim_t i; \ \ @@ -79,12 +79,22 @@ void PASTEMAC(ch,varname) \ \ chi1 = x; \ \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,scals)( alpha_conj, *chi1 ); \ -\ - chi1 += incx; \ - } \ + if ( incx == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,scals)( alpha_conj, chi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,scals)( alpha_conj, *chi1 ); \ + \ + chi1 += incx; \ + } \ + } \ } INSERT_GENTFUNC_BASIC0( scalv_ref ) diff --git a/frame/1/kernels/bli_setv_ref.c b/frame/1/kernels/bli_setv_ref.c index f01364339..4f7d1db2d 100644 --- a/frame/1/kernels/bli_setv_ref.c +++ b/frame/1/kernels/bli_setv_ref.c @@ -41,12 +41,12 @@ void PASTEMAC(ch,varname) \ ( \ conj_t conjalpha, \ dim_t n, \ - ctype* alpha, \ - ctype* x, inc_t incx, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ cntx_t* cntx \ ) \ { \ - ctype* chi1; \ + ctype* restrict chi1; \ ctype alpha_conj; \ dim_t i; \ \ @@ -56,23 +56,43 @@ void PASTEMAC(ch,varname) \ \ if ( PASTEMAC(ch,eq0)( *alpha ) ) \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,set0s)( *chi1 ); \ -\ - chi1 += incx; \ - } \ + if ( incx == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,set0s)( chi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,set0s)( *chi1 ); \ + \ + chi1 += incx; \ + } \ + } \ } \ else \ { \ PASTEMAC(ch,copycjs)( conjalpha, *alpha, alpha_conj ); \ \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,copys)( alpha_conj, *chi1 ); \ -\ - chi1 += incx; \ - } \ + if ( incx == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,copys)( alpha_conj, chi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,copys)( alpha_conj, *chi1 ); \ + \ + chi1 += incx; \ + } \ + } \ } \ } diff --git a/frame/1/kernels/bli_subv_ref.c b/frame/1/kernels/bli_subv_ref.c index eca8f36dc..bc59b01b6 100644 --- a/frame/1/kernels/bli_subv_ref.c +++ b/frame/1/kernels/bli_subv_ref.c @@ -41,13 +41,13 @@ void PASTEMAC(ch,varname) \ ( \ conj_t conjx, \ dim_t n, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ) \ { \ - ctype* chi1; \ - ctype* psi1; \ + ctype* restrict chi1; \ + ctype* restrict psi1; \ dim_t i; \ \ if ( bli_zero_dim1( n ) ) return; \ @@ -57,23 +57,43 @@ void PASTEMAC(ch,varname) \ \ if ( bli_is_conj( conjx ) ) \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,subjs)( *chi1, *psi1 ); \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,subjs)( chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,subjs)( *chi1, *psi1 ); \ \ - chi1 += incx; \ - psi1 += incy; \ - } \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ else \ { \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,subs)( *chi1, *psi1 ); \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,subs)( chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,subs)( *chi1, *psi1 ); \ \ - chi1 += incx; \ - psi1 += incy; \ - } \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } \ } diff --git a/frame/1/kernels/bli_swapv_ref.c b/frame/1/kernels/bli_swapv_ref.c index 8fe4a4b9a..6a43fb0d0 100644 --- a/frame/1/kernels/bli_swapv_ref.c +++ b/frame/1/kernels/bli_swapv_ref.c @@ -40,13 +40,13 @@ void PASTEMAC(ch,varname) \ ( \ dim_t n, \ - ctype* x, inc_t incx, \ - ctype* y, inc_t incy, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict y, inc_t incy, \ cntx_t* cntx \ ) \ { \ - ctype* chi1; \ - ctype* psi1; \ + ctype* restrict chi1; \ + ctype* restrict psi1; \ dim_t i; \ \ if ( bli_zero_dim1( n ) ) return; \ @@ -54,13 +54,23 @@ void PASTEMAC(ch,varname) \ chi1 = x; \ psi1 = y; \ \ - for ( i = 0; i < n; ++i ) \ - { \ - PASTEMAC(ch,swaps)( *chi1, *psi1 ); \ -\ - chi1 += incx; \ - psi1 += incy; \ - } \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,swaps)( chi1[i], psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,swaps)( *chi1, *psi1 ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ } INSERT_GENTFUNC_BASIC0( swapv_ref ) diff --git a/frame/1/kernels/bli_xpbyv_ref.c b/frame/1/kernels/bli_xpbyv_ref.c new file mode 100644 index 000000000..508e06ce8 --- /dev/null +++ b/frame/1/kernels/bli_xpbyv_ref.c @@ -0,0 +1,137 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +#undef GENTFUNC +#define GENTFUNC( ctype, ch, varname ) \ +\ +void PASTEMAC(ch,varname) \ + ( \ + conj_t conjx, \ + dim_t n, \ + ctype* restrict x, inc_t incx, \ + ctype* restrict beta, \ + ctype* restrict y, inc_t incy, \ + cntx_t* cntx \ + ) \ +{ \ + ctype* restrict chi1; \ + ctype* restrict psi1; \ + dim_t i; \ +\ + if ( bli_zero_dim1( n ) ) return; \ +\ + /* If beta is zero, use copyv. */ \ + if ( PASTEMAC(ch,eq0)( *beta ) ) \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,copyv_ft) copyv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_COPYV_KER, cntx ); \ +\ + copyv_p \ + ( \ + conjx, \ + n, \ + x, incx, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ + /* If alpha is one, use addv. */ \ + else if ( PASTEMAC(ch,eq1)( *beta ) ) \ + { \ + /* Query the context for the kernel function pointer. */ \ + const num_t dt = PASTEMAC(ch,type); \ + PASTECH(ch,addv_ft) addv_p = bli_cntx_get_l1v_ker_dt( dt, BLIS_ADDV_KER, cntx ); \ +\ + addv_p \ + ( \ + conjx, \ + n, \ + x, incx, \ + y, incy, \ + cntx \ + ); \ + return; \ + } \ +\ + chi1 = x; \ + psi1 = y; \ +\ + if ( bli_is_conj( conjx ) ) \ + { \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,xpbyjs)( chi1[i], *beta, psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,xpbyjs)( *chi1, *beta, *psi1 ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ + } \ + else \ + { \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,xpbys)( chi1[i], *beta, psi1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,xpbys)( *chi1, *beta, *psi1 ); \ + \ + chi1 += incx; \ + psi1 += incy; \ + } \ + } \ + } \ +} + +INSERT_GENTFUNC_BASIC0( xpbyv_ref ) + diff --git a/frame/base/bli_gks.c b/frame/base/bli_gks.c index 1368d8846..74e9dde11 100644 --- a/frame/base/bli_gks.c +++ b/frame/base/bli_gks.c @@ -754,6 +754,9 @@ static func_t bli_gks_l1v_kers[BLIS_NUM_LEVEL1V_KERS] = /* addv */ { { BLIS_SADDV_KERNEL, BLIS_CADDV_KERNEL, BLIS_DADDV_KERNEL, BLIS_ZADDV_KERNEL, } }, +/* axpbyv */ { { BLIS_SAXPBYV_KERNEL, BLIS_CAXPBYV_KERNEL, + BLIS_DAXPBYV_KERNEL, BLIS_ZAXPBYV_KERNEL, } + }, /* axpyv */ { { BLIS_SAXPYV_KERNEL, BLIS_CAXPYV_KERNEL, BLIS_DAXPYV_KERNEL, BLIS_ZAXPYV_KERNEL, } }, @@ -784,6 +787,9 @@ static func_t bli_gks_l1v_kers[BLIS_NUM_LEVEL1V_KERS] = /* swapv */ { { BLIS_SSWAPV_KERNEL, BLIS_CSWAPV_KERNEL, BLIS_DSWAPV_KERNEL, BLIS_ZSWAPV_KERNEL, } }, +/* xpbyv */ { { BLIS_SXPBYV_KERNEL, BLIS_CXPBYV_KERNEL, + BLIS_DXPBYV_KERNEL, BLIS_ZXPBYV_KERNEL, } + }, }; static func_t bli_gks_l1v_ref_kers[BLIS_NUM_LEVEL1V_KERS] = @@ -792,6 +798,9 @@ static func_t bli_gks_l1v_ref_kers[BLIS_NUM_LEVEL1V_KERS] = /* addv */ { { BLIS_SADDV_KERNEL_REF, BLIS_CADDV_KERNEL_REF, BLIS_DADDV_KERNEL_REF, BLIS_ZADDV_KERNEL_REF, } }, +/* axpbyv */ { { BLIS_SAXPBYV_KERNEL_REF, BLIS_CAXPBYV_KERNEL_REF, + BLIS_DAXPBYV_KERNEL_REF, BLIS_ZAXPBYV_KERNEL_REF, } + }, /* axpyv */ { { BLIS_SAXPYV_KERNEL_REF, BLIS_CAXPYV_KERNEL_REF, BLIS_DAXPYV_KERNEL_REF, BLIS_ZAXPYV_KERNEL_REF, } }, @@ -822,6 +831,9 @@ static func_t bli_gks_l1v_ref_kers[BLIS_NUM_LEVEL1V_KERS] = /* swapv */ { { BLIS_SSWAPV_KERNEL_REF, BLIS_CSWAPV_KERNEL_REF, BLIS_DSWAPV_KERNEL_REF, BLIS_ZSWAPV_KERNEL_REF, } }, +/* xpbyv */ { { BLIS_SXPBYV_KERNEL_REF, BLIS_CXPBYV_KERNEL_REF, + BLIS_DXPBYV_KERNEL_REF, BLIS_ZXPBYV_KERNEL_REF, } + }, }; // ----------------------------------------------------------------------------- diff --git a/frame/include/bli_kernel_macro_defs.h b/frame/include/bli_kernel_macro_defs.h index 9f3643a90..1c599a20e 100644 --- a/frame/include/bli_kernel_macro_defs.h +++ b/frame/include/bli_kernel_macro_defs.h @@ -629,6 +629,24 @@ #define BLIS_ZADDV_KERNEL BLIS_ZADDV_KERNEL_REF #endif +// axpbyv kernels + +#ifndef BLIS_SAXPBYV_KERNEL +#define BLIS_SAXPBYV_KERNEL BLIS_SAXPBYV_KERNEL_REF +#endif + +#ifndef BLIS_DAXPBYV_KERNEL +#define BLIS_DAXPBYV_KERNEL BLIS_DAXPBYV_KERNEL_REF +#endif + +#ifndef BLIS_CAXPBYV_KERNEL +#define BLIS_CAXPBYV_KERNEL BLIS_CAXPBYV_KERNEL_REF +#endif + +#ifndef BLIS_ZAXPBYV_KERNEL +#define BLIS_ZAXPBYV_KERNEL BLIS_ZAXPBYV_KERNEL_REF +#endif + // axpyv kernels #ifndef BLIS_SAXPYV_KERNEL @@ -809,6 +827,24 @@ #define BLIS_ZSWAPV_KERNEL BLIS_ZSWAPV_KERNEL_REF #endif +// xpbyv kernels + +#ifndef BLIS_SXPBYV_KERNEL +#define BLIS_SXPBYV_KERNEL BLIS_SXPBYV_KERNEL_REF +#endif + +#ifndef BLIS_DXPBYV_KERNEL +#define BLIS_DXPBYV_KERNEL BLIS_DXPBYV_KERNEL_REF +#endif + +#ifndef BLIS_CXPBYV_KERNEL +#define BLIS_CXPBYV_KERNEL BLIS_CXPBYV_KERNEL_REF +#endif + +#ifndef BLIS_ZXPBYV_KERNEL +#define BLIS_ZXPBYV_KERNEL BLIS_ZXPBYV_KERNEL_REF +#endif + // -- Define default blocksize macros ------------------------------------------ diff --git a/frame/include/bli_kernel_pre_macro_defs.h b/frame/include/bli_kernel_pre_macro_defs.h index 703f8c54f..10f27ac54 100644 --- a/frame/include/bli_kernel_pre_macro_defs.h +++ b/frame/include/bli_kernel_pre_macro_defs.h @@ -260,6 +260,13 @@ #define BLIS_CADDV_KERNEL_REF bli_caddv_ref #define BLIS_ZADDV_KERNEL_REF bli_zaddv_ref +// axpbyv kernels + +#define BLIS_SAXPBYV_KERNEL_REF bli_saxpbyv_ref +#define BLIS_DAXPBYV_KERNEL_REF bli_daxpbyv_ref +#define BLIS_CAXPBYV_KERNEL_REF bli_caxpbyv_ref +#define BLIS_ZAXPBYV_KERNEL_REF bli_zaxpbyv_ref + // axpyv kernels #define BLIS_SAXPYV_KERNEL_REF bli_saxpyv_ref @@ -330,6 +337,13 @@ #define BLIS_CSWAPV_KERNEL_REF bli_cswapv_ref #define BLIS_ZSWAPV_KERNEL_REF bli_zswapv_ref +// xpbyv kernels + +#define BLIS_SXPBYV_KERNEL_REF bli_sxpbyv_ref +#define BLIS_DXPBYV_KERNEL_REF bli_dxpbyv_ref +#define BLIS_CXPBYV_KERNEL_REF bli_cxpbyv_ref +#define BLIS_ZXPBYV_KERNEL_REF bli_zxpbyv_ref + #endif diff --git a/frame/include/bli_scalar_macro_defs.h b/frame/include/bli_scalar_macro_defs.h index 32258e86e..8104e5d4f 100644 --- a/frame/include/bli_scalar_macro_defs.h +++ b/frame/include/bli_scalar_macro_defs.h @@ -75,6 +75,9 @@ #include "bli_add3ris.h" +#include "bli_axpbyris.h" +#include "bli_axpbyjris.h" + #include "bli_axpyris.h" #include "bli_axpyjris.h" @@ -112,6 +115,7 @@ #include "bli_swapris.h" #include "bli_xpbyris.h" +#include "bli_xpbyjris.h" // Inlined scalar macros in loops #include "bli_scalris_mxn_uplo.h" @@ -128,6 +132,9 @@ #include "bli_add3s.h" +#include "bli_axpbys.h" +#include "bli_axpbyjs.h" + #include "bli_axpys.h" #include "bli_axpyjs.h" @@ -178,6 +185,7 @@ #include "bli_swaps.h" #include "bli_xpbys.h" +#include "bli_xpbyjs.h" // Inlined scalar macros in loops #include "bli_adds_mxn.h" diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index 2efaedf9e..5f921b79d 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -785,6 +785,7 @@ typedef enum typedef enum { BLIS_ADDV_KER = 0, + BLIS_AXPBYV_KER, BLIS_AXPYV_KER, BLIS_COPYV_KER, BLIS_DOTV_KER, @@ -795,9 +796,10 @@ typedef enum BLIS_SETV_KER, BLIS_SUBV_KER, BLIS_SWAPV_KER, + BLIS_XPBYV_KER, } l1vkr_t; -#define BLIS_NUM_LEVEL1V_KERS 11 +#define BLIS_NUM_LEVEL1V_KERS 13 typedef enum { diff --git a/frame/include/level0/bli_axpbyjs.h b/frame/include/level0/bli_axpbyjs.h new file mode 100644 index 000000000..3d0a663b2 --- /dev/null +++ b/frame/include/level0/bli_axpbyjs.h @@ -0,0 +1,481 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_AXPBYJS_H +#define BLIS_AXPBYJS_H + +// axpbyjs + +// Notes: +// - The first char encodes the type of a. +// - The second char encodes the type of x. +// - The third char encodes the type of b. +// - The fourth char encodes the type of y. + + +// -- (axby) = (???s) ---------------------------------------------------------- + +#define bli_ssssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dsssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_csssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zsssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_sdssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_ddssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cdssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zdssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_scssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dcssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_ccssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zcssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_szssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dzssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_czssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zzssaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) + +#define bli_ssdsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dsdsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_csdsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zsdsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_sddsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dddsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cddsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zddsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_scdsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dcdsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_ccdsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zcdsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_szdsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dzdsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_czdsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zzdsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) + +#define bli_sscsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dscsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cscsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zscsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_sdcsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_ddcsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cdcsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zdcsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_sccsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dccsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cccsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zccsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_szcsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dzcsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_czcsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zzcsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) + +#define bli_sszsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dszsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cszsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zszsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_sdzsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_ddzsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cdzsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zdzsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_sczsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dczsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cczsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zczsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_szzsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dzzsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_czzsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zzzsaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) + +// -- (axby) = (???d) ---------------------------------------------------------- + +#define bli_sssdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dssdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cssdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zssdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_sdsdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_ddsdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cdsdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zdsdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_scsdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dcsdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_ccsdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zcsdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_szsdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dzsdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_czsdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zzsdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) + +#define bli_ssddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dsddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_csddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zsddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_sdddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_ddddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cdddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zdddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_scddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dcddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_ccddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zcddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_szddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dzddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_czddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zzddaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) + +#define bli_sscdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dscdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cscdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zscdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_sdcdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_ddcdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cdcdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zdcdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_sccdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dccdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cccdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zccdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_szcdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dzcdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_czcdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zzcdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) + +#define bli_sszdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dszdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cszdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zszdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_sdzdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_ddzdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cdzdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zdzdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_sczdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dczdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cczdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zczdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_szzdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dzzdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_czzdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zzzdaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) + +#ifndef BLIS_ENABLE_C99_COMPLEX + +// -- (axby) = (???c) ---------------------------------------------------------- + +#define bli_ssscaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dsscaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_csscaxpbyjs( a, x, b, y ) bli_cssaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zsscaxpbyjs( a, x, b, y ) bli_cssaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_sdscaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_ddscaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cdscaxpbyjs( a, x, b, y ) bli_cssaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zdscaxpbyjs( a, x, b, y ) bli_cssaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_scscaxpbyjs( a, x, b, y ) bli_scsaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dcscaxpbyjs( a, x, b, y ) bli_scsaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_ccscaxpbyjs( a, x, b, y ) bli_ccsaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zcscaxpbyjs( a, x, b, y ) bli_ccsaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_szscaxpbyjs( a, x, b, y ) bli_scsaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dzscaxpbyjs( a, x, b, y ) bli_scsaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_czscaxpbyjs( a, x, b, y ) bli_ccsaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zzscaxpbyjs( a, x, b, y ) bli_ccsaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) + +#define bli_ssdcaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dsdcaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_csdcaxpbyjs( a, x, b, y ) bli_cssaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zsdcaxpbyjs( a, x, b, y ) bli_cssaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_sddcaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dddcaxpbyjs( a, x, b, y ) bli_saxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cddcaxpbyjs( a, x, b, y ) bli_cssaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zddcaxpbyjs( a, x, b, y ) bli_cssaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_scdcaxpbyjs( a, x, b, y ) bli_scsaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dcdcaxpbyjs( a, x, b, y ) bli_scsaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_ccdcaxpbyjs( a, x, b, y ) bli_ccsaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zcdcaxpbyjs( a, x, b, y ) bli_ccsaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_szdcaxpbyjs( a, x, b, y ) bli_scsaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dzdcaxpbyjs( a, x, b, y ) bli_scsaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_czdcaxpbyjs( a, x, b, y ) bli_ccsaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zzdcaxpbyjs( a, x, b, y ) bli_ccsaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) + +#define bli_ssccaxpbyjs( a, x, b, y ) bli_sscaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dsccaxpbyjs( a, x, b, y ) bli_sscaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_csccaxpbyjs( a, x, b, y ) bli_cscaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zsccaxpbyjs( a, x, b, y ) bli_cscaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_sdccaxpbyjs( a, x, b, y ) bli_sscaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_ddccaxpbyjs( a, x, b, y ) bli_sscaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cdccaxpbyjs( a, x, b, y ) bli_cscaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zdccaxpbyjs( a, x, b, y ) bli_cscaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_scccaxpbyjs( a, x, b, y ) bli_sccaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dcccaxpbyjs( a, x, b, y ) bli_sccaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_ccccaxpbyjs( a, x, b, y ) bli_caxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zcccaxpbyjs( a, x, b, y ) bli_caxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_szccaxpbyjs( a, x, b, y ) bli_sccaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dzccaxpbyjs( a, x, b, y ) bli_sccaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_czccaxpbyjs( a, x, b, y ) bli_caxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zzccaxpbyjs( a, x, b, y ) bli_caxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) + +#define bli_sszcaxpbyjs( a, x, b, y ) bli_sscaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dszcaxpbyjs( a, x, b, y ) bli_sscaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cszcaxpbyjs( a, x, b, y ) bli_cscaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zszcaxpbyjs( a, x, b, y ) bli_cscaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_sdzcaxpbyjs( a, x, b, y ) bli_sscaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_ddzcaxpbyjs( a, x, b, y ) bli_sscaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cdzcaxpbyjs( a, x, b, y ) bli_cscaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zdzcaxpbyjs( a, x, b, y ) bli_cscaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_sczcaxpbyjs( a, x, b, y ) bli_sccaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dczcaxpbyjs( a, x, b, y ) bli_sccaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cczcaxpbyjs( a, x, b, y ) bli_caxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zczcaxpbyjs( a, x, b, y ) bli_caxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_szzcaxpbyjs( a, x, b, y ) bli_sccaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dzzcaxpbyjs( a, x, b, y ) bli_sccaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_czzcaxpbyjs( a, x, b, y ) bli_caxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zzzcaxpbyjs( a, x, b, y ) bli_caxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) + +// -- (axby) = (???z) ---------------------------------------------------------- + +#define bli_ssszaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dsszaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_csszaxpbyjs( a, x, b, y ) bli_zddaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zsszaxpbyjs( a, x, b, y ) bli_zddaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_sdszaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_ddszaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cdszaxpbyjs( a, x, b, y ) bli_zddaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zdszaxpbyjs( a, x, b, y ) bli_zddaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_scszaxpbyjs( a, x, b, y ) bli_dzdaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dcszaxpbyjs( a, x, b, y ) bli_dzdaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_ccszaxpbyjs( a, x, b, y ) bli_zzdaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zcszaxpbyjs( a, x, b, y ) bli_zzdaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_szszaxpbyjs( a, x, b, y ) bli_dzdaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dzszaxpbyjs( a, x, b, y ) bli_dzdaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_czszaxpbyjs( a, x, b, y ) bli_zzdaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zzszaxpbyjs( a, x, b, y ) bli_zzdaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) + +#define bli_ssdzaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dsdzaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_csdzaxpbyjs( a, x, b, y ) bli_zddaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zsdzaxpbyjs( a, x, b, y ) bli_zddaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_sddzaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dddzaxpbyjs( a, x, b, y ) bli_daxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cddzaxpbyjs( a, x, b, y ) bli_zddaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zddzaxpbyjs( a, x, b, y ) bli_zddaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_scdzaxpbyjs( a, x, b, y ) bli_dzdaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dcdzaxpbyjs( a, x, b, y ) bli_dzdaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_ccdzaxpbyjs( a, x, b, y ) bli_zzdaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zcdzaxpbyjs( a, x, b, y ) bli_zzdaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_szdzaxpbyjs( a, x, b, y ) bli_dzdaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dzdzaxpbyjs( a, x, b, y ) bli_dzdaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_czdzaxpbyjs( a, x, b, y ) bli_zzdaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zzdzaxpbyjs( a, x, b, y ) bli_zzdaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) + +#define bli_ssczaxpbyjs( a, x, b, y ) bli_ddzaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dsczaxpbyjs( a, x, b, y ) bli_ddzaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_csczaxpbyjs( a, x, b, y ) bli_zdzaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zsczaxpbyjs( a, x, b, y ) bli_zdzaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_sdczaxpbyjs( a, x, b, y ) bli_ddzaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_ddczaxpbyjs( a, x, b, y ) bli_ddzaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cdczaxpbyjs( a, x, b, y ) bli_zdzaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zdczaxpbyjs( a, x, b, y ) bli_zdzaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_scczaxpbyjs( a, x, b, y ) bli_dzzaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dcczaxpbyjs( a, x, b, y ) bli_dzzaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_ccczaxpbyjs( a, x, b, y ) bli_zaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zcczaxpbyjs( a, x, b, y ) bli_zaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_szczaxpbyjs( a, x, b, y ) bli_dzzaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dzczaxpbyjs( a, x, b, y ) bli_dzzaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_czczaxpbyjs( a, x, b, y ) bli_zaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zzczaxpbyjs( a, x, b, y ) bli_zaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) + +#define bli_sszzaxpbyjs( a, x, b, y ) bli_ddzaxpbyjris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dszzaxpbyjs( a, x, b, y ) bli_ddzaxpbyjris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cszzaxpbyjs( a, x, b, y ) bli_zdzaxpbyjris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zszzaxpbyjs( a, x, b, y ) bli_zdzaxpbyjris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_sdzzaxpbyjs( a, x, b, y ) bli_ddzaxpbyjris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_ddzzaxpbyjs( a, x, b, y ) bli_ddzaxpbyjris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cdzzaxpbyjs( a, x, b, y ) bli_zdzaxpbyjris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zdzzaxpbyjs( a, x, b, y ) bli_zdzaxpbyjris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_sczzaxpbyjs( a, x, b, y ) bli_dzzaxpbyjris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dczzaxpbyjs( a, x, b, y ) bli_dzzaxpbyjris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cczzaxpbyjs( a, x, b, y ) bli_zaxpbyjris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zczzaxpbyjs( a, x, b, y ) bli_zaxpbyjris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_szzzaxpbyjs( a, x, b, y ) bli_dzzaxpbyjris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dzzzaxpbyjs( a, x, b, y ) bli_dzzaxpbyjris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_czzzaxpbyjs( a, x, b, y ) bli_zaxpbyjris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zzzzaxpbyjs( a, x, b, y ) bli_zaxpbyjris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) + +#else // ifdef BLIS_ENABLE_C99_COMPLEX + +// -- (axby) = (???c) ---------------------------------------------------------- + +#define bli_ssscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dsscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_csscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zsscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sdscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ddscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cdscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zdscaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_scscaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_dcscaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_ccscaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_zcscaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_szscaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_dzscaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_czscaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_zzscaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } + +#define bli_ssdcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dsdcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_csdcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zsdcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sddcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dddcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cddcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zddcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_scdcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_dcdcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_ccdcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_zcdcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_szdcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_dzdcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_czdcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_zzdcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } + +#define bli_ssccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dsccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_csccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zsccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sdccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ddccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cdccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zdccaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_scccaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_dcccaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_ccccaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_zcccaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_szccaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_dzccaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_czccaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_zzccaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } + +#define bli_sszcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dszcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cszcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zszcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sdzcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ddzcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cdzcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zdzcaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sczcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_dczcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_cczcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_zczcaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_szzcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_dzzcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_czzcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_zzzcaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } + +// -- (axby) = (???z) ---------------------------------------------------------- + +#define bli_ssszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dsszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_csszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zsszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sdszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ddszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cdszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zdszaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_scszaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_dcszaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_ccszaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_zcszaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_szszaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_dzszaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_czszaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_zzszaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } + +#define bli_ssdzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dsdzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_csdzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zsdzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sddzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dddzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cddzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zddzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_scdzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_dcdzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_ccdzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_zcdzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_szdzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_dzdzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_czdzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_zzdzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } + +#define bli_ssczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dsczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_csczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zsczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sdczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ddczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cdczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zdczaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_scczaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_dcczaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_ccczaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_zcczaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_szczaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_dzczaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_czczaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_zzczaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } + +#define bli_sszzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dszzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cszzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zszzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sdzzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ddzzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cdzzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zdzzaxpbyjs( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sczzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_dczzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_cczzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_zczzaxpbyjs( a, x, b, y ) { (y) = (a) * conjf(x) + (b) * (y); } +#define bli_szzzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_dzzzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_czzzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } +#define bli_zzzzaxpbyjs( a, x, b, y ) { (y) = (a) * conj(x) + (b) * (y); } + +#endif // BLIS_ENABLE_C99_COMPLEX + + +#define bli_saxpbyjs( a, x, b, y ) bli_ssssaxpbyjs( a, x, b, y ) +#define bli_daxpbyjs( a, x, b, y ) bli_ddddaxpbyjs( a, x, b, y ) +#define bli_caxpbyjs( a, x, b, y ) bli_ccccaxpbyjs( a, x, b, y ) +#define bli_zaxpbyjs( a, x, b, y ) bli_zzzzaxpbyjs( a, x, b, y ) + + +#endif + diff --git a/frame/include/level0/bli_axpbys.h b/frame/include/level0/bli_axpbys.h new file mode 100644 index 000000000..6b9bffd87 --- /dev/null +++ b/frame/include/level0/bli_axpbys.h @@ -0,0 +1,481 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_AXPBYS_H +#define BLIS_AXPBYS_H + +// axpbys + +// Notes: +// - The first char encodes the type of a. +// - The second char encodes the type of x. +// - The third char encodes the type of b. +// - The fourth char encodes the type of y. + + +// -- (axby) = (???s) ---------------------------------------------------------- + +#define bli_ssssaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dsssaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_csssaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zsssaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_sdssaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_ddssaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cdssaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zdssaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_scssaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dcssaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_ccssaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zcssaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_szssaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dzssaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_czssaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zzssaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) + +#define bli_ssdsaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dsdsaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_csdsaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zsdsaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_sddsaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dddsaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cddsaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zddsaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_scdsaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dcdsaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_ccdsaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zcdsaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_szdsaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dzdsaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_czdsaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zzdsaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) + +#define bli_sscsaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dscsaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cscsaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zscsaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_sdcsaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_ddcsaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cdcsaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zdcsaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_sccsaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dccsaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cccsaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zccsaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_szcsaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dzcsaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_czcsaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zzcsaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) + +#define bli_sszsaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dszsaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cszsaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zszsaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_sdzsaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_ddzsaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cdzsaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zdzsaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_sczsaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dczsaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cczsaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zczsaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_szzsaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dzzsaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_czzsaxpbys( a, x, b, y ) bli_saxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zzzsaxpbys( a, x, b, y ) bli_saxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) + +// -- (axby) = (???d) ---------------------------------------------------------- + +#define bli_sssdaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dssdaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cssdaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zssdaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_sdsdaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_ddsdaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cdsdaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zdsdaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_scsdaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dcsdaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_ccsdaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zcsdaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_szsdaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dzsdaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_czsdaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zzsdaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) + +#define bli_ssddaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dsddaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_csddaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zsddaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_sdddaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_ddddaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cdddaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zdddaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_scddaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dcddaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_ccddaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zcddaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_szddaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dzddaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_czddaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zzddaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) + +#define bli_sscdaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dscdaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cscdaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zscdaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_sdcdaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_ddcdaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cdcdaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zdcdaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_sccdaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dccdaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cccdaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zccdaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_szcdaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dzcdaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_czcdaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zzcdaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) + +#define bli_sszdaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dszdaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cszdaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zszdaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_sdzdaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_ddzdaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cdzdaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zdzdaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_sczdaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dczdaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cczdaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zczdaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_szzdaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dzzdaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_czzdaxpbys( a, x, b, y ) bli_daxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zzzdaxpbys( a, x, b, y ) bli_daxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) + +#ifndef BLIS_ENABLE_C99_COMPLEX + +// -- (axby) = (???c) ---------------------------------------------------------- + +#define bli_ssscaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dsscaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_csscaxpbys( a, x, b, y ) bli_cssaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zsscaxpbys( a, x, b, y ) bli_cssaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_sdscaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_ddscaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cdscaxpbys( a, x, b, y ) bli_cssaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zdscaxpbys( a, x, b, y ) bli_cssaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_scscaxpbys( a, x, b, y ) bli_scsaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dcscaxpbys( a, x, b, y ) bli_scsaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_ccscaxpbys( a, x, b, y ) bli_ccsaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zcscaxpbys( a, x, b, y ) bli_ccsaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_szscaxpbys( a, x, b, y ) bli_scsaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dzscaxpbys( a, x, b, y ) bli_scsaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_czscaxpbys( a, x, b, y ) bli_ccsaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zzscaxpbys( a, x, b, y ) bli_ccsaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) + +#define bli_ssdcaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dsdcaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_csdcaxpbys( a, x, b, y ) bli_cssaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zsdcaxpbys( a, x, b, y ) bli_cssaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_sddcaxpbys( a, x, b, y ) bli_saxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dddcaxpbys( a, x, b, y ) bli_saxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cddcaxpbys( a, x, b, y ) bli_cssaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zddcaxpbys( a, x, b, y ) bli_cssaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_scdcaxpbys( a, x, b, y ) bli_scsaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dcdcaxpbys( a, x, b, y ) bli_scsaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_ccdcaxpbys( a, x, b, y ) bli_ccsaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zcdcaxpbys( a, x, b, y ) bli_ccsaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_szdcaxpbys( a, x, b, y ) bli_scsaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dzdcaxpbys( a, x, b, y ) bli_scsaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_czdcaxpbys( a, x, b, y ) bli_ccsaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zzdcaxpbys( a, x, b, y ) bli_ccsaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) + +#define bli_ssccaxpbys( a, x, b, y ) bli_sscaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dsccaxpbys( a, x, b, y ) bli_sscaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_csccaxpbys( a, x, b, y ) bli_cscaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zsccaxpbys( a, x, b, y ) bli_cscaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_sdccaxpbys( a, x, b, y ) bli_sscaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_ddccaxpbys( a, x, b, y ) bli_sscaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cdccaxpbys( a, x, b, y ) bli_cscaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zdccaxpbys( a, x, b, y ) bli_cscaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_scccaxpbys( a, x, b, y ) bli_sccaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dcccaxpbys( a, x, b, y ) bli_sccaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_ccccaxpbys( a, x, b, y ) bli_caxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zcccaxpbys( a, x, b, y ) bli_caxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_szccaxpbys( a, x, b, y ) bli_sccaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dzccaxpbys( a, x, b, y ) bli_sccaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_czccaxpbys( a, x, b, y ) bli_caxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zzccaxpbys( a, x, b, y ) bli_caxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) + +#define bli_sszcaxpbys( a, x, b, y ) bli_sscaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dszcaxpbys( a, x, b, y ) bli_sscaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cszcaxpbys( a, x, b, y ) bli_cscaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zszcaxpbys( a, x, b, y ) bli_cscaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_sdzcaxpbys( a, x, b, y ) bli_sscaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_ddzcaxpbys( a, x, b, y ) bli_sscaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cdzcaxpbys( a, x, b, y ) bli_cscaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zdzcaxpbys( a, x, b, y ) bli_cscaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_sczcaxpbys( a, x, b, y ) bli_sccaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dczcaxpbys( a, x, b, y ) bli_sccaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cczcaxpbys( a, x, b, y ) bli_caxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zczcaxpbys( a, x, b, y ) bli_caxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_szzcaxpbys( a, x, b, y ) bli_sccaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dzzcaxpbys( a, x, b, y ) bli_sccaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_czzcaxpbys( a, x, b, y ) bli_caxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zzzcaxpbys( a, x, b, y ) bli_caxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) + +// -- (axby) = (???z) ---------------------------------------------------------- + +#define bli_ssszaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dsszaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_csszaxpbys( a, x, b, y ) bli_zddaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zsszaxpbys( a, x, b, y ) bli_zddaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_sdszaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_ddszaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cdszaxpbys( a, x, b, y ) bli_zddaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zdszaxpbys( a, x, b, y ) bli_zddaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_scszaxpbys( a, x, b, y ) bli_dzdaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dcszaxpbys( a, x, b, y ) bli_dzdaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_ccszaxpbys( a, x, b, y ) bli_zzdaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zcszaxpbys( a, x, b, y ) bli_zzdaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_szszaxpbys( a, x, b, y ) bli_dzdaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dzszaxpbys( a, x, b, y ) bli_dzdaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_czszaxpbys( a, x, b, y ) bli_zzdaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zzszaxpbys( a, x, b, y ) bli_zzdaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) + +#define bli_ssdzaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dsdzaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_csdzaxpbys( a, x, b, y ) bli_zddaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zsdzaxpbys( a, x, b, y ) bli_zddaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_sddzaxpbys( a, x, b, y ) bli_daxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dddzaxpbys( a, x, b, y ) bli_daxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cddzaxpbys( a, x, b, y ) bli_zddaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zddzaxpbys( a, x, b, y ) bli_zddaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_scdzaxpbys( a, x, b, y ) bli_dzdaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dcdzaxpbys( a, x, b, y ) bli_dzdaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_ccdzaxpbys( a, x, b, y ) bli_zzdaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zcdzaxpbys( a, x, b, y ) bli_zzdaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_szdzaxpbys( a, x, b, y ) bli_dzdaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dzdzaxpbys( a, x, b, y ) bli_dzdaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_czdzaxpbys( a, x, b, y ) bli_zzdaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zzdzaxpbys( a, x, b, y ) bli_zzdaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) + +#define bli_ssczaxpbys( a, x, b, y ) bli_ddzaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dsczaxpbys( a, x, b, y ) bli_ddzaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_csczaxpbys( a, x, b, y ) bli_zdzaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zsczaxpbys( a, x, b, y ) bli_zdzaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_sdczaxpbys( a, x, b, y ) bli_ddzaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_ddczaxpbys( a, x, b, y ) bli_ddzaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cdczaxpbys( a, x, b, y ) bli_zdzaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zdczaxpbys( a, x, b, y ) bli_zdzaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_scczaxpbys( a, x, b, y ) bli_dzzaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dcczaxpbys( a, x, b, y ) bli_dzzaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_ccczaxpbys( a, x, b, y ) bli_zaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zcczaxpbys( a, x, b, y ) bli_zaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_szczaxpbys( a, x, b, y ) bli_dzzaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dzczaxpbys( a, x, b, y ) bli_dzzaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_czczaxpbys( a, x, b, y ) bli_zaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zzczaxpbys( a, x, b, y ) bli_zaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) + +#define bli_sszzaxpbys( a, x, b, y ) bli_ddzaxpbyris( bli_sreal(a), bli_simag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dszzaxpbys( a, x, b, y ) bli_ddzaxpbyris( bli_dreal(a), bli_dimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cszzaxpbys( a, x, b, y ) bli_zdzaxpbyris( bli_creal(a), bli_cimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zszzaxpbys( a, x, b, y ) bli_zdzaxpbyris( bli_zreal(a), bli_zimag(a), bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_sdzzaxpbys( a, x, b, y ) bli_ddzaxpbyris( bli_sreal(a), bli_simag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_ddzzaxpbys( a, x, b, y ) bli_ddzaxpbyris( bli_dreal(a), bli_dimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cdzzaxpbys( a, x, b, y ) bli_zdzaxpbyris( bli_creal(a), bli_cimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zdzzaxpbys( a, x, b, y ) bli_zdzaxpbyris( bli_zreal(a), bli_zimag(a), bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_sczzaxpbys( a, x, b, y ) bli_dzzaxpbyris( bli_sreal(a), bli_simag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dczzaxpbys( a, x, b, y ) bli_dzzaxpbyris( bli_dreal(a), bli_dimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cczzaxpbys( a, x, b, y ) bli_zaxpbyris( bli_creal(a), bli_cimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zczzaxpbys( a, x, b, y ) bli_zaxpbyris( bli_zreal(a), bli_zimag(a), bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_szzzaxpbys( a, x, b, y ) bli_dzzaxpbyris( bli_sreal(a), bli_simag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dzzzaxpbys( a, x, b, y ) bli_dzzaxpbyris( bli_dreal(a), bli_dimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_czzzaxpbys( a, x, b, y ) bli_zaxpbyris( bli_creal(a), bli_cimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zzzzaxpbys( a, x, b, y ) bli_zaxpbyris( bli_zreal(a), bli_zimag(a), bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) + +#else // ifdef BLIS_ENABLE_C99_COMPLEX + +// -- (axby) = (???c) ---------------------------------------------------------- + +#define bli_ssscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dsscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_csscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zsscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sdscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ddscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cdscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zdscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_scscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dcscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ccscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zcscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_szscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dzscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_czscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zzscaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } + +#define bli_ssdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dsdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_csdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zsdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sddcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dddcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cddcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zddcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_scdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dcdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ccdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zcdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_szdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dzdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_czdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zzdcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } + +#define bli_ssccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dsccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_csccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zsccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sdccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ddccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cdccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zdccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_scccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dcccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ccccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zcccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_szccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dzccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_czccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zzccaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } + +#define bli_sszcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dszcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cszcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zszcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sdzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ddzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cdzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zdzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sczcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dczcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cczcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zczcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_szzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dzzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_czzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zzzcaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } + +// -- (axby) = (???z) ---------------------------------------------------------- + +#define bli_ssszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dsszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_csszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zsszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sdszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ddszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cdszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zdszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_scszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dcszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ccszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zcszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_szszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dzszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_czszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zzszaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } + +#define bli_ssdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dsdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_csdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zsdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sddzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dddzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cddzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zddzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_scdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dcdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ccdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zcdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_szdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dzdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_czdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zzdzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } + +#define bli_ssczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dsczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_csczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zsczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sdczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ddczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cdczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zdczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_scczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dcczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ccczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zcczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_szczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dzczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_czczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zzczaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } + +#define bli_sszzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dszzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cszzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zszzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sdzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_ddzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cdzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zdzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_sczzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dczzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_cczzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zczzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_szzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_dzzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_czzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } +#define bli_zzzzaxpbys( a, x, b, y ) { (y) = (a) * (x) + (b) * (y); } + +#endif // BLIS_ENABLE_C99_COMPLEX + + +#define bli_saxpbys( a, x, b, y ) bli_ssssaxpbys( a, x, b, y ) +#define bli_daxpbys( a, x, b, y ) bli_ddddaxpbys( a, x, b, y ) +#define bli_caxpbys( a, x, b, y ) bli_ccccaxpbys( a, x, b, y ) +#define bli_zaxpbys( a, x, b, y ) bli_zzzzaxpbys( a, x, b, y ) + + +#endif + diff --git a/frame/include/level0/bli_xpbyjs.h b/frame/include/level0/bli_xpbyjs.h new file mode 100644 index 000000000..f59b42af3 --- /dev/null +++ b/frame/include/level0/bli_xpbyjs.h @@ -0,0 +1,192 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_XPBYJS_H +#define BLIS_XPBYJS_H + +// xpbyjs + +// Notes: +// - The first char encodes the type of x. +// - The second char encodes the type of b. +// - The third char encodes the type of y. + + +// -- (xby) = (??s) ------------------------------------------------------------ + +#define bli_sssxpbyjs( x, b, y ) bli_sxpbyjris( bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dssxpbyjs( x, b, y ) bli_sxpbyjris( bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cssxpbyjs( x, b, y ) bli_sxpbyjris( bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zssxpbyjs( x, b, y ) bli_sxpbyjris( bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_sreal(y), bli_simag(y) ) + +#define bli_sdsxpbyjs( x, b, y ) bli_sxpbyjris( bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_ddsxpbyjs( x, b, y ) bli_sxpbyjris( bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_cdsxpbyjs( x, b, y ) bli_sxpbyjris( bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zdsxpbyjs( x, b, y ) bli_sxpbyjris( bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_sreal(y), bli_simag(y) ) + +#define bli_scsxpbyjs( x, b, y ) bli_sxpbyjris( bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dcsxpbyjs( x, b, y ) bli_sxpbyjris( bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_ccsxpbyjs( x, b, y ) bli_sxpbyjris( bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zcsxpbyjs( x, b, y ) bli_sxpbyjris( bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_sreal(y), bli_simag(y) ) + +#define bli_szsxpbyjs( x, b, y ) bli_sxpbyjris( bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_dzsxpbyjs( x, b, y ) bli_sxpbyjris( bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_czsxpbyjs( x, b, y ) bli_sxpbyjris( bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) +#define bli_zzsxpbyjs( x, b, y ) bli_sxpbyjris( bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_sreal(y), bli_simag(y) ) + +// -- (xby) = (??d) ------------------------------------------------------------ + +#define bli_ssdxpbyjs( x, b, y ) bli_dxpbyjris( bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dsdxpbyjs( x, b, y ) bli_dxpbyjris( bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_csdxpbyjs( x, b, y ) bli_dxpbyjris( bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zsdxpbyjs( x, b, y ) bli_dxpbyjris( bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_dreal(y), bli_dimag(y) ) + +#define bli_sddxpbyjs( x, b, y ) bli_dxpbyjris( bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dddxpbyjs( x, b, y ) bli_dxpbyjris( bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_cddxpbyjs( x, b, y ) bli_dxpbyjris( bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zddxpbyjs( x, b, y ) bli_dxpbyjris( bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_dreal(y), bli_dimag(y) ) + +#define bli_scdxpbyjs( x, b, y ) bli_dxpbyjris( bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dcdxpbyjs( x, b, y ) bli_dxpbyjris( bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_ccdxpbyjs( x, b, y ) bli_dxpbyjris( bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zcdxpbyjs( x, b, y ) bli_dxpbyjris( bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_dreal(y), bli_dimag(y) ) + +#define bli_szdxpbyjs( x, b, y ) bli_dxpbyjris( bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_dzdxpbyjs( x, b, y ) bli_dxpbyjris( bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_czdxpbyjs( x, b, y ) bli_dxpbyjris( bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) +#define bli_zzdxpbyjs( x, b, y ) bli_dxpbyjris( bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_dreal(y), bli_dimag(y) ) + +#ifndef BLIS_ENABLE_C99_COMPLEX + +// -- (xby) = (??c) ------------------------------------------------------------ + +#define bli_sscxpbyjs( x, b, y ) bli_sxpbyjris( bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dscxpbyjs( x, b, y ) bli_sxpbyjris( bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cscxpbyjs( x, b, y ) bli_scxpbyjris( bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zscxpbyjs( x, b, y ) bli_scxpbyjris( bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_creal(y), bli_cimag(y) ) + +#define bli_sdcxpbyjs( x, b, y ) bli_sxpbyjris( bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_ddcxpbyjs( x, b, y ) bli_sxpbyjris( bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cdcxpbyjs( x, b, y ) bli_scxpbyjris( bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zdcxpbyjs( x, b, y ) bli_scxpbyjris( bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_creal(y), bli_cimag(y) ) + +#define bli_sccxpbyjs( x, b, y ) bli_cxpbyjris( bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dccxpbyjs( x, b, y ) bli_cxpbyjris( bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_cccxpbyjs( x, b, y ) bli_cxpbyjris( bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zccxpbyjs( x, b, y ) bli_cxpbyjris( bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_creal(y), bli_cimag(y) ) + +#define bli_szcxpbyjs( x, b, y ) bli_cxpbyjris( bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_dzcxpbyjs( x, b, y ) bli_cxpbyjris( bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_czcxpbyjs( x, b, y ) bli_cxpbyjris( bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) +#define bli_zzcxpbyjs( x, b, y ) bli_cxpbyjris( bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_creal(y), bli_cimag(y) ) + +// -- (xby) = (??z) ------------------------------------------------------------ + +#define bli_sszxpbyjs( x, b, y ) bli_dxpbyjris( bli_sreal(x), bli_simag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dszxpbyjs( x, b, y ) bli_dxpbyjris( bli_dreal(x), bli_dimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cszxpbyjs( x, b, y ) bli_dzxpbyjris( bli_creal(x), bli_cimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zszxpbyjs( x, b, y ) bli_dzxpbyjris( bli_zreal(x), bli_zimag(x), bli_sreal(b), bli_simag(b), bli_zreal(y), bli_zimag(y) ) + +#define bli_sdzxpbyjs( x, b, y ) bli_dxpbyjris( bli_sreal(x), bli_simag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_ddzxpbyjs( x, b, y ) bli_dxpbyjris( bli_dreal(x), bli_dimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cdzxpbyjs( x, b, y ) bli_dzxpbyjris( bli_creal(x), bli_cimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zdzxpbyjs( x, b, y ) bli_dzxpbyjris( bli_zreal(x), bli_zimag(x), bli_dreal(b), bli_dimag(b), bli_zreal(y), bli_zimag(y) ) + +#define bli_sczxpbyjs( x, b, y ) bli_zxpbyjris( bli_sreal(x), bli_simag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dczxpbyjs( x, b, y ) bli_zxpbyjris( bli_dreal(x), bli_dimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_cczxpbyjs( x, b, y ) bli_zxpbyjris( bli_creal(x), bli_cimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zczxpbyjs( x, b, y ) bli_zxpbyjris( bli_zreal(x), bli_zimag(x), bli_creal(b), bli_cimag(b), bli_zreal(y), bli_zimag(y) ) + +#define bli_szzxpbyjs( x, b, y ) bli_zxpbyjris( bli_sreal(x), bli_simag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_dzzxpbyjs( x, b, y ) bli_zxpbyjris( bli_dreal(x), bli_dimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_czzxpbyjs( x, b, y ) bli_zxpbyjris( bli_creal(x), bli_cimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) +#define bli_zzzxpbyjs( x, b, y ) bli_zxpbyjris( bli_zreal(x), bli_zimag(x), bli_zreal(b), bli_zimag(b), bli_zreal(y), bli_zimag(y) ) + +#else // ifdef BLIS_ENABLE_C99_COMPLEX + +// -- (xby) = (??c) ------------------------------------------------------------ + +#define bli_sscxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_dscxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_cscxpbyjs( x, b, y ) { (y) = conjf(x) + (b) * (y); } +#define bli_zscxpbyjs( x, b, y ) { (y) = conj(x) + (b) * (y); } + +#define bli_sdcxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_ddcxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_cdcxpbyjs( x, b, y ) { (y) = conjf(x) + (b) * (y); } +#define bli_zdcxpbyjs( x, b, y ) { (y) = conj(x) + (b) * (y); } + +#define bli_sccxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_dccxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_cccxpbyjs( x, b, y ) { (y) = conjf(x) + (b) * (y); } +#define bli_zccxpbyjs( x, b, y ) { (y) = conj(x) + (b) * (y); } + +#define bli_szcxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_dzcxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_czcxpbyjs( x, b, y ) { (y) = conjf(x) + (b) * (y); } +#define bli_zzcxpbyjs( x, b, y ) { (y) = conj(x) + (b) * (y); } + +// -- (xby) = (??z) ------------------------------------------------------------ + +#define bli_sszxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_dszxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_cszxpbyjs( x, b, y ) { (y) = conjf(x) + (b) * (y); } +#define bli_zszxpbyjs( x, b, y ) { (y) = conj(x) + (b) * (y); } + +#define bli_sdzxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_ddzxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_cdzxpbyjs( x, b, y ) { (y) = conjf(x) + (b) * (y); } +#define bli_zdzxpbyjs( x, b, y ) { (y) = conj(x) + (b) * (y); } + +#define bli_sczxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_dczxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_cczxpbyjs( x, b, y ) { (y) = conjf(x) + (b) * (y); } +#define bli_zczxpbyjs( x, b, y ) { (y) = conj(x) + (b) * (y); } + +#define bli_szzxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_dzzxpbyjs( x, b, y ) { (y) = (x) + (b) * (y); } +#define bli_czzxpbyjs( x, b, y ) { (y) = conjf(x) + (b) * (y); } +#define bli_zzzxpbyjs( x, b, y ) { (y) = conj(x) + (b) * (y); } + +#endif // BLIS_ENABLE_C99_COMPLEX + + +#define bli_sxpbyjs( x, b, y ) bli_sssxpbyjs( x, b, y ) +#define bli_dxpbyjs( x, b, y ) bli_dddxpbyjs( x, b, y ) +#define bli_cxpbyjs( x, b, y ) bli_cccxpbyjs( x, b, y ) +#define bli_zxpbyjs( x, b, y ) bli_zzzxpbyjs( x, b, y ) + + +#endif + diff --git a/frame/include/level0/ri/bli_axpbyjris.h b/frame/include/level0/ri/bli_axpbyjris.h new file mode 100644 index 000000000..24512d223 --- /dev/null +++ b/frame/include/level0/ri/bli_axpbyjris.h @@ -0,0 +1,163 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_AXPBYJRIS_H +#define BLIS_AXPBYJRIS_H + +// axpbyjris + +#define bli_saxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + (yr) = (ar) * (xr) + (br) * (yr); \ +} + +#define bli_daxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + (yr) = (ar) * (xr) + (br) * (yr); \ +} + +#define bli_caxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + float yt_r = (ar) * (xr) + (ai) * (xi) + (br) * (yr) - (bi) * (yi); \ + float yt_i = (ai) * (xr) - (ar) * (xi) + (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_sccaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + float yt_r = (ar) * (xr) + (br) * (yr) - (bi) * (yi); \ + float yt_i = -(ar) * (xi) + (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_ccsaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + float yt_r = (ar) * (xr) + (ai) * (xi) + (br) * (yr); \ + float yt_i = (ai) * (xr) - (ar) * (xi) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_cscaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + float yt_r = (ar) * (xr) + (br) * (yr) - (bi) * (yi); \ + float yt_i = (ai) * (xr) + (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_sscaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + float yt_r = (ar) * (xr) + (br) * (yr) - (bi) * (yi); \ + float yt_i = (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_cssaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + float yt_r = (ar) * (xr) + (br) * (yr); \ + float yt_i = (ai) * (xr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_scsaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + float yt_r = (ar) * (xr) + (br) * (yr); \ + float yt_i = -(ar) * (xi) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_zaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + double yt_r = (ar) * (xr) + (ai) * (xi) + (br) * (yr) - (bi) * (yi); \ + double yt_i = (ai) * (xr) - (ar) * (xi) + (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_dzzaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + double yt_r = (ar) * (xr) + (br) * (yr) - (bi) * (yi); \ + double yt_i = -(ar) * (xi) + (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_zzdaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + double yt_r = (ar) * (xr) + (ai) * (xi) + (br) * (yr); \ + double yt_i = (ai) * (xr) - (ar) * (xi) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_zdzaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + double yt_r = (ar) * (xr) + (br) * (yr) - (bi) * (yi); \ + double yt_i = (ai) * (xr) + (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_ddzaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + double yt_r = (ar) * (xr) + (br) * (yr) - (bi) * (yi); \ + double yt_i = (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_zddaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + double yt_r = (ar) * (xr) + (br) * (yr); \ + double yt_i = (ai) * (xr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_dzdaxpbyjris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + double yt_r = (ar) * (xr) + (br) * (yr); \ + double yt_i = -(ar) * (xi) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#endif + diff --git a/frame/include/level0/ri/bli_axpbyris.h b/frame/include/level0/ri/bli_axpbyris.h index 2303db350..fbc49f0fa 100644 --- a/frame/include/level0/ri/bli_axpbyris.h +++ b/frame/include/level0/ri/bli_axpbyris.h @@ -49,10 +49,58 @@ #define bli_caxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ { \ - float yt_r = (ar) * (xr) - (ai) * (xi) + (br) * (yr) - (bi) * (yi); \ - float yt_i = (ai) * (xr) + (ar) * (xi) + (bi) * (yr) + (br) * (yi); \ - (yr) = yt_r; \ - (yi) = yt_i; \ + float yt_r = (ar) * (xr) - (ai) * (xi) + (br) * (yr) - (bi) * (yi); \ + float yt_i = (ai) * (xr) + (ar) * (xi) + (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_sccaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + float yt_r = (ar) * (xr) + (br) * (yr) - (bi) * (yi); \ + float yt_i = (ar) * (xi) + (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_ccsaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + float yt_r = (ar) * (xr) - (ai) * (xi) + (br) * (yr); \ + float yt_i = (ai) * (xr) + (ar) * (xi) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_cscaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + float yt_r = (ar) * (xr) + (br) * (yr) - (bi) * (yi); \ + float yt_i = (ai) * (xr) + (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_sscaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + float yt_r = (ar) * (xr) + (br) * (yr) - (bi) * (yi); \ + float yt_i = (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_cssaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + float yt_r = (ar) * (xr) + (br) * (yr); \ + float yt_i = (ai) * (xr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_scsaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + float yt_r = (ar) * (xr) + (br) * (yr); \ + float yt_i = (ar) * (xi) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ } #define bli_zaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ @@ -63,5 +111,53 @@ (yi) = yt_i; \ } +#define bli_dzzaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + double yt_r = (ar) * (xr) + (br) * (yr) - (bi) * (yi); \ + double yt_i = (ar) * (xi) + (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_zzdaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + double yt_r = (ar) * (xr) - (ai) * (xi) + (br) * (yr); \ + double yt_i = (ai) * (xr) + (ar) * (xi) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_zdzaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + double yt_r = (ar) * (xr) + (br) * (yr) - (bi) * (yi); \ + double yt_i = (ai) * (xr) + (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_ddzaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + double yt_r = (ar) * (xr) + (br) * (yr) - (bi) * (yi); \ + double yt_i = (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_zddaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + double yt_r = (ar) * (xr) + (br) * (yr); \ + double yt_i = (ai) * (xr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_dzdaxpbyris( ar, ai, xr, xi, br, bi, yr, yi ) \ +{ \ + double yt_r = (ar) * (xr) + (br) * (yr); \ + double yt_i = (ar) * (xi) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + #endif diff --git a/frame/include/level0/ri/bli_xpbyjris.h b/frame/include/level0/ri/bli_xpbyjris.h new file mode 100644 index 000000000..fe3cf6767 --- /dev/null +++ b/frame/include/level0/ri/bli_xpbyjris.h @@ -0,0 +1,79 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#ifndef BLIS_XPBYJRIS_H +#define BLIS_XPBYJRIS_H + +// xpbyjris + +#define bli_sxpbyjris( xr, xi, br, bi, yr, yi ) \ +{ \ + (yr) = (xr) + (br) * (yr); \ +} + +#define bli_dxpbyjris( xr, xi, br, bi, yr, yi ) \ +{ \ + (yr) = (xr) + (br) * (yr); \ +} + +#define bli_cxpbyjris( xr, xi, br, bi, yr, yi ) \ +{ \ + float yt_r = (xr) + (br) * (yr) - (bi) * (yi); \ + float yt_i = -(xi) + (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_zxpbyjris( xr, xi, br, bi, yr, yi ) \ +{ \ + double yt_r = (xr) + (br) * (yr) - (bi) * (yi); \ + double yt_i = -(xi) + (bi) * (yr) + (br) * (yi); \ + (yr) = yt_r; \ + (yi) = yt_i; \ +} + +#define bli_scxpbyjris( xr, xi, br, bi, yr, yi ) \ +{ \ + (yr) = (xr) + (br) * (yr); \ + (yi) = -(xi) + (br) * (yi); \ +} + +#define bli_dzxpbyjris( xr, xi, br, bi, yr, yi ) \ +{ \ + (yr) = (xr) + (br) * (yr); \ + (yi) = -(xi) + (br) * (yi); \ +} + +#endif + diff --git a/testsuite/input.operations b/testsuite/input.operations index 87b1090b0..058721632 100644 --- a/testsuite/input.operations +++ b/testsuite/input.operations @@ -107,6 +107,11 @@ -1 # dimensions: m ? # parameters: conjx +1 # axpbyv +1 # test sequential front-end +-1 # dimensions: m +? # parameters: conjx + 1 # axpyv 1 # test sequential front-end -1 # dimensions: m @@ -150,6 +155,11 @@ -1 # dimensions: m ? # parameters: conjx +1 # xpbyv +1 # test sequential front-end +-1 # dimensions: m +? # parameters: conjx + # --- Level-1m ------------------------------------------------------------- diff --git a/testsuite/src/test_axpbyv.c b/testsuite/src/test_axpbyv.c new file mode 100644 index 000000000..d9e3c18a0 --- /dev/null +++ b/testsuite/src/test_axpbyv.c @@ -0,0 +1,282 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" +#include "test_libblis.h" + + +// Static variables. +static char* op_str = "axpbyv"; +static char* o_types = "vv"; // x y +static char* p_types = "c"; // conjx +static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s + { 1e-04, 1e-05 }, // warn, pass for c + { 1e-13, 1e-14 }, // warn, pass for d + { 1e-13, 1e-14 } }; // warn, pass for z + +// Local prototypes. +void libblis_test_axpbyv_deps( test_params_t* params, + test_op_t* op ); + +void libblis_test_axpbyv_experiment( test_params_t* params, + test_op_t* op, + iface_t iface, + num_t datatype, + char* pc_str, + char* sc_str, + unsigned int p_cur, + double* perf, + double* resid ); + +void libblis_test_axpbyv_impl( iface_t iface, + obj_t* alpha, + obj_t* x, + obj_t* beta, + obj_t* y ); + +void libblis_test_axpbyv_check( obj_t* alpha, + obj_t* x, + obj_t* beta, + obj_t* y, + obj_t* y_orig, + double* resid ); + + + +void libblis_test_axpbyv_deps( test_params_t* params, test_op_t* op ) +{ + libblis_test_randv( params, &(op->ops->randv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); + libblis_test_addv( params, &(op->ops->addv) ); + libblis_test_axpyv( params, &(op->ops->axpyv) ); + libblis_test_subv( params, &(op->ops->subv) ); + libblis_test_copyv( params, &(op->ops->copyv) ); + libblis_test_scalv( params, &(op->ops->scalv) ); + libblis_test_scal2v( params, &(op->ops->scal2v) ); + libblis_test_xpbyv( params, &(op->ops->xpbyv) ); +} + + + +void libblis_test_axpbyv( test_params_t* params, test_op_t* op ) +{ + + // Return early if this test has already been done. + if ( op->test_done == TRUE ) return; + + // Return early if operation is disabled. + if ( op->op_switch == DISABLE_ALL || + op->ops->l1v_over == DISABLE_ALL ) return; + + // Call dependencies first. + if ( TRUE ) libblis_test_axpbyv_deps( params, op ); + + // Execute the test driver for each implementation requested. + if ( op->front_seq == ENABLE ) + { + libblis_test_op_driver( params, + op, + BLIS_TEST_SEQ_FRONT_END, + op_str, + p_types, + o_types, + thresh, + libblis_test_axpbyv_experiment ); + } +} + + + +void libblis_test_axpbyv_experiment( test_params_t* params, + test_op_t* op, + iface_t iface, + num_t datatype, + char* pc_str, + char* sc_str, + unsigned int p_cur, + double* perf, + double* resid ) +{ + unsigned int n_repeats = params->n_repeats; + unsigned int i; + + double time_min = 1e9; + double time; + + dim_t m; + + conj_t conjx; + + obj_t alpha, beta, x, y; + obj_t y_save; + + + // Map the dimension specifier to an actual dimension. + m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); + + // Map parameter characters to BLIS constants. + bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); + + // Create test scalars. + bli_obj_scalar_init_detached( datatype, &alpha ); + bli_obj_scalar_init_detached( datatype, &beta ); + + // Create test operands (vectors and/or matrices). + libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); + libblis_test_vobj_create( params, datatype, sc_str[1], m, &y ); + libblis_test_vobj_create( params, datatype, sc_str[1], m, &y_save ); + + // Set alpha. + //bli_setsc( sqrt(2.0)/2.0, sqrt(2.0)/2.0, &alpha ); + //bli_copysc( &BLIS_TWO, &alpha ); + if ( bli_obj_is_real( y ) ) + bli_setsc( -2.0, 0.0, &alpha ); + else + bli_setsc( 0.0, -2.0, &alpha ); + + bli_setsc( -1.0, 0.0, &beta ); + + // Randomize x and y, and save y. + bli_randv( &x ); + bli_randv( &y ); + bli_copyv( &y, &y_save ); + + // Apply the parameters. + bli_obj_set_conj( conjx, x ); + + // Repeat the experiment n_repeats times and record results. + for ( i = 0; i < n_repeats; ++i ) + { + bli_copyv( &y_save, &y ); + + time = bli_clock(); + + libblis_test_axpbyv_impl( iface, &alpha, &x, &beta, &y ); + + time_min = bli_clock_min_diff( time_min, time ); + } + + // Estimate the performance of the best experiment repeat. + *perf = ( 3.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( y ) ) *perf *= 14.0 / 3.0; + + // Perform checks. + libblis_test_axpbyv_check( &alpha, &x, &beta, &y, &y_save, resid ); + + // Zero out performance and residual if output vector is empty. + libblis_test_check_empty_problem( &y, perf, resid ); + + // Free the test objects. + bli_obj_free( &x ); + bli_obj_free( &y ); + bli_obj_free( &y_save ); +} + + + +void libblis_test_axpbyv_impl( iface_t iface, + obj_t* alpha, + obj_t* x, + obj_t* beta, + obj_t* y ) +{ + switch ( iface ) + { + case BLIS_TEST_SEQ_FRONT_END: + bli_axpbyv( alpha, x, beta, y ); + break; + + default: + libblis_test_printf_error( "Invalid interface type.\n" ); + } +} + + + +void libblis_test_axpbyv_check( obj_t* alpha, + obj_t* x, + obj_t* beta, + obj_t* y, + obj_t* y_orig, + double* resid ) +{ + num_t dt = bli_obj_datatype( *y ); + num_t dt_real = bli_obj_datatype_proj_to_real( *y ); + + dim_t m = bli_obj_vector_dim( *y ); + + obj_t x_temp, y_temp; + obj_t norm; + + double junk; + + // + // Pre-conditions: + // - x is randomized. + // - y_orig is randomized. + // Note: + // - alpha should have a non-zero imaginary component in the complex + // cases in order to more fully exercise the implementation. + // + // Under these conditions, we assume that the implementation for + // + // y := beta * y_orig + alpha * conjx(x) + // + // is functioning correctly if + // + // normf( y - ( beta * y_orig + alpha * conjx(x) ) ) + // + // is negligible. + // + + bli_obj_scalar_init_detached( dt_real, &norm ); + + bli_obj_create( dt, m, 1, 0, 0, &x_temp ); + bli_obj_create( dt, m, 1, 0, 0, &y_temp ); + + bli_copyv( x, &x_temp ); + bli_copyv( y_orig, &y_temp ); + + bli_scalv( alpha, &x_temp ); + bli_scalv( beta, &y_temp ); + bli_addv( &x_temp, &y_temp ); + + bli_subv( &y_temp, y ); + bli_normfv( y, &norm ); + bli_getsc( &norm, resid, &junk ); + + bli_obj_free( &x_temp ); + bli_obj_free( &y_temp ); +} + diff --git a/testsuite/src/test_axpbyv.h b/testsuite/src/test_axpbyv.h new file mode 100644 index 000000000..d0ff10644 --- /dev/null +++ b/testsuite/src/test_axpbyv.h @@ -0,0 +1,36 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void libblis_test_axpbyv( test_params_t* params, test_op_t* op ); + diff --git a/testsuite/src/test_libblis.c b/testsuite/src/test_libblis.c index d07eade90..d48516a12 100644 --- a/testsuite/src/test_libblis.c +++ b/testsuite/src/test_libblis.c @@ -113,7 +113,8 @@ void libblis_test_utility_ops( test_params_t* params, test_ops_t* ops ) void libblis_test_level1v_ops( test_params_t* params, test_ops_t* ops ) { libblis_test_addv( params, &(ops->addv) ); - libblis_test_axpyv( params, &(ops->axpyv) ); + libblis_test_axpbyv( params, &(ops->axpbyv) ); + libblis_test_axpyv( params, &(ops->axpyv) ); libblis_test_copyv( params, &(ops->copyv) ); libblis_test_dotv( params, &(ops->dotv) ); libblis_test_dotxv( params, &(ops->dotxv) ); @@ -122,6 +123,7 @@ void libblis_test_level1v_ops( test_params_t* params, test_ops_t* ops ) libblis_test_scal2v( params, &(ops->scal2v) ); libblis_test_setv( params, &(ops->setv) ); libblis_test_subv( params, &(ops->subv) ); + libblis_test_xpbyv( params, &(ops->xpbyv) ); } @@ -220,6 +222,7 @@ void libblis_test_read_ops_file( char* input_filename, test_ops_t* ops ) // Level-1v libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->addv) ); + libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->axpbyv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->axpyv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->copyv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 2, &(ops->dotv) ); @@ -229,6 +232,7 @@ void libblis_test_read_ops_file( char* input_filename, test_ops_t* ops ) libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->scal2v) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 0, &(ops->setv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->subv) ); + libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->xpbyv) ); // Level-1m libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_MN, 1, &(ops->addm) ); diff --git a/testsuite/src/test_libblis.h b/testsuite/src/test_libblis.h index 0725dd400..8a84a2d9b 100644 --- a/testsuite/src/test_libblis.h +++ b/testsuite/src/test_libblis.h @@ -204,7 +204,8 @@ typedef struct test_ops_s // level-1v test_op_t addv; - test_op_t axpyv; + test_op_t axpbyv; + test_op_t axpyv; test_op_t copyv; test_op_t dotv; test_op_t dotxv; @@ -213,6 +214,7 @@ typedef struct test_ops_s test_op_t scal2v; test_op_t setv; test_op_t subv; + test_op_t xpbyv; // level-1m test_op_t addm; @@ -415,6 +417,7 @@ void libblis_test_check_empty_problem( obj_t* c, double* perf, double* resid ); // Level-1v #include "test_addv.h" +#include "test_axpbyv.h" #include "test_axpyv.h" #include "test_copyv.h" #include "test_dotv.h" @@ -424,6 +427,7 @@ void libblis_test_check_empty_problem( obj_t* c, double* perf, double* resid ); #include "test_scal2v.h" #include "test_setv.h" #include "test_subv.h" +#include "test_xpbyv.h" // Level-1m #include "test_addm.h" diff --git a/testsuite/src/test_xpbyv.c b/testsuite/src/test_xpbyv.c new file mode 100644 index 000000000..a6610905b --- /dev/null +++ b/testsuite/src/test_xpbyv.c @@ -0,0 +1,268 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" +#include "test_libblis.h" + + +// Static variables. +static char* op_str = "xpbyv"; +static char* o_types = "vv"; // x y +static char* p_types = "c"; // conjx +static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s + { 1e-04, 1e-05 }, // warn, pass for c + { 1e-13, 1e-14 }, // warn, pass for d + { 1e-13, 1e-14 } }; // warn, pass for z + +// Local prototypes. +void libblis_test_xpbyv_deps( test_params_t* params, + test_op_t* op ); + +void libblis_test_xpbyv_experiment( test_params_t* params, + test_op_t* op, + iface_t iface, + num_t datatype, + char* pc_str, + char* sc_str, + unsigned int p_cur, + double* perf, + double* resid ); + +void libblis_test_xpbyv_impl( iface_t iface, + obj_t* x, + obj_t* beta, + obj_t* y ); + +void libblis_test_xpbyv_check( obj_t* x, + obj_t* beta, + obj_t* y, + obj_t* y_orig, + double* resid ); + + + +void libblis_test_xpbyv_deps( test_params_t* params, test_op_t* op ) +{ + libblis_test_randv( params, &(op->ops->randv) ); + libblis_test_normfv( params, &(op->ops->normfv) ); + libblis_test_addv( params, &(op->ops->addv) ); + libblis_test_subv( params, &(op->ops->subv) ); + libblis_test_copyv( params, &(op->ops->copyv) ); +} + + + +void libblis_test_xpbyv( test_params_t* params, test_op_t* op ) +{ + + // Return early if this test has already been done. + if ( op->test_done == TRUE ) return; + + // Return early if operation is disabled. + if ( op->op_switch == DISABLE_ALL || + op->ops->l1v_over == DISABLE_ALL ) return; + + // Call dependencies first. + if ( TRUE ) libblis_test_xpbyv_deps( params, op ); + + // Execute the test driver for each implementation requested. + if ( op->front_seq == ENABLE ) + { + libblis_test_op_driver( params, + op, + BLIS_TEST_SEQ_FRONT_END, + op_str, + p_types, + o_types, + thresh, + libblis_test_xpbyv_experiment ); + } +} + + + +void libblis_test_xpbyv_experiment( test_params_t* params, + test_op_t* op, + iface_t iface, + num_t datatype, + char* pc_str, + char* sc_str, + unsigned int p_cur, + double* perf, + double* resid ) +{ + unsigned int n_repeats = params->n_repeats; + unsigned int i; + + double time_min = 1e9; + double time; + + dim_t m; + + conj_t conjx; + + obj_t beta, x, y; + obj_t y_save; + + + // Map the dimension specifier to an actual dimension. + m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); + + // Map parameter characters to BLIS constants. + bli_param_map_char_to_blis_conj( pc_str[0], &conjx ); + + // Create test scalars. + bli_obj_scalar_init_detached( datatype, &beta ); + + // Create test operands (vectors and/or matrices). + libblis_test_vobj_create( params, datatype, sc_str[0], m, &x ); + libblis_test_vobj_create( params, datatype, sc_str[1], m, &y ); + libblis_test_vobj_create( params, datatype, sc_str[1], m, &y_save ); + + // Set beta. + if ( bli_obj_is_real( y ) ) + bli_setsc( -2.0, 0.0, &beta ); + else + bli_setsc( 0.0, -2.0, &beta ); + + // Randomize x and y, and save y. + bli_randv( &x ); + bli_randv( &y ); + bli_copyv( &y, &y_save ); + + // Apply the parameters. + bli_obj_set_conj( conjx, x ); + + // Repeat the experiment n_repeats times and record results. + for ( i = 0; i < n_repeats; ++i ) + { + bli_copyv( &y_save, &y ); + + time = bli_clock(); + + libblis_test_xpbyv_impl( iface, &x, &beta, &y ); + + time_min = bli_clock_min_diff( time_min, time ); + } + + // Estimate the performance of the best experiment repeat. + *perf = ( 2.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( y ) ) *perf *= 4.0; + + // Perform checks. + libblis_test_xpbyv_check( &x, &beta, &y, &y_save, resid ); + + // Zero out performance and residual if output vector is empty. + libblis_test_check_empty_problem( &y, perf, resid ); + + // Free the test objects. + bli_obj_free( &x ); + bli_obj_free( &y ); + bli_obj_free( &y_save ); +} + + + +void libblis_test_xpbyv_impl( iface_t iface, + obj_t* x, + obj_t* beta, + obj_t* y ) +{ + switch ( iface ) + { + case BLIS_TEST_SEQ_FRONT_END: + bli_xpbyv( x, beta, y ); + break; + + default: + libblis_test_printf_error( "Invalid interface type.\n" ); + } +} + + + +void libblis_test_xpbyv_check( obj_t* x, + obj_t* beta, + obj_t* y, + obj_t* y_orig, + double* resid ) +{ + num_t dt = bli_obj_datatype( *y ); + num_t dt_real = bli_obj_datatype_proj_to_real( *y ); + + dim_t m = bli_obj_vector_dim( *y ); + + obj_t x_temp, y_temp; + obj_t norm; + + double junk; + + // + // Pre-conditions: + // - x is randomized. + // - y_orig is randomized. + // Note: + // - beta should have a non-zero imaginary component in the complex + // cases in order to more fully exercise the implementation. + // + // Under these conditions, we assume that the implementation for + // + // y := beta * y_orig + conjx(x) + // + // is functioning correctly if + // + // normf( y - ( beta * y_orig + conjx(x) ) ) + // + // is negligible. + // + + bli_obj_scalar_init_detached( dt_real, &norm ); + + bli_obj_create( dt, m, 1, 0, 0, &x_temp ); + bli_obj_create( dt, m, 1, 0, 0, &y_temp ); + + bli_copyv( x, &x_temp ); + bli_copyv( y_orig, &y_temp ); + + bli_scalv( beta, &y_temp ); + bli_addv( &x_temp, &y_temp ); + + bli_subv( &y_temp, y ); + bli_normfv( y, &norm ); + bli_getsc( &norm, resid, &junk ); + + bli_obj_free( &x_temp ); + bli_obj_free( &y_temp ); +} + diff --git a/testsuite/src/test_xpbyv.h b/testsuite/src/test_xpbyv.h new file mode 100644 index 000000000..79b01c18d --- /dev/null +++ b/testsuite/src/test_xpbyv.h @@ -0,0 +1,36 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void libblis_test_xpbyv( test_params_t* params, test_op_t* op ); +