diff --git a/docs/BLISObjectAPI.md b/docs/BLISObjectAPI.md index 5e8ed3d8f..51f5753a0 100644 --- a/docs/BLISObjectAPI.md +++ b/docs/BLISObjectAPI.md @@ -41,11 +41,11 @@ This index provides a quick way to jump directly to the description for each operation discussed later in the [Computational function reference](BLISObjectAPI.md#computational-function-reference) section: * **[Level-1v](BLISObjectAPI.md#level-1v-operations)**: Operations on vectors: - * [addv](BLISObjectAPI.md#addv), [amaxv](BLISObjectAPI.md#amaxv), [axpyv](BLISObjectAPI.md#axpyv), [axpbyv](BLISObjectAPI.md#axpbyv), [copyv](BLISObjectAPI.md#copyv), [dotv](BLISObjectAPI.md#dotv), [dotxv](BLISObjectAPI.md#dotxv), [invertv](BLISObjectAPI.md#invertv), [scal2v](BLISObjectAPI.md#scal2v), [scalv](BLISObjectAPI.md#scalv), [setv](BLISObjectAPI.md#setv), [setrv](BLISObjectAPI.md#setrv), [setiv](BLISObjectAPI.md#setiv), [subv](BLISObjectAPI.md#subv), [swapv](BLISObjectAPI.md#swapv), [xpbyv](BLISObjectAPI.md#xpbyv) + * [addv](BLISObjectAPI.md#addv), [amaxv](BLISObjectAPI.md#amaxv), [axpyv](BLISObjectAPI.md#axpyv), [axpbyv](BLISObjectAPI.md#axpbyv), [copyv](BLISObjectAPI.md#copyv), [dotv](BLISObjectAPI.md#dotv), [dotxv](BLISObjectAPI.md#dotxv), [invertv](BLISObjectAPI.md#invertv), [invscalv](BLISObjectAPI.md#invscalv), [scalv](BLISObjectAPI.md#scalv), [scal2v](BLISObjectAPI.md#scal2v), [setv](BLISObjectAPI.md#setv), [setrv](BLISObjectAPI.md#setrv), [setiv](BLISObjectAPI.md#setiv), [subv](BLISObjectAPI.md#subv), [swapv](BLISObjectAPI.md#swapv), [xpbyv](BLISObjectAPI.md#xpbyv) * **[Level-1d](BLISObjectAPI.md#level-1d-operations)**: Element-wise operations on matrix diagonals: - * [addd](BLISObjectAPI.md#addd), [axpyd](BLISObjectAPI.md#axpyd), [copyd](BLISObjectAPI.md#copyd), [invertd](BLISObjectAPI.md#invertd), [scald](BLISObjectAPI.md#scald), [scal2d](BLISObjectAPI.md#scal2d), [setd](BLISObjectAPI.md#setd), [setid](BLISObjectAPI.md#setid), [shiftd](BLISObjectAPI.md#shiftd), [subd](BLISObjectAPI.md#subd), [xpbyd](BLISObjectAPI.md#xpbyd) + * [addd](BLISObjectAPI.md#addd), [axpyd](BLISObjectAPI.md#axpyd), [copyd](BLISObjectAPI.md#copyd), [invertd](BLISObjectAPI.md#invertd), [invscald](BLISObjectAPI.md#invscald), [scald](BLISObjectAPI.md#scald), [scal2d](BLISObjectAPI.md#scal2d), [setd](BLISObjectAPI.md#setd), [setid](BLISObjectAPI.md#setid), [shiftd](BLISObjectAPI.md#shiftd), [subd](BLISObjectAPI.md#subd), [xpbyd](BLISObjectAPI.md#xpbyd) * **[Level-1m](BLISObjectAPI.md#level-1m-operations)**: Element-wise operations on matrices: - * [addm](BLISObjectAPI.md#addm), [axpym](BLISObjectAPI.md#axpym), [copym](BLISObjectAPI.md#copym), [scalm](BLISObjectAPI.md#scalm), [scal2m](BLISObjectAPI.md#scal2m), [setm](BLISObjectAPI.md#setm), [setrm](BLISObjectAPI.md#setrm), [setim](BLISObjectAPI.md#setim), [subm](BLISObjectAPI.md#subm) + * [addm](BLISObjectAPI.md#addm), [axpym](BLISObjectAPI.md#axpym), [copym](BLISObjectAPI.md#copym), [invscalm](BLISObjectAPI.md#invscalm), [scalm](BLISObjectAPI.md#scalm), [scal2m](BLISObjectAPI.md#scal2m), [setm](BLISObjectAPI.md#setm), [setrm](BLISObjectAPI.md#setrm), [setim](BLISObjectAPI.md#setim), [subm](BLISObjectAPI.md#subm) * **[Level-1f](BLISObjectAPI.md#level-1f-operations)**: Fused operations on multiple vectors: * [axpy2v](BLISObjectAPI.md#axpy2v), [dotaxpyv](BLISObjectAPI.md#dotaxpyv), [axpyf](BLISObjectAPI.md#axpyf), [dotxf](BLISObjectAPI.md#dotxf), [dotxaxpyf](BLISObjectAPI.md#dotxaxpyf) * **[Level-2](BLISObjectAPI.md#level-2-operations)**: Operations with one matrix and (at least) one vector operand: @@ -845,6 +845,24 @@ Invert all elements of an _n_-length vector `x`. --- +#### invscalv +```c +void bli_invscalv + ( + obj_t* alpha, + obj_t* x + ); +``` +Perform +``` + x := ( 1.0 / conj?(alpha) ) * x +``` +where `x` is a vector of length _n_, and `alpha` is a scalar. + +Observed object properties: `conj?(alpha)`. + +--- + #### scalv ```c void bli_scalv @@ -1049,6 +1067,19 @@ Observed object properties: `diagoff(A)`. --- +#### invscald +```c +void bli_invscald + ( + obj_t* alpha, + obj_t* a + ); +``` + +Observed object properties: `conj?(alpha)`, `diagoff(A)`. + +--- + #### scald ```c void bli_scald @@ -1213,6 +1244,24 @@ Observed object properties: `diagoff(A)`, `diag(A)`, `uplo(A)`, `trans?(A)`. --- +#### invscalm +```c +void bli_invscalm + ( + obj_t* alpha, + obj_t* a + ); +``` +Perform +``` + A := ( 1.0 / conj?(alpha) ) * A +``` +where `A` is an _m x n_ matrix stored as a dense matrix, or lower- or upper-triangular/trapezoidal matrix with arbitrary diagonal offset. If `uplo(A)` indicates lower or upper storage, only that part of matrix `A` will be updated. + +Observed object properties: `conj?(alpha)`, `diagoff(A)`, `uplo(A)`. + +--- + #### scalm ```c void bli_scalm diff --git a/docs/BLISTypedAPI.md b/docs/BLISTypedAPI.md index 76d7ef8f6..497776a15 100644 --- a/docs/BLISTypedAPI.md +++ b/docs/BLISTypedAPI.md @@ -36,11 +36,11 @@ This index provides a quick way to jump directly to the description for each operation discussed later in the [Computational function reference](BLISTypedAPI.md#computational-function-reference) section: * **[Level-1v](BLISTypedAPI.md#level-1v-operations)**: Operations on vectors: - * [addv](BLISTypedAPI.md#addv), [amaxv](BLISTypedAPI.md#amaxv), [axpyv](BLISTypedAPI.md#axpyv), [axpbyv](BLISTypedAPI.md#axpbyv), [copyv](BLISTypedAPI.md#copyv), [dotv](BLISTypedAPI.md#dotv), [dotxv](BLISTypedAPI.md#dotxv), [invertv](BLISTypedAPI.md#invertv), [scal2v](BLISTypedAPI.md#scal2v), [scalv](BLISTypedAPI.md#scalv), [setv](BLISTypedAPI.md#setv), [subv](BLISTypedAPI.md#subv), [swapv](BLISTypedAPI.md#swapv), [xpbyv](BLISTypedAPI.md#xpbyv) + * [addv](BLISTypedAPI.md#addv), [amaxv](BLISTypedAPI.md#amaxv), [axpyv](BLISTypedAPI.md#axpyv), [axpbyv](BLISTypedAPI.md#axpbyv), [copyv](BLISTypedAPI.md#copyv), [dotv](BLISTypedAPI.md#dotv), [dotxv](BLISTypedAPI.md#dotxv), [invertv](BLISTypedAPI.md#invertv), [invscalv](BLISTypedAPI.md#invscalv), [scalv](BLISTypedAPI.md#scalv), [scal2v](BLISTypedAPI.md#scal2v), [setv](BLISTypedAPI.md#setv), [subv](BLISTypedAPI.md#subv), [swapv](BLISTypedAPI.md#swapv), [xpbyv](BLISTypedAPI.md#xpbyv) * **[Level-1d](BLISTypedAPI.md#level-1d-operations)**: Element-wise operations on matrix diagonals: - * [addd](BLISTypedAPI.md#addd), [axpyd](BLISTypedAPI.md#axpyd), [copyd](BLISTypedAPI.md#copyd), [invertd](BLISTypedAPI.md#invertd), [scald](BLISTypedAPI.md#scald), [scal2d](BLISTypedAPI.md#scal2d), [setd](BLISTypedAPI.md#setd), [setid](BLISTypedAPI.md#setid), [shiftd](BLISTypedAPI.md#shiftd), [subd](BLISTypedAPI.md#subd), [xpbyd](BLISTypedAPI.md#xpbyd) + * [addd](BLISTypedAPI.md#addd), [axpyd](BLISTypedAPI.md#axpyd), [copyd](BLISTypedAPI.md#copyd), [invertd](BLISTypedAPI.md#invertd), [invscald](BLISTypedAPI.md#invscald), [scald](BLISTypedAPI.md#scald), [scal2d](BLISTypedAPI.md#scal2d), [setd](BLISTypedAPI.md#setd), [setid](BLISTypedAPI.md#setid), [shiftd](BLISTypedAPI.md#shiftd), [subd](BLISTypedAPI.md#subd), [xpbyd](BLISTypedAPI.md#xpbyd) * **[Level-1m](BLISTypedAPI.md#level-1m-operations)**: Element-wise operations on matrices: - * [addm](BLISTypedAPI.md#addm), [axpym](BLISTypedAPI.md#axpym), [copym](BLISTypedAPI.md#copym), [scalm](BLISTypedAPI.md#scalm), [scal2m](BLISTypedAPI.md#scal2m), [setm](BLISTypedAPI.md#setm), [subm](BLISTypedAPI.md#subm) + * [addm](BLISTypedAPI.md#addm), [axpym](BLISTypedAPI.md#axpym), [copym](BLISTypedAPI.md#copym), [invscalm](BLISTypedAPI.md#invscalm), [scalm](BLISTypedAPI.md#scalm), [scal2m](BLISTypedAPI.md#scal2m), [setm](BLISTypedAPI.md#setm), [subm](BLISTypedAPI.md#subm) * **[Level-1f](BLISTypedAPI.md#level-1f-operations)**: Fused operations on multiple vectors: * [axpy2v](BLISTypedAPI.md#axpy2v), [dotaxpyv](BLISTypedAPI.md#dotaxpyv), [axpyf](BLISTypedAPI.md#axpyf), [dotxf](BLISTypedAPI.md#dotxf), [dotxaxpyf](BLISTypedAPI.md#dotxaxpyf) * **[Level-2](BLISTypedAPI.md#level-2-operations)**: Operations with one matrix and (at least) one vector operand: @@ -369,6 +369,24 @@ Invert all elements of an _n_-length vector `x`. --- +#### invscalv +```c +void bli_?invscalv + ( + conj_t conjalpha, + dim_t n, + ctype* alpha, + ctype* x, inc_t incx + ); +``` +Perform +``` + x := ( 1.0 / conjalpha(alpha) ) * x +``` +where `x` is a vector of length _n_, and `alpha` is a scalar. + +--- + #### scalv ```c void bli_?scalv @@ -548,6 +566,21 @@ void bli_?invertd --- +#### invscald +```c +void bli_?invscald + ( + conj_t conjalpha, + doff_t diagoffa, + dim_t m, + dim_t n, + ctype* alpha, + ctype* a, inc_t rsa, inc_t csa + ); +``` + +--- + #### scald ```c void bli_?scald @@ -737,6 +770,27 @@ where `B` is an _m x n_ matrix, `A` is stored as a dense matrix, or lower- or up --- +#### invscalm +```c +void bli_?invscalm + ( + conj_t conjalpha, + doff_t diagoffa, + uplo_t uploa, + dim_t m, + dim_t n, + ctype* alpha, + ctype* a, inc_t rsa, inc_t csa + ); +``` +Perform +``` + A := ( 1.0 / conjalpha(alpha) ) * A +``` +where `A` is an _m x n_ matrix stored as a dense matrix, or lower- or upper-triangular/trapezoidal matrix, as specified by `uploa`, with the diagonal offset of `A` specified by `diagoffa`. If `uploa` indicates lower or upper storage, only that part of matrix `A` will be updated. + +--- + #### scalm ```c void bli_?scalm diff --git a/docs/KernelsHowTo.md b/docs/KernelsHowTo.md index 6e84db8e7..30a4dc736 100644 --- a/docs/KernelsHowTo.md +++ b/docs/KernelsHowTo.md @@ -22,11 +22,11 @@ One of the primary features of BLIS is that it provides a large set of dense lin Presently, BLIS supports several groups of operations: * **[Level-1v](BLISTypedAPI.md#level-1v-operations)**: Operations on vectors: - * [addv](BLISTypedAPI.md#addv), [amaxv](BLISTypedAPI.md#amaxv), [axpyv](BLISTypedAPI.md#axpyv), [copyv](BLISTypedAPI.md#copyv), [dotv](BLISTypedAPI.md#dotv), [dotxv](BLISTypedAPI.md#dotxv), [invertv](BLISTypedAPI.md#invertv), [scal2v](BLISTypedAPI.md#scal2v), [scalv](BLISTypedAPI.md#scalv), [setv](BLISTypedAPI.md#setv), [subv](BLISTypedAPI.md#subv), [swapv](BLISTypedAPI.md#swapv) + * [addv](BLISTypedAPI.md#addv), [amaxv](BLISTypedAPI.md#amaxv), [axpyv](BLISTypedAPI.md#axpyv), [copyv](BLISTypedAPI.md#copyv), [dotv](BLISTypedAPI.md#dotv), [dotxv](BLISTypedAPI.md#dotxv), [invertv](BLISTypedAPI.md#invertv), [invscalv](BLISTypedAPI.md#invscalv), [scalv](BLISTypedAPI.md#scalv), [scal2v](BLISTypedAPI.md#scal2v), [setv](BLISTypedAPI.md#setv), [subv](BLISTypedAPI.md#subv), [swapv](BLISTypedAPI.md#swapv) * **[Level-1d](BLISTypedAPI.md#level-1d-operations)**: Element-wise operations on matrix diagonals: - * [addd](BLISTypedAPI.md#addd), [axpyd](BLISTypedAPI.md#axpyd), [copyd](BLISTypedAPI.md#copyd), [invertd](BLISTypedAPI.md#invertd), [scald](BLISTypedAPI.md#scald), [scal2d](BLISTypedAPI.md#scal2d), [setd](BLISTypedAPI.md#setd), [setid](BLISTypedAPI.md#setid), [subd](BLISTypedAPI.md#subd) + * [addd](BLISTypedAPI.md#addd), [axpyd](BLISTypedAPI.md#axpyd), [copyd](BLISTypedAPI.md#copyd), [invertd](BLISTypedAPI.md#invertd), [invscald](BLISTypedAPI.md#invscald), [scald](BLISTypedAPI.md#scald), [scal2d](BLISTypedAPI.md#scal2d), [setd](BLISTypedAPI.md#setd), [setid](BLISTypedAPI.md#setid), [subd](BLISTypedAPI.md#subd) * **[Level-1m](BLISTypedAPI.md#level-1m-operations)**: Element-wise operations on matrices: - * [addm](BLISTypedAPI.md#addm), [axpym](BLISTypedAPI.md#axpym), [copym](BLISTypedAPI.md#copym), [scalm](BLISTypedAPI.md#scalm), [scal2m](BLISTypedAPI.md#scal2m), [setm](BLISTypedAPI.md#setm), [subm](BLISTypedAPI.md#subm) + * [addm](BLISTypedAPI.md#addm), [axpym](BLISTypedAPI.md#axpym), [copym](BLISTypedAPI.md#copym), [invscalm](BLISTypedAPI.md#invscalm), [scalm](BLISTypedAPI.md#scalm), [scal2m](BLISTypedAPI.md#scal2m), [setm](BLISTypedAPI.md#setm), [subm](BLISTypedAPI.md#subm) * **[Level-1f](BLISTypedAPI.md#level-1f-operations)**: Fused operations on multiple vectors: * [axpy2v](BLISTypedAPI.md#axpy2v), [dotaxpyv](BLISTypedAPI.md#dotaxpyv), [axpyf](BLISTypedAPI.md#axpyf), [dotxf](BLISTypedAPI.md#dotxf), [dotxaxpyf](BLISTypedAPI.md#dotxaxpyf) * **[Level-2](BLISTypedAPI.md#level-2-operations)**: Operations with one matrix and (at least) one vector operand: @@ -81,6 +81,7 @@ BLIS supports the following 14 level-1v kernels. These kernels are used primaril * **dotv**: Performs a [dot product](BLISTypedAPI.md#dotv) where the output scalar is overwritten. * **dotxv**: Performs an [extended dot product](BLISTypedAPI.md#dotxv) operation where the dot product is first scaled and then accumulated into a scaled output scalar. * **invertv**: Performs an [element-wise vector inversion](BLISTypedAPI.md#invertv) operation. + * **invscalv**: Performs an [in-place (destructive) vector inverse-scaling](BLISTypedAPI.md#invscalv) operation. * **scalv**: Performs an [in-place (destructive) vector scaling](BLISTypedAPI.md#scalv) operation. * **scal2v**: Performs an [out-of-place (non-destructive) vector scaling](BLISTypedAPI.md#scal2v) operation. * **setv**: Performs a [vector broadcast](BLISTypedAPI.md#setv) operation. @@ -184,6 +185,7 @@ datatype characters. | copyv | `BLIS_COPYV_KER` | `?copyv_ft` | | dotxv | `BLIS_DOTXV_KER` | `?dotxv_ft` | | invertv | `BLIS_INVERTV_KER` | `?invertv_ft` | +| invscalv | `BLIS_INVSCALV_KER` | `?invscalv_ft` | | scalv | `BLIS_SCALV_KER` | `?scalv_ft` | | scal2v | `BLIS_SCAL2V_KER` | `?scal2v_ft` | | setv | `BLIS_SETV_KER` | `?setv_ft` | @@ -220,6 +222,7 @@ This section seeks to provide developers with a complete reference for each of t * [dotv](KernelsHowTo.md#dotv-kernel) * [dotxv](KernelsHowTo.md#dotxv-kernel) * [invertv](KernelsHowTo.md#invertv-kernel) + * [invscalv](KernelsHowTo.md#invscalv-kernel) * [scalv](KernelsHowTo.md#scalv-kernel) * [scal2v](KernelsHowTo.md#scal2v-kernel) * [setv](KernelsHowTo.md#setv-kernel) @@ -929,6 +932,25 @@ This kernel inverts all elements of an _n_-length vector `x`. --- +#### invscalv kernel +```c +void bli_?invscalv_ + ( + conj_t conjalpha, + dim_t n, + ctype* restrict alpha, + ctype* restrict x, inc_t incx, + cntx_t* restrict cntx + ) +``` +This kernel performs the following operation: +``` + x := ( 1.0 / conjalpha(alpha) ) * x +``` +where `x` is a vector of length _n_ stored with stride `incx` and `alpha` is a scalar. + +--- + #### scalv kernel ```c void bli_?scalv_ diff --git a/frame/1/bli_l1v_check.c b/frame/1/bli_l1v_check.c index 8ab470bf4..f2c4622d5 100644 --- a/frame/1/bli_l1v_check.c +++ b/frame/1/bli_l1v_check.c @@ -165,6 +165,7 @@ void PASTEMAC(opname,_check) \ bli_l1v_ax_check( alpha, x ); \ } +GENFRONT( invscalv ) GENFRONT( scalv ) GENFRONT( setv ) diff --git a/frame/1/bli_l1v_check.h b/frame/1/bli_l1v_check.h index 110b25d55..cfd6d9e6e 100644 --- a/frame/1/bli_l1v_check.h +++ b/frame/1/bli_l1v_check.h @@ -140,6 +140,7 @@ void PASTEMAC(opname,_check) \ const obj_t* x \ ); +GENTPROT( invscalv ) GENTPROT( scalv ) GENTPROT( setv ) diff --git a/frame/1/bli_l1v_fpa.c b/frame/1/bli_l1v_fpa.c index 311f0b2b9..a88aba93d 100644 --- a/frame/1/bli_l1v_fpa.c +++ b/frame/1/bli_l1v_fpa.c @@ -60,6 +60,7 @@ GENFRONT( scal2v ) GENFRONT( dotv ) GENFRONT( dotxv ) GENFRONT( invertv ) +GENFRONT( invscalv ) GENFRONT( scalv ) GENFRONT( setv ) GENFRONT( swapv ) diff --git a/frame/1/bli_l1v_fpa.h b/frame/1/bli_l1v_fpa.h index c05a4ff7b..52d477d30 100644 --- a/frame/1/bli_l1v_fpa.h +++ b/frame/1/bli_l1v_fpa.h @@ -52,6 +52,7 @@ GENPROT( scal2v ) GENPROT( dotv ) GENPROT( dotxv ) GENPROT( invertv ) +GENPROT( invscalv ) GENPROT( scalv ) GENPROT( setv ) GENPROT( swapv ) diff --git a/frame/1/bli_l1v_ft.h b/frame/1/bli_l1v_ft.h index 57f9d223a..244b926ca 100644 --- a/frame/1/bli_l1v_ft.h +++ b/frame/1/bli_l1v_ft.h @@ -158,7 +158,7 @@ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ INSERT_GENTDEF( invertv ) -// scalv, setv +// invscalv, scalv, setv #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ @@ -172,6 +172,7 @@ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ BLIS_TAPI_EX_PARAMS \ ); +INSERT_GENTDEF( invscalv ) INSERT_GENTDEF( scalv ) INSERT_GENTDEF( setv ) diff --git a/frame/1/bli_l1v_ft_ker.h b/frame/1/bli_l1v_ft_ker.h index fd3f14c1c..ade2c98eb 100644 --- a/frame/1/bli_l1v_ft_ker.h +++ b/frame/1/bli_l1v_ft_ker.h @@ -161,7 +161,7 @@ typedef void (*PASTECH3(ch,opname,_ker,tsuf)) \ INSERT_GENTDEF( invertv ) -// scalv, setv +// invscalv, scalv, setv #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ @@ -175,6 +175,7 @@ typedef void (*PASTECH3(ch,opname,_ker,tsuf)) \ cntx_t* cntx \ ); +INSERT_GENTDEF( invscalv ) INSERT_GENTDEF( scalv ) INSERT_GENTDEF( setv ) diff --git a/frame/1/bli_l1v_ker.h b/frame/1/bli_l1v_ker.h index e91813a07..4ebbffa82 100644 --- a/frame/1/bli_l1v_ker.h +++ b/frame/1/bli_l1v_ker.h @@ -90,6 +90,12 @@ INSERT_GENTPROT_BASIC0( dotxv_ker_name ) INSERT_GENTPROT_BASIC0( invertv_ker_name ) +#undef GENTPROT +#define GENTPROT INVSCALV_KER_PROT + +INSERT_GENTPROT_BASIC0( invscalv_ker_name ) + + #undef GENTPROT #define GENTPROT SCALV_KER_PROT diff --git a/frame/1/bli_l1v_ker_prot.h b/frame/1/bli_l1v_ker_prot.h index b912ba7e0..965626392 100644 --- a/frame/1/bli_l1v_ker_prot.h +++ b/frame/1/bli_l1v_ker_prot.h @@ -139,6 +139,18 @@ void PASTEMAC(ch,opname) \ ); \ +#define INVSCALV_KER_PROT( ctype, ch, opname ) \ +\ +void PASTEMAC(ch,opname) \ + ( \ + conj_t conjalpha, \ + dim_t n, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + cntx_t* cntx \ + ); \ + + #define SCALV_KER_PROT( ctype, ch, opname ) \ \ void PASTEMAC(ch,opname) \ diff --git a/frame/1/bli_l1v_oapi.c b/frame/1/bli_l1v_oapi.c index 4ea241693..ae12250e7 100644 --- a/frame/1/bli_l1v_oapi.c +++ b/frame/1/bli_l1v_oapi.c @@ -460,6 +460,7 @@ void PASTEMAC(opname,EX_SUF) \ ); \ } +GENFRONT( invscalv ) GENFRONT( scalv ) GENFRONT( setv ) diff --git a/frame/1/bli_l1v_oapi.h b/frame/1/bli_l1v_oapi.h index 957747a2a..b503cf9f4 100644 --- a/frame/1/bli_l1v_oapi.h +++ b/frame/1/bli_l1v_oapi.h @@ -147,6 +147,7 @@ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ BLIS_OAPI_EX_PARAMS \ ); +GENTPROT( invscalv ) GENTPROT( scalv ) GENTPROT( setv ) diff --git a/frame/1/bli_l1v_tapi.c b/frame/1/bli_l1v_tapi.c index 01e3356d5..b22ba365f 100644 --- a/frame/1/bli_l1v_tapi.c +++ b/frame/1/bli_l1v_tapi.c @@ -341,6 +341,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ ); \ } +INSERT_GENTFUNC_BASIC( invscalv, BLIS_INVSCALV_KER ) INSERT_GENTFUNC_BASIC( scalv, BLIS_SCALV_KER ) INSERT_GENTFUNC_BASIC( setv, BLIS_SETV_KER ) diff --git a/frame/1/bli_l1v_tapi.h b/frame/1/bli_l1v_tapi.h index c1965cb3c..8eaf2b185 100644 --- a/frame/1/bli_l1v_tapi.h +++ b/frame/1/bli_l1v_tapi.h @@ -163,6 +163,7 @@ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ BLIS_TAPI_EX_PARAMS \ ); \ +INSERT_GENTPROT_BASIC0( invscalv ) INSERT_GENTPROT_BASIC0( scalv ) INSERT_GENTPROT_BASIC0( setv ) diff --git a/frame/1d/bli_l1d_check.c b/frame/1d/bli_l1d_check.c index fcc62a757..776ab8aee 100644 --- a/frame/1d/bli_l1d_check.c +++ b/frame/1d/bli_l1d_check.c @@ -98,6 +98,7 @@ void PASTEMAC(opname,_check) \ bli_l1d_ax_check( alpha, x ); \ } +GENFRONT( invscald ) GENFRONT( scald ) GENFRONT( setd ) GENFRONT( setid ) diff --git a/frame/1d/bli_l1d_check.h b/frame/1d/bli_l1d_check.h index 1ef57e236..56286f9ee 100644 --- a/frame/1d/bli_l1d_check.h +++ b/frame/1d/bli_l1d_check.h @@ -85,6 +85,7 @@ void PASTEMAC(opname,_check) \ const obj_t* x \ ); +GENTPROT( invscald ) GENTPROT( scald ) GENTPROT( setd ) GENTPROT( setid ) diff --git a/frame/1d/bli_l1d_fpa.c b/frame/1d/bli_l1d_fpa.c index ec4c222ab..371f9289b 100644 --- a/frame/1d/bli_l1d_fpa.c +++ b/frame/1d/bli_l1d_fpa.c @@ -56,6 +56,7 @@ GENFRONT( subd ) GENFRONT( axpyd ) GENFRONT( scal2d ) GENFRONT( invertd ) +GENFRONT( invscald ) GENFRONT( scald ) GENFRONT( setd ) GENFRONT( setid ) diff --git a/frame/1d/bli_l1d_fpa.h b/frame/1d/bli_l1d_fpa.h index 4516912de..11fb36192 100644 --- a/frame/1d/bli_l1d_fpa.h +++ b/frame/1d/bli_l1d_fpa.h @@ -48,6 +48,7 @@ GENPROT( subd ) GENPROT( axpyd ) GENPROT( scal2d ) GENPROT( invertd ) +GENPROT( invscald ) GENPROT( scald ) GENPROT( setd ) GENPROT( setid ) diff --git a/frame/1d/bli_l1d_ft.h b/frame/1d/bli_l1d_ft.h index 3de317527..b14e17b6a 100644 --- a/frame/1d/bli_l1d_ft.h +++ b/frame/1d/bli_l1d_ft.h @@ -95,7 +95,7 @@ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ INSERT_GENTDEF( invertd ) -// scald, setd +// invscald, scald, setd #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ @@ -111,6 +111,7 @@ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ BLIS_TAPI_EX_PARAMS \ ); +INSERT_GENTDEF( invscald ) INSERT_GENTDEF( scald ) INSERT_GENTDEF( setd ) diff --git a/frame/1d/bli_l1d_oapi.c b/frame/1d/bli_l1d_oapi.c index 7027e7780..8dfd9cad0 100644 --- a/frame/1d/bli_l1d_oapi.c +++ b/frame/1d/bli_l1d_oapi.c @@ -260,6 +260,7 @@ void PASTEMAC(opname,EX_SUF) \ ); \ } +GENFRONT( invscald ) GENFRONT( scald ) GENFRONT( setd ) diff --git a/frame/1d/bli_l1d_oapi.h b/frame/1d/bli_l1d_oapi.h index 66f9d698c..81171f3b8 100644 --- a/frame/1d/bli_l1d_oapi.h +++ b/frame/1d/bli_l1d_oapi.h @@ -89,6 +89,7 @@ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ BLIS_OAPI_EX_PARAMS \ ); +GENTPROT( invscald ) GENTPROT( scald ) GENTPROT( setd ) GENTPROT( setid ) diff --git a/frame/1d/bli_l1d_tapi.c b/frame/1d/bli_l1d_tapi.c index 60916cd56..907afb703 100644 --- a/frame/1d/bli_l1d_tapi.c +++ b/frame/1d/bli_l1d_tapi.c @@ -312,6 +312,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ ); \ } +INSERT_GENTFUNC_BASIC2( invscald, invscalv, BLIS_INVSCALV_KER ) INSERT_GENTFUNC_BASIC2( scald, scalv, BLIS_SCALV_KER ) INSERT_GENTFUNC_BASIC2( setd, setv, BLIS_SETV_KER ) diff --git a/frame/1d/bli_l1d_tapi.h b/frame/1d/bli_l1d_tapi.h index 831b3d390..8fe882f0c 100644 --- a/frame/1d/bli_l1d_tapi.h +++ b/frame/1d/bli_l1d_tapi.h @@ -106,6 +106,7 @@ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ BLIS_TAPI_EX_PARAMS \ ); +INSERT_GENTPROT_BASIC0( invscald ) INSERT_GENTPROT_BASIC0( scald ) INSERT_GENTPROT_BASIC0( setd ) diff --git a/frame/1m/bli_l1m_check.c b/frame/1m/bli_l1m_check.c index f5d4bf1b4..92f192838 100644 --- a/frame/1m/bli_l1m_check.c +++ b/frame/1m/bli_l1m_check.c @@ -84,6 +84,7 @@ void PASTEMAC(opname,_check) \ bli_l1m_ax_check( alpha, x ); \ } +GENFRONT( invscalm ) GENFRONT( scalm ) GENFRONT( setm ) diff --git a/frame/1m/bli_l1m_check.h b/frame/1m/bli_l1m_check.h index 6089dfa17..d767f104c 100644 --- a/frame/1m/bli_l1m_check.h +++ b/frame/1m/bli_l1m_check.h @@ -74,6 +74,7 @@ void PASTEMAC(opname,_check) \ const obj_t* x \ ); +GENPROT( invscalm ) GENPROT( scalm ) GENPROT( setm ) diff --git a/frame/1m/bli_l1m_fpa.c b/frame/1m/bli_l1m_fpa.c index c3d13fb51..7299dd7c8 100644 --- a/frame/1m/bli_l1m_fpa.c +++ b/frame/1m/bli_l1m_fpa.c @@ -55,6 +55,7 @@ GENFRONT( copym ) GENFRONT( subm ) GENFRONT( axpym ) GENFRONT( scal2m ) +GENFRONT( invscalm ) GENFRONT( scalm ) GENFRONT( setm ) GENFRONT( xpbym ) diff --git a/frame/1m/bli_l1m_fpa.h b/frame/1m/bli_l1m_fpa.h index 84ef8b77f..9de988559 100644 --- a/frame/1m/bli_l1m_fpa.h +++ b/frame/1m/bli_l1m_fpa.h @@ -47,6 +47,7 @@ GENPROT( copym ) GENPROT( subm ) GENPROT( axpym ) GENPROT( scal2m ) +GENPROT( invscalm ) GENPROT( scalm ) GENPROT( setm ) GENPROT( xpbym ) diff --git a/frame/1m/bli_l1m_ft.h b/frame/1m/bli_l1m_ft.h index 36d06b2fe..0851470dd 100644 --- a/frame/1m/bli_l1m_ft.h +++ b/frame/1m/bli_l1m_ft.h @@ -101,7 +101,7 @@ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ INSERT_GENTDEF( scal2m ) -// scalm, setm +// invscalm, scalm, setm #undef GENTDEF #define GENTDEF( ctype, ch, opname, tsuf ) \ @@ -119,6 +119,7 @@ typedef void (*PASTECH3(ch,opname,EX_SUF,tsuf)) \ BLIS_TAPI_EX_PARAMS \ ); +INSERT_GENTDEF( invscalm ) INSERT_GENTDEF( scalm ) INSERT_GENTDEF( setm ) diff --git a/frame/1m/bli_l1m_oapi.c b/frame/1m/bli_l1m_oapi.c index 7520afce7..775d69018 100644 --- a/frame/1m/bli_l1m_oapi.c +++ b/frame/1m/bli_l1m_oapi.c @@ -237,6 +237,7 @@ void PASTEMAC(opname,EX_SUF) \ ); \ } +GENFRONT( invscalm ) GENFRONT( scalm ) diff --git a/frame/1m/bli_l1m_oapi.h b/frame/1m/bli_l1m_oapi.h index 9510f1aee..6873e9903 100644 --- a/frame/1m/bli_l1m_oapi.h +++ b/frame/1m/bli_l1m_oapi.h @@ -77,6 +77,7 @@ BLIS_EXPORT_BLIS void PASTEMAC(opname,EX_SUF) \ BLIS_OAPI_EX_PARAMS \ ); +GENPROT( invscalm ) GENPROT( scalm ) GENPROT( setm ) diff --git a/frame/1m/bli_l1m_tapi.c b/frame/1m/bli_l1m_tapi.c index 6b802b9fe..0a641cf9e 100644 --- a/frame/1m/bli_l1m_tapi.c +++ b/frame/1m/bli_l1m_tapi.c @@ -378,6 +378,7 @@ void PASTEMAC2(ch,opname,EX_SUF) \ ); \ } +INSERT_GENTFUNC_BASIC0( invscalm ) INSERT_GENTFUNC_BASIC0( scalm ) INSERT_GENTFUNC_BASIC0( setm ) diff --git a/frame/1m/bli_l1m_tapi.h b/frame/1m/bli_l1m_tapi.h index 68646a71f..531fae075 100644 --- a/frame/1m/bli_l1m_tapi.h +++ b/frame/1m/bli_l1m_tapi.h @@ -95,6 +95,7 @@ BLIS_EXPORT_BLIS void PASTEMAC2(ch,opname,EX_SUF) \ BLIS_TAPI_EX_PARAMS \ ); +INSERT_GENTPROT_BASIC0( invscalm ) INSERT_GENTPROT_BASIC0( scalm ) INSERT_GENTPROT_BASIC0( setm ) diff --git a/frame/1m/bli_l1m_unb_var1.c b/frame/1m/bli_l1m_unb_var1.c index c979f082a..1bcd9b9ca 100644 --- a/frame/1m/bli_l1m_unb_var1.c +++ b/frame/1m/bli_l1m_unb_var1.c @@ -376,6 +376,7 @@ void PASTEMAC(ch,opname) \ } \ } +INSERT_GENTFUNC_BASIC2( invscalm_unb_var1, invscalv, BLIS_INVSCALV_KER ) INSERT_GENTFUNC_BASIC2( scalm_unb_var1, scalv, BLIS_SCALV_KER ) INSERT_GENTFUNC_BASIC2( setm_unb_var1, setv, BLIS_SETV_KER ) diff --git a/frame/1m/bli_l1m_unb_var1.h b/frame/1m/bli_l1m_unb_var1.h index 0364d4b7c..fe01989e3 100644 --- a/frame/1m/bli_l1m_unb_var1.h +++ b/frame/1m/bli_l1m_unb_var1.h @@ -98,6 +98,7 @@ void PASTEMAC2(ch,opname,_unb_var1) \ rntm_t* rntm \ ); +INSERT_GENTPROT_BASIC0( invscalm ) INSERT_GENTPROT_BASIC0( scalm ) INSERT_GENTPROT_BASIC0( setm ) diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index 08c7ddc4a..b5c3ec255 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -635,6 +635,7 @@ typedef enum BLIS_DOTV_KER, BLIS_DOTXV_KER, BLIS_INVERTV_KER, + BLIS_INVSCALV_KER, BLIS_SCALV_KER, BLIS_SCAL2V_KER, BLIS_SETV_KER, diff --git a/ref_kernels/1/bli_invscalv_ref.c b/ref_kernels/1/bli_invscalv_ref.c new file mode 100644 index 000000000..a2263ee58 --- /dev/null +++ b/ref_kernels/1/bli_invscalv_ref.c @@ -0,0 +1,81 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2022, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +#undef GENTFUNC +#define GENTFUNC( ctype, ch, opname, arch, suf ) \ +\ +void PASTEMAC3(ch,opname,arch,suf) \ + ( \ + conj_t conjalpha, \ + dim_t n, \ + ctype* restrict alpha, \ + ctype* restrict x, inc_t incx, \ + cntx_t* cntx \ + ) \ +{ \ + if ( bli_zero_dim1( n ) ) return; \ +\ + /* If alpha is one, return. */ \ + if ( PASTEMAC(ch,eq1)( *alpha ) ) return; \ +\ + /* If alpha is zero, inv(alpha) is undefined. Bad user! Return early. */ \ + if ( PASTEMAC(ch,eq0)( *alpha ) ) return; \ +\ + ctype alpha_conj; \ +\ + PASTEMAC(ch,copycjs)( conjalpha, *alpha, alpha_conj ); \ +\ + if ( incx == 1 ) \ + { \ + PRAGMA_SIMD \ + for ( dim_t i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,invscals)( alpha_conj, x[i] ); \ + } \ + } \ + else \ + { \ + for ( dim_t i = 0; i < n; ++i ) \ + { \ + PASTEMAC(ch,invscals)( alpha_conj, *x ); \ +\ + x += incx; \ + } \ + } \ +} + +INSERT_GENTFUNC_BASIC2( invscalv, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) + diff --git a/ref_kernels/bli_cntx_ref.c b/ref_kernels/bli_cntx_ref.c index e094db54b..11c3091e9 100644 --- a/ref_kernels/bli_cntx_ref.c +++ b/ref_kernels/bli_cntx_ref.c @@ -173,6 +173,8 @@ #define dotxv_ker_name GENARNAME(dotxv) #undef invertv_ker_name #define invertv_ker_name GENARNAME(invertv) +#undef invscalv_ker_name +#define invscalv_ker_name GENARNAME(invscalv) #undef scalv_ker_name #define scalv_ker_name GENARNAME(scalv) #undef scal2v_ker_name @@ -380,20 +382,21 @@ void GENBARNAME(cntx_init) // -- Set level-1v kernels ------------------------------------------------- - gen_func_init( &funcs[ BLIS_ADDV_KER ], addv_ker_name ); - gen_func_init( &funcs[ BLIS_AMAXV_KER ], amaxv_ker_name ); - gen_func_init( &funcs[ BLIS_AXPBYV_KER ], axpbyv_ker_name ); - gen_func_init( &funcs[ BLIS_AXPYV_KER ], axpyv_ker_name ); - gen_func_init( &funcs[ BLIS_COPYV_KER ], copyv_ker_name ); - gen_func_init( &funcs[ BLIS_DOTV_KER ], dotv_ker_name ); - gen_func_init( &funcs[ BLIS_DOTXV_KER ], dotxv_ker_name ); - gen_func_init( &funcs[ BLIS_INVERTV_KER ], invertv_ker_name ); - gen_func_init( &funcs[ BLIS_SCALV_KER ], scalv_ker_name ); - gen_func_init( &funcs[ BLIS_SCAL2V_KER ], scal2v_ker_name ); - gen_func_init( &funcs[ BLIS_SETV_KER ], setv_ker_name ); - gen_func_init( &funcs[ BLIS_SUBV_KER ], subv_ker_name ); - gen_func_init( &funcs[ BLIS_SWAPV_KER ], swapv_ker_name ); - gen_func_init( &funcs[ BLIS_XPBYV_KER ], xpbyv_ker_name ); + gen_func_init( &funcs[ BLIS_ADDV_KER ], addv_ker_name ); + gen_func_init( &funcs[ BLIS_AMAXV_KER ], amaxv_ker_name ); + gen_func_init( &funcs[ BLIS_AXPBYV_KER ], axpbyv_ker_name ); + gen_func_init( &funcs[ BLIS_AXPYV_KER ], axpyv_ker_name ); + gen_func_init( &funcs[ BLIS_COPYV_KER ], copyv_ker_name ); + gen_func_init( &funcs[ BLIS_DOTV_KER ], dotv_ker_name ); + gen_func_init( &funcs[ BLIS_DOTXV_KER ], dotxv_ker_name ); + gen_func_init( &funcs[ BLIS_INVERTV_KER ], invertv_ker_name ); + gen_func_init( &funcs[ BLIS_INVSCALV_KER ], invscalv_ker_name ); + gen_func_init( &funcs[ BLIS_SCALV_KER ], scalv_ker_name ); + gen_func_init( &funcs[ BLIS_SCAL2V_KER ], scal2v_ker_name ); + gen_func_init( &funcs[ BLIS_SETV_KER ], setv_ker_name ); + gen_func_init( &funcs[ BLIS_SUBV_KER ], subv_ker_name ); + gen_func_init( &funcs[ BLIS_SWAPV_KER ], swapv_ker_name ); + gen_func_init( &funcs[ BLIS_XPBYV_KER ], xpbyv_ker_name ); // -- Set level-1m (packm/unpackm) kernels --------------------------------- diff --git a/testsuite/input.operations b/testsuite/input.operations index eebe8b605..e6c39e631 100644 --- a/testsuite/input.operations +++ b/testsuite/input.operations @@ -138,9 +138,13 @@ 1 # normfv -1 # dimensions: m +1 # invscalv +-1 # dimensions: m +? # parameters: conjalpha + 1 # scalv -1 # dimensions: m -? # parameters: conjbeta +? # parameters: conjalpha 1 # scal2v -1 # dimensions: m @@ -175,9 +179,13 @@ 1 # normfm -1 -2 # dimensions: m n +1 # invscalm +-1 -2 # dimensions: m n +? # parameters: conjalpha + 1 # scalm -1 -2 # dimensions: m n -? # parameters: conjbeta +? # parameters: conjalpha 1 # scal2m -1 -2 # dimensions: m n diff --git a/testsuite/input.operations.fast b/testsuite/input.operations.fast index b733c672d..ecd526aaa 100644 --- a/testsuite/input.operations.fast +++ b/testsuite/input.operations.fast @@ -138,9 +138,13 @@ 1 # normfv -1 # dimensions: m +1 # invscalv +-1 # dimensions: m +? # parameters: conjalpha + 1 # scalv -1 # dimensions: m -? # parameters: conjbeta +? # parameters: conjalpha 1 # scal2v -1 # dimensions: m @@ -175,9 +179,13 @@ 1 # normfm -1 -2 # dimensions: m n +1 # invscalm +-1 -2 # dimensions: m n +? # parameters: conjalpha + 1 # scalm -1 -2 # dimensions: m n -? # parameters: conjbeta +? # parameters: conjalpha 1 # scal2m -1 -2 # dimensions: m n diff --git a/testsuite/input.operations.mixed b/testsuite/input.operations.mixed index 6292ea8ab..eb851b786 100644 --- a/testsuite/input.operations.mixed +++ b/testsuite/input.operations.mixed @@ -138,9 +138,13 @@ 1 # normfv -1 # dimensions: m +1 # invscalv +-1 # dimensions: m +? # parameters: conjalpha + 1 # scalv -1 # dimensions: m -? # parameters: conjbeta +? # parameters: conjalpha 1 # scal2v -1 # dimensions: m @@ -175,9 +179,13 @@ 1 # normfm -1 -2 # dimensions: m n +1 # invscalm +-1 -2 # dimensions: m n +? # parameters: conjalpha + 1 # scalm -1 -2 # dimensions: m n -? # parameters: conjbeta +? # parameters: conjalpha 1 # scal2m -1 -2 # dimensions: m n diff --git a/testsuite/input.operations.salt b/testsuite/input.operations.salt index b733c672d..ecd526aaa 100644 --- a/testsuite/input.operations.salt +++ b/testsuite/input.operations.salt @@ -138,9 +138,13 @@ 1 # normfv -1 # dimensions: m +1 # invscalv +-1 # dimensions: m +? # parameters: conjalpha + 1 # scalv -1 # dimensions: m -? # parameters: conjbeta +? # parameters: conjalpha 1 # scal2v -1 # dimensions: m @@ -175,9 +179,13 @@ 1 # normfm -1 -2 # dimensions: m n +1 # invscalm +-1 -2 # dimensions: m n +? # parameters: conjalpha + 1 # scalm -1 -2 # dimensions: m n -? # parameters: conjbeta +? # parameters: conjalpha 1 # scal2m -1 -2 # dimensions: m n diff --git a/testsuite/src/test_invscalm.c b/testsuite/src/test_invscalm.c new file mode 100644 index 000000000..9ad730631 --- /dev/null +++ b/testsuite/src/test_invscalm.c @@ -0,0 +1,301 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" +#include "test_libblis.h" + + +// Static variables. +static char* op_str = "invscalm"; +static char* o_types = "m"; // x +static char* p_types = "c"; // conjalpha +static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s + { 1e-04, 1e-05 }, // warn, pass for c + { 1e-13, 1e-14 }, // warn, pass for d + { 1e-13, 1e-14 } }; // warn, pass for z + +// Local prototypes. +void libblis_test_invscalm_deps + ( + thread_data_t* tdata, + test_params_t* params, + test_op_t* op + ); + +void libblis_test_invscalm_experiment + ( + test_params_t* params, + test_op_t* op, + iface_t iface, + char* dc_str, + char* pc_str, + char* sc_str, + unsigned int p_cur, + double* perf, + double* resid + ); + +void libblis_test_invscalm_impl + ( + iface_t iface, + obj_t* alpha, + obj_t* y + ); + +void libblis_test_invscalm_check + ( + test_params_t* params, + obj_t* alpha, + obj_t* y, + obj_t* y_save, + double* resid + ); + + + +void libblis_test_invscalm_deps + ( + thread_data_t* tdata, + test_params_t* params, + test_op_t* op + ) +{ + libblis_test_randm( tdata, params, &(op->ops->randm) ); + libblis_test_normfm( tdata, params, &(op->ops->normfm) ); + libblis_test_copym( tdata, params, &(op->ops->copym) ); +} + + + +void libblis_test_invscalm + ( + thread_data_t* tdata, + test_params_t* params, + test_op_t* op + ) +{ + + // Return early if this test has already been done. + if ( libblis_test_op_is_done( op ) ) return; + + // Return early if operation is disabled. + if ( libblis_test_op_is_disabled( op ) || + libblis_test_l1m_is_disabled( op ) ) return; + + // Call dependencies first. + if ( TRUE ) libblis_test_invscalm_deps( tdata, params, op ); + + // Execute the test driver for each implementation requested. + //if ( op->front_seq == ENABLE ) + { + libblis_test_op_driver( tdata, + params, + op, + BLIS_TEST_SEQ_FRONT_END, + op_str, + p_types, + o_types, + thresh, + libblis_test_invscalm_experiment ); + } +} + + + +void libblis_test_invscalm_experiment + ( + test_params_t* params, + test_op_t* op, + iface_t iface, + char* dc_str, + char* pc_str, + char* sc_str, + unsigned int p_cur, + double* perf, + double* resid + ) +{ + unsigned int n_repeats = params->n_repeats; + unsigned int i; + + double time_min = DBL_MAX; + double time; + + num_t datatype; + + dim_t m, n; + + conj_t conjalpha; + + obj_t alpha, y; + obj_t y_save; + + + // Use the datatype of the first char in the datatype combination string. + bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); + + // Map the dimension specifier to actual dimensions. + m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); + n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); + + // Map parameter characters to BLIS constants. + bli_param_map_char_to_blis_conj( pc_str[0], &conjalpha ); + + // Create test scalars. + bli_obj_scalar_init_detached( datatype, &alpha ); + + // Create test operands (vectors and/or matrices). + libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, + sc_str[0], m, n, &y ); + libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, + sc_str[0], m, n, &y_save ); + + // Set alpha to 0 + i. + //bli_setsc( 0.0, 1.0, &alpha ); + if ( bli_obj_is_real( &y ) ) + bli_setsc( -2.0, 0.0, &alpha ); + else + bli_setsc( 0.0, -2.0, &alpha ); + + // Randomize and save y. + libblis_test_mobj_randomize( params, FALSE, &y ); + bli_copym( &y, &y_save ); + + // Apply the parameters. + bli_obj_set_conj( conjalpha, &alpha ); + + // Repeat the experiment n_repeats times and record results. + for ( i = 0; i < n_repeats; ++i ) + { + bli_copym( &y_save, &y ); + + time = bli_clock(); + + libblis_test_invscalm_impl( iface, &alpha, &y ); + + time_min = bli_clock_min_diff( time_min, time ); + } + + // Estimate the performance of the best experiment repeat. + *perf = ( 1.0 * m * n ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( &y ) ) *perf *= 6.0; + + // Perform checks. + libblis_test_invscalm_check( params, &alpha, &y, &y_save, resid ); + + // Zero out performance and residual if output matrix is empty. + libblis_test_check_empty_problem( &y, perf, resid ); + + // Free the test objects. + bli_obj_free( &y ); + bli_obj_free( &y_save ); +} + + + +void libblis_test_invscalm_impl + ( + iface_t iface, + obj_t* alpha, + obj_t* y + ) +{ + switch ( iface ) + { + case BLIS_TEST_SEQ_FRONT_END: + bli_invscalm( alpha, y ); + break; + + default: + libblis_test_printf_error( "Invalid interface type.\n" ); + } +} + + + +void libblis_test_invscalm_check + ( + test_params_t* params, + obj_t* alpha, + obj_t* y, + obj_t* y_orig, + double* resid + ) +{ + num_t dt = bli_obj_dt( y ); + num_t dt_real = bli_obj_dt_proj_to_real( y ); + + dim_t m = bli_obj_length( y ); + dim_t n = bli_obj_width( y ); + + obj_t norm_y_r; + + obj_t y2; + + double junk; + + // + // Pre-conditions: + // - y_orig is randomized. + // Note: + // - alpha should have a non-zero imaginary component in the complex + // cases in order to more fully exercise the implementation. + // + // Under these conditions, we assume that the implementation for + // + // y := ( 1.0 / conjalpha(alpha) ) * y_orig + // + // is functioning correctly if + // + // normfv( y_orig - conjalpha(alpha) * y ) + // + // is negligible. + // + + bli_obj_create( dt, m, n, 0, 0, &y2 ); + bli_copym( y, &y2 ); + + bli_obj_scalar_init_detached( dt_real, &norm_y_r ); + + bli_scalm( alpha, &y2 ); + bli_subm( y_orig, &y2 ); + + bli_normfm( &y2, &norm_y_r ); + + bli_getsc( &norm_y_r, resid, &junk ); + + bli_obj_free( &y2 ); +} + diff --git a/testsuite/src/test_invscalm.h b/testsuite/src/test_invscalm.h new file mode 100644 index 000000000..698f9b377 --- /dev/null +++ b/testsuite/src/test_invscalm.h @@ -0,0 +1,42 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void libblis_test_invscalm + ( + thread_data_t* tdata, + test_params_t* params, + test_op_t* op + ); + diff --git a/testsuite/src/test_invscalv.c b/testsuite/src/test_invscalv.c new file mode 100644 index 000000000..47d46b4c2 --- /dev/null +++ b/testsuite/src/test_invscalv.c @@ -0,0 +1,297 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" +#include "test_libblis.h" + + +// Static variables. +static char* op_str = "invscalv"; +static char* o_types = "v"; // y +static char* p_types = "c"; // conjalpha +static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s + { 1e-04, 1e-05 }, // warn, pass for c + { 1e-13, 1e-14 }, // warn, pass for d + { 1e-13, 1e-14 } }; // warn, pass for z + +// Local prototypes. +void libblis_test_invscalv_deps + ( + thread_data_t* tdata, + test_params_t* params, + test_op_t* op + ); + +void libblis_test_invscalv_experiment + ( + test_params_t* params, + test_op_t* op, + iface_t iface, + char* dc_str, + char* pc_str, + char* sc_str, + unsigned int p_cur, + double* perf, + double* resid + ); + +void libblis_test_invscalv_impl + ( + iface_t iface, + obj_t* alpha, + obj_t* y + ); + +void libblis_test_invscalv_check + ( + test_params_t* params, + obj_t* alpha, + obj_t* y, + obj_t* y_orig, + double* resid + ); + + + +void libblis_test_invscalv_deps + ( + thread_data_t* tdata, + test_params_t* params, + test_op_t* op + ) +{ + libblis_test_randv( tdata, params, &(op->ops->randv) ); + libblis_test_normfv( tdata, params, &(op->ops->normfv) ); + libblis_test_addv( tdata, params, &(op->ops->addv) ); + libblis_test_copyv( tdata, params, &(op->ops->copyv) ); +} + + + +void libblis_test_invscalv + ( + thread_data_t* tdata, + test_params_t* params, + test_op_t* op + ) +{ + + // Return early if this test has already been done. + if ( libblis_test_op_is_done( op ) ) return; + + // Return early if operation is disabled. + if ( libblis_test_op_is_disabled( op ) || + libblis_test_l1v_is_disabled( op ) ) return; + + // Call dependencies first. + if ( TRUE ) libblis_test_invscalv_deps( tdata, params, op ); + + // Execute the test driver for each implementation requested. + //if ( op->front_seq == ENABLE ) + { + libblis_test_op_driver( tdata, + params, + op, + BLIS_TEST_SEQ_FRONT_END, + op_str, + p_types, + o_types, + thresh, + libblis_test_invscalv_experiment ); + } +} + + + +void libblis_test_invscalv_experiment + ( + test_params_t* params, + test_op_t* op, + iface_t iface, + char* dc_str, + char* pc_str, + char* sc_str, + unsigned int p_cur, + double* perf, + double* resid + ) +{ + unsigned int n_repeats = params->n_repeats; + unsigned int i; + + double time_min = DBL_MAX; + double time; + + num_t datatype; + + dim_t m; + + conj_t conjalpha; + + obj_t alpha, y; + obj_t y_save; + + + // Use the datatype of the first char in the datatype combination string. + bli_param_map_char_to_blis_dt( dc_str[0], &datatype ); + + // Map the dimension specifier to an actual dimension. + m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); + + // Map parameter characters to BLIS constants. + bli_param_map_char_to_blis_conj( pc_str[0], &conjalpha ); + + // Create test scalars. + bli_obj_scalar_init_detached( datatype, &alpha ); + + // Create test operands (vectors and/or matrices). + libblis_test_vobj_create( params, datatype, sc_str[0], m, &y ); + libblis_test_vobj_create( params, datatype, sc_str[0], m, &y_save ); + + // Set alpha. + if ( bli_obj_is_real( &y ) ) + bli_setsc( -2.0, 0.0, &alpha ); + else + bli_setsc( 0.0, -2.0, &alpha ); + + // Randomize and save y. + libblis_test_vobj_randomize( params, FALSE, &y ); + bli_copyv( &y, &y_save ); + + // Apply the parameters. + bli_obj_set_conj( conjalpha, &alpha ); + + // Repeat the experiment n_repeats times and record results. + for ( i = 0; i < n_repeats; ++i ) + { + bli_copyv( &y_save, &y ); + + time = bli_clock(); + + libblis_test_invscalv_impl( iface, &alpha, &y ); + + time_min = bli_clock_min_diff( time_min, time ); + } + + // Estimate the performance of the best experiment repeat. + *perf = ( 1.0 * m ) / time_min / FLOPS_PER_UNIT_PERF; + if ( bli_obj_is_complex( &y ) ) *perf *= 6.0; + + // Perform checks. + libblis_test_invscalv_check( params, &alpha, &y, &y_save, resid ); + + // Zero out performance and residual if output vector is empty. + libblis_test_check_empty_problem( &y, perf, resid ); + + // Free the test objects. + bli_obj_free( &y ); + bli_obj_free( &y_save ); +} + + + +void libblis_test_invscalv_impl + ( + iface_t iface, + obj_t* alpha, + obj_t* y + ) +{ + switch ( iface ) + { + case BLIS_TEST_SEQ_FRONT_END: + bli_invscalv( alpha, y ); + break; + + default: + libblis_test_printf_error( "Invalid interface type.\n" ); + } +} + + + +void libblis_test_invscalv_check + ( + test_params_t* params, + obj_t* alpha, + obj_t* y, + obj_t* y_orig, + double* resid + ) +{ + num_t dt = bli_obj_dt( y ); + num_t dt_real = bli_obj_dt_proj_to_real( y ); + + dim_t m = bli_obj_vector_dim( y ); + + obj_t norm_y_r; + + obj_t y2; + + double junk; + + // + // Pre-conditions: + // - y_orig is randomized. + // Note: + // - alpha should have a non-zero imaginary component in the complex + // cases in order to more fully exercise the implementation. + // + // Under these conditions, we assume that the implementation for + // + // y := ( 1.0 / conjalpha(alpha) ) * y_orig + // + // is functioning correctly if + // + // normfv( y_orig - conjalpha(alpha) * y ) + // + // is negligible. + // + + bli_obj_create( dt, m, 1, 0, 0, &y2 ); + bli_copyv( y, &y2 ); + + bli_obj_scalar_init_detached( dt_real, &norm_y_r ); + + bli_scalv( alpha, &y2 ); + bli_subv( y_orig, &y2 ); + + bli_normfv( &y2, &norm_y_r ); + + bli_getsc( &norm_y_r, resid, &junk ); + + bli_obj_free( &y2 ); +} + diff --git a/testsuite/src/test_invscalv.h b/testsuite/src/test_invscalv.h new file mode 100644 index 000000000..297be4836 --- /dev/null +++ b/testsuite/src/test_invscalv.h @@ -0,0 +1,42 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + Copyright (C) 2018 - 2019, Advanced Micro Devices, Inc. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name(s) of the copyright holder(s) nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void libblis_test_invscalv + ( + thread_data_t* tdata, + test_params_t* params, + test_op_t* op + ); + diff --git a/testsuite/src/test_libblis.c b/testsuite/src/test_libblis.c index 442fae0e0..3ce92e377 100644 --- a/testsuite/src/test_libblis.c +++ b/testsuite/src/test_libblis.c @@ -255,6 +255,7 @@ void libblis_test_level1v_ops( thread_data_t* tdata, test_params_t* params, test libblis_test_dotv( tdata, params, &(ops->dotv) ); libblis_test_dotxv( tdata, params, &(ops->dotxv) ); libblis_test_normfv( tdata, params, &(ops->normfv) ); + libblis_test_invscalv( tdata, params, &(ops->invscalv) ); libblis_test_scalv( tdata, params, &(ops->scalv) ); libblis_test_scal2v( tdata, params, &(ops->scal2v) ); libblis_test_setv( tdata, params, &(ops->setv) ); @@ -270,6 +271,7 @@ void libblis_test_level1m_ops( thread_data_t* tdata, test_params_t* params, test libblis_test_axpym( tdata, params, &(ops->axpym) ); libblis_test_copym( tdata, params, &(ops->copym) ); libblis_test_normfm( tdata, params, &(ops->normfm) ); + libblis_test_invscalm( tdata, params, &(ops->invscalm) ); libblis_test_scalm( tdata, params, &(ops->scalm) ); libblis_test_scal2m( tdata, params, &(ops->scal2m) ); libblis_test_setm( tdata, params, &(ops->setm) ); @@ -370,6 +372,7 @@ void libblis_test_read_ops_file( char* input_filename, test_ops_t* ops ) libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 2, &(ops->dotv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 2, &(ops->dotxv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 0, &(ops->normfv) ); + libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->invscalv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->scalv) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 1, &(ops->scal2v) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_M, 0, &(ops->setv) ); @@ -381,6 +384,7 @@ void libblis_test_read_ops_file( char* input_filename, test_ops_t* ops ) libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_MN, 1, &(ops->axpym) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_MN, 1, &(ops->copym) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_MN, 0, &(ops->normfm) ); + libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_MN, 1, &(ops->invscalm) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_MN, 1, &(ops->scalm) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_MN, 1, &(ops->scal2m) ); libblis_test_read_op_info( ops, input_stream, BLIS_NOID, BLIS_TEST_DIMS_MN, 0, &(ops->setm) ); @@ -2705,8 +2709,9 @@ void libblis_test_vobj_randomize( test_params_t* params, bool normalize, obj_t* bli_normfv( x, &kappa_r ); libblis_test_ceil_pow2( &kappa_r ); bli_copysc( &kappa_r, &kappa ); - bli_invertsc( &kappa ); - bli_scalv( &kappa, x ); + //bli_invertsc( &kappa ); + //bli_scalv( &kappa, x ); + bli_invscalv( &kappa, x ); } } @@ -2744,8 +2749,9 @@ void libblis_test_mobj_randomize( test_params_t* params, bool normalize, obj_t* bli_norm1m( a, &kappa_r ); libblis_test_ceil_pow2( &kappa_r ); bli_copysc( &kappa_r, &kappa ); - bli_invertsc( &kappa ); - bli_scalm( &kappa, a ); + //bli_invertsc( &kappa ); + //bli_scalm( &kappa, a ); + bli_invscalm( &kappa, a ); } } diff --git a/testsuite/src/test_libblis.h b/testsuite/src/test_libblis.h index cdb3c6dac..9e38964ee 100644 --- a/testsuite/src/test_libblis.h +++ b/testsuite/src/test_libblis.h @@ -230,6 +230,7 @@ typedef struct test_ops_s test_op_t dotv; test_op_t dotxv; test_op_t normfv; + test_op_t invscalv; test_op_t scalv; test_op_t scal2v; test_op_t setv; @@ -241,6 +242,7 @@ typedef struct test_ops_s test_op_t axpym; test_op_t copym; test_op_t normfm; + test_op_t invscalm; test_op_t scalm; test_op_t scal2m; test_op_t setm; @@ -504,6 +506,7 @@ char libblis_test_proj_dtchar_to_precchar( char dt_char ); #include "test_dotv.h" #include "test_dotxv.h" #include "test_normfv.h" +#include "test_invscalv.h" #include "test_scalv.h" #include "test_scal2v.h" #include "test_setv.h" @@ -515,6 +518,7 @@ char libblis_test_proj_dtchar_to_precchar( char dt_char ); #include "test_axpym.h" #include "test_copym.h" #include "test_normfm.h" +#include "test_invscalm.h" #include "test_scalm.h" #include "test_scal2m.h" #include "test_setm.h" diff --git a/testsuite/src/test_scalm.c b/testsuite/src/test_scalm.c index 6219c71df..bd4565ccd 100644 --- a/testsuite/src/test_scalm.c +++ b/testsuite/src/test_scalm.c @@ -40,7 +40,7 @@ // Static variables. static char* op_str = "scalm"; static char* o_types = "m"; // x -static char* p_types = "c"; // conjbeta +static char* p_types = "c"; // conjalpha static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d @@ -70,14 +70,14 @@ void libblis_test_scalm_experiment void libblis_test_scalm_impl ( iface_t iface, - obj_t* beta, + obj_t* alpha, obj_t* y ); void libblis_test_scalm_check ( test_params_t* params, - obj_t* beta, + obj_t* alpha, obj_t* y, obj_t* y_save, double* resid @@ -157,9 +157,9 @@ void libblis_test_scalm_experiment dim_t m, n; - conj_t conjbeta; + conj_t conjalpha; - obj_t beta, y; + obj_t alpha, y; obj_t y_save; @@ -171,10 +171,10 @@ void libblis_test_scalm_experiment n = libblis_test_get_dim_from_prob_size( op->dim_spec[1], p_cur ); // Map parameter characters to BLIS constants. - bli_param_map_char_to_blis_conj( pc_str[0], &conjbeta ); + bli_param_map_char_to_blis_conj( pc_str[0], &conjalpha ); // Create test scalars. - bli_obj_scalar_init_detached( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, @@ -182,19 +182,19 @@ void libblis_test_scalm_experiment libblis_test_mobj_create( params, datatype, BLIS_NO_TRANSPOSE, sc_str[0], m, n, &y_save ); - // Set beta to 0 + i. - //bli_setsc( 0.0, 1.0, &beta ); + // Set alpha to 0 + i. + //bli_setsc( 0.0, 1.0, &alpha ); if ( bli_obj_is_real( &y ) ) - bli_setsc( -2.0, 0.0, &beta ); + bli_setsc( -2.0, 0.0, &alpha ); else - bli_setsc( 0.0, -2.0, &beta ); + bli_setsc( 0.0, -2.0, &alpha ); // Randomize and save y. libblis_test_mobj_randomize( params, FALSE, &y ); bli_copym( &y, &y_save ); // Apply the parameters. - bli_obj_set_conj( conjbeta, &beta ); + bli_obj_set_conj( conjalpha, &alpha ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) @@ -203,7 +203,7 @@ void libblis_test_scalm_experiment time = bli_clock(); - libblis_test_scalm_impl( iface, &beta, &y ); + libblis_test_scalm_impl( iface, &alpha, &y ); time_min = bli_clock_min_diff( time_min, time ); } @@ -213,7 +213,7 @@ void libblis_test_scalm_experiment if ( bli_obj_is_complex( &y ) ) *perf *= 6.0; // Perform checks. - libblis_test_scalm_check( params, &beta, &y, &y_save, resid ); + libblis_test_scalm_check( params, &alpha, &y, &y_save, resid ); // Zero out performance and residual if output matrix is empty. libblis_test_check_empty_problem( &y, perf, resid ); @@ -228,14 +228,14 @@ void libblis_test_scalm_experiment void libblis_test_scalm_impl ( iface_t iface, - obj_t* beta, + obj_t* alpha, obj_t* y ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: - bli_scalm( beta, y ); + bli_scalm( alpha, y ); break; default: @@ -248,7 +248,7 @@ void libblis_test_scalm_impl void libblis_test_scalm_check ( test_params_t* params, - obj_t* beta, + obj_t* alpha, obj_t* y, obj_t* y_orig, double* resid @@ -261,7 +261,7 @@ void libblis_test_scalm_check dim_t n = bli_obj_width( y ); obj_t norm_y_r; - obj_t nbeta; + obj_t nalpha; obj_t y2; @@ -271,16 +271,16 @@ void libblis_test_scalm_check // Pre-conditions: // - y_orig is randomized. // Note: - // - beta should have a non-zero imaginary component in the complex + // - alpha should have a non-zero imaginary component in the complex // cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // - // y := conjbeta(beta) * y_orig + // y := conjalpha(alpha) * y_orig // // is functioning correctly if // - // normfm( y + -conjbeta(beta) * y_orig ) + // normfm( y + -conjalpha(alpha) * y_orig ) // // is negligible. // @@ -288,13 +288,13 @@ void libblis_test_scalm_check bli_obj_create( dt, m, n, 0, 0, &y2 ); bli_copym( y_orig, &y2 ); - bli_obj_scalar_init_detached( dt, &nbeta ); + bli_obj_scalar_init_detached( dt, &nalpha ); bli_obj_scalar_init_detached( dt_real, &norm_y_r ); - bli_copysc( beta, &nbeta ); - bli_mulsc( &BLIS_MINUS_ONE, &nbeta ); + bli_copysc( alpha, &nalpha ); + bli_mulsc( &BLIS_MINUS_ONE, &nalpha ); - bli_scalm( &nbeta, &y2 ); + bli_scalm( &nalpha, &y2 ); bli_addm( &y2, y ); bli_normfm( y, &norm_y_r ); diff --git a/testsuite/src/test_scalv.c b/testsuite/src/test_scalv.c index 142b5e410..7b409103b 100644 --- a/testsuite/src/test_scalv.c +++ b/testsuite/src/test_scalv.c @@ -40,7 +40,7 @@ // Static variables. static char* op_str = "scalv"; static char* o_types = "v"; // y -static char* p_types = "c"; // conjbeta +static char* p_types = "c"; // conjalpha static thresh_t thresh[BLIS_NUM_FP_TYPES] = { { 1e-04, 1e-05 }, // warn, pass for s { 1e-04, 1e-05 }, // warn, pass for c { 1e-13, 1e-14 }, // warn, pass for d @@ -70,14 +70,14 @@ void libblis_test_scalv_experiment void libblis_test_scalv_impl ( iface_t iface, - obj_t* beta, + obj_t* alpha, obj_t* y ); void libblis_test_scalv_check ( test_params_t* params, - obj_t* beta, + obj_t* alpha, obj_t* y, obj_t* y_orig, double* resid @@ -158,9 +158,9 @@ void libblis_test_scalv_experiment dim_t m; - conj_t conjbeta; + conj_t conjalpha; - obj_t beta, y; + obj_t alpha, y; obj_t y_save; @@ -171,27 +171,27 @@ void libblis_test_scalv_experiment m = libblis_test_get_dim_from_prob_size( op->dim_spec[0], p_cur ); // Map parameter characters to BLIS constants. - bli_param_map_char_to_blis_conj( pc_str[0], &conjbeta ); + bli_param_map_char_to_blis_conj( pc_str[0], &conjalpha ); // Create test scalars. - bli_obj_scalar_init_detached( datatype, &beta ); + bli_obj_scalar_init_detached( datatype, &alpha ); // Create test operands (vectors and/or matrices). libblis_test_vobj_create( params, datatype, sc_str[0], m, &y ); libblis_test_vobj_create( params, datatype, sc_str[0], m, &y_save ); - // Set beta. + // Set alpha. if ( bli_obj_is_real( &y ) ) - bli_setsc( -2.0, 0.0, &beta ); + bli_setsc( -2.0, 0.0, &alpha ); else - bli_setsc( 0.0, -2.0, &beta ); + bli_setsc( 0.0, -2.0, &alpha ); // Randomize and save y. libblis_test_vobj_randomize( params, FALSE, &y ); bli_copyv( &y, &y_save ); // Apply the parameters. - bli_obj_set_conj( conjbeta, &beta ); + bli_obj_set_conj( conjalpha, &alpha ); // Repeat the experiment n_repeats times and record results. for ( i = 0; i < n_repeats; ++i ) @@ -200,7 +200,7 @@ void libblis_test_scalv_experiment time = bli_clock(); - libblis_test_scalv_impl( iface, &beta, &y ); + libblis_test_scalv_impl( iface, &alpha, &y ); time_min = bli_clock_min_diff( time_min, time ); } @@ -210,7 +210,7 @@ void libblis_test_scalv_experiment if ( bli_obj_is_complex( &y ) ) *perf *= 6.0; // Perform checks. - libblis_test_scalv_check( params, &beta, &y, &y_save, resid ); + libblis_test_scalv_check( params, &alpha, &y, &y_save, resid ); // Zero out performance and residual if output vector is empty. libblis_test_check_empty_problem( &y, perf, resid ); @@ -225,14 +225,14 @@ void libblis_test_scalv_experiment void libblis_test_scalv_impl ( iface_t iface, - obj_t* beta, + obj_t* alpha, obj_t* y ) { switch ( iface ) { case BLIS_TEST_SEQ_FRONT_END: - bli_scalv( beta, y ); + bli_scalv( alpha, y ); break; default: @@ -245,7 +245,7 @@ void libblis_test_scalv_impl void libblis_test_scalv_check ( test_params_t* params, - obj_t* beta, + obj_t* alpha, obj_t* y, obj_t* y_orig, double* resid @@ -257,7 +257,7 @@ void libblis_test_scalv_check dim_t m = bli_obj_vector_dim( y ); obj_t norm_y_r; - obj_t nbeta; + obj_t nalpha; obj_t y2; @@ -267,16 +267,16 @@ void libblis_test_scalv_check // Pre-conditions: // - y_orig is randomized. // Note: - // - beta should have a non-zero imaginary component in the complex + // - alpha should have a non-zero imaginary component in the complex // cases in order to more fully exercise the implementation. // // Under these conditions, we assume that the implementation for // - // y := conjbeta(beta) * y_orig + // y := conjalpha(alpha) * y_orig // // is functioning correctly if // - // normfv( y + -conjbeta(beta) * y_orig ) + // normfv( y + -conjalpha(alpha) * y_orig ) // // is negligible. // @@ -284,13 +284,13 @@ void libblis_test_scalv_check bli_obj_create( dt, m, 1, 0, 0, &y2 ); bli_copyv( y_orig, &y2 ); - bli_obj_scalar_init_detached( dt, &nbeta ); + bli_obj_scalar_init_detached( dt, &nalpha ); bli_obj_scalar_init_detached( dt_real, &norm_y_r ); - bli_copysc( beta, &nbeta ); - bli_mulsc( &BLIS_MINUS_ONE, &nbeta ); + bli_copysc( alpha, &nalpha ); + bli_mulsc( &BLIS_MINUS_ONE, &nalpha ); - bli_scalv( &nbeta, &y2 ); + bli_scalv( &nalpha, &y2 ); bli_addv( &y2, y ); bli_normfv( y, &norm_y_r );