From 6c25c379fadb50834146e1614f7b80c093c2aad0 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Sun, 24 Aug 2014 13:44:10 -0500 Subject: [PATCH] Consolidated unpackm ukernels into single file. Details: - Reorganized unpackm ukernels into a single file, bli_unpackm_ref_cxk.c, in a manner similar to what was done for packm ukernels in commit 4cc2b46. --- frame/1m/unpackm/bli_unpackm_cxk.h | 10 +- .../unpackm/ukernels/bli_unpackm_ref_10xk.c | 139 --- .../unpackm/ukernels/bli_unpackm_ref_10xk.h | 46 - .../unpackm/ukernels/bli_unpackm_ref_12xk.c | 147 --- .../unpackm/ukernels/bli_unpackm_ref_12xk.h | 46 - .../unpackm/ukernels/bli_unpackm_ref_14xk.c | 155 ---- .../unpackm/ukernels/bli_unpackm_ref_16xk.c | 163 ---- .../unpackm/ukernels/bli_unpackm_ref_16xk.h | 46 - .../1m/unpackm/ukernels/bli_unpackm_ref_2xk.c | 107 --- .../1m/unpackm/ukernels/bli_unpackm_ref_2xk.h | 46 - .../1m/unpackm/ukernels/bli_unpackm_ref_4xk.c | 115 --- .../1m/unpackm/ukernels/bli_unpackm_ref_4xk.h | 46 - .../1m/unpackm/ukernels/bli_unpackm_ref_6xk.c | 123 --- .../1m/unpackm/ukernels/bli_unpackm_ref_6xk.h | 46 - .../1m/unpackm/ukernels/bli_unpackm_ref_8xk.c | 131 --- .../1m/unpackm/ukernels/bli_unpackm_ref_8xk.h | 46 - .../1m/unpackm/ukernels/bli_unpackm_ref_cxk.c | 849 ++++++++++++++++++ ...packm_ref_14xk.h => bli_unpackm_ref_cxk.h} | 8 + 18 files changed, 858 insertions(+), 1411 deletions(-) delete mode 100644 frame/1m/unpackm/ukernels/bli_unpackm_ref_10xk.c delete mode 100644 frame/1m/unpackm/ukernels/bli_unpackm_ref_10xk.h delete mode 100644 frame/1m/unpackm/ukernels/bli_unpackm_ref_12xk.c delete mode 100644 frame/1m/unpackm/ukernels/bli_unpackm_ref_12xk.h delete mode 100644 frame/1m/unpackm/ukernels/bli_unpackm_ref_14xk.c delete mode 100644 frame/1m/unpackm/ukernels/bli_unpackm_ref_16xk.c delete mode 100644 frame/1m/unpackm/ukernels/bli_unpackm_ref_16xk.h delete mode 100644 frame/1m/unpackm/ukernels/bli_unpackm_ref_2xk.c delete mode 100644 frame/1m/unpackm/ukernels/bli_unpackm_ref_2xk.h delete mode 100644 frame/1m/unpackm/ukernels/bli_unpackm_ref_4xk.c delete mode 100644 frame/1m/unpackm/ukernels/bli_unpackm_ref_4xk.h delete mode 100644 frame/1m/unpackm/ukernels/bli_unpackm_ref_6xk.c delete mode 100644 frame/1m/unpackm/ukernels/bli_unpackm_ref_6xk.h delete mode 100644 frame/1m/unpackm/ukernels/bli_unpackm_ref_8xk.c delete mode 100644 frame/1m/unpackm/ukernels/bli_unpackm_ref_8xk.h create mode 100644 frame/1m/unpackm/ukernels/bli_unpackm_ref_cxk.c rename frame/1m/unpackm/ukernels/{bli_unpackm_ref_14xk.h => bli_unpackm_ref_cxk.h} (87%) diff --git a/frame/1m/unpackm/bli_unpackm_cxk.h b/frame/1m/unpackm/bli_unpackm_cxk.h index e72025b63..beebe1940 100644 --- a/frame/1m/unpackm/bli_unpackm_cxk.h +++ b/frame/1m/unpackm/bli_unpackm_cxk.h @@ -32,15 +32,7 @@ */ -// Include headers for various unpackm kernels. -#include "bli_unpackm_ref_2xk.h" -#include "bli_unpackm_ref_4xk.h" -#include "bli_unpackm_ref_6xk.h" -#include "bli_unpackm_ref_8xk.h" -#include "bli_unpackm_ref_10xk.h" -#include "bli_unpackm_ref_12xk.h" -#include "bli_unpackm_ref_14xk.h" -#include "bli_unpackm_ref_16xk.h" +#include "bli_unpackm_ref_cxk.h" #undef GENTPROT diff --git a/frame/1m/unpackm/ukernels/bli_unpackm_ref_10xk.c b/frame/1m/unpackm/ukernels/bli_unpackm_ref_10xk.c deleted file mode 100644 index 6dcee61ee..000000000 --- a/frame/1m/unpackm/ukernels/bli_unpackm_ref_10xk.c +++ /dev/null @@ -1,139 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -#undef GENTFUNC -#define GENTFUNC( ctype, ch, opname, varname ) \ -\ -void PASTEMAC(ch,varname)( \ - conj_t conjp, \ - dim_t n, \ - void* beta, \ - void* p, \ - void* a, inc_t inca, inc_t lda \ - ) \ -{ \ - const inc_t ldp = 10; \ -\ - ctype* restrict beta_cast = beta; \ - ctype* restrict pi1 = p; \ - ctype* restrict alpha1 = a; \ -\ - if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \ - { \ - if ( bli_is_conj( conjp ) ) \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 8), *(alpha1 + 8*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 9), *(alpha1 + 9*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - else \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 8), *(alpha1 + 8*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 9), *(alpha1 + 9*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - } \ - else \ - { \ - if ( bli_is_conj( conjp ) ) \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - else \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - } \ -} - -INSERT_GENTFUNC_BASIC( unpackm_ref_10xk, unpackm_ref_10xk ) - diff --git a/frame/1m/unpackm/ukernels/bli_unpackm_ref_10xk.h b/frame/1m/unpackm/ukernels/bli_unpackm_ref_10xk.h deleted file mode 100644 index 2b5f9730d..000000000 --- a/frame/1m/unpackm/ukernels/bli_unpackm_ref_10xk.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname)( \ - conj_t conjp, \ - dim_t n, \ - void* beta, \ - void* p, \ - void* a, inc_t inca, inc_t lda \ - ); - -INSERT_GENTPROT_BASIC( unpackm_ref_10xk ) diff --git a/frame/1m/unpackm/ukernels/bli_unpackm_ref_12xk.c b/frame/1m/unpackm/ukernels/bli_unpackm_ref_12xk.c deleted file mode 100644 index 372e55bbc..000000000 --- a/frame/1m/unpackm/ukernels/bli_unpackm_ref_12xk.c +++ /dev/null @@ -1,147 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -#undef GENTFUNC -#define GENTFUNC( ctype, ch, opname, varname ) \ -\ -void PASTEMAC(ch,varname)( \ - conj_t conjp, \ - dim_t n, \ - void* beta, \ - void* p, \ - void* a, inc_t inca, inc_t lda \ - ) \ -{ \ - const inc_t ldp = 12; \ -\ - ctype* restrict beta_cast = beta; \ - ctype* restrict pi1 = p; \ - ctype* restrict alpha1 = a; \ -\ - if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \ - { \ - if ( bli_is_conj( conjp ) ) \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 8), *(alpha1 + 8*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 9), *(alpha1 + 9*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 10), *(alpha1 + 10*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 11), *(alpha1 + 11*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - else \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 8), *(alpha1 + 8*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 9), *(alpha1 + 9*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 10), *(alpha1 + 10*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 11), *(alpha1 + 11*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - } \ - else \ - { \ - if ( bli_is_conj( conjp ) ) \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - else \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - } \ -} - -INSERT_GENTFUNC_BASIC( unpackm_ref_12xk, unpackm_ref_12xk ) - diff --git a/frame/1m/unpackm/ukernels/bli_unpackm_ref_12xk.h b/frame/1m/unpackm/ukernels/bli_unpackm_ref_12xk.h deleted file mode 100644 index d47476908..000000000 --- a/frame/1m/unpackm/ukernels/bli_unpackm_ref_12xk.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname)( \ - conj_t conjp, \ - dim_t n, \ - void* beta, \ - void* p, \ - void* a, inc_t inca, inc_t lda \ - ); - -INSERT_GENTPROT_BASIC( unpackm_ref_12xk ) diff --git a/frame/1m/unpackm/ukernels/bli_unpackm_ref_14xk.c b/frame/1m/unpackm/ukernels/bli_unpackm_ref_14xk.c deleted file mode 100644 index 99f88b5cd..000000000 --- a/frame/1m/unpackm/ukernels/bli_unpackm_ref_14xk.c +++ /dev/null @@ -1,155 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -#undef GENTFUNC -#define GENTFUNC( ctype, ch, opname, varname ) \ -\ -void PASTEMAC(ch,varname)( \ - conj_t conjp, \ - dim_t n, \ - void* beta, \ - void* p, \ - void* a, inc_t inca, inc_t lda \ - ) \ -{ \ - const inc_t ldp = 14; \ -\ - ctype* restrict beta_cast = beta; \ - ctype* restrict pi1 = p; \ - ctype* restrict alpha1 = a; \ -\ - if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \ - { \ - if ( bli_is_conj( conjp ) ) \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 8), *(alpha1 + 8*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 9), *(alpha1 + 9*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 10), *(alpha1 + 10*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 11), *(alpha1 + 11*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 12), *(alpha1 + 12*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 13), *(alpha1 + 13*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - else \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 8), *(alpha1 + 8*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 9), *(alpha1 + 9*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 10), *(alpha1 + 10*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 11), *(alpha1 + 11*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 12), *(alpha1 + 12*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 13), *(alpha1 + 13*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - } \ - else \ - { \ - if ( bli_is_conj( conjp ) ) \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - else \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - } \ -} - -INSERT_GENTFUNC_BASIC( unpackm_ref_14xk, unpackm_ref_14xk ) - diff --git a/frame/1m/unpackm/ukernels/bli_unpackm_ref_16xk.c b/frame/1m/unpackm/ukernels/bli_unpackm_ref_16xk.c deleted file mode 100644 index 930d2b9d1..000000000 --- a/frame/1m/unpackm/ukernels/bli_unpackm_ref_16xk.c +++ /dev/null @@ -1,163 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -#undef GENTFUNC -#define GENTFUNC( ctype, ch, opname, varname ) \ -\ -void PASTEMAC(ch,varname)( \ - conj_t conjp, \ - dim_t n, \ - void* beta, \ - void* p, \ - void* a, inc_t inca, inc_t lda \ - ) \ -{ \ - const inc_t ldp = 16; \ -\ - ctype* restrict beta_cast = beta; \ - ctype* restrict pi1 = p; \ - ctype* restrict alpha1 = a; \ -\ - if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \ - { \ - if ( bli_is_conj( conjp ) ) \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 8), *(alpha1 + 8*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 9), *(alpha1 + 9*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 10), *(alpha1 + 10*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 11), *(alpha1 + 11*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 12), *(alpha1 + 12*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 13), *(alpha1 + 13*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 14), *(alpha1 + 14*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 15), *(alpha1 + 15*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - else \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 8), *(alpha1 + 8*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 9), *(alpha1 + 9*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 10), *(alpha1 + 10*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 11), *(alpha1 + 11*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 12), *(alpha1 + 12*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 13), *(alpha1 + 13*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 14), *(alpha1 + 14*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 15), *(alpha1 + 15*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - } \ - else \ - { \ - if ( bli_is_conj( conjp ) ) \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 14), *(alpha1 + 14*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 15), *(alpha1 + 15*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - else \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 14), *(alpha1 + 14*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 15), *(alpha1 + 15*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - } \ -} - -INSERT_GENTFUNC_BASIC( unpackm_ref_16xk, unpackm_ref_16xk ) - diff --git a/frame/1m/unpackm/ukernels/bli_unpackm_ref_16xk.h b/frame/1m/unpackm/ukernels/bli_unpackm_ref_16xk.h deleted file mode 100644 index fc6f8a3dd..000000000 --- a/frame/1m/unpackm/ukernels/bli_unpackm_ref_16xk.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname)( \ - conj_t conjp, \ - dim_t n, \ - void* beta, \ - void* p, \ - void* a, inc_t inca, inc_t lda \ - ); - -INSERT_GENTPROT_BASIC( unpackm_ref_16xk ) diff --git a/frame/1m/unpackm/ukernels/bli_unpackm_ref_2xk.c b/frame/1m/unpackm/ukernels/bli_unpackm_ref_2xk.c deleted file mode 100644 index 6986ac493..000000000 --- a/frame/1m/unpackm/ukernels/bli_unpackm_ref_2xk.c +++ /dev/null @@ -1,107 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -#undef GENTFUNC -#define GENTFUNC( ctype, ch, opname, varname ) \ -\ -void PASTEMAC(ch,varname)( \ - conj_t conjp, \ - dim_t n, \ - void* beta, \ - void* p, \ - void* a, inc_t inca, inc_t lda \ - ) \ -{ \ - const inc_t ldp = 2; \ -\ - ctype* restrict beta_cast = beta; \ - ctype* restrict pi1 = p; \ - ctype* restrict alpha1 = a; \ -\ - if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \ - { \ - if ( bli_is_conj( conjp ) ) \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - else \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - } \ - else \ - { \ - if ( bli_is_conj( conjp ) ) \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - else \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - } \ -} - -INSERT_GENTFUNC_BASIC( unpackm_ref_2xk, unpackm_ref_2xk ) - diff --git a/frame/1m/unpackm/ukernels/bli_unpackm_ref_2xk.h b/frame/1m/unpackm/ukernels/bli_unpackm_ref_2xk.h deleted file mode 100644 index bb1734716..000000000 --- a/frame/1m/unpackm/ukernels/bli_unpackm_ref_2xk.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname)( \ - conj_t conjp, \ - dim_t n, \ - void* beta, \ - void* p, \ - void* a, inc_t inca, inc_t lda \ - ); - -INSERT_GENTPROT_BASIC( unpackm_ref_2xk ) diff --git a/frame/1m/unpackm/ukernels/bli_unpackm_ref_4xk.c b/frame/1m/unpackm/ukernels/bli_unpackm_ref_4xk.c deleted file mode 100644 index 2a5bdb7dc..000000000 --- a/frame/1m/unpackm/ukernels/bli_unpackm_ref_4xk.c +++ /dev/null @@ -1,115 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -#undef GENTFUNC -#define GENTFUNC( ctype, ch, opname, varname ) \ -\ -void PASTEMAC(ch,varname)( \ - conj_t conjp, \ - dim_t n, \ - void* beta, \ - void* p, \ - void* a, inc_t inca, inc_t lda \ - ) \ -{ \ - const inc_t ldp = 4; \ -\ - ctype* restrict beta_cast = beta; \ - ctype* restrict pi1 = p; \ - ctype* restrict alpha1 = a; \ -\ - if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \ - { \ - if ( bli_is_conj( conjp ) ) \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - else \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - } \ - else \ - { \ - if ( bli_is_conj( conjp ) ) \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - else \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - } \ -} - -INSERT_GENTFUNC_BASIC( unpackm_ref_4xk, unpackm_ref_4xk ) - diff --git a/frame/1m/unpackm/ukernels/bli_unpackm_ref_4xk.h b/frame/1m/unpackm/ukernels/bli_unpackm_ref_4xk.h deleted file mode 100644 index 0e5b150b8..000000000 --- a/frame/1m/unpackm/ukernels/bli_unpackm_ref_4xk.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname)( \ - conj_t conjp, \ - dim_t n, \ - void* beta, \ - void* p, \ - void* a, inc_t inca, inc_t lda \ - ); - -INSERT_GENTPROT_BASIC( unpackm_ref_4xk ) diff --git a/frame/1m/unpackm/ukernels/bli_unpackm_ref_6xk.c b/frame/1m/unpackm/ukernels/bli_unpackm_ref_6xk.c deleted file mode 100644 index 19af46023..000000000 --- a/frame/1m/unpackm/ukernels/bli_unpackm_ref_6xk.c +++ /dev/null @@ -1,123 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -#undef GENTFUNC -#define GENTFUNC( ctype, ch, opname, varname ) \ -\ -void PASTEMAC(ch,varname)( \ - conj_t conjp, \ - dim_t n, \ - void* beta, \ - void* p, \ - void* a, inc_t inca, inc_t lda \ - ) \ -{ \ - const inc_t ldp = 6; \ -\ - ctype* restrict beta_cast = beta; \ - ctype* restrict pi1 = p; \ - ctype* restrict alpha1 = a; \ -\ - if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \ - { \ - if ( bli_is_conj( conjp ) ) \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - else \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - } \ - else \ - { \ - if ( bli_is_conj( conjp ) ) \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - else \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - } \ -} - -INSERT_GENTFUNC_BASIC( unpackm_ref_6xk, unpackm_ref_6xk ) - diff --git a/frame/1m/unpackm/ukernels/bli_unpackm_ref_6xk.h b/frame/1m/unpackm/ukernels/bli_unpackm_ref_6xk.h deleted file mode 100644 index 9acfcc487..000000000 --- a/frame/1m/unpackm/ukernels/bli_unpackm_ref_6xk.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname)( \ - conj_t conjp, \ - dim_t n, \ - void* beta, \ - void* p, \ - void* a, inc_t inca, inc_t lda \ - ); - -INSERT_GENTPROT_BASIC( unpackm_ref_6xk ) diff --git a/frame/1m/unpackm/ukernels/bli_unpackm_ref_8xk.c b/frame/1m/unpackm/ukernels/bli_unpackm_ref_8xk.c deleted file mode 100644 index dadabe78d..000000000 --- a/frame/1m/unpackm/ukernels/bli_unpackm_ref_8xk.c +++ /dev/null @@ -1,131 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -#undef GENTFUNC -#define GENTFUNC( ctype, ch, opname, varname ) \ -\ -void PASTEMAC(ch,varname)( \ - conj_t conjp, \ - dim_t n, \ - void* beta, \ - void* p, \ - void* a, inc_t inca, inc_t lda \ - ) \ -{ \ - const inc_t ldp = 8; \ -\ - ctype* restrict beta_cast = beta; \ - ctype* restrict pi1 = p; \ - ctype* restrict alpha1 = a; \ -\ - if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \ - { \ - if ( bli_is_conj( conjp ) ) \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ - PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - else \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ - PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - } \ - else \ - { \ - if ( bli_is_conj( conjp ) ) \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - else \ - { \ - for ( ; n != 0; --n ) \ - { \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ - PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ -\ - pi1 += ldp; \ - alpha1 += lda; \ - } \ - } \ - } \ -} - -INSERT_GENTFUNC_BASIC( unpackm_ref_8xk, unpackm_ref_8xk ) - diff --git a/frame/1m/unpackm/ukernels/bli_unpackm_ref_8xk.h b/frame/1m/unpackm/ukernels/bli_unpackm_ref_8xk.h deleted file mode 100644 index 52ad023f7..000000000 --- a/frame/1m/unpackm/ukernels/bli_unpackm_ref_8xk.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname)( \ - conj_t conjp, \ - dim_t n, \ - void* beta, \ - void* p, \ - void* a, inc_t inca, inc_t lda \ - ); - -INSERT_GENTPROT_BASIC( unpackm_ref_8xk ) diff --git a/frame/1m/unpackm/ukernels/bli_unpackm_ref_cxk.c b/frame/1m/unpackm/ukernels/bli_unpackm_ref_cxk.c new file mode 100644 index 000000000..f04f2e38c --- /dev/null +++ b/frame/1m/unpackm/ukernels/bli_unpackm_ref_cxk.c @@ -0,0 +1,849 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +#undef GENTFUNC +#define GENTFUNC( ctype, ch, opname, varname ) \ +\ +void PASTEMAC(ch,varname)( \ + conj_t conjp, \ + dim_t n, \ + void* beta, \ + void* p, \ + void* a, inc_t inca, inc_t lda \ + ) \ +{ \ + const inc_t ldp = 2; \ +\ + ctype* restrict beta_cast = beta; \ + ctype* restrict pi1 = p; \ + ctype* restrict alpha1 = a; \ +\ + if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \ + { \ + if ( bli_is_conj( conjp ) ) \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + else \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + } \ + else \ + { \ + if ( bli_is_conj( conjp ) ) \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + else \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + } \ +} + +INSERT_GENTFUNC_BASIC( unpackm_ref_2xk, unpackm_ref_2xk ) + + + + +#undef GENTFUNC +#define GENTFUNC( ctype, ch, opname, varname ) \ +\ +void PASTEMAC(ch,varname)( \ + conj_t conjp, \ + dim_t n, \ + void* beta, \ + void* p, \ + void* a, inc_t inca, inc_t lda \ + ) \ +{ \ + const inc_t ldp = 4; \ +\ + ctype* restrict beta_cast = beta; \ + ctype* restrict pi1 = p; \ + ctype* restrict alpha1 = a; \ +\ + if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \ + { \ + if ( bli_is_conj( conjp ) ) \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + else \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + } \ + else \ + { \ + if ( bli_is_conj( conjp ) ) \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + else \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + } \ +} + +INSERT_GENTFUNC_BASIC( unpackm_ref_4xk, unpackm_ref_4xk ) + + + + +#undef GENTFUNC +#define GENTFUNC( ctype, ch, opname, varname ) \ +\ +void PASTEMAC(ch,varname)( \ + conj_t conjp, \ + dim_t n, \ + void* beta, \ + void* p, \ + void* a, inc_t inca, inc_t lda \ + ) \ +{ \ + const inc_t ldp = 6; \ +\ + ctype* restrict beta_cast = beta; \ + ctype* restrict pi1 = p; \ + ctype* restrict alpha1 = a; \ +\ + if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \ + { \ + if ( bli_is_conj( conjp ) ) \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + else \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + } \ + else \ + { \ + if ( bli_is_conj( conjp ) ) \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + else \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + } \ +} + +INSERT_GENTFUNC_BASIC( unpackm_ref_6xk, unpackm_ref_6xk ) + + + + +#undef GENTFUNC +#define GENTFUNC( ctype, ch, opname, varname ) \ +\ +void PASTEMAC(ch,varname)( \ + conj_t conjp, \ + dim_t n, \ + void* beta, \ + void* p, \ + void* a, inc_t inca, inc_t lda \ + ) \ +{ \ + const inc_t ldp = 8; \ +\ + ctype* restrict beta_cast = beta; \ + ctype* restrict pi1 = p; \ + ctype* restrict alpha1 = a; \ +\ + if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \ + { \ + if ( bli_is_conj( conjp ) ) \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + else \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + } \ + else \ + { \ + if ( bli_is_conj( conjp ) ) \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + else \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + } \ +} + +INSERT_GENTFUNC_BASIC( unpackm_ref_8xk, unpackm_ref_8xk ) + + + + +#undef GENTFUNC +#define GENTFUNC( ctype, ch, opname, varname ) \ +\ +void PASTEMAC(ch,varname)( \ + conj_t conjp, \ + dim_t n, \ + void* beta, \ + void* p, \ + void* a, inc_t inca, inc_t lda \ + ) \ +{ \ + const inc_t ldp = 10; \ +\ + ctype* restrict beta_cast = beta; \ + ctype* restrict pi1 = p; \ + ctype* restrict alpha1 = a; \ +\ + if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \ + { \ + if ( bli_is_conj( conjp ) ) \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 8), *(alpha1 + 8*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 9), *(alpha1 + 9*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + else \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 8), *(alpha1 + 8*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 9), *(alpha1 + 9*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + } \ + else \ + { \ + if ( bli_is_conj( conjp ) ) \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + else \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + } \ +} + +INSERT_GENTFUNC_BASIC( unpackm_ref_10xk, unpackm_ref_10xk ) + + + + +#undef GENTFUNC +#define GENTFUNC( ctype, ch, opname, varname ) \ +\ +void PASTEMAC(ch,varname)( \ + conj_t conjp, \ + dim_t n, \ + void* beta, \ + void* p, \ + void* a, inc_t inca, inc_t lda \ + ) \ +{ \ + const inc_t ldp = 12; \ +\ + ctype* restrict beta_cast = beta; \ + ctype* restrict pi1 = p; \ + ctype* restrict alpha1 = a; \ +\ + if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \ + { \ + if ( bli_is_conj( conjp ) ) \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 8), *(alpha1 + 8*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 9), *(alpha1 + 9*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 10), *(alpha1 + 10*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 11), *(alpha1 + 11*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + else \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 8), *(alpha1 + 8*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 9), *(alpha1 + 9*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 10), *(alpha1 + 10*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 11), *(alpha1 + 11*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + } \ + else \ + { \ + if ( bli_is_conj( conjp ) ) \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + else \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + } \ +} + +INSERT_GENTFUNC_BASIC( unpackm_ref_12xk, unpackm_ref_12xk ) + + + + +#undef GENTFUNC +#define GENTFUNC( ctype, ch, opname, varname ) \ +\ +void PASTEMAC(ch,varname)( \ + conj_t conjp, \ + dim_t n, \ + void* beta, \ + void* p, \ + void* a, inc_t inca, inc_t lda \ + ) \ +{ \ + const inc_t ldp = 14; \ +\ + ctype* restrict beta_cast = beta; \ + ctype* restrict pi1 = p; \ + ctype* restrict alpha1 = a; \ +\ + if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \ + { \ + if ( bli_is_conj( conjp ) ) \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 8), *(alpha1 + 8*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 9), *(alpha1 + 9*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 10), *(alpha1 + 10*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 11), *(alpha1 + 11*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 12), *(alpha1 + 12*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 13), *(alpha1 + 13*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + else \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 8), *(alpha1 + 8*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 9), *(alpha1 + 9*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 10), *(alpha1 + 10*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 11), *(alpha1 + 11*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 12), *(alpha1 + 12*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 13), *(alpha1 + 13*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + } \ + else \ + { \ + if ( bli_is_conj( conjp ) ) \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + else \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + } \ +} + +INSERT_GENTFUNC_BASIC( unpackm_ref_14xk, unpackm_ref_14xk ) + + + + +#undef GENTFUNC +#define GENTFUNC( ctype, ch, opname, varname ) \ +\ +void PASTEMAC(ch,varname)( \ + conj_t conjp, \ + dim_t n, \ + void* beta, \ + void* p, \ + void* a, inc_t inca, inc_t lda \ + ) \ +{ \ + const inc_t ldp = 16; \ +\ + ctype* restrict beta_cast = beta; \ + ctype* restrict pi1 = p; \ + ctype* restrict alpha1 = a; \ +\ + if ( PASTEMAC(ch,eq1)( *beta_cast ) ) \ + { \ + if ( bli_is_conj( conjp ) ) \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 8), *(alpha1 + 8*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 9), *(alpha1 + 9*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 10), *(alpha1 + 10*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 11), *(alpha1 + 11*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 12), *(alpha1 + 12*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 13), *(alpha1 + 13*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 14), *(alpha1 + 14*inca) ); \ + PASTEMAC2(ch,ch,copyjs)( *(pi1 + 15), *(alpha1 + 15*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + else \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 1), *(alpha1 + 1*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 2), *(alpha1 + 2*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 3), *(alpha1 + 3*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 4), *(alpha1 + 4*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 5), *(alpha1 + 5*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 6), *(alpha1 + 6*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 7), *(alpha1 + 7*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 8), *(alpha1 + 8*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 9), *(alpha1 + 9*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 10), *(alpha1 + 10*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 11), *(alpha1 + 11*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 12), *(alpha1 + 12*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 13), *(alpha1 + 13*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 14), *(alpha1 + 14*inca) ); \ + PASTEMAC2(ch,ch,copys)( *(pi1 + 15), *(alpha1 + 15*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + } \ + else \ + { \ + if ( bli_is_conj( conjp ) ) \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 14), *(alpha1 + 14*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2js)( *beta_cast, *(pi1 + 15), *(alpha1 + 15*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + else \ + { \ + for ( ; n != 0; --n ) \ + { \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 0), *(alpha1 + 0*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 1), *(alpha1 + 1*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 2), *(alpha1 + 2*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 3), *(alpha1 + 3*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 4), *(alpha1 + 4*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 5), *(alpha1 + 5*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 6), *(alpha1 + 6*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 7), *(alpha1 + 7*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 8), *(alpha1 + 8*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 9), *(alpha1 + 9*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 10), *(alpha1 + 10*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 11), *(alpha1 + 11*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 12), *(alpha1 + 12*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 13), *(alpha1 + 13*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 14), *(alpha1 + 14*inca) ); \ + PASTEMAC3(ch,ch,ch,scal2s)( *beta_cast, *(pi1 + 15), *(alpha1 + 15*inca) ); \ +\ + pi1 += ldp; \ + alpha1 += lda; \ + } \ + } \ + } \ +} + +INSERT_GENTFUNC_BASIC( unpackm_ref_16xk, unpackm_ref_16xk ) + diff --git a/frame/1m/unpackm/ukernels/bli_unpackm_ref_14xk.h b/frame/1m/unpackm/ukernels/bli_unpackm_ref_cxk.h similarity index 87% rename from frame/1m/unpackm/ukernels/bli_unpackm_ref_14xk.h rename to frame/1m/unpackm/ukernels/bli_unpackm_ref_cxk.h index 9f4a61eae..17fa425c8 100644 --- a/frame/1m/unpackm/ukernels/bli_unpackm_ref_14xk.h +++ b/frame/1m/unpackm/ukernels/bli_unpackm_ref_cxk.h @@ -43,4 +43,12 @@ void PASTEMAC(ch,varname)( \ void* a, inc_t inca, inc_t lda \ ); +INSERT_GENTPROT_BASIC( unpackm_ref_2xk ) +INSERT_GENTPROT_BASIC( unpackm_ref_4xk ) +INSERT_GENTPROT_BASIC( unpackm_ref_6xk ) +INSERT_GENTPROT_BASIC( unpackm_ref_8xk ) +INSERT_GENTPROT_BASIC( unpackm_ref_10xk ) +INSERT_GENTPROT_BASIC( unpackm_ref_12xk ) INSERT_GENTPROT_BASIC( unpackm_ref_14xk ) +INSERT_GENTPROT_BASIC( unpackm_ref_16xk ) +