From c2732272f0ac680a0ad19fa9db5d587398a1479a Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Tue, 29 Jul 2014 16:37:18 -0500 Subject: [PATCH] Removed old/unused packm variants. --- frame/1m/packm/old/bli_packm_blk_var1.c | 216 ---------- frame/1m/packm/old/bli_packm_blk_var1.h | 61 --- frame/1m/packm/old/bli_packm_blk_var2.c | 265 ------------ frame/1m/packm/old/bli_packm_blk_var2.h | 59 --- frame/1m/packm/old/bli_packm_densify.c | 128 ------ frame/1m/packm/old/bli_packm_densify.h | 51 --- frame/1m/packm/other/bli_packm_blk_var2.c | 467 ---------------------- 7 files changed, 1247 deletions(-) delete mode 100644 frame/1m/packm/old/bli_packm_blk_var1.c delete mode 100644 frame/1m/packm/old/bli_packm_blk_var1.h delete mode 100644 frame/1m/packm/old/bli_packm_blk_var2.c delete mode 100644 frame/1m/packm/old/bli_packm_blk_var2.h delete mode 100644 frame/1m/packm/old/bli_packm_densify.c delete mode 100644 frame/1m/packm/old/bli_packm_densify.h delete mode 100644 frame/1m/packm/other/bli_packm_blk_var2.c diff --git a/frame/1m/packm/old/bli_packm_blk_var1.c b/frame/1m/packm/old/bli_packm_blk_var1.c deleted file mode 100644 index a1923d606..000000000 --- a/frame/1m/packm/old/bli_packm_blk_var1.c +++ /dev/null @@ -1,216 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -#define FUNCPTR_T packm_fp - -typedef void (*FUNCPTR_T)( - struc_t strucc, - doff_t diagoffc, - diag_t diagc, - uplo_t uploc, - trans_t transc, - bool_t densify, - dim_t m, - dim_t n, - dim_t m_max, - dim_t n_max, - void* beta, - void* c, inc_t rs_c, inc_t cs_c, - void* p, inc_t rs_p, inc_t cs_p, inc_t ps_p - ); - -static FUNCPTR_T GENARRAY(ftypes,packm_blk_var1); - - -void bli_packm_blk_var1( obj_t* beta, - obj_t* c, - obj_t* p, - packm_t* cntl ) -{ - num_t dt_cp = bli_obj_datatype( *c ); - - struc_t strucc = bli_obj_struc( *c ); - doff_t diagoffc = bli_obj_diag_offset( *c ); - diag_t diagc = bli_obj_diag( *c ); - uplo_t uploc = bli_obj_uplo( *c ); - trans_t transc = bli_obj_conjtrans_status( *c ); - bool_t densify = cntl_does_densify( cntl ); - - dim_t m_p = bli_obj_length( *p ); - dim_t n_p = bli_obj_width( *p ); - dim_t m_max_p = bli_obj_padded_length( *p ); - dim_t n_max_p = bli_obj_padded_width( *p ); - - void* buf_c = bli_obj_buffer_at_off( *c ); - inc_t rs_c = bli_obj_row_stride( *c ); - inc_t cs_c = bli_obj_col_stride( *c ); - - void* buf_p = bli_obj_buffer_at_off( *p ); - inc_t rs_p = bli_obj_row_stride( *p ); - inc_t cs_p = bli_obj_col_stride( *p ); - inc_t ps_p = bli_obj_panel_stride( *p ); - - void* buf_beta = bli_obj_buffer_for_1x1( dt_cp, *beta ); - - FUNCPTR_T f; - - // Index into the type combination array to extract the correct - // function pointer. - f = ftypes[dt_cp]; - - // Invoke the function. - f( strucc, - diagoffc, - diagc, - uploc, - transc, - densify, - m_p, - n_p, - m_max_p, - n_max_p, - buf_beta, - buf_c, rs_c, cs_c, - buf_p, rs_p, cs_p, ps_p ); -} - - -#undef GENTFUNC -#define GENTFUNC( ctype, ch, opname, varname ) \ -\ -void PASTEMAC(ch,varname )( \ - struc_t strucc, \ - doff_t diagoffc, \ - diag_t diagc, \ - uplo_t uploc, \ - trans_t transc, \ - bool_t densify, \ - dim_t m, \ - dim_t n, \ - dim_t m_max, \ - dim_t n_max, \ - void* beta, \ - void* c, inc_t rs_c, inc_t cs_c, \ - void* p, inc_t rs_p, inc_t cs_p, inc_t ps_p \ - ) \ -{ \ - ctype* restrict beta_cast = beta; \ - ctype* restrict c_cast = c; \ - ctype* restrict p_cast = p; \ - ctype* restrict c_begin; \ - ctype* restrict p_begin; \ - dim_t panel_dim; \ - dim_t iter_dim; \ - doff_t diagoffc_i; \ - dim_t panel_dim_i; \ - dim_t ic, ip; \ - inc_t diagoffc_inc, ps_c; \ - dim_t* m_panel; \ - dim_t* n_panel; \ - dim_t m_panel_max; \ - dim_t n_panel_max; \ -\ - /* If c needs a transposition, induce it so that we can more simply - express the remaining parameters and code. */ \ - if ( bli_does_trans( transc ) ) \ - { \ - bli_swap_incs( rs_c, cs_c ); \ - bli_negate_diag_offset( diagoffc ); \ - bli_toggle_uplo( uploc ); \ - bli_toggle_trans( transc ); \ - } \ -\ - /* If the strides of p indicate row storage, then we are packing to - column panels; otherwise, if the strides indicate column storage, - we are packing to row panels. */ \ - if ( bli_is_row_stored( rs_p, cs_p ) ) \ - { \ - /* Prepare to pack to column panels. */ \ - iter_dim = n; \ - panel_dim = rs_p; \ - ps_c = cs_c; \ - diagoffc_inc = -( doff_t)panel_dim; \ - m_panel = &m; \ - n_panel = &panel_dim_i; \ - m_panel_max = m_max; \ - n_panel_max = panel_dim; \ - } \ - else /* if ( bli_is_col_stored( rs_p, cs_p ) ) */ \ - { \ - /* Prepare to pack to row panels. */ \ - iter_dim = m; \ - panel_dim = cs_p; \ - ps_c = rs_c; \ - diagoffc_inc = ( doff_t )panel_dim; \ - m_panel = &panel_dim_i; \ - n_panel = &n; \ - m_panel_max = panel_dim; \ - n_panel_max = n_max; \ - } \ -\ -\ - for ( ic = 0, ip = 0, diagoffc_i = diagoffc; ic < iter_dim; \ - ic += panel_dim, ip += 1, diagoffc_i += diagoffc_inc ) \ - { \ - panel_dim_i = bli_min( panel_dim, iter_dim - ic ); \ -\ - c_begin = c_cast + ic * ps_c; \ - p_begin = p_cast + ip * ps_p; \ -\ - PASTEMAC(ch,packm_unb_var1)( strucc, \ - diagoffc_i, \ - diagc, \ - uploc, \ - transc, \ - densify, \ - *m_panel, \ - *n_panel, \ - m_panel_max, \ - n_panel_max, \ - beta_cast, \ - c_begin, rs_c, cs_c, \ - p_begin, rs_p, cs_p ); \ -\ -/* - if ( *m_panel <= 4 ) \ - PASTEMAC(ch,fprintm)( stdout, "p copied", m_panel_max, n_panel_max, \ - p_begin, rs_p, cs_p, "%4.1f", "" ); \ -*/ \ - } \ -} - -INSERT_GENTFUNC_BASIC( packm, packm_blk_var1 ) - diff --git a/frame/1m/packm/old/bli_packm_blk_var1.h b/frame/1m/packm/old/bli_packm_blk_var1.h deleted file mode 100644 index 965616639..000000000 --- a/frame/1m/packm/old/bli_packm_blk_var1.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -void bli_packm_blk_var1( obj_t* beta, - obj_t* c, - obj_t* p, - packm_t* cntl ); - - -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname)( \ - struc_t strucc, \ - doff_t diagoffc, \ - diag_t diagc, \ - uplo_t uploc, \ - trans_t transc, \ - bool_t densify, \ - dim_t m, \ - dim_t n, \ - dim_t m_max, \ - dim_t n_max, \ - void* beta, \ - void* c, inc_t rs_c, inc_t cs_c, \ - void* p, inc_t rs_p, inc_t cs_p, inc_t ps_p \ - ); - -INSERT_GENTPROT_BASIC( packm_blk_var1 ) - diff --git a/frame/1m/packm/old/bli_packm_blk_var2.c b/frame/1m/packm/old/bli_packm_blk_var2.c deleted file mode 100644 index 5a5950688..000000000 --- a/frame/1m/packm/old/bli_packm_blk_var2.c +++ /dev/null @@ -1,265 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -#define FUNCPTR_T packm_fp - -typedef void (*FUNCPTR_T)( - struc_t strucc, - doff_t diagoffc, - diag_t diagc, - uplo_t uploc, - trans_t transc, - dim_t m, - dim_t n, - dim_t m_max, - dim_t n_max, - void* kappa, - void* c, inc_t rs_c, inc_t cs_c, - void* p, inc_t rs_p, inc_t cs_p, - dim_t pd_p, inc_t ps_p - ); - -static FUNCPTR_T GENARRAY(ftypes,packm_blk_var2); - - -void bli_packm_blk_var2( obj_t* c, - obj_t* p ) -{ - num_t dt_cp = bli_obj_datatype( *c ); - - struc_t strucc = bli_obj_struc( *c ); - doff_t diagoffc = bli_obj_diag_offset( *c ); - diag_t diagc = bli_obj_diag( *c ); - uplo_t uploc = bli_obj_uplo( *c ); - trans_t transc = bli_obj_conjtrans_status( *c ); - - dim_t m_p = bli_obj_length( *p ); - dim_t n_p = bli_obj_width( *p ); - dim_t m_max_p = bli_obj_padded_length( *p ); - dim_t n_max_p = bli_obj_padded_width( *p ); - - void* buf_c = bli_obj_buffer_at_off( *c ); - inc_t rs_c = bli_obj_row_stride( *c ); - inc_t cs_c = bli_obj_col_stride( *c ); - - void* buf_p = bli_obj_buffer_at_off( *p ); - inc_t rs_p = bli_obj_row_stride( *p ); - inc_t cs_p = bli_obj_col_stride( *p ); - dim_t pd_p = bli_obj_panel_dim( *p ); - inc_t ps_p = bli_obj_panel_stride( *p ); - - void* buf_kappa; - - FUNCPTR_T f; - - // This variant assumes that the micro-kernel will always apply the - // alpha scalar of the higher-level operation. Thus, we use BLIS_ONE - // for kappa so that the underlying packm implementation does not - // scale during packing. - buf_kappa = bli_obj_buffer_for_const( dt_cp, BLIS_ONE ); - - // Index into the type combination array to extract the correct - // function pointer. - f = ftypes[dt_cp]; - - // Invoke the function. - f( strucc, - diagoffc, - diagc, - uploc, - transc, - m_p, - n_p, - m_max_p, - n_max_p, - buf_kappa, - buf_c, rs_c, cs_c, - buf_p, rs_p, cs_p, - pd_p, ps_p ); -} - - -#undef GENTFUNC -#define GENTFUNC( ctype, ch, opname, varname ) \ -\ -void PASTEMAC(ch,varname )( \ - struc_t strucc, \ - doff_t diagoffc, \ - diag_t diagc, \ - uplo_t uploc, \ - trans_t transc, \ - dim_t m, \ - dim_t n, \ - dim_t m_max, \ - dim_t n_max, \ - void* kappa, \ - void* c, inc_t rs_c, inc_t cs_c, \ - void* p, inc_t rs_p, inc_t cs_p, \ - dim_t pd_p, inc_t ps_p \ - ) \ -{ \ - ctype* restrict c_cast = c; \ - ctype* restrict p_cast = p; \ -\ - ctype* restrict c_begin; \ - ctype* restrict p_begin; \ -\ - dim_t iter_dim; \ - dim_t num_iter; \ - dim_t it, ic, ip; \ -\ - dim_t ic0, ip0; \ - dim_t ic_inc, ip_inc; \ - dim_t panel_dim; \ - doff_t diagoffc_i; \ - doff_t diagoffc_inc; \ -\ - dim_t panel_dim_i; \ - inc_t vs_c; \ - dim_t* m_panel; \ - dim_t* n_panel; \ - dim_t m_panel_max; \ - dim_t n_panel_max; \ - conj_t conjc; \ -\ -\ - /* Extract the conjugation bit from the transposition argument. */ \ - conjc = bli_extract_conj( transc ); \ -\ - /* If C needs a transposition, induce it so that we can more simply - express the remaining parameters and code. */ \ - if ( bli_does_trans( transc ) ) \ - { \ - bli_swap_incs( rs_c, cs_c ); \ - bli_negate_diag_offset( diagoffc ); \ - bli_toggle_uplo( uploc ); \ - bli_toggle_trans( transc ); \ - } \ -\ - /* If the strides of p indicate row storage, then we are packing to - column panels; otherwise, if the strides indicate column storage, - we are packing to row panels. */ \ - if ( bli_is_row_stored_f( rs_p, cs_p ) ) \ - { \ - /* Prepare to pack to row-stored column panels. */ \ - iter_dim = n; \ - panel_dim = pd_p; \ - vs_c = cs_c; \ - diagoffc_inc = -( doff_t)panel_dim; \ - m_panel = &m; \ - n_panel = &panel_dim_i; \ - m_panel_max = m_max; \ - n_panel_max = panel_dim; \ - } \ - else /* if ( bli_is_col_stored_f( rs_p, cs_p ) ) */ \ - { \ - /* Prepare to pack to column-stored row panels. */ \ - iter_dim = m; \ - panel_dim = pd_p; \ - vs_c = rs_c; \ - diagoffc_inc = ( doff_t )panel_dim; \ - m_panel = &panel_dim_i; \ - n_panel = &n; \ - m_panel_max = panel_dim; \ - n_panel_max = n_max; \ - } \ -\ - /* Compute the total number of iterations we'll need. */ \ - num_iter = iter_dim / panel_dim + ( iter_dim % panel_dim ? 1 : 0 ); \ -\ - /* Set the initial values and increments for indices related to C and P. - Currently we only support forwards iteration. */ \ - { \ - ic0 = 0; \ - ic_inc = panel_dim; \ - ip0 = 0; \ - ip_inc = 1; \ - } \ -\ - for ( ic = ic0, ip = ip0, it = 0; it < num_iter; \ - ic += ic_inc, ip += ip_inc, it += 1 ) \ - { \ - panel_dim_i = bli_min( panel_dim, iter_dim - ic ); \ -\ - diagoffc_i = diagoffc + (ip )*diagoffc_inc; \ - c_begin = c_cast + (ic )*vs_c; \ - p_begin = p_cast + (ip )*ps_p; \ -\ - /* Call a specialized packm kernel wrapper for Hermitian and - symmetric matrices. Otherwise, call the kernel wrapper for - general matrices. */ \ - if ( bli_is_herm_or_symm( strucc ) ) \ - { \ - PASTEMAC(ch,packm_herm_cxk)( strucc, \ - diagoffc_i, \ - uploc, \ - conjc, \ - *m_panel, \ - *n_panel, \ - m_panel_max, \ - n_panel_max, \ - kappa, \ - c_begin, rs_c, cs_c, \ - p_begin, rs_p, cs_p ); \ - } \ - else /* if ( bli_is_general( strucc ) ) */ \ - { \ - PASTEMAC(ch,packm_gen_cxk)( strucc, \ - diagoffc_i, \ - uploc, \ - conjc, \ - *m_panel, \ - *n_panel, \ - m_panel_max, \ - n_panel_max, \ - kappa, \ - c_begin, rs_c, cs_c, \ - p_begin, rs_p, cs_p ); \ - } \ -\ -/* - if ( rs_p == 1 ) \ - PASTEMAC(ch,fprintm)( stdout, "packm_blk_var2: a copied", m_panel_max, n_panel_max, \ - p_begin, 1, cs_p, "%4.1f", "" ); \ - if ( cs_p == 1 ) \ - PASTEMAC(ch,fprintm)( stdout, "packm_blk_var2: b copied", m_panel_max, n_panel_max, \ - p_begin, rs_p, 1, "%4.1f", "" ); \ -*/ \ - } \ -} - -INSERT_GENTFUNC_BASIC( packm, packm_blk_var2 ) - diff --git a/frame/1m/packm/old/bli_packm_blk_var2.h b/frame/1m/packm/old/bli_packm_blk_var2.h deleted file mode 100644 index b228bde92..000000000 --- a/frame/1m/packm/old/bli_packm_blk_var2.h +++ /dev/null @@ -1,59 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -void bli_packm_blk_var2( obj_t* c, - obj_t* p ); - - -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname)( \ - struc_t strucc, \ - doff_t diagoffc, \ - diag_t diagc, \ - uplo_t uploc, \ - trans_t transc, \ - dim_t m, \ - dim_t n, \ - dim_t m_max, \ - dim_t n_max, \ - void* kappa, \ - void* c, inc_t rs_c, inc_t cs_c, \ - void* p, inc_t rs_p, inc_t cs_p, \ - dim_t pd_p, inc_t ps_p \ - ); - -INSERT_GENTPROT_BASIC( packm_blk_var2 ) - diff --git a/frame/1m/packm/old/bli_packm_densify.c b/frame/1m/packm/old/bli_packm_densify.c deleted file mode 100644 index aab91fc8e..000000000 --- a/frame/1m/packm/old/bli_packm_densify.c +++ /dev/null @@ -1,128 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -#undef GENTFUNC -#define GENTFUNC( ctype, ch, opname, varname ) \ -\ -void PASTEMAC(ch,varname)( \ - struc_t strucc, \ - doff_t diagoffc, \ - uplo_t uploc, \ - trans_t transc, \ - dim_t m, \ - dim_t n, \ - void* beta, \ - void* c, inc_t rs_c, inc_t cs_c, \ - void* p, inc_t rs_p, inc_t cs_p \ - ) \ -{ \ - ctype* beta_cast = beta; \ - ctype* c_cast = c; \ - ctype* p_cast = p; \ - ctype* zero = PASTEMAC(ch,0); \ -\ - /* If uploc is upper or lower, then the structure of c is necessarily - non-dense (ie: Hermitian, symmetric, or triangular, where part of the - matrix is unstored). In these cases, when indicated by the densify - parameter, we want to fill in the unstored part of the matrix. How - this is done depends on the structure of c. */ \ - { \ - /* The Hermitian and symmetric cases are almost identical, so we - handle them in one conditional block. */ \ - if ( bli_is_hermitian( strucc ) || bli_is_symmetric( strucc ) ) \ - { \ - /* First we must reflect the region referenced to the opposite - side of the diagonal. */ \ - c_cast = c_cast + diagoffc * ( doff_t )cs_c + \ - -diagoffc * ( doff_t )rs_c; \ - bli_negate_diag_offset( diagoffc ); \ - bli_toggle_trans( transc ); \ - if ( bli_is_upper( uploc ) ) diagoffc += 1; \ - else if ( bli_is_lower( uploc ) ) diagoffc -= 1; \ -\ - /* If c is Hermitian, we need to apply a conjugation when - copying the region opposite the diagonal. */ \ - if ( bli_is_hermitian( strucc ) ) \ - bli_toggle_conj( transc ); \ -\ - /* Copy the data from the region opposite the diagonal of c - (as specified by the original value of diagoffc). Notice - that we use a diag parameter of non-unit since we can - assume nothing about the neighboring off-diagonal. */ \ - PASTEMAC3(ch,ch,ch,scal2m)( diagoffc, \ - BLIS_NONUNIT_DIAG, \ - uploc, \ - transc, \ - m, \ - n, \ - beta_cast, \ - c_cast, rs_c, cs_c, \ - p_cast, rs_p, cs_p ); \ - } \ - else /* if ( bli_is_triangular( strucc ) ) */ \ - { \ - doff_t diagoffp = diagoffc; \ - uplo_t uplop = uploc; \ -\ - /* For this step we need the uplo and diagonal offset of p, which - we can derive from the parameters given. */ \ - if ( bli_does_trans( transc ) ) \ - { \ - bli_negate_diag_offset( diagoffp ); \ - bli_toggle_uplo( uplop ); \ - } \ -\ - /* For triangular matrices, we wish to reference the region - strictly opposite the diagonal of c. This amounts to - shifting the diagonal offset and then toggling uploc. */ \ - if ( bli_is_upper( uplop ) ) diagoffp -= 1; \ - else if ( bli_is_lower( uplop ) ) diagoffp += 1; \ - bli_toggle_uplo( uplop ); \ -\ - /* Set the region opposite the diagonal of p to zero. */ \ - PASTEMAC2(ch,ch,setm)( diagoffp, \ - BLIS_NONUNIT_DIAG, \ - uplop, \ - m, \ - n, \ - zero, \ - p_cast, rs_p, cs_p ); \ - } \ - } \ -} - -INSERT_GENTFUNC_BASIC( packm_densify, packm_densify ) - diff --git a/frame/1m/packm/old/bli_packm_densify.h b/frame/1m/packm/old/bli_packm_densify.h deleted file mode 100644 index 13f140562..000000000 --- a/frame/1m/packm/old/bli_packm_densify.h +++ /dev/null @@ -1,51 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#undef GENTPROT -#define GENTPROT( ctype, ch, varname ) \ -\ -void PASTEMAC(ch,varname)( \ - struc_t strucc, \ - doff_t diagoffc, \ - uplo_t uploc, \ - trans_t transc, \ - dim_t m, \ - dim_t n, \ - void* beta, \ - void* c, inc_t rs_c, inc_t cs_c, \ - void* p, inc_t rs_p, inc_t cs_p \ - ); - -INSERT_GENTPROT_BASIC( packm_densify ) - diff --git a/frame/1m/packm/other/bli_packm_blk_var2.c b/frame/1m/packm/other/bli_packm_blk_var2.c deleted file mode 100644 index 5ab1e9a35..000000000 --- a/frame/1m/packm/other/bli_packm_blk_var2.c +++ /dev/null @@ -1,467 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" -#include - -#define FUNCPTR_T packm_fp - -typedef void (*FUNCPTR_T)( - struc_t strucc, - doff_t diagoffc, - diag_t diagc, - uplo_t uploc, - trans_t transc, - dim_t m, - dim_t n, - dim_t m_max, - dim_t n_max, - void* beta, - void* c, inc_t rs_c, inc_t cs_c, - void* p, inc_t rs_p, inc_t cs_p, - dim_t pd_p, inc_t ps_p - ); - -static FUNCPTR_T GENARRAY(ftypes,packm_blk_var2); - - -void bli_packm_blk_var2( obj_t* beta, - obj_t* c, - obj_t* p ) -{ - num_t dt_cp = bli_obj_datatype( *c ); - - struc_t strucc = bli_obj_struc( *c ); - doff_t diagoffc = bli_obj_diag_offset( *c ); - diag_t diagc = bli_obj_diag( *c ); - uplo_t uploc = bli_obj_uplo( *c ); - trans_t transc = bli_obj_conjtrans_status( *c ); - - dim_t m_p = bli_obj_length( *p ); - dim_t n_p = bli_obj_width( *p ); - dim_t m_max_p = bli_obj_padded_length( *p ); - dim_t n_max_p = bli_obj_padded_width( *p ); - - void* buf_c = bli_obj_buffer_at_off( *c ); - inc_t rs_c = bli_obj_row_stride( *c ); - inc_t cs_c = bli_obj_col_stride( *c ); - - void* buf_p = bli_obj_buffer_at_off( *p ); - inc_t rs_p = bli_obj_row_stride( *p ); - inc_t cs_p = bli_obj_col_stride( *p ); - dim_t pd_p = bli_obj_panel_dim( *p ); - inc_t ps_p = bli_obj_panel_stride( *p ); - - void* buf_beta = bli_obj_buffer_for_1x1( dt_cp, *beta ); - - FUNCPTR_T f; - - // Index into the type combination array to extract the correct - // function pointer. - f = ftypes[dt_cp]; - - // Invoke the function. - f( strucc, - diagoffc, - diagc, - uploc, - transc, - m_p, - n_p, - m_max_p, - n_max_p, - buf_beta, - buf_c, rs_c, cs_c, - buf_p, rs_p, cs_p, - pd_p, ps_p ); -} - - -#undef GENTFUNC -#define GENTFUNC( ctype, ch, opname, varname ) \ -\ -void PASTEMAC(ch,varname )( \ - struc_t strucc, \ - doff_t diagoffc, \ - diag_t diagc, \ - uplo_t uploc, \ - trans_t transc, \ - dim_t m, \ - dim_t n, \ - dim_t m_max, \ - dim_t n_max, \ - void* beta, \ - void* c, inc_t rs_c, inc_t cs_c, \ - void* p, inc_t rs_p, inc_t cs_p, \ - dim_t pd_p, inc_t ps_p \ - ) \ -{ \ - /* If C needs a transposition, induce it so that we can more simply - express the remaining parameters and code. */ \ - if ( bli_does_trans( transc ) ) \ - { \ - bli_swap_incs( rs_c, cs_c ); \ - bli_negate_diag_offset( diagoffc ); \ - bli_toggle_uplo( uploc ); \ - bli_toggle_trans( transc ); \ - } \ -\ - _Pragma( "omp parallel" ) \ - { \ - guint_t n_threads = omp_get_num_threads(); \ - guint_t t_id = omp_get_thread_num(); \ -\ - ctype* restrict beta_cast = beta; \ - ctype* restrict c_cast = c; \ - ctype* restrict p_cast = p; \ - ctype* restrict zero = PASTEMAC(ch,0); \ - ctype* restrict c_begin; \ - ctype* restrict p_begin; \ -\ - dim_t iter_dim; \ - dim_t num_iter; \ - dim_t it, ic, ip; \ - dim_t j; \ - dim_t ic0, ip0; \ - dim_t ic_inc, ip_inc; \ - dim_t panel_dim; \ - dim_t panel_len; \ - doff_t diagoffc_i; \ - doff_t diagoffc_inc; \ - doff_t diagoffc_i_abs; \ - dim_t panel_dim_i; \ - inc_t vs_c; \ - inc_t incc, ldc; \ - inc_t ldp; \ - dim_t* m_panel; \ - dim_t* n_panel; \ - dim_t m_panel_max; \ - dim_t n_panel_max; \ - conj_t conjc; \ -\ - ctype* restrict c10; \ - ctype* restrict p10; \ - dim_t p10_dim, p10_len; \ - inc_t incc10, ldc10; \ - doff_t diagoffc10; \ - conj_t conjc10; \ -\ - ctype* restrict c12; \ - ctype* restrict p12; \ - dim_t p12_dim, p12_len; \ - inc_t incc12, ldc12; \ - doff_t diagoffc12; \ - conj_t conjc12; \ -\ - ctype* restrict c11; \ - ctype* restrict p11; \ - dim_t p11_m; \ - dim_t p11_n; \ - inc_t rs_p11, cs_p11; \ -\ -\ - /* Extract the conjugation bit from the transposition argument. */ \ - conjc = bli_extract_conj( transc ); \ -\ - /* If the strides of p indicate row storage, then we are packing to - column panels; otherwise, if the strides indicate column storage, - we are packing to row panels. */ \ - if ( bli_is_row_stored( rs_p, cs_p ) ) \ - { \ - /* Prepare to pack to row-stored column panels. */ \ - iter_dim = n; \ - panel_len = m; \ - panel_dim = pd_p; \ - incc = cs_c; \ - ldc = rs_c; \ - vs_c = cs_c; \ - diagoffc_inc = -( doff_t)panel_dim; \ - ldp = rs_p; \ - m_panel = &m; \ - n_panel = &panel_dim_i; \ - m_panel_max = m_max; \ - n_panel_max = panel_dim; \ - rs_p11 = rs_p; \ - cs_p11 = 1; \ - } \ - else /* if ( bli_is_col_stored( rs_p, cs_p ) ) */ \ - { \ - /* Prepare to pack to column-stored row panels. */ \ - iter_dim = m; \ - panel_len = n; \ - panel_dim = pd_p; \ - incc = rs_c; \ - ldc = cs_c; \ - vs_c = rs_c; \ - diagoffc_inc = ( doff_t )panel_dim; \ - ldp = cs_p; \ - m_panel = &panel_dim_i; \ - n_panel = &n; \ - m_panel_max = panel_dim; \ - n_panel_max = n_max; \ - rs_p11 = 1; \ - cs_p11 = cs_p; \ - } \ -\ - /* Compute the total number of iterations we'll need. */ \ - num_iter = iter_dim / panel_dim + ( iter_dim % panel_dim ? 1 : 0 ); \ -\ - /* Set the initial values and increments for indices related to C and P. - Currently we only support forwards iteration. */ \ - { \ - ic0 = 0; \ - ic_inc = panel_dim; \ - ip0 = 0; \ - ip_inc = 1; \ - } \ -\ - for ( ic = ic0 + t_id*ic_inc, ip = ip0 + t_id*ip_inc, it = t_id; it < num_iter; \ - ic += ic_inc*n_threads, ip += ip_inc*n_threads, it += n_threads ) \ - { \ - panel_dim_i = bli_min( panel_dim, iter_dim - ic ); \ -\ - diagoffc_i = diagoffc + (ip )*diagoffc_inc; \ - c_begin = c_cast + (ic )*vs_c; \ - p_begin = p_cast + (ip )*ps_p; \ -\ - /* If the current panel intersects the diagonal and C is either - upper- or lower-stored, then we assume C is symmetric or - Hermitian and that it must be densified (note we don't even - bother passing in a densify parameter), in which case we pack - the panel in three stages. - Otherwise, we pack the panel all at once. */ \ - if ( bli_intersects_diag_n( diagoffc_i, *m_panel, *n_panel ) && \ - bli_is_upper_or_lower( uploc ) ) \ - { \ - diagoffc_i_abs = bli_abs( diagoffc_i ); \ -\ - if ( ( bli_is_col_stored( rs_p, cs_p ) && diagoffc_i < 0 ) || \ - ( bli_is_row_stored( rs_p, cs_p ) && diagoffc_i > 0 ) ) \ - bli_check_error_code( BLIS_NOT_YET_IMPLEMENTED ); \ -\ - if ( ( bli_is_row_stored( rs_p, cs_p ) && bli_is_upper( uploc ) ) || \ - ( bli_is_col_stored( rs_p, cs_p ) && bli_is_lower( uploc ) ) ) \ - { \ - p10_dim = panel_dim_i; \ - p10_len = diagoffc_i_abs; \ - p10 = p_begin; \ - c10 = c_begin; \ - incc10 = incc; \ - ldc10 = ldc; \ - conjc10 = conjc; \ -\ - p12_dim = panel_dim_i; \ - p12_len = panel_len - p10_len; \ - j = p10_len; \ - diagoffc12 = diagoffc_i_abs - j; \ - p12 = p_begin + (j )*ldp; \ - c12 = c_begin + (j )*ldc; \ - c12 = c12 + diagoffc12 * ( doff_t )cs_c + \ - -diagoffc12 * ( doff_t )rs_c; \ - incc12 = ldc; \ - ldc12 = incc; \ - conjc12 = conjc; \ -\ - p11_m = panel_dim_i; \ - p11_n = panel_dim_i; \ - j = diagoffc_i_abs; \ - p11 = p_begin + (j )*ldp; \ - c11 = c_begin + (j )*ldc; \ -\ - if ( bli_is_hermitian( strucc ) ) \ - bli_toggle_conj( conjc12 ); \ - } \ - else /* if ( ( bli_is_row_stored( rs_p, cs_p ) && bli_is_lower( uploc ) ) || \ - ( bli_is_col_stored( rs_p, cs_p ) && bli_is_upper( uploc ) ) ) */ \ - { \ - p10_dim = panel_dim_i; \ - p10_len = diagoffc_i_abs + panel_dim_i; \ - diagoffc10 = diagoffc_i; \ - p10 = p_begin; \ - c10 = c_begin; \ - c10 = c10 + diagoffc10 * ( doff_t )cs_c + \ - -diagoffc10 * ( doff_t )rs_c; \ - incc10 = ldc; \ - ldc10 = incc; \ - conjc10 = conjc; \ -\ - p12_dim = panel_dim_i; \ - p12_len = panel_len - p10_len; \ - j = p10_len; \ - p12 = p_begin + (j )*ldp; \ - c12 = c_begin + (j )*ldc; \ - incc12 = incc; \ - ldc12 = ldc; \ - conjc12 = conjc; \ -\ - p11_m = panel_dim_i; \ - p11_n = panel_dim_i; \ - j = diagoffc_i_abs; \ - p11 = p_begin + (j )*ldp; \ - c11 = c_begin + (j )*ldc; \ -\ - if ( bli_is_hermitian( strucc ) ) \ - bli_toggle_conj( conjc10 ); \ - } \ -\ - /* Pack to P10. For upper storage, this includes the unstored - triangle of C11. */ \ - PASTEMAC(ch,packm_cxk)( conjc10, \ - p10_dim, \ - p10_len, \ - beta_cast, \ - c10, incc10, ldc10, \ - p10, ldp ); \ -\ - /* Pack to P12. For lower storage, this includes the unstored - triangle of C11. */ \ - PASTEMAC(ch,packm_cxk)( conjc12, \ - p12_dim, \ - p12_len, \ - beta_cast, \ - c12, incc12, ldc12, \ - p12, ldp ); \ -\ - /* Pack the stored triangule of C11 to P11. */ \ - PASTEMAC3(ch,ch,ch,scal2m)( 0, \ - BLIS_NONUNIT_DIAG, \ - uploc, \ - conjc, \ - p11_m, \ - p11_n, \ - beta_cast, \ - c11, rs_c, cs_c, \ - p11, rs_p11, cs_p11 ); \ - } \ - else \ - { \ - /* Note that the following code executes if the current panel either: - - does not intersect the diagonal, or - - does intersect the diagonal, BUT the matrix is general - which means the entire current panel can be copied at once. */ \ -\ - /* We use some c10-specific variables here because we might need - to change them if the current panel is unstored. (The values - below are used if the current panel is stored.) */ \ - c10 = c_begin; \ - incc10 = incc; \ - ldc10 = ldc; \ - conjc10 = conjc; \ -\ - /* If the current panel is unstored, we need to make a few - adjustments so we refer to the data where it is actually - stored, and so we take conjugation into account. (Note - this implicitly assumes we are operating on a symmetric or - Hermitian matrix, since a general matrix would not contain - any unstored region.) */ \ - if ( bli_is_unstored_subpart_n( diagoffc_i, uploc, *m_panel, *n_panel ) ) \ - { \ - c10 = c10 + diagoffc_i * ( doff_t )cs_c + \ - -diagoffc_i * ( doff_t )rs_c; \ - bli_swap_incs( incc10, ldc10 ); \ -\ - if ( bli_is_hermitian( strucc ) ) \ - bli_toggle_conj( conjc10 ); \ - } \ -\ - /* Pack the current panel. */ \ - PASTEMAC(ch,packm_cxk)( conjc10, \ - panel_dim_i, \ - panel_len, \ - beta_cast, \ - c10, incc10, ldc10, \ - p_begin, ldp ); \ -\ -/* - PASTEMAC(ch,fprintm)( stdout, "packm_blk_var2: c", panel_len, panel_dim_i, \ - c_begin, ldc, incc, "%5.2f", "" ); \ - PASTEMAC(ch,fprintm)( stdout, "packm_blk_var2: p copied", panel_len, panel_dim_i, \ - p_begin, ldp, 1, "%5.2f", "" ); \ -*/ \ - } \ -\ - /* The packed memory region was acquired/allocated with "aligned" - dimensions (ie: dimensions that were possibly inflated up to a - multiple). When these dimension are inflated, it creates empty - regions along the bottom and/or right edges of the matrix. If - either region exists, we set them to zero. This simplifies the - register level micro-kernel in that it does not need to support - different register blockings for the edge cases. */ \ - if ( *m_panel != m_panel_max ) \ - { \ - dim_t i = *m_panel; \ - dim_t m_edge = m_panel_max - i; \ - dim_t n_edge = n_panel_max; \ - ctype* p_edge = p_begin + (i )*rs_p; \ -\ - PASTEMAC2(ch,ch,setm)( 0, \ - BLIS_NONUNIT_DIAG, \ - BLIS_DENSE, \ - m_edge, \ - n_edge, \ - zero, \ - p_edge, rs_p, cs_p ); \ - } \ -\ - if ( *n_panel != n_panel_max ) \ - { \ - dim_t j = *n_panel; \ - dim_t m_edge = m_panel_max; \ - dim_t n_edge = n_panel_max - j; \ - ctype* p_edge = p_begin + (j )*cs_p; \ -\ - PASTEMAC2(ch,ch,setm)( 0, \ - BLIS_NONUNIT_DIAG, \ - BLIS_DENSE, \ - m_edge, \ - n_edge, \ - zero, \ - p_edge, rs_p, cs_p ); \ - } \ - } \ -\ - } /* end omp parallel */ \ -\ -/* - if ( rs_p == 1 ) \ - PASTEMAC(ch,fprintm)( stdout, "packm_blk_var2: a copied", m_panel_max, n_panel_max, \ - p_begin, 1, cs_p, "%4.1f", "" ); \ - if ( cs_p == 1 ) \ - PASTEMAC(ch,fprintm)( stdout, "packm_blk_var2: b copied", m_panel_max, n_panel_max, \ - p_begin, panel_dim, 1, "%8.5f", "" ); \ -*/ \ -} - -INSERT_GENTFUNC_BASIC( packm, packm_blk_var2 ) -