diff --git a/frame/1m/packm/bli_packm_blk_var2.c b/frame/1m/packm/bli_packm_blk_var2.c index 838f70aef..199a2ca4c 100644 --- a/frame/1m/packm/bli_packm_blk_var2.c +++ b/frame/1m/packm/bli_packm_blk_var2.c @@ -152,8 +152,8 @@ void bli_packm_blk_var2( obj_t* c, // Choose the correct func_t object based on the pack_t schema. - if ( bli_is_4m_packed( schema ) ) packm_kers = packm_struc_cxk_4m_kers; - else if ( bli_is_3m_packed( schema ) ) packm_kers = packm_struc_cxk_3m_kers; + if ( bli_is_4mi_packed( schema ) ) packm_kers = packm_struc_cxk_4m_kers; + else if ( bli_is_3mi_packed( schema ) ) packm_kers = packm_struc_cxk_3m_kers; else if ( bli_is_ro_packed( schema ) || bli_is_io_packed( schema ) || bli_is_rpi_packed( schema ) ) packm_kers = packm_struc_cxk_rih_kers; @@ -330,7 +330,7 @@ void PASTEMAC(ch,varname)( \ 1/2. In both cases, we are compensating for the fact that pointer arithmetic occurs in terms of complex elements rather than real elements. */ \ - if ( bli_is_3m_packed( schema ) ) { ss_num = 3; ss_den = 2; } \ + if ( bli_is_3mi_packed( schema ) ) { ss_num = 3; ss_den = 2; } \ else if ( bli_is_rih_packed( schema ) ) { ss_num = 1; ss_den = 2; } \ else { ss_num = 1; ss_den = 1; } \ \ diff --git a/frame/1m/packm/bli_packm_init.c b/frame/1m/packm/bli_packm_init.c index 9dfce4420..10f6e1bdf 100644 --- a/frame/1m/packm/bli_packm_init.c +++ b/frame/1m/packm/bli_packm_init.c @@ -362,7 +362,7 @@ void bli_packm_init_pack( invdiag_t invert_diag, // Why? Because the macro-kernel indexes in units of the complex // datatype. So these changes "trick" it into indexing the correct // amount. - if ( bli_is_3m_packed( pack_schema ) ) + if ( bli_is_3mi_packed( pack_schema ) ) { ps_p = ( ps_p * 3 ) / 2; @@ -460,7 +460,7 @@ void bli_packm_init_pack( invdiag_t invert_diag, // Why? Because the macro-kernel indexes in units of the complex // datatype. So these changes "trick" it into indexing the correct // amount. - if ( bli_is_3m_packed( pack_schema ) ) + if ( bli_is_3mi_packed( pack_schema ) ) { ps_p = ( ps_p * 3 ) / 2; diff --git a/frame/3/gemm/3m/bli_gemm3m_cntl.c b/frame/3/gemm/3m/bli_gemm3m_cntl.c index f1b33c7ef..75326dc7f 100644 --- a/frame/3/gemm/3m/bli_gemm3m_cntl.c +++ b/frame/3/gemm/3m/bli_gemm3m_cntl.c @@ -134,7 +134,7 @@ void bli_gemm3m_cntl_init() FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? - BLIS_PACKED_ROW_PANELS_3M, + BLIS_PACKED_ROW_PANELS_3MI, BLIS_BUFFER_FOR_A_BLOCK ); gemm3m_packb_cntl @@ -146,7 +146,7 @@ void bli_gemm3m_cntl_init() FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? - BLIS_PACKED_COL_PANELS_3M, + BLIS_PACKED_COL_PANELS_3MI, BLIS_BUFFER_FOR_B_PANEL ); diff --git a/frame/3/gemm/4m/bli_gemm4m_cntl.c b/frame/3/gemm/4m/bli_gemm4m_cntl.c index 8f08add60..2b86ff693 100644 --- a/frame/3/gemm/4m/bli_gemm4m_cntl.c +++ b/frame/3/gemm/4m/bli_gemm4m_cntl.c @@ -131,7 +131,7 @@ void bli_gemm4m_cntl_init() FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? - BLIS_PACKED_ROW_PANELS_4M, + BLIS_PACKED_ROW_PANELS_4MI, BLIS_BUFFER_FOR_A_BLOCK ); gemm4m_packb_cntl @@ -143,7 +143,7 @@ void bli_gemm4m_cntl_init() FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? - BLIS_PACKED_COL_PANELS_4M, + BLIS_PACKED_COL_PANELS_4MI, BLIS_BUFFER_FOR_B_PANEL ); diff --git a/frame/3/gemm/4mb/bli_gemm4mb_cntl.c b/frame/3/gemm/4mb/bli_gemm4mb_cntl.c index e660103d2..62e122a8d 100644 --- a/frame/3/gemm/4mb/bli_gemm4mb_cntl.c +++ b/frame/3/gemm/4mb/bli_gemm4mb_cntl.c @@ -125,7 +125,7 @@ void bli_gemm4mb_cntl_init() FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? - BLIS_PACKED_ROW_PANELS_4M, + BLIS_PACKED_ROW_PANELS_4MI, BLIS_BUFFER_FOR_A_BLOCK ); gemm4mb_packb_cntl @@ -137,7 +137,7 @@ void bli_gemm4mb_cntl_init() FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? - BLIS_PACKED_COL_PANELS_4M, + BLIS_PACKED_COL_PANELS_4MI, BLIS_BUFFER_FOR_B_PANEL ); diff --git a/frame/3/herk/3m/old/bli_herk3m_cntl.c b/frame/3/herk/3m/old/bli_herk3m_cntl.c deleted file mode 100644 index f2900eb78..000000000 --- a/frame/3/herk/3m/old/bli_herk3m_cntl.c +++ /dev/null @@ -1,156 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -extern scalm_t* scalm_cntl; - -extern blksz_t* gemm3m_mc; -extern blksz_t* gemm3m_nc; -extern blksz_t* gemm3m_kc; -extern blksz_t* gemm3m_mr; -extern blksz_t* gemm3m_nr; -extern blksz_t* gemm3m_kr; - -extern func_t* gemm3m_ukrs; - -packm_t* herk3m_packa_cntl; -packm_t* herk3m_packb_cntl; - -herk_t* herk3m_cntl_bp_ke; -herk_t* herk3m_cntl_op_bp; -herk_t* herk3m_cntl_mm_op; -herk_t* herk3m_cntl_vl_mm; - -herk_t* herk3m_cntl; - - -void bli_herk3m_cntl_init() -{ - // Create control tree objects for packm operations. - herk3m_packa_cntl - = - bli_packm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, - gemm3m_mr, - gemm3m_kr, - FALSE, // do NOT invert diagonal - FALSE, // reverse iteration if upper? - FALSE, // reverse iteration if lower? - BLIS_PACKED_ROW_PANELS_3M, - BLIS_BUFFER_FOR_A_BLOCK ); - - herk3m_packb_cntl - = - bli_packm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, - gemm3m_kr, - gemm3m_nr, - FALSE, // do NOT invert diagonal - FALSE, // reverse iteration if upper? - FALSE, // reverse iteration if lower? - BLIS_PACKED_COL_PANELS_3M, - BLIS_BUFFER_FOR_B_PANEL ); - - - // Create control tree object for lowest-level block-panel kernel. - herk3m_cntl_bp_ke - = - bli_herk_cntl_obj_create( BLIS_UNB_OPT, - BLIS_VARIANT2, - NULL, - gemm3m_ukrs, - NULL, NULL, NULL, - NULL, NULL, NULL ); - - // Create control tree object for outer panel (to block-panel) - // problem. - herk3m_cntl_op_bp - = - bli_herk_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT1, - gemm3m_mc, - gemm3m_ukrs, - NULL, - herk3m_packa_cntl, - herk3m_packb_cntl, - NULL, - herk3m_cntl_bp_ke, - NULL ); - - // Create control tree object for general problem via multiple - // rank-k (outer panel) updates. - herk3m_cntl_mm_op - = - bli_herk_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT3, - gemm3m_kc, - gemm3m_ukrs, - NULL, - NULL, - NULL, - NULL, - herk3m_cntl_op_bp, - NULL ); - - // Create control tree object for very large problem via multiple - // general problems. - herk3m_cntl_vl_mm - = - bli_herk_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, - gemm3m_nc, - gemm3m_ukrs, - NULL, - NULL, - NULL, - NULL, - herk3m_cntl_mm_op, - NULL ); - - // Alias the "master" herk control tree to a shorter name. - herk3m_cntl = herk3m_cntl_vl_mm; -} - -void bli_herk3m_cntl_finalize() -{ - bli_cntl_obj_free( herk3m_packa_cntl ); - bli_cntl_obj_free( herk3m_packb_cntl ); - - bli_cntl_obj_free( herk3m_cntl_bp_ke ); - bli_cntl_obj_free( herk3m_cntl_op_bp ); - bli_cntl_obj_free( herk3m_cntl_mm_op ); - bli_cntl_obj_free( herk3m_cntl_vl_mm ); -} - diff --git a/frame/3/herk/4m/old/bli_herk4m_cntl.c b/frame/3/herk/4m/old/bli_herk4m_cntl.c deleted file mode 100644 index 1fe6d7652..000000000 --- a/frame/3/herk/4m/old/bli_herk4m_cntl.c +++ /dev/null @@ -1,156 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -extern scalm_t* scalm_cntl; - -extern blksz_t* gemm4m_mc; -extern blksz_t* gemm4m_nc; -extern blksz_t* gemm4m_kc; -extern blksz_t* gemm4m_mr; -extern blksz_t* gemm4m_nr; -extern blksz_t* gemm4m_kr; - -extern func_t* gemm4m_ukrs; - -packm_t* herk4m_packa_cntl; -packm_t* herk4m_packb_cntl; - -herk_t* herk4m_cntl_bp_ke; -herk_t* herk4m_cntl_op_bp; -herk_t* herk4m_cntl_mm_op; -herk_t* herk4m_cntl_vl_mm; - -herk_t* herk4m_cntl; - - -void bli_herk4m_cntl_init() -{ - // Create control tree objects for packm operations. - herk4m_packa_cntl - = - bli_packm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, - gemm4m_mr, - gemm4m_kr, - FALSE, // do NOT invert diagonal - FALSE, // reverse iteration if upper? - FALSE, // reverse iteration if lower? - BLIS_PACKED_ROW_PANELS_4M, - BLIS_BUFFER_FOR_A_BLOCK ); - - herk4m_packb_cntl - = - bli_packm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, - gemm4m_kr, - gemm4m_nr, - FALSE, // do NOT invert diagonal - FALSE, // reverse iteration if upper? - FALSE, // reverse iteration if lower? - BLIS_PACKED_COL_PANELS_4M, - BLIS_BUFFER_FOR_B_PANEL ); - - - // Create control tree object for lowest-level block-panel kernel. - herk4m_cntl_bp_ke - = - bli_herk_cntl_obj_create( BLIS_UNB_OPT, - BLIS_VARIANT2, - NULL, - gemm4m_ukrs, - NULL, NULL, NULL, - NULL, NULL, NULL ); - - // Create control tree object for outer panel (to block-panel) - // problem. - herk4m_cntl_op_bp - = - bli_herk_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT1, - gemm4m_mc, - gemm4m_ukrs, - NULL, - herk4m_packa_cntl, - herk4m_packb_cntl, - NULL, - herk4m_cntl_bp_ke, - NULL ); - - // Create control tree object for general problem via multiple - // rank-k (outer panel) updates. - herk4m_cntl_mm_op - = - bli_herk_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT3, - gemm4m_kc, - gemm4m_ukrs, - NULL, - NULL, - NULL, - NULL, - herk4m_cntl_op_bp, - NULL ); - - // Create control tree object for very large problem via multiple - // general problems. - herk4m_cntl_vl_mm - = - bli_herk_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, - gemm4m_nc, - gemm4m_ukrs, - NULL, - NULL, - NULL, - NULL, - herk4m_cntl_mm_op, - NULL ); - - // Alias the "master" herk control tree to a shorter name. - herk4m_cntl = herk4m_cntl_vl_mm; -} - -void bli_herk4m_cntl_finalize() -{ - bli_cntl_obj_free( herk4m_packa_cntl ); - bli_cntl_obj_free( herk4m_packb_cntl ); - - bli_cntl_obj_free( herk4m_cntl_bp_ke ); - bli_cntl_obj_free( herk4m_cntl_op_bp ); - bli_cntl_obj_free( herk4m_cntl_mm_op ); - bli_cntl_obj_free( herk4m_cntl_vl_mm ); -} - diff --git a/frame/3/trmm/3m/old/bli_trmm3m_cntl.c b/frame/3/trmm/3m/old/bli_trmm3m_cntl.c deleted file mode 100644 index ac7c5a16e..000000000 --- a/frame/3/trmm/3m/old/bli_trmm3m_cntl.c +++ /dev/null @@ -1,258 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -extern scalm_t* scalm_cntl; - -extern blksz_t* gemm3m_mc; -extern blksz_t* gemm3m_nc; -extern blksz_t* gemm3m_kc; -extern blksz_t* gemm3m_mr; -extern blksz_t* gemm3m_nr; -extern blksz_t* gemm3m_kr; - -extern func_t* gemm3m_ukrs; - -extern gemm_t* gemm3m_cntl_bp_ke; - -packm_t* trmm3m_l_packa_cntl; -packm_t* trmm3m_l_packb_cntl; - -packm_t* trmm3m_r_packa_cntl; -packm_t* trmm3m_r_packb_cntl; - -gemm_t* trmm3m_cntl_bp_ke; - -gemm_t* trmm3m_l_cntl_op_bp; -gemm_t* trmm3m_l_cntl_mm_op; -gemm_t* trmm3m_l_cntl_vl_mm; - -gemm_t* trmm3m_r_cntl_op_bp; -gemm_t* trmm3m_r_cntl_mm_op; -gemm_t* trmm3m_r_cntl_vl_mm; - -gemm_t* trmm3m_l_cntl; -gemm_t* trmm3m_r_cntl; - - -void bli_trmm3m_cntl_init() -{ - // Create control tree objects for packm operations (left side). - trmm3m_l_packa_cntl - = - bli_packm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, - // IMPORTANT: for consistency with trsm, "k" dim - // multiple is set to mr. - gemm3m_mr, - gemm3m_kr, - FALSE, // do NOT invert diagonal - FALSE, // reverse iteration if upper? - FALSE, // reverse iteration if lower? - BLIS_PACKED_ROW_PANELS_3M, - BLIS_BUFFER_FOR_A_BLOCK ); - - trmm3m_l_packb_cntl - = - bli_packm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, - // IMPORTANT: m dim multiple here must be mr - // since "k" dim multiple is set to mr above. - gemm3m_kr, - gemm3m_nr, - FALSE, // do NOT invert diagonal - FALSE, // reverse iteration if upper? - FALSE, // reverse iteration if lower? - BLIS_PACKED_COL_PANELS_3M, - BLIS_BUFFER_FOR_B_PANEL ); - - // Create control tree objects for packm operations (right side). - trmm3m_r_packa_cntl - = - bli_packm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, - // IMPORTANT: for consistency with trsm, "k" dim - // multiple is set to nr. - gemm3m_mr, - gemm3m_nr, - FALSE, // do NOT invert diagonal - FALSE, // reverse iteration if upper? - FALSE, // reverse iteration if lower? - BLIS_PACKED_ROW_PANELS_3M, - BLIS_BUFFER_FOR_A_BLOCK ); - - trmm3m_r_packb_cntl - = - bli_packm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, - // IMPORTANT: m dim multiple here must be nr - // since "k" dim multiple is set to nr above. - gemm3m_nr, - gemm3m_nr, - FALSE, // do NOT invert diagonal - FALSE, // reverse iteration if upper? - FALSE, // reverse iteration if lower? - BLIS_PACKED_COL_PANELS_3M, - BLIS_BUFFER_FOR_B_PANEL ); - - - // Create control tree object for lowest-level block-panel kernel. - trmm3m_cntl_bp_ke - = - bli_trmm_cntl_obj_create( BLIS_UNB_OPT, - BLIS_VARIANT2, - NULL, - gemm3m_ukrs, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL ); - - // Create control tree object for outer panel (to block-panel) - // problem (left side). - trmm3m_l_cntl_op_bp - = - bli_trmm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT1, - gemm3m_mc, - gemm3m_ukrs, - NULL, - trmm3m_l_packa_cntl, - trmm3m_l_packb_cntl, - NULL, - trmm3m_cntl_bp_ke, - gemm3m_cntl_bp_ke, - NULL ); - - // Create control tree object for general problem via multiple - // rank-k (outer panel) updates (left side). - trmm3m_l_cntl_mm_op - = - bli_trmm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT3, - gemm3m_kc, - gemm3m_ukrs, - NULL, - NULL, - NULL, - NULL, - trmm3m_l_cntl_op_bp, - NULL, - NULL ); - - // Create control tree object for very large problem via multiple - // general problems (left side). - trmm3m_l_cntl_vl_mm - = - bli_trmm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, - gemm3m_nc, - gemm3m_ukrs, - NULL, - NULL, - NULL, - NULL, - trmm3m_l_cntl_mm_op, - NULL, - NULL ); - - // Create control tree object for outer panel (to block-panel) - // problem (right side). - trmm3m_r_cntl_op_bp - = - bli_trmm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT1, - gemm3m_mc, - gemm3m_ukrs, - NULL, - trmm3m_r_packa_cntl, - trmm3m_r_packb_cntl, - NULL, - trmm3m_cntl_bp_ke, - gemm3m_cntl_bp_ke, - NULL ); - - // Create control tree object for general problem via multiple - // rank-k (outer panel) updates (right side). - trmm3m_r_cntl_mm_op - = - bli_trmm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT3, - gemm3m_kc, - gemm3m_ukrs, - NULL, - NULL, - NULL, - NULL, - trmm3m_r_cntl_op_bp, - NULL, - NULL ); - - // Create control tree object for very large problem via multiple - // general problems (right side). - trmm3m_r_cntl_vl_mm - = - bli_trmm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, - gemm3m_nc, - gemm3m_ukrs, - NULL, - NULL, - NULL, - NULL, - trmm3m_r_cntl_mm_op, - NULL, - NULL ); - - // Alias the "master" trmm control trees to shorter names. - trmm3m_l_cntl = trmm3m_l_cntl_vl_mm; - trmm3m_r_cntl = trmm3m_r_cntl_vl_mm; -} - -void bli_trmm3m_cntl_finalize() -{ - bli_cntl_obj_free( trmm3m_l_packa_cntl ); - bli_cntl_obj_free( trmm3m_l_packb_cntl ); - bli_cntl_obj_free( trmm3m_r_packa_cntl ); - bli_cntl_obj_free( trmm3m_r_packb_cntl ); - - bli_cntl_obj_free( trmm3m_cntl_bp_ke ); - - bli_cntl_obj_free( trmm3m_l_cntl_op_bp ); - bli_cntl_obj_free( trmm3m_l_cntl_mm_op ); - bli_cntl_obj_free( trmm3m_l_cntl_vl_mm ); - bli_cntl_obj_free( trmm3m_r_cntl_op_bp ); - bli_cntl_obj_free( trmm3m_r_cntl_mm_op ); - bli_cntl_obj_free( trmm3m_r_cntl_vl_mm ); -} - diff --git a/frame/3/trmm/4m/old/bli_trmm4m_cntl.c b/frame/3/trmm/4m/old/bli_trmm4m_cntl.c deleted file mode 100644 index a1a7d004d..000000000 --- a/frame/3/trmm/4m/old/bli_trmm4m_cntl.c +++ /dev/null @@ -1,258 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas at Austin - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas at Austin nor the names - of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -extern scalm_t* scalm_cntl; - -extern blksz_t* gemm4m_mc; -extern blksz_t* gemm4m_nc; -extern blksz_t* gemm4m_kc; -extern blksz_t* gemm4m_mr; -extern blksz_t* gemm4m_nr; -extern blksz_t* gemm4m_kr; - -extern func_t* gemm4m_ukrs; - -extern gemm_t* gemm4m_cntl_bp_ke; - -packm_t* trmm4m_l_packa_cntl; -packm_t* trmm4m_l_packb_cntl; - -packm_t* trmm4m_r_packa_cntl; -packm_t* trmm4m_r_packb_cntl; - -gemm_t* trmm4m_cntl_bp_ke; - -gemm_t* trmm4m_l_cntl_op_bp; -gemm_t* trmm4m_l_cntl_mm_op; -gemm_t* trmm4m_l_cntl_vl_mm; - -gemm_t* trmm4m_r_cntl_op_bp; -gemm_t* trmm4m_r_cntl_mm_op; -gemm_t* trmm4m_r_cntl_vl_mm; - -gemm_t* trmm4m_l_cntl; -gemm_t* trmm4m_r_cntl; - - -void bli_trmm4m_cntl_init() -{ - // Create control tree objects for packm operations (left side). - trmm4m_l_packa_cntl - = - bli_packm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, - // IMPORTANT: for consistency with trsm, "k" dim - // multiple is set to mr. - gemm4m_mr, - gemm4m_kr, - FALSE, // do NOT invert diagonal - FALSE, // reverse iteration if upper? - FALSE, // reverse iteration if lower? - BLIS_PACKED_ROW_PANELS_4M, - BLIS_BUFFER_FOR_A_BLOCK ); - - trmm4m_l_packb_cntl - = - bli_packm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, - // IMPORTANT: m dim multiple here must be mr - // since "k" dim multiple is set to mr above. - gemm4m_kr, - gemm4m_nr, - FALSE, // do NOT invert diagonal - FALSE, // reverse iteration if upper? - FALSE, // reverse iteration if lower? - BLIS_PACKED_COL_PANELS_4M, - BLIS_BUFFER_FOR_B_PANEL ); - - // Create control tree objects for packm operations (right side). - trmm4m_r_packa_cntl - = - bli_packm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, - // IMPORTANT: for consistency with trsm, "k" dim - // multiple is set to nr. - gemm4m_mr, - gemm4m_nr, - FALSE, // do NOT invert diagonal - FALSE, // reverse iteration if upper? - FALSE, // reverse iteration if lower? - BLIS_PACKED_ROW_PANELS_4M, - BLIS_BUFFER_FOR_A_BLOCK ); - - trmm4m_r_packb_cntl - = - bli_packm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, - // IMPORTANT: m dim multiple here must be nr - // since "k" dim multiple is set to nr above. - gemm4m_nr, - gemm4m_nr, - FALSE, // do NOT invert diagonal - FALSE, // reverse iteration if upper? - FALSE, // reverse iteration if lower? - BLIS_PACKED_COL_PANELS_4M, - BLIS_BUFFER_FOR_B_PANEL ); - - - // Create control tree object for lowest-level block-panel kernel. - trmm4m_cntl_bp_ke - = - bli_trmm_cntl_obj_create( BLIS_UNB_OPT, - BLIS_VARIANT2, - NULL, - gemm4m_ukrs, - NULL, NULL, NULL, NULL, - NULL, NULL, NULL ); - - // Create control tree object for outer panel (to block-panel) - // problem (left side). - trmm4m_l_cntl_op_bp - = - bli_trmm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT1, - gemm4m_mc, - gemm4m_ukrs, - NULL, - trmm4m_l_packa_cntl, - trmm4m_l_packb_cntl, - NULL, - trmm4m_cntl_bp_ke, - gemm4m_cntl_bp_ke, - NULL ); - - // Create control tree object for general problem via multiple - // rank-k (outer panel) updates (left side). - trmm4m_l_cntl_mm_op - = - bli_trmm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT3, - gemm4m_kc, - gemm4m_ukrs, - NULL, - NULL, - NULL, - NULL, - trmm4m_l_cntl_op_bp, - NULL, - NULL ); - - // Create control tree object for very large problem via multiple - // general problems (left side). - trmm4m_l_cntl_vl_mm - = - bli_trmm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, - gemm4m_nc, - gemm4m_ukrs, - NULL, - NULL, - NULL, - NULL, - trmm4m_l_cntl_mm_op, - NULL, - NULL ); - - // Create control tree object for outer panel (to block-panel) - // problem (right side). - trmm4m_r_cntl_op_bp - = - bli_trmm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT1, - gemm4m_mc, - gemm4m_ukrs, - NULL, - trmm4m_r_packa_cntl, - trmm4m_r_packb_cntl, - NULL, - trmm4m_cntl_bp_ke, - gemm4m_cntl_bp_ke, - NULL ); - - // Create control tree object for general problem via multiple - // rank-k (outer panel) updates (right side). - trmm4m_r_cntl_mm_op - = - bli_trmm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT3, - gemm4m_kc, - gemm4m_ukrs, - NULL, - NULL, - NULL, - NULL, - trmm4m_r_cntl_op_bp, - NULL, - NULL ); - - // Create control tree object for very large problem via multiple - // general problems (right side). - trmm4m_r_cntl_vl_mm - = - bli_trmm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, - gemm4m_nc, - gemm4m_ukrs, - NULL, - NULL, - NULL, - NULL, - trmm4m_r_cntl_mm_op, - NULL, - NULL ); - - // Alias the "master" trmm control trees to shorter names. - trmm4m_l_cntl = trmm4m_l_cntl_vl_mm; - trmm4m_r_cntl = trmm4m_r_cntl_vl_mm; -} - -void bli_trmm4m_cntl_finalize() -{ - bli_cntl_obj_free( trmm4m_l_packa_cntl ); - bli_cntl_obj_free( trmm4m_l_packb_cntl ); - bli_cntl_obj_free( trmm4m_r_packa_cntl ); - bli_cntl_obj_free( trmm4m_r_packb_cntl ); - - bli_cntl_obj_free( trmm4m_cntl_bp_ke ); - - bli_cntl_obj_free( trmm4m_l_cntl_op_bp ); - bli_cntl_obj_free( trmm4m_l_cntl_mm_op ); - bli_cntl_obj_free( trmm4m_l_cntl_vl_mm ); - bli_cntl_obj_free( trmm4m_r_cntl_op_bp ); - bli_cntl_obj_free( trmm4m_r_cntl_mm_op ); - bli_cntl_obj_free( trmm4m_r_cntl_vl_mm ); -} - diff --git a/frame/3/trmm/bli_trmm_ll_ker_var2.c b/frame/3/trmm/bli_trmm_ll_ker_var2.c index 43b81e9f9..035699ec0 100644 --- a/frame/3/trmm/bli_trmm_ll_ker_var2.c +++ b/frame/3/trmm/bli_trmm_ll_ker_var2.c @@ -243,8 +243,8 @@ void PASTEMAC(ch,varname)( \ needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ - if ( bli_is_4m_packed( schema_a ) || \ - bli_is_3m_packed( schema_a ) || \ + if ( bli_is_4mi_packed( schema_a ) || \ + bli_is_3mi_packed( schema_a ) || \ bli_is_rih_packed( schema_a ) ) off_scl = 2; \ else off_scl = 1; \ \ @@ -254,7 +254,7 @@ void PASTEMAC(ch,varname)( \ summed-only, we need to scale the computed panel sizes by 1/2 to compensate for the fact that the pointer arithmetic occurs in terms of complex elements rather than real elements. */ \ - if ( bli_is_3m_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \ + if ( bli_is_3mi_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \ else if ( bli_is_rih_packed( schema_a ) ) { ss_a_num = 1; ss_a_den = 2; } \ else { ss_a_num = 1; ss_a_den = 1; } \ \ diff --git a/frame/3/trmm/bli_trmm_lu_ker_var2.c b/frame/3/trmm/bli_trmm_lu_ker_var2.c index 653e20d43..12b9faca3 100644 --- a/frame/3/trmm/bli_trmm_lu_ker_var2.c +++ b/frame/3/trmm/bli_trmm_lu_ker_var2.c @@ -243,8 +243,8 @@ void PASTEMAC(ch,varname)( \ needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ - if ( bli_is_4m_packed( schema_a ) || \ - bli_is_3m_packed( schema_a ) || \ + if ( bli_is_4mi_packed( schema_a ) || \ + bli_is_3mi_packed( schema_a ) || \ bli_is_rih_packed( schema_a ) ) off_scl = 2; \ else off_scl = 1; \ \ @@ -254,7 +254,7 @@ void PASTEMAC(ch,varname)( \ summed-only, we need to scale the computed panel sizes by 1/2 to compensate for the fact that the pointer arithmetic occurs in terms of complex elements rather than real elements. */ \ - if ( bli_is_3m_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \ + if ( bli_is_3mi_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \ else if ( bli_is_rih_packed( schema_a ) ) { ss_a_num = 1; ss_a_den = 2; } \ else { ss_a_num = 1; ss_a_den = 1; } \ \ diff --git a/frame/3/trmm/bli_trmm_rl_ker_var2.c b/frame/3/trmm/bli_trmm_rl_ker_var2.c index 2cadebba0..9107f3130 100644 --- a/frame/3/trmm/bli_trmm_rl_ker_var2.c +++ b/frame/3/trmm/bli_trmm_rl_ker_var2.c @@ -243,8 +243,8 @@ void PASTEMAC(ch,varname)( \ needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ - if ( bli_is_4m_packed( schema_b ) || \ - bli_is_3m_packed( schema_b ) || \ + if ( bli_is_4mi_packed( schema_b ) || \ + bli_is_3mi_packed( schema_b ) || \ bli_is_rih_packed( schema_b ) ) off_scl = 2; \ else off_scl = 1; \ \ @@ -254,7 +254,7 @@ void PASTEMAC(ch,varname)( \ summed-only, we need to scale the computed panel sizes by 1/2 to compensate for the fact that the pointer arithmetic occurs in terms of complex elements rather than real elements. */ \ - if ( bli_is_3m_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \ + if ( bli_is_3mi_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \ else if ( bli_is_rih_packed( schema_b ) ) { ss_b_num = 1; ss_b_den = 2; } \ else { ss_b_num = 1; ss_b_den = 1; } \ \ diff --git a/frame/3/trmm/bli_trmm_ru_ker_var2.c b/frame/3/trmm/bli_trmm_ru_ker_var2.c index 64e5a453c..55c92f6cc 100644 --- a/frame/3/trmm/bli_trmm_ru_ker_var2.c +++ b/frame/3/trmm/bli_trmm_ru_ker_var2.c @@ -243,8 +243,8 @@ void PASTEMAC(ch,varname)( \ needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ - if ( bli_is_4m_packed( schema_b ) || \ - bli_is_3m_packed( schema_b ) || \ + if ( bli_is_4mi_packed( schema_b ) || \ + bli_is_3mi_packed( schema_b ) || \ bli_is_rih_packed( schema_b ) ) off_scl = 2; \ else off_scl = 1; \ \ @@ -254,7 +254,7 @@ void PASTEMAC(ch,varname)( \ summed-only, we need to scale the computed panel sizes by 1/2 to compensate for the fact that the pointer arithmetic occurs in terms of complex elements rather than real elements. */ \ - if ( bli_is_3m_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \ + if ( bli_is_3mi_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \ else if ( bli_is_rih_packed( schema_b ) ) { ss_b_num = 1; ss_b_den = 2; } \ else { ss_b_num = 1; ss_b_den = 1; } \ \ diff --git a/frame/3/trsm/3m/bli_trsm3m_cntl.c b/frame/3/trsm/3m/bli_trsm3m_cntl.c index 1e8580427..9633d8bd7 100644 --- a/frame/3/trsm/3m/bli_trsm3m_cntl.c +++ b/frame/3/trsm/3m/bli_trsm3m_cntl.c @@ -120,7 +120,7 @@ void bli_trsm3m_cntl_init() TRUE, // invert diagonal TRUE, // reverse iteration if upper? FALSE, // reverse iteration if lower? - BLIS_PACKED_ROW_PANELS_3M, + BLIS_PACKED_ROW_PANELS_3MI, BLIS_BUFFER_FOR_A_BLOCK ); trsm3m_l_packb_cntl @@ -134,7 +134,7 @@ void bli_trsm3m_cntl_init() FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? - BLIS_PACKED_COL_PANELS_3M, + BLIS_PACKED_COL_PANELS_3MI, BLIS_BUFFER_FOR_B_PANEL ); // Create control tree objects for packm operations (right side). @@ -147,7 +147,7 @@ void bli_trsm3m_cntl_init() FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? - BLIS_PACKED_ROW_PANELS_3M, + BLIS_PACKED_ROW_PANELS_3MI, BLIS_BUFFER_FOR_A_BLOCK ); trsm3m_r_packb_cntl @@ -159,7 +159,7 @@ void bli_trsm3m_cntl_init() TRUE, // invert diagonal FALSE, // reverse iteration if upper? TRUE, // reverse iteration if lower? - BLIS_PACKED_COL_PANELS_3M, + BLIS_PACKED_COL_PANELS_3MI, BLIS_BUFFER_FOR_B_PANEL ); diff --git a/frame/3/trsm/4m/bli_trsm4m_cntl.c b/frame/3/trsm/4m/bli_trsm4m_cntl.c index 3fd5470c3..ff1053872 100644 --- a/frame/3/trsm/4m/bli_trsm4m_cntl.c +++ b/frame/3/trsm/4m/bli_trsm4m_cntl.c @@ -120,7 +120,7 @@ void bli_trsm4m_cntl_init() TRUE, // invert diagonal TRUE, // reverse iteration if upper? FALSE, // reverse iteration if lower? - BLIS_PACKED_ROW_PANELS_4M, + BLIS_PACKED_ROW_PANELS_4MI, BLIS_BUFFER_FOR_A_BLOCK ); trsm4m_l_packb_cntl @@ -134,7 +134,7 @@ void bli_trsm4m_cntl_init() FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? - BLIS_PACKED_COL_PANELS_4M, + BLIS_PACKED_COL_PANELS_4MI, BLIS_BUFFER_FOR_B_PANEL ); // Create control tree objects for packm operations (right side). @@ -147,7 +147,7 @@ void bli_trsm4m_cntl_init() FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? - BLIS_PACKED_ROW_PANELS_4M, + BLIS_PACKED_ROW_PANELS_4MI, BLIS_BUFFER_FOR_A_BLOCK ); trsm4m_r_packb_cntl @@ -159,7 +159,7 @@ void bli_trsm4m_cntl_init() TRUE, // invert diagonal FALSE, // reverse iteration if upper? TRUE, // reverse iteration if lower? - BLIS_PACKED_COL_PANELS_4M, + BLIS_PACKED_COL_PANELS_4MI, BLIS_BUFFER_FOR_B_PANEL ); diff --git a/frame/3/trsm/bli_trsm_ll_ker_var2.c b/frame/3/trsm/bli_trsm_ll_ker_var2.c index bd066d627..1c0327690 100644 --- a/frame/3/trsm/bli_trsm_ll_ker_var2.c +++ b/frame/3/trsm/bli_trsm_ll_ker_var2.c @@ -255,8 +255,8 @@ void PASTEMAC(ch,varname)( \ needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ - if ( bli_is_4m_packed( schema_a ) || \ - bli_is_3m_packed( schema_a ) || \ + if ( bli_is_4mi_packed( schema_a ) || \ + bli_is_3mi_packed( schema_a ) || \ bli_is_rih_packed( schema_a ) ) off_scl = 2; \ else off_scl = 1; \ \ @@ -266,8 +266,8 @@ void PASTEMAC(ch,varname)( \ packing formats are not applicable here since trsm is a two- operand operation only (unlike trmm, which is capable of three- operand). */ \ - if ( bli_is_3m_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \ - else { ss_a_num = 1; ss_a_den = 1; } \ + if ( bli_is_3mi_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \ + else { ss_a_num = 1; ss_a_den = 1; } \ \ /* If there is a zero region above where the diagonal of A intersects the left edge of the block, adjust the pointer to C and treat this case as diff --git a/frame/3/trsm/bli_trsm_lu_ker_var2.c b/frame/3/trsm/bli_trsm_lu_ker_var2.c index 67ae3d55f..3054a7f90 100644 --- a/frame/3/trsm/bli_trsm_lu_ker_var2.c +++ b/frame/3/trsm/bli_trsm_lu_ker_var2.c @@ -256,8 +256,8 @@ void PASTEMAC(ch,varname)( \ needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ - if ( bli_is_4m_packed( schema_a ) || \ - bli_is_3m_packed( schema_a ) || \ + if ( bli_is_4mi_packed( schema_a ) || \ + bli_is_3mi_packed( schema_a ) || \ bli_is_rih_packed( schema_a ) ) off_scl = 2; \ else off_scl = 1; \ \ @@ -267,8 +267,8 @@ void PASTEMAC(ch,varname)( \ packing formats are not applicable here since trsm is a two- operand operation only (unlike trmm, which is capable of three- operand). */ \ - if ( bli_is_3m_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \ - else { ss_a_num = 1; ss_a_den = 1; } \ + if ( bli_is_3mi_packed( schema_a ) ) { ss_a_num = 3; ss_a_den = 2; } \ + else { ss_a_num = 1; ss_a_den = 1; } \ \ /* If there is a zero region to the left of where the diagonal of A intersects the top edge of the block, adjust the pointer to B and diff --git a/frame/3/trsm/bli_trsm_rl_ker_var2.c b/frame/3/trsm/bli_trsm_rl_ker_var2.c index bdee5b4d1..cbcd17c76 100644 --- a/frame/3/trsm/bli_trsm_rl_ker_var2.c +++ b/frame/3/trsm/bli_trsm_rl_ker_var2.c @@ -264,8 +264,8 @@ void PASTEMAC(ch,varname)( \ needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ - if ( bli_is_4m_packed( schema_b ) || \ - bli_is_3m_packed( schema_b ) || \ + if ( bli_is_4mi_packed( schema_b ) || \ + bli_is_3mi_packed( schema_b ) || \ bli_is_rih_packed( schema_b ) ) off_scl = 2; \ else off_scl = 1; \ \ @@ -275,8 +275,8 @@ void PASTEMAC(ch,varname)( \ packing formats are not applicable here since trsm is a two- operand operation only (unlike trmm, which is capable of three- operand). */ \ - if ( bli_is_3m_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \ - else { ss_b_num = 1; ss_b_den = 1; } \ + if ( bli_is_3mi_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \ + else { ss_b_num = 1; ss_b_den = 1; } \ \ /* If there is a zero region above where the diagonal of B intersects the left edge of the panel, adjust the pointer to A and treat this diff --git a/frame/3/trsm/bli_trsm_ru_ker_var2.c b/frame/3/trsm/bli_trsm_ru_ker_var2.c index e65f946e7..f4ab7b44c 100644 --- a/frame/3/trsm/bli_trsm_ru_ker_var2.c +++ b/frame/3/trsm/bli_trsm_ru_ker_var2.c @@ -263,8 +263,8 @@ void PASTEMAC(ch,varname)( \ needs to occur in units of real values. The value computed here is divided into the complex pointer offset to cause the pointer to be advanced by the correct value. */ \ - if ( bli_is_4m_packed( schema_b ) || \ - bli_is_3m_packed( schema_b ) || \ + if ( bli_is_4mi_packed( schema_b ) || \ + bli_is_3mi_packed( schema_b ) || \ bli_is_rih_packed( schema_b ) ) off_scl = 2; \ else off_scl = 1; \ \ @@ -274,8 +274,8 @@ void PASTEMAC(ch,varname)( \ packing formats are not applicable here since trsm is a two- operand operation only (unlike trmm, which is capable of three- operand). */ \ - if ( bli_is_3m_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \ - else { ss_b_num = 1; ss_b_den = 1; } \ + if ( bli_is_3mi_packed( schema_b ) ) { ss_b_num = 3; ss_b_den = 2; } \ + else { ss_b_num = 1; ss_b_den = 1; } \ \ /* If there is a zero region to the left of where the diagonal of B intersects the top edge of the panel, adjust the pointer to C and diff --git a/frame/include/bli_obj_macro_defs.h b/frame/include/bli_obj_macro_defs.h index df859266f..cee5314e2 100644 --- a/frame/include/bli_obj_macro_defs.h +++ b/frame/include/bli_obj_macro_defs.h @@ -213,13 +213,13 @@ \ ( ( (obj).info & BLIS_PACK_PANEL_BIT ) ) -#define bli_obj_is_4m_packed( obj ) \ +#define bli_obj_is_4mi_packed( obj ) \ \ - ( ( (obj).info & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_4M ) + ( ( (obj).info & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_4MI ) -#define bli_obj_is_3m_packed( obj ) \ +#define bli_obj_is_3mi_packed( obj ) \ \ - ( ( (obj).info & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_3M ) + ( ( (obj).info & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_3MI ) #define bli_obj_is_ro_packed( obj ) \ \ diff --git a/frame/include/bli_param_macro_defs.h b/frame/include/bli_param_macro_defs.h index 7b8f56543..3d205d5f4 100644 --- a/frame/include/bli_param_macro_defs.h +++ b/frame/include/bli_param_macro_defs.h @@ -521,13 +521,13 @@ \ ( ( schema & BLIS_PACK_PANEL_BIT ) ) -#define bli_is_4m_packed( schema ) \ +#define bli_is_4mi_packed( schema ) \ \ - ( ( schema & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_4M ) + ( ( schema & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_4MI ) -#define bli_is_3m_packed( schema ) \ +#define bli_is_3mi_packed( schema ) \ \ - ( ( schema & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_3M ) + ( ( schema & BLIS_PACK_FORMAT_BITS ) == BLIS_BITVAL_3MI ) #define bli_is_ro_packed( schema ) \ \ diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index a1a83bd0d..fa09e3e9a 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -198,35 +198,35 @@ typedef dcomplex f77_dcomplex; - 13: domain (0 == real, 1 == complex) - 14: precision (0 == single, 1 == double) - 15: unused - 21 ~ 16 Packed type/status - - 000000: not packed - - 100000: packed (unspecified; by rows, columns, or vector) - - 100000: packed by rows - - 100001: packed by columns - - 100010: packed by row panels - - 100011: packed by column panels - - 100110: packed by 4m row panels - - 100111: packed by 4m column panels - - 101010: packed by 3m row panels - - 101011: packed by 3m column panels - - 110010: packed real-only row panels - - 110011: packed real-only column panels - - 110110: packed imag-only row panels - - 110111: packed imag-only column panels - - 111010: packed real+imag row panels - - 111011: packed real+imag column panels - 22 Packed panel order if upper-stored + 22 ~ 16 Packed type/status + - 0 0000 00: not packed + - 1 0000 00: packed (unspecified; by rows, columns, or vector) + - 1 0000 00: packed by rows + - 1 0000 01: packed by columns + - 1 0000 10: packed by row panels + - 1 0000 11: packed by column panels + - 1 0001 10: packed by 4m interleaved row panels + - 1 0001 11: packed by 4m interleaved column panels + - 1 0010 10: packed by 3m interleaved row panels + - 1 0010 11: packed by 3m interleaved column panels + - 1 0101 10: packed real-only row panels + - 1 0101 11: packed real-only column panels + - 1 0110 10: packed imag-only row panels + - 1 0110 11: packed imag-only column panels + - 1 0111 10: packed real+imag row panels + - 1 0111 11: packed real+imag column panels + 23 Packed panel order if upper-stored - 0 == forward order if upper - 1 == reverse order if upper - 23 Packed panel order if lower-stored + 24 Packed panel order if lower-stored - 0 == forward order if lower - 1 == reverse order if lower - 25 ~ 24 Packed buffer type + 26 ~ 25 Packed buffer type - 0 == block of A - 1 == panel of B - 2 == panel of C - 3 == general use - 27 ~ 26 Structure type + 28 ~ 27 Structure type - 0 == general - 1 == Hermitian - 2 == symmetric @@ -251,11 +251,11 @@ typedef dcomplex f77_dcomplex; #define BLIS_PACK_RC_SHIFT 16 #define BLIS_PACK_PANEL_SHIFT 17 #define BLIS_PACK_FORMAT_SHIFT 18 -#define BLIS_PACK_SHIFT 21 -#define BLIS_PACK_REV_IF_UPPER_SHIFT 22 -#define BLIS_PACK_REV_IF_LOWER_SHIFT 23 -#define BLIS_PACK_BUFFER_SHIFT 24 -#define BLIS_STRUC_SHIFT 26 +#define BLIS_PACK_SHIFT 22 +#define BLIS_PACK_REV_IF_UPPER_SHIFT 23 +#define BLIS_PACK_REV_IF_LOWER_SHIFT 24 +#define BLIS_PACK_BUFFER_SHIFT 25 +#define BLIS_STRUC_SHIFT 27 // // -- BLIS info bit field masks ------------------------------------------------ @@ -275,10 +275,10 @@ typedef dcomplex f77_dcomplex; #define BLIS_INVERT_DIAG_BIT ( 0x1 << BLIS_INVERT_DIAG_SHIFT ) #define BLIS_TARGET_DT_BITS ( 0x7 << BLIS_TARGET_DT_SHIFT ) #define BLIS_EXECUTION_DT_BITS ( 0x7 << BLIS_EXECUTION_DT_SHIFT ) -#define BLIS_PACK_SCHEMA_BITS ( 0x3F << BLIS_PACK_SCHEMA_SHIFT ) +#define BLIS_PACK_SCHEMA_BITS ( 0x7F << BLIS_PACK_SCHEMA_SHIFT ) #define BLIS_PACK_RC_BIT ( 0x1 << BLIS_PACK_RC_SHIFT ) #define BLIS_PACK_PANEL_BIT ( 0x1 << BLIS_PACK_PANEL_SHIFT ) -#define BLIS_PACK_FORMAT_BITS ( 0x7 << BLIS_PACK_FORMAT_SHIFT ) +#define BLIS_PACK_FORMAT_BITS ( 0xF << BLIS_PACK_FORMAT_SHIFT ) #define BLIS_PACK_BIT ( 0x1 << BLIS_PACK_SHIFT ) #define BLIS_PACK_REV_IF_UPPER_BIT ( 0x1 << BLIS_PACK_REV_IF_UPPER_SHIFT ) #define BLIS_PACK_REV_IF_LOWER_BIT ( 0x1 << BLIS_PACK_REV_IF_LOWER_SHIFT ) @@ -290,61 +290,61 @@ typedef dcomplex f77_dcomplex; // -- BLIS enumerated type value definitions ----------------------------------- // -#define BLIS_BITVAL_REAL 0x0 -#define BLIS_BITVAL_COMPLEX BLIS_DOMAIN_BIT -#define BLIS_BITVAL_SINGLE_PREC 0x0 -#define BLIS_BITVAL_DOUBLE_PREC BLIS_PRECISION_BIT -#define BLIS_BITVAL_FLOAT_TYPE 0x0 -#define BLIS_BITVAL_SCOMPLEX_TYPE BLIS_DOMAIN_BIT -#define BLIS_BITVAL_DOUBLE_TYPE BLIS_PRECISION_BIT -#define BLIS_BITVAL_DCOMPLEX_TYPE ( BLIS_DOMAIN_BIT | BLIS_PRECISION_BIT ) -#define BLIS_BITVAL_INT_TYPE 0x04 -#define BLIS_BITVAL_CONST_TYPE 0x05 -#define BLIS_BITVAL_NO_TRANS 0x0 -#define BLIS_BITVAL_TRANS BLIS_TRANS_BIT -#define BLIS_BITVAL_NO_CONJ 0x0 -#define BLIS_BITVAL_CONJ BLIS_CONJ_BIT -#define BLIS_BITVAL_CONJ_TRANS ( BLIS_CONJ_BIT | BLIS_TRANS_BIT ) -#define BLIS_BITVAL_ZEROS 0x0 -#define BLIS_BITVAL_UPPER ( BLIS_UPPER_BIT | BLIS_DIAG_BIT ) -#define BLIS_BITVAL_LOWER ( BLIS_LOWER_BIT | BLIS_DIAG_BIT ) -#define BLIS_BITVAL_DENSE BLIS_UPLO_BITS -#define BLIS_BITVAL_NONUNIT_DIAG 0x0 -#define BLIS_BITVAL_UNIT_DIAG BLIS_UNIT_DIAG_BIT -#define BLIS_BITVAL_INVERT_DIAG BLIS_INVERT_DIAG_BIT -#define BLIS_BITVAL_NOT_PACKED 0x0 -#define BLIS_BITVAL_4M ( 0x1 << BLIS_PACK_FORMAT_SHIFT ) -#define BLIS_BITVAL_3M ( 0x2 << BLIS_PACK_FORMAT_SHIFT ) -#define BLIS_BITVAL_RO ( 0x5 << BLIS_PACK_FORMAT_SHIFT ) -#define BLIS_BITVAL_IO ( 0x6 << BLIS_PACK_FORMAT_SHIFT ) -#define BLIS_BITVAL_RPI ( 0x7 << BLIS_PACK_FORMAT_SHIFT ) -#define BLIS_BITVAL_PACKED_UNSPEC BLIS_PACK_BIT -#define BLIS_BITVAL_PACKED_ROWS ( BLIS_PACK_BIT ) -#define BLIS_BITVAL_PACKED_COLUMNS ( BLIS_PACK_BIT | BLIS_PACK_RC_BIT ) -#define BLIS_BITVAL_PACKED_ROW_PANELS ( BLIS_PACK_BIT | BLIS_PACK_PANEL_BIT ) -#define BLIS_BITVAL_PACKED_COL_PANELS ( BLIS_PACK_BIT | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT ) -#define BLIS_BITVAL_PACKED_ROW_PANELS_4M ( BLIS_PACK_BIT | BLIS_BITVAL_4M | BLIS_PACK_PANEL_BIT ) -#define BLIS_BITVAL_PACKED_COL_PANELS_4M ( BLIS_PACK_BIT | BLIS_BITVAL_4M | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT ) -#define BLIS_BITVAL_PACKED_ROW_PANELS_3M ( BLIS_PACK_BIT | BLIS_BITVAL_3M | BLIS_PACK_PANEL_BIT ) -#define BLIS_BITVAL_PACKED_COL_PANELS_3M ( BLIS_PACK_BIT | BLIS_BITVAL_3M | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT ) -#define BLIS_BITVAL_PACKED_ROW_PANELS_RO ( BLIS_PACK_BIT | BLIS_BITVAL_RO | BLIS_PACK_PANEL_BIT ) -#define BLIS_BITVAL_PACKED_COL_PANELS_RO ( BLIS_PACK_BIT | BLIS_BITVAL_RO | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT ) -#define BLIS_BITVAL_PACKED_ROW_PANELS_IO ( BLIS_PACK_BIT | BLIS_BITVAL_IO | BLIS_PACK_PANEL_BIT ) -#define BLIS_BITVAL_PACKED_COL_PANELS_IO ( BLIS_PACK_BIT | BLIS_BITVAL_IO | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT ) +#define BLIS_BITVAL_REAL 0x0 +#define BLIS_BITVAL_COMPLEX BLIS_DOMAIN_BIT +#define BLIS_BITVAL_SINGLE_PREC 0x0 +#define BLIS_BITVAL_DOUBLE_PREC BLIS_PRECISION_BIT +#define BLIS_BITVAL_FLOAT_TYPE 0x0 +#define BLIS_BITVAL_SCOMPLEX_TYPE BLIS_DOMAIN_BIT +#define BLIS_BITVAL_DOUBLE_TYPE BLIS_PRECISION_BIT +#define BLIS_BITVAL_DCOMPLEX_TYPE ( BLIS_DOMAIN_BIT | BLIS_PRECISION_BIT ) +#define BLIS_BITVAL_INT_TYPE 0x04 +#define BLIS_BITVAL_CONST_TYPE 0x05 +#define BLIS_BITVAL_NO_TRANS 0x0 +#define BLIS_BITVAL_TRANS BLIS_TRANS_BIT +#define BLIS_BITVAL_NO_CONJ 0x0 +#define BLIS_BITVAL_CONJ BLIS_CONJ_BIT +#define BLIS_BITVAL_CONJ_TRANS ( BLIS_CONJ_BIT | BLIS_TRANS_BIT ) +#define BLIS_BITVAL_ZEROS 0x0 +#define BLIS_BITVAL_UPPER ( BLIS_UPPER_BIT | BLIS_DIAG_BIT ) +#define BLIS_BITVAL_LOWER ( BLIS_LOWER_BIT | BLIS_DIAG_BIT ) +#define BLIS_BITVAL_DENSE BLIS_UPLO_BITS +#define BLIS_BITVAL_NONUNIT_DIAG 0x0 +#define BLIS_BITVAL_UNIT_DIAG BLIS_UNIT_DIAG_BIT +#define BLIS_BITVAL_INVERT_DIAG BLIS_INVERT_DIAG_BIT +#define BLIS_BITVAL_NOT_PACKED 0x0 +#define BLIS_BITVAL_4MI ( 0x1 << BLIS_PACK_FORMAT_SHIFT ) +#define BLIS_BITVAL_3MI ( 0x2 << BLIS_PACK_FORMAT_SHIFT ) +#define BLIS_BITVAL_RO ( 0x5 << BLIS_PACK_FORMAT_SHIFT ) +#define BLIS_BITVAL_IO ( 0x6 << BLIS_PACK_FORMAT_SHIFT ) +#define BLIS_BITVAL_RPI ( 0x7 << BLIS_PACK_FORMAT_SHIFT ) +#define BLIS_BITVAL_PACKED_UNSPEC BLIS_PACK_BIT +#define BLIS_BITVAL_PACKED_ROWS ( BLIS_PACK_BIT ) +#define BLIS_BITVAL_PACKED_COLUMNS ( BLIS_PACK_BIT | BLIS_PACK_RC_BIT ) +#define BLIS_BITVAL_PACKED_ROW_PANELS ( BLIS_PACK_BIT | BLIS_PACK_PANEL_BIT ) +#define BLIS_BITVAL_PACKED_COL_PANELS ( BLIS_PACK_BIT | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT ) +#define BLIS_BITVAL_PACKED_ROW_PANELS_4MI ( BLIS_PACK_BIT | BLIS_BITVAL_4MI | BLIS_PACK_PANEL_BIT ) +#define BLIS_BITVAL_PACKED_COL_PANELS_4MI ( BLIS_PACK_BIT | BLIS_BITVAL_4MI | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT ) +#define BLIS_BITVAL_PACKED_ROW_PANELS_3MI ( BLIS_PACK_BIT | BLIS_BITVAL_3MI | BLIS_PACK_PANEL_BIT ) +#define BLIS_BITVAL_PACKED_COL_PANELS_3MI ( BLIS_PACK_BIT | BLIS_BITVAL_3MI | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT ) +#define BLIS_BITVAL_PACKED_ROW_PANELS_RO ( BLIS_PACK_BIT | BLIS_BITVAL_RO | BLIS_PACK_PANEL_BIT ) +#define BLIS_BITVAL_PACKED_COL_PANELS_RO ( BLIS_PACK_BIT | BLIS_BITVAL_RO | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT ) +#define BLIS_BITVAL_PACKED_ROW_PANELS_IO ( BLIS_PACK_BIT | BLIS_BITVAL_IO | BLIS_PACK_PANEL_BIT ) +#define BLIS_BITVAL_PACKED_COL_PANELS_IO ( BLIS_PACK_BIT | BLIS_BITVAL_IO | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT ) #define BLIS_BITVAL_PACKED_ROW_PANELS_RPI ( BLIS_PACK_BIT | BLIS_BITVAL_RPI | BLIS_PACK_PANEL_BIT ) #define BLIS_BITVAL_PACKED_COL_PANELS_RPI ( BLIS_PACK_BIT | BLIS_BITVAL_RPI | BLIS_PACK_PANEL_BIT | BLIS_PACK_RC_BIT ) -#define BLIS_BITVAL_PACK_FWD_IF_UPPER 0x0 -#define BLIS_BITVAL_PACK_REV_IF_UPPER BLIS_PACK_REV_IF_UPPER_BIT -#define BLIS_BITVAL_PACK_FWD_IF_LOWER 0x0 -#define BLIS_BITVAL_PACK_REV_IF_LOWER BLIS_PACK_REV_IF_LOWER_BIT -#define BLIS_BITVAL_BUFFER_FOR_A_BLOCK 0x0 -#define BLIS_BITVAL_BUFFER_FOR_B_PANEL ( 0x1 << BLIS_PACK_BUFFER_SHIFT ) -#define BLIS_BITVAL_BUFFER_FOR_C_PANEL ( 0x2 << BLIS_PACK_BUFFER_SHIFT ) -#define BLIS_BITVAL_BUFFER_FOR_GEN_USE ( 0x3 << BLIS_PACK_BUFFER_SHIFT ) -#define BLIS_BITVAL_GENERAL 0x0 -#define BLIS_BITVAL_HERMITIAN ( 0x1 << BLIS_STRUC_SHIFT ) -#define BLIS_BITVAL_SYMMETRIC ( 0x2 << BLIS_STRUC_SHIFT ) -#define BLIS_BITVAL_TRIANGULAR ( 0x3 << BLIS_STRUC_SHIFT ) +#define BLIS_BITVAL_PACK_FWD_IF_UPPER 0x0 +#define BLIS_BITVAL_PACK_REV_IF_UPPER BLIS_PACK_REV_IF_UPPER_BIT +#define BLIS_BITVAL_PACK_FWD_IF_LOWER 0x0 +#define BLIS_BITVAL_PACK_REV_IF_LOWER BLIS_PACK_REV_IF_LOWER_BIT +#define BLIS_BITVAL_BUFFER_FOR_A_BLOCK 0x0 +#define BLIS_BITVAL_BUFFER_FOR_B_PANEL ( 0x1 << BLIS_PACK_BUFFER_SHIFT ) +#define BLIS_BITVAL_BUFFER_FOR_C_PANEL ( 0x2 << BLIS_PACK_BUFFER_SHIFT ) +#define BLIS_BITVAL_BUFFER_FOR_GEN_USE ( 0x3 << BLIS_PACK_BUFFER_SHIFT ) +#define BLIS_BITVAL_GENERAL 0x0 +#define BLIS_BITVAL_HERMITIAN ( 0x1 << BLIS_STRUC_SHIFT ) +#define BLIS_BITVAL_SYMMETRIC ( 0x2 << BLIS_STRUC_SHIFT ) +#define BLIS_BITVAL_TRIANGULAR ( 0x3 << BLIS_STRUC_SHIFT ) // @@ -431,21 +431,21 @@ typedef enum typedef enum { - BLIS_NOT_PACKED = BLIS_BITVAL_NOT_PACKED, - BLIS_PACKED_UNSPEC = BLIS_BITVAL_PACKED_UNSPEC, - BLIS_PACKED_VECTOR = BLIS_BITVAL_PACKED_UNSPEC, - BLIS_PACKED_ROWS = BLIS_BITVAL_PACKED_ROWS, - BLIS_PACKED_COLUMNS = BLIS_BITVAL_PACKED_COLUMNS, - BLIS_PACKED_ROW_PANELS = BLIS_BITVAL_PACKED_ROW_PANELS, - BLIS_PACKED_COL_PANELS = BLIS_BITVAL_PACKED_COL_PANELS, - BLIS_PACKED_ROW_PANELS_4M = BLIS_BITVAL_PACKED_ROW_PANELS_4M, - BLIS_PACKED_COL_PANELS_4M = BLIS_BITVAL_PACKED_COL_PANELS_4M, - BLIS_PACKED_ROW_PANELS_3M = BLIS_BITVAL_PACKED_ROW_PANELS_3M, - BLIS_PACKED_COL_PANELS_3M = BLIS_BITVAL_PACKED_COL_PANELS_3M, - BLIS_PACKED_ROW_PANELS_RO = BLIS_BITVAL_PACKED_ROW_PANELS_RO, - BLIS_PACKED_COL_PANELS_RO = BLIS_BITVAL_PACKED_COL_PANELS_RO, - BLIS_PACKED_ROW_PANELS_IO = BLIS_BITVAL_PACKED_ROW_PANELS_IO, - BLIS_PACKED_COL_PANELS_IO = BLIS_BITVAL_PACKED_COL_PANELS_IO, + BLIS_NOT_PACKED = BLIS_BITVAL_NOT_PACKED, + BLIS_PACKED_UNSPEC = BLIS_BITVAL_PACKED_UNSPEC, + BLIS_PACKED_VECTOR = BLIS_BITVAL_PACKED_UNSPEC, + BLIS_PACKED_ROWS = BLIS_BITVAL_PACKED_ROWS, + BLIS_PACKED_COLUMNS = BLIS_BITVAL_PACKED_COLUMNS, + BLIS_PACKED_ROW_PANELS = BLIS_BITVAL_PACKED_ROW_PANELS, + BLIS_PACKED_COL_PANELS = BLIS_BITVAL_PACKED_COL_PANELS, + BLIS_PACKED_ROW_PANELS_4MI = BLIS_BITVAL_PACKED_ROW_PANELS_4MI, + BLIS_PACKED_COL_PANELS_4MI = BLIS_BITVAL_PACKED_COL_PANELS_4MI, + BLIS_PACKED_ROW_PANELS_3MI = BLIS_BITVAL_PACKED_ROW_PANELS_3MI, + BLIS_PACKED_COL_PANELS_3MI = BLIS_BITVAL_PACKED_COL_PANELS_3MI, + BLIS_PACKED_ROW_PANELS_RO = BLIS_BITVAL_PACKED_ROW_PANELS_RO, + BLIS_PACKED_COL_PANELS_RO = BLIS_BITVAL_PACKED_COL_PANELS_RO, + BLIS_PACKED_ROW_PANELS_IO = BLIS_BITVAL_PACKED_ROW_PANELS_IO, + BLIS_PACKED_COL_PANELS_IO = BLIS_BITVAL_PACKED_COL_PANELS_IO, BLIS_PACKED_ROW_PANELS_RPI = BLIS_BITVAL_PACKED_ROW_PANELS_RPI, BLIS_PACKED_COL_PANELS_RPI = BLIS_BITVAL_PACKED_COL_PANELS_RPI, } pack_t;