diff --git a/frame/3/gemm/bli_gemm_cntl.c b/frame/3/gemm/bli_gemm_cntl.c index aa7156739..a86d7e5d6 100644 --- a/frame/3/gemm/bli_gemm_cntl.c +++ b/frame/3/gemm/bli_gemm_cntl.c @@ -47,8 +47,6 @@ func_t* gemm_ukrs; packm_t* gemm_packa_cntl; packm_t* gemm_packb_cntl; -packm_t* gemm_packc_cntl; -unpackm_t* gemm_unpackc_cntl; gemm_t* gemm_cntl_bp_ke; gemm_t* gemm_cntl_op_bp; @@ -104,7 +102,7 @@ void bli_gemm_cntl_init() BLIS_VARIANT2, gemm_mr, gemm_kr, - FALSE, // already dense; densify not necessary + TRUE, // densify; used by hemm/symm FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? @@ -117,33 +115,13 @@ void bli_gemm_cntl_init() BLIS_VARIANT2, gemm_kr, gemm_nr, - FALSE, // already dense; densify not necessary + TRUE, // densify; used by hemm/symm FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? BLIS_PACKED_COL_PANELS, BLIS_BUFFER_FOR_B_PANEL ); - // Create control tree objects for packm/unpackm operations on C. - gemm_packc_cntl - = - bli_packm_cntl_obj_create( BLIS_UNBLOCKED, - BLIS_VARIANT1, - gemm_mr, - gemm_nr, - FALSE, // already dense; densify not necessary - FALSE, // do NOT invert diagonal - FALSE, // reverse iteration if upper? - FALSE, // reverse iteration if lower? - BLIS_PACKED_COLUMNS, - BLIS_BUFFER_FOR_C_PANEL ); - - gemm_unpackc_cntl - = - bli_unpackm_cntl_obj_create( BLIS_UNBLOCKED, - BLIS_VARIANT1, - NULL ); // no blocksize needed - // // Create a control tree for packing A and B, and streaming C. @@ -223,8 +201,6 @@ void bli_gemm_cntl_finalize() bli_cntl_obj_free( gemm_packa_cntl ); bli_cntl_obj_free( gemm_packb_cntl ); - bli_cntl_obj_free( gemm_packc_cntl ); - bli_cntl_obj_free( gemm_unpackc_cntl ); bli_cntl_obj_free( gemm_cntl_bp_ke ); bli_cntl_obj_free( gemm_cntl_op_bp ); diff --git a/frame/3/hemm/bli_hemm.c b/frame/3/hemm/bli_hemm.c index 845d826ed..1142f38aa 100644 --- a/frame/3/hemm/bli_hemm.c +++ b/frame/3/hemm/bli_hemm.c @@ -34,7 +34,7 @@ #include "blis.h" -extern gemm_t* hemm_cntl; +extern gemm_t* gemm_cntl; // // Define object-based interface. @@ -86,7 +86,7 @@ void bli_hemm( side_t side, } // Choose the control tree. - cntl = hemm_cntl; + cntl = gemm_cntl; // Invoke the internal back-end. bli_gemm_int( alpha, diff --git a/frame/3/hemm/bli_hemm.h b/frame/3/hemm/bli_hemm.h index d95fe8617..27c1bdaef 100644 --- a/frame/3/hemm/bli_hemm.h +++ b/frame/3/hemm/bli_hemm.h @@ -32,7 +32,6 @@ */ -#include "bli_hemm_cntl.h" #include "bli_hemm_check.h" diff --git a/frame/3/hemm/bli_hemm_cntl.c b/frame/3/hemm/bli_hemm_cntl.c deleted file mode 100644 index f1f7ff65f..000000000 --- a/frame/3/hemm/bli_hemm_cntl.c +++ /dev/null @@ -1,220 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -extern scalm_t* scalm_cntl; -extern func_t* gemm_ukrs; - -blksz_t* hemm_mc; -blksz_t* hemm_nc; -blksz_t* hemm_kc; -blksz_t* hemm_mr; -blksz_t* hemm_nr; -blksz_t* hemm_kr; - -packm_t* hemm_packa_cntl; -packm_t* hemm_packb_cntl; -packm_t* hemm_packc_cntl; -unpackm_t* hemm_unpackc_cntl; - -gemm_t* hemm_cntl_bp_ke; -gemm_t* hemm_cntl_op_bp; -gemm_t* hemm_cntl_mm_op; -gemm_t* hemm_cntl_vl_mm; - -gemm_t* hemm_cntl; - - -void bli_hemm_cntl_init() -{ - // Create blocksize objects for each dimension. - hemm_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_EXTEND_MC_S, - BLIS_DEFAULT_MC_D, BLIS_EXTEND_MC_D, - BLIS_DEFAULT_MC_C, BLIS_EXTEND_MC_C, - BLIS_DEFAULT_MC_Z, BLIS_EXTEND_MC_Z ); - - hemm_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_EXTEND_NC_S, - BLIS_DEFAULT_NC_D, BLIS_EXTEND_NC_D, - BLIS_DEFAULT_NC_C, BLIS_EXTEND_NC_C, - BLIS_DEFAULT_NC_Z, BLIS_EXTEND_NC_Z ); - - hemm_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_EXTEND_KC_S, - BLIS_DEFAULT_KC_D, BLIS_EXTEND_KC_D, - BLIS_DEFAULT_KC_C, BLIS_EXTEND_KC_C, - BLIS_DEFAULT_KC_Z, BLIS_EXTEND_KC_Z ); - - hemm_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_EXTEND_MR_S, - BLIS_DEFAULT_MR_D, BLIS_EXTEND_MR_D, - BLIS_DEFAULT_MR_C, BLIS_EXTEND_MR_C, - BLIS_DEFAULT_MR_Z, BLIS_EXTEND_MR_Z ); - - hemm_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_EXTEND_NR_S, - BLIS_DEFAULT_NR_D, BLIS_EXTEND_NR_D, - BLIS_DEFAULT_NR_C, BLIS_EXTEND_NR_C, - BLIS_DEFAULT_NR_Z, BLIS_EXTEND_NR_Z ); - - hemm_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_EXTEND_KR_S, - BLIS_DEFAULT_KR_D, BLIS_EXTEND_KR_D, - BLIS_DEFAULT_KR_C, BLIS_EXTEND_KR_C, - BLIS_DEFAULT_KR_Z, BLIS_EXTEND_KR_Z ); - - - // Create control tree objects for packm operations. - hemm_packa_cntl - = - bli_packm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, - hemm_mr, - hemm_kr, - TRUE, // densify - FALSE, // do NOT invert diagonal - FALSE, // reverse iteration if upper? - FALSE, // reverse iteration if lower? - BLIS_PACKED_ROW_PANELS, - BLIS_BUFFER_FOR_A_BLOCK ); - - hemm_packb_cntl - = - bli_packm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, - hemm_kr, - hemm_nr, - TRUE, // densify - FALSE, // do NOT invert diagonal - FALSE, // reverse iteration if upper? - FALSE, // reverse iteration if lower? - BLIS_PACKED_COL_PANELS, - BLIS_BUFFER_FOR_B_PANEL ); - - // Create control tree objects for packm/unpackm operations on C. - hemm_packc_cntl - = - bli_packm_cntl_obj_create( BLIS_UNBLOCKED, - BLIS_VARIANT1, - hemm_mr, - hemm_nr, - FALSE, // already dense; densify not necessary - FALSE, // do NOT invert diagonal - FALSE, // reverse iteration if upper? - FALSE, // reverse iteration if lower? - BLIS_PACKED_COLUMNS, - BLIS_BUFFER_FOR_GEN_USE ); - - hemm_unpackc_cntl - = - bli_unpackm_cntl_obj_create( BLIS_UNBLOCKED, - BLIS_VARIANT1, - NULL ); // no blocksize needed - - - // Create control tree object for lowest-level block-panel kernel. - hemm_cntl_bp_ke - = - bli_gemm_cntl_obj_create( BLIS_UNB_OPT, - BLIS_VARIANT2, - NULL, - gemm_ukrs, - NULL, NULL, NULL, - NULL, NULL, NULL ); - - // Create control tree object for outer panel (to block-panel) - // problem. - hemm_cntl_op_bp - = - bli_gemm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT1, - hemm_mc, - NULL, - NULL, - hemm_packa_cntl, - hemm_packb_cntl, - NULL, - hemm_cntl_bp_ke, - NULL ); - - // Create control tree object for general problem via multiple - // rank-k (outer panel) updates. - hemm_cntl_mm_op - = - bli_gemm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT3, - hemm_kc, - NULL, - NULL, - NULL, - NULL, - NULL, - hemm_cntl_op_bp, - NULL ); - - // Create control tree object for very large problem via multiple - // general problems. - hemm_cntl_vl_mm - = - bli_gemm_cntl_obj_create( BLIS_BLOCKED, - BLIS_VARIANT2, - hemm_nc, - NULL, - NULL, - NULL, - NULL, - NULL, - hemm_cntl_mm_op, - NULL ); - - // Alias the "master" hemm control tree to a shorter name. - hemm_cntl = hemm_cntl_vl_mm; -} - -void bli_hemm_cntl_finalize() -{ - bli_blksz_obj_free( hemm_mc ); - bli_blksz_obj_free( hemm_nc ); - bli_blksz_obj_free( hemm_kc ); - bli_blksz_obj_free( hemm_mr ); - bli_blksz_obj_free( hemm_nr ); - bli_blksz_obj_free( hemm_kr ); - - bli_cntl_obj_free( hemm_packa_cntl ); - bli_cntl_obj_free( hemm_packb_cntl ); - bli_cntl_obj_free( hemm_packc_cntl ); - bli_cntl_obj_free( hemm_unpackc_cntl ); - - bli_cntl_obj_free( hemm_cntl_bp_ke ); - bli_cntl_obj_free( hemm_cntl_op_bp ); - bli_cntl_obj_free( hemm_cntl_mm_op ); - bli_cntl_obj_free( hemm_cntl_vl_mm ); -} - diff --git a/frame/3/hemm/bli_hemm_cntl.h b/frame/3/hemm/bli_hemm_cntl.h deleted file mode 100644 index c30561091..000000000 --- a/frame/3/hemm/bli_hemm_cntl.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -void bli_hemm_cntl_init( void ); -void bli_hemm_cntl_finalize( void ); - diff --git a/frame/3/her2k/bli_her2k_blk_var1f.c b/frame/3/her2k/attic/bli_her2k_blk_var1f.c similarity index 100% rename from frame/3/her2k/bli_her2k_blk_var1f.c rename to frame/3/her2k/attic/bli_her2k_blk_var1f.c diff --git a/frame/3/her2k/bli_her2k_blk_var1f.h b/frame/3/her2k/attic/bli_her2k_blk_var1f.h similarity index 100% rename from frame/3/her2k/bli_her2k_blk_var1f.h rename to frame/3/her2k/attic/bli_her2k_blk_var1f.h diff --git a/frame/3/her2k/bli_her2k_blk_var2f.c b/frame/3/her2k/attic/bli_her2k_blk_var2f.c similarity index 100% rename from frame/3/her2k/bli_her2k_blk_var2f.c rename to frame/3/her2k/attic/bli_her2k_blk_var2f.c diff --git a/frame/3/her2k/bli_her2k_blk_var2f.h b/frame/3/her2k/attic/bli_her2k_blk_var2f.h similarity index 100% rename from frame/3/her2k/bli_her2k_blk_var2f.h rename to frame/3/her2k/attic/bli_her2k_blk_var2f.h diff --git a/frame/3/her2k/bli_her2k_blk_var3f.c b/frame/3/her2k/attic/bli_her2k_blk_var3f.c similarity index 100% rename from frame/3/her2k/bli_her2k_blk_var3f.c rename to frame/3/her2k/attic/bli_her2k_blk_var3f.c diff --git a/frame/3/her2k/bli_her2k_blk_var3f.h b/frame/3/her2k/attic/bli_her2k_blk_var3f.h similarity index 100% rename from frame/3/her2k/bli_her2k_blk_var3f.h rename to frame/3/her2k/attic/bli_her2k_blk_var3f.h diff --git a/frame/3/her2k/bli_her2k_cntl.c b/frame/3/her2k/attic/bli_her2k_cntl.c similarity index 100% rename from frame/3/her2k/bli_her2k_cntl.c rename to frame/3/her2k/attic/bli_her2k_cntl.c diff --git a/frame/3/her2k/bli_her2k_cntl.h b/frame/3/her2k/attic/bli_her2k_cntl.h similarity index 100% rename from frame/3/her2k/bli_her2k_cntl.h rename to frame/3/her2k/attic/bli_her2k_cntl.h diff --git a/frame/3/her2k/bli_her2k_int.c b/frame/3/her2k/attic/bli_her2k_int.c similarity index 100% rename from frame/3/her2k/bli_her2k_int.c rename to frame/3/her2k/attic/bli_her2k_int.c diff --git a/frame/3/her2k/bli_her2k_int.h b/frame/3/her2k/attic/bli_her2k_int.h similarity index 100% rename from frame/3/her2k/bli_her2k_int.h rename to frame/3/her2k/attic/bli_her2k_int.h diff --git a/frame/3/her2k/bli_her2k_l_ker_var2.c b/frame/3/her2k/attic/bli_her2k_l_ker_var2.c similarity index 100% rename from frame/3/her2k/bli_her2k_l_ker_var2.c rename to frame/3/her2k/attic/bli_her2k_l_ker_var2.c diff --git a/frame/3/her2k/bli_her2k_l_ker_var2.h b/frame/3/her2k/attic/bli_her2k_l_ker_var2.h similarity index 100% rename from frame/3/her2k/bli_her2k_l_ker_var2.h rename to frame/3/her2k/attic/bli_her2k_l_ker_var2.h diff --git a/frame/3/her2k/bli_her2k_target.c b/frame/3/her2k/attic/bli_her2k_target.c similarity index 100% rename from frame/3/her2k/bli_her2k_target.c rename to frame/3/her2k/attic/bli_her2k_target.c diff --git a/frame/3/her2k/bli_her2k_target.h b/frame/3/her2k/attic/bli_her2k_target.h similarity index 100% rename from frame/3/her2k/bli_her2k_target.h rename to frame/3/her2k/attic/bli_her2k_target.h diff --git a/frame/3/her2k/bli_her2k_u_ker_var2.c b/frame/3/her2k/attic/bli_her2k_u_ker_var2.c similarity index 100% rename from frame/3/her2k/bli_her2k_u_ker_var2.c rename to frame/3/her2k/attic/bli_her2k_u_ker_var2.c diff --git a/frame/3/her2k/bli_her2k_u_ker_var2.h b/frame/3/her2k/attic/bli_her2k_u_ker_var2.h similarity index 100% rename from frame/3/her2k/bli_her2k_u_ker_var2.h rename to frame/3/her2k/attic/bli_her2k_u_ker_var2.h diff --git a/frame/3/her2k/bli_her2k.c b/frame/3/her2k/bli_her2k.c index 5fca55a5d..ada6cb2a7 100644 --- a/frame/3/her2k/bli_her2k.c +++ b/frame/3/her2k/bli_her2k.c @@ -34,7 +34,7 @@ #include "blis.h" -extern her2k_t* her2k_cntl; +//extern her2k_t* her2k_cntl; extern herk_t* herk_cntl; // diff --git a/frame/3/her2k/bli_her2k.h b/frame/3/her2k/bli_her2k.h index 2497ed2e7..05b8809e8 100644 --- a/frame/3/her2k/bli_her2k.h +++ b/frame/3/her2k/bli_her2k.h @@ -32,8 +32,9 @@ */ -#include "bli_her2k_cntl.h" +//#include "bli_her2k_cntl.h" #include "bli_her2k_check.h" +/* #include "bli_her2k_int.h" #include "bli_her2k_target.h" @@ -45,6 +46,7 @@ #include "bli_her2k_l_ker_var2.h" #include "bli_her2k_u_ker_var2.h" +*/ // diff --git a/frame/3/her2k/bli_her2k_check.c b/frame/3/her2k/bli_her2k_check.c index f76d40f57..26a4eea76 100644 --- a/frame/3/her2k/bli_her2k_check.c +++ b/frame/3/her2k/bli_her2k_check.c @@ -151,6 +151,7 @@ void bli_her2k_check( obj_t* alpha, bli_check_error_code( e_val ); } +#if 0 void bli_her2k_int_check( obj_t* alpha, obj_t* a, obj_t* bh, @@ -172,4 +173,4 @@ void bli_her2k_int_check( obj_t* alpha, e_val = bli_check_valid_cntl( ( void* )cntl ); bli_check_error_code( e_val ); } - +#endif diff --git a/frame/3/her2k/bli_her2k_check.h b/frame/3/her2k/bli_her2k_check.h index 73bd9a470..0730522ae 100644 --- a/frame/3/her2k/bli_her2k_check.h +++ b/frame/3/her2k/bli_her2k_check.h @@ -47,6 +47,7 @@ void bli_her2k_check( obj_t* alpha, obj_t* beta, obj_t* c ); +#if 0 void bli_her2k_int_check( obj_t* alpha, obj_t* a, obj_t* bh, @@ -56,4 +57,4 @@ void bli_her2k_int_check( obj_t* alpha, obj_t* beta, obj_t* c, her2k_t* cntl ); - +#endif diff --git a/frame/3/her2k/other/bli_her2k_l_blk_var4.c b/frame/3/her2k/other/bli_her2k_l_blk_var4.c deleted file mode 100644 index 0d05f9582..000000000 --- a/frame/3/her2k/other/bli_her2k_l_blk_var4.c +++ /dev/null @@ -1,256 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -void bli_her2k_l_blk_var4( obj_t* alpha, - obj_t* a, - obj_t* bh, - obj_t* alpha_conj, - obj_t* b, - obj_t* ah, - obj_t* beta, - obj_t* c, - her2k_t* cntl ) -{ - obj_t a1, a1_pack; - obj_t bh_pack, bhL_pack; - obj_t b1, b1_pack; - obj_t ah_pack, ahL_pack; - obj_t c1, c1_pack; - obj_t c1L, c1L_pack; - - dim_t i; - dim_t bm_alg; - dim_t m_trans; - dim_t offL, nL; - - // Initialize all pack objects that are passed into packm_init(). - bli_obj_init_pack( &a1_pack ); - bli_obj_init_pack( &bh_pack ); - bli_obj_init_pack( &b1_pack ); - bli_obj_init_pack( &ah_pack ); - bli_obj_init_pack( &c1_pack ); - bli_obj_init_pack( &c1L_pack ); - - // Query dimension in partitioning direction. - m_trans = bli_obj_length_after_trans( *c ); - - // Scale C by beta (if instructed). - bli_scalm_int( beta, - c, - cntl_sub_scalm( cntl ) ); - - // Initialize object for packing B'. - bli_packm_init( bh, &bh_pack, - cntl_sub_packm_b( cntl ) ); - - // Initialize object for packing A'. - bli_packm_init( ah, &ah_pack, - cntl_sub_packm_b( cntl ) ); - - // Fuse the first iteration with incremental packing and computation. - { - obj_t bh_inc, bh_pack_inc; - obj_t ah_inc, ah_pack_inc; - obj_t c1_pack_inc; - - dim_t j; - dim_t bn_inc; - dim_t n_trans; - - // Query dimension in partitioning direction. - n_trans = bli_obj_width( bh_pack ); - - // Determine the current algorithmic blocksize. - bm_alg = bli_determine_blocksize_b( 0, m_trans, a, - cntl_blocksize( cntl ) ); - - // Acquire partitions for A1, B1, and C1. - bli_acquire_mpart_b2t( BLIS_SUBPART1, - 0, bm_alg, a, &a1 ); - bli_acquire_mpart_b2t( BLIS_SUBPART1, - 0, bm_alg, b, &b1 ); - bli_acquire_mpart_b2t( BLIS_SUBPART1, - 0, bm_alg, c, &c1 ); - - // Initialize objects for packing A1, B1, and C1. - bli_packm_init( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - bli_packm_init( &b1, &b1_pack, cntl_sub_packm_a( cntl ) ); - bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); - - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); - - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - - // Partition along the n dimension. - for ( j = 0; j < n_trans; j += bn_inc ) - { - // Determine the current incremental packing blocksize. - bn_inc = bli_determine_blocksize_f( j, n_trans, a, - cntl_blocksize_aux( cntl ) ); - - // Acquire incremental partitions. - bli_acquire_mpart_l2r( BLIS_SUBPART1, - j, bn_inc, bh, &bh_inc ); - bli_acquire_mpart_l2r( BLIS_SUBPART1, - j, bn_inc, &bh_pack, &bh_pack_inc ); - bli_acquire_mpart_l2r( BLIS_SUBPART1, - j, bn_inc, &c1_pack, &c1_pack_inc ); - - // Pack Bh_inc and scale by alpha (if instructed). - bli_packm_int( alpha, &bh_inc, &bh_pack_inc, cntl_sub_packm_b( cntl ) ); - - // Perform herk subproblem. - bli_herk_int( &BLIS_ONE, - &a1_pack, - &bh_pack_inc, - beta, - &c1_pack_inc, - cntl_sub_herk( cntl ) ); - } - - // Pack B1 and scale by alpha_conj (if instructed). - bli_packm_int( alpha_conj, &b1, &b1_pack, cntl_sub_packm_a( cntl ) ); - - // Partition along the n dimension. - for ( j = 0; j < n_trans; j += bn_inc ) - { - // Determine the current incremental packing blocksize. - bn_inc = bli_determine_blocksize_f( j, n_trans, b, - cntl_blocksize_aux( cntl ) ); - - // Acquire incremental partitions. - bli_acquire_mpart_l2r( BLIS_SUBPART1, - j, bn_inc, ah, &ah_inc ); - bli_acquire_mpart_l2r( BLIS_SUBPART1, - j, bn_inc, &ah_pack, &ah_pack_inc ); - bli_acquire_mpart_l2r( BLIS_SUBPART1, - j, bn_inc, &c1_pack, &c1_pack_inc ); - - // Pack Ah_inc and scale by alpha_conj (if instructed). - bli_packm_int( alpha_conj, &ah_inc, &ah_pack_inc, cntl_sub_packm_b( cntl ) ); - - // Perform herk subproblem. - bli_herk_int( &BLIS_ONE, - &b1_pack, - &ah_pack_inc, - beta, - &c1_pack_inc, - cntl_sub_herk( cntl ) ); - } - - // Unpack C1 (if C1 was packed). - bli_unpackm_int( &c1_pack, &c1, cntl_sub_unpackm_c( cntl ) ); - } - - // Partition along the m dimension. - for ( i = bm_alg; i < m_trans; i += bm_alg ) - { - // Determine the current algorithmic blocksize. - bm_alg = bli_determine_blocksize_b( i, m_trans, a, - cntl_blocksize( cntl ) ); - - // Acquire partitions for A1, B1, and C1. - bli_acquire_mpart_b2t( BLIS_SUBPART1, - i, bm_alg, a, &a1 ); - bli_acquire_mpart_b2t( BLIS_SUBPART1, - i, bm_alg, b, &b1 ); - bli_acquire_mpart_b2t( BLIS_SUBPART1, - i, bm_alg, c, &c1 ); - - // Partition off the stored region of C1 and the corresponding regions - // of Bh_pack and Ah_pack. We compute the width of the subpartition - // taking the location of the diagonal into account. - offL = 0; - nL = bli_min( bli_obj_width_after_trans( c1 ), - bli_obj_diag_offset_after_trans( c1 ) + bm_alg ); - bli_acquire_mpart_l2r( BLIS_SUBPART1, - offL, nL, &c1, &c1L ); - bli_acquire_mpart_l2r( BLIS_SUBPART1, - offL, nL, &bh_pack, &bhL_pack ); - bli_acquire_mpart_l2r( BLIS_SUBPART1, - offL, nL, &ah_pack, &ahL_pack ); - - // Initialize objects for packing A1, B1, and C1. - bli_packm_init( &a1, &a1_pack, - cntl_sub_packm_a( cntl ) ); - bli_packm_init( &b1, &b1_pack, - cntl_sub_packm_a( cntl ) ); - bli_packm_init( &c1L, &c1L_pack, - cntl_sub_packm_c( cntl ) ); - - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, &a1_pack, - cntl_sub_packm_a( cntl ) ); - - // Pack B1 and scale by alpha_conj (if instructed). - bli_packm_int( alpha_conj, - &b1, &b1_pack, - cntl_sub_packm_a( cntl ) ); - - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1L, &c1L_pack, - cntl_sub_packm_c( cntl ) ); - - // Perform herk subproblem. - bli_her2k_int( alpha, - &a1_pack, - &bhL_pack, - alpha_conj, - &b1_pack, - &ahL_pack, - beta, - &c1L_pack, - cntl_sub_her2k( cntl ) ); - - // Unpack C1 (if C1 was packed). - bli_unpackm_int( &c1L_pack, &c1L, - cntl_sub_unpackm_c( cntl ) ); - } - - // If any packing buffers were acquired within packm, release them back - // to the memory manager. - bli_obj_release_pack( &a1_pack ); - bli_obj_release_pack( &bh_pack ); - bli_obj_release_pack( &b1_pack ); - bli_obj_release_pack( &ah_pack ); - bli_obj_release_pack( &c1_pack ); - bli_obj_release_pack( &c1L_pack ); -} - diff --git a/frame/3/her2k/other/bli_her2k_l_blk_var4.h b/frame/3/her2k/other/bli_her2k_l_blk_var4.h deleted file mode 100644 index 78bee4f8f..000000000 --- a/frame/3/her2k/other/bli_her2k_l_blk_var4.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -void bli_her2k_l_blk_var4( obj_t* alpha, - obj_t* a, - obj_t* bh, - obj_t* alpha_conj, - obj_t* b, - obj_t* ah, - obj_t* beta, - obj_t* c, - her2k_t* cntl ); - diff --git a/frame/3/her2k/other/bli_her2k_u_blk_var1.c b/frame/3/her2k/other/bli_her2k_u_blk_var1.c deleted file mode 100644 index 3c78be4c2..000000000 --- a/frame/3/her2k/other/bli_her2k_u_blk_var1.c +++ /dev/null @@ -1,215 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -void bli_her2k_u_blk_var1( obj_t* alpha, - obj_t* a, - obj_t* bh, - obj_t* alpha_conj, - obj_t* b, - obj_t* ah, - obj_t* beta, - obj_t* c, - her2k_t* cntl ) -{ - obj_t a1, a1_pack; - obj_t bh_pack, bhR_pack; - obj_t b1, b1_pack; - obj_t ah_pack, ahR_pack; - obj_t c1; - obj_t c1R, c1R_pack; - - dim_t i; - dim_t b_alg; - dim_t m_trans; - dim_t offR, nR; - - // Initialize all pack objects that are passed into packm_init(). - bli_obj_init_pack( &a1_pack ); - bli_obj_init_pack( &bh_pack ); - bli_obj_init_pack( &b1_pack ); - bli_obj_init_pack( &ah_pack ); - bli_obj_init_pack( &c1R_pack ); - - // Query dimension in partitioning direction. - m_trans = bli_obj_length_after_trans( *c ); - - // Scale C by beta (if instructed). - bli_scalm_int( beta, - c, - cntl_sub_scalm( cntl ) ); - - // - // Perform first rank-k update: C = C + alpha * A * B'. - // - - // Initialize object for packing B'. - bli_packm_init( bh, &bh_pack, - cntl_sub_packm_b( cntl ) ); - - // Pack B' and scale by alpha (if instructed). - bli_packm_int( alpha, - bh, &bh_pack, - cntl_sub_packm_b( cntl ) ); - - // Partition along the m dimension. - for ( i = 0; i < m_trans; i += b_alg ) - { - // Determine the current algorithmic blocksize. - b_alg = bli_determine_blocksize_f( i, m_trans, a, - cntl_blocksize( cntl ) ); - - // Acquire partitions for A1 and C1. - bli_acquire_mpart_t2b( BLIS_SUBPART1, - i, b_alg, a, &a1 ); - bli_acquire_mpart_t2b( BLIS_SUBPART1, - i, b_alg, c, &c1 ); - - // Partition off the stored region of C1 and the corresponding region - // of Bh_pack. We compute the width of the subpartition taking the - // location of the diagonal into account. - offR = bli_max( 0, bli_obj_diag_offset_after_trans( c1 ) ); - nR = bli_obj_width_after_trans( c1 ) - offR; - bli_acquire_mpart_l2r( BLIS_SUBPART1, - offR, nR, &c1, &c1R ); - bli_acquire_mpart_l2r( BLIS_SUBPART1, - offR, nR, &bh_pack, &bhR_pack ); - - // Initialize objects for packing A1 and C1. - bli_packm_init( &a1, &a1_pack, - cntl_sub_packm_a( cntl ) ); - bli_packm_init( &c1R, &c1R_pack, - cntl_sub_packm_c( cntl ) ); - - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, &a1_pack, - cntl_sub_packm_a( cntl ) ); - - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1R, &c1R_pack, - cntl_sub_packm_c( cntl ) ); - - // Perform herk subproblem. - bli_herk_int( alpha, - &a1_pack, - &bhR_pack, - beta, - &c1R_pack, - cntl_sub_herk( cntl ) ); - - // Unpack C1 (if C1 was packed). - bli_unpackm_int( &c1R_pack, &c1R, - cntl_sub_unpackm_c( cntl ) ); - } - - // If any packing buffers were acquired within packm, release them back - // to the memory manager. - bli_obj_release_pack( &a1_pack ); - bli_obj_release_pack( &bh_pack ); - - // - // Perform second rank-k update: C = C + conj(alpha) * B * A'. - // - - // Initialize object for packing A'. - bli_packm_init( ah, &ah_pack, - cntl_sub_packm_b( cntl ) ); - - // Pack A' and scale by alpha_conj (if instructed). - bli_packm_int( alpha_conj, - ah, &ah_pack, - cntl_sub_packm_b( cntl ) ); - - // Partition along the m dimension. - for ( i = 0; i < m_trans; i += b_alg ) - { - // Determine the current algorithmic blocksize. - b_alg = bli_determine_blocksize_f( i, m_trans, b, - cntl_blocksize( cntl ) ); - - // Acquire partitions for B1 and C1. - bli_acquire_mpart_t2b( BLIS_SUBPART1, - i, b_alg, b, &b1 ); - bli_acquire_mpart_t2b( BLIS_SUBPART1, - i, b_alg, c, &c1 ); - - // Partition off the stored region of C1 and the corresponding region - // of Ah_pack. We compute the width of the subpartition taking the - // location of the diagonal into account. - offR = bli_max( 0, bli_obj_diag_offset_after_trans( c1 ) ); - nR = bli_obj_width_after_trans( c1 ) - offR; - bli_acquire_mpart_l2r( BLIS_SUBPART1, - offR, nR, &c1, &c1R ); - bli_acquire_mpart_l2r( BLIS_SUBPART1, - offR, nR, &ah_pack, &ahR_pack ); - - // Initialize objects for packing B1 and C1. - bli_packm_init( &b1, &b1_pack, - cntl_sub_packm_a( cntl ) ); - bli_packm_init( &c1R, &c1R_pack, - cntl_sub_packm_c( cntl ) ); - - // Pack B1 and scale by alpha_conj (if instructed). - bli_packm_int( alpha_conj, - &b1, &b1_pack, - cntl_sub_packm_a( cntl ) ); - - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1R, &c1R_pack, - cntl_sub_packm_c( cntl ) ); - - // Perform herk subproblem. - bli_herk_int( alpha_conj, - &b1_pack, - &ahR_pack, - &BLIS_ONE, - &c1R_pack, - cntl_sub_herk( cntl ) ); - - // Unpack C1 (if C1 was packed). - bli_unpackm_int( &c1R_pack, &c1R, - cntl_sub_unpackm_c( cntl ) ); - } - - // If any packing buffers were acquired within packm, release them back - // to the memory manager. - bli_obj_release_pack( &b1_pack ); - bli_obj_release_pack( &ah_pack ); - bli_obj_release_pack( &c1R_pack ); -} - diff --git a/frame/3/her2k/other/bli_her2k_u_blk_var1.h b/frame/3/her2k/other/bli_her2k_u_blk_var1.h deleted file mode 100644 index 9128c99a0..000000000 --- a/frame/3/her2k/other/bli_her2k_u_blk_var1.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -void bli_her2k_u_blk_var1( obj_t* alpha, - obj_t* a, - obj_t* bh, - obj_t* alpha_conj, - obj_t* b, - obj_t* ah, - obj_t* beta, - obj_t* c, - her2k_t* cntl ); - diff --git a/frame/3/her2k/other/bli_her2k_u_blk_var2.c b/frame/3/her2k/other/bli_her2k_u_blk_var2.c deleted file mode 100644 index 423ff6408..000000000 --- a/frame/3/her2k/other/bli_her2k_u_blk_var2.c +++ /dev/null @@ -1,165 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -void bli_her2k_u_blk_var2( obj_t* alpha, - obj_t* a, - obj_t* bh, - obj_t* alpha_conj, - obj_t* b, - obj_t* ah, - obj_t* beta, - obj_t* c, - her2k_t* cntl ) -{ - obj_t a_pack, aT_pack; - obj_t bh1, bh1_pack; - obj_t b_pack, bT_pack; - obj_t ah1, ah1_pack; - obj_t c1; - obj_t c1T, c1T_pack; - - dim_t i; - dim_t b_alg; - dim_t n_trans; - dim_t offT, mT; - - // Initialize all pack objects that are passed into packm_init(). - bli_obj_init_pack( &a_pack ); - bli_obj_init_pack( &bh1_pack ); - bli_obj_init_pack( &b_pack ); - bli_obj_init_pack( &ah1_pack ); - bli_obj_init_pack( &c1T_pack ); - - // Query dimension in partitioning direction. - n_trans = bli_obj_width_after_trans( *c ); - - // Scale C by beta (if instructed). - bli_scalm_int( beta, - c, - cntl_sub_scalm( cntl ) ); - - // Initialize object for packing A and B. - bli_packm_init( a, &a_pack, - cntl_sub_packm_a( cntl ) ); - bli_packm_init( b, &b_pack, - cntl_sub_packm_a( cntl ) ); - - // Pack A and scale by alpha (if instructed). - bli_packm_int( alpha, - a, &a_pack, - cntl_sub_packm_a( cntl ) ); - - // Pack B and scale by alpha_conj (if instructed). - bli_packm_int( alpha_conj, - b, &b_pack, - cntl_sub_packm_a( cntl ) ); - - // Partition along the n dimension. - for ( i = 0; i < n_trans; i += b_alg ) - { - // Determine the current algorithmic blocksize. - b_alg = bli_determine_blocksize_b( i, n_trans, bh, - cntl_blocksize( cntl ) ); - - // Acquire partitions for B1', A1', and C1. - bli_acquire_mpart_r2l( BLIS_SUBPART1, - i, b_alg, bh, &bh1 ); - bli_acquire_mpart_r2l( BLIS_SUBPART1, - i, b_alg, ah, &ah1 ); - bli_acquire_mpart_r2l( BLIS_SUBPART1, - i, b_alg, c, &c1 ); - - // Partition off the stored region of C1 and the corresponding regions - // of A_pack and B_pack. We compute the length of the subpartition - // taking the location of the diagonal into account. - offT = 0; - mT = bli_min( bli_obj_length_after_trans( c1 ), - -bli_obj_diag_offset_after_trans( c1 ) + b_alg ); - bli_acquire_mpart_t2b( BLIS_SUBPART1, - offT, mT, &c1, &c1T ); - bli_acquire_mpart_t2b( BLIS_SUBPART1, - offT, mT, &a_pack, &aT_pack ); - bli_acquire_mpart_t2b( BLIS_SUBPART1, - offT, mT, &b_pack, &bT_pack ); - - // Initialize objects for packing B1', A1', and C1. - bli_packm_init( &bh1, &bh1_pack, - cntl_sub_packm_b( cntl ) ); - bli_packm_init( &ah1, &ah1_pack, - cntl_sub_packm_b( cntl ) ); - bli_packm_init( &c1T, &c1T_pack, - cntl_sub_packm_c( cntl ) ); - - // Pack B1' and scale by alpha (if instructed). - bli_packm_int( alpha, - &bh1, &bh1_pack, - cntl_sub_packm_b( cntl ) ); - - // Pack A1' and scale by alpha_conj (if instructed). - bli_packm_int( alpha_conj, - &ah1, &ah1_pack, - cntl_sub_packm_b( cntl ) ); - - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1T, &c1T_pack, - cntl_sub_packm_c( cntl ) ); - - // Perform herk subproblem. - bli_her2k_int( alpha, - &aT_pack, - &bh1_pack, - alpha_conj, - &bT_pack, - &ah1_pack, - beta, - &c1T_pack, - cntl_sub_her2k( cntl ) ); - - // Unpack C1 (if C1 was packed). - bli_unpackm_int( &c1T_pack, &c1T, - cntl_sub_unpackm_c( cntl ) ); - } - - // If any packing buffers were acquired within packm, release them back - // to the memory manager. - bli_obj_release_pack( &a_pack ); - bli_obj_release_pack( &bh1_pack ); - bli_obj_release_pack( &b_pack ); - bli_obj_release_pack( &ah1_pack ); - bli_obj_release_pack( &c1T_pack ); -} - diff --git a/frame/3/her2k/other/bli_her2k_u_blk_var2.h b/frame/3/her2k/other/bli_her2k_u_blk_var2.h deleted file mode 100644 index 7a3ec8145..000000000 --- a/frame/3/her2k/other/bli_her2k_u_blk_var2.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -void bli_her2k_u_blk_var2( obj_t* alpha, - obj_t* a, - obj_t* bh, - obj_t* alpha_conj, - obj_t* b, - obj_t* ah, - obj_t* beta, - obj_t* c, - her2k_t* cntl ); - diff --git a/frame/3/her2k/other/bli_her2k_u_blk_var4.c b/frame/3/her2k/other/bli_her2k_u_blk_var4.c deleted file mode 100644 index c440965ad..000000000 --- a/frame/3/her2k/other/bli_her2k_u_blk_var4.c +++ /dev/null @@ -1,255 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -#include "blis.h" - -void bli_her2k_u_blk_var4( obj_t* alpha, - obj_t* a, - obj_t* bh, - obj_t* alpha_conj, - obj_t* b, - obj_t* ah, - obj_t* beta, - obj_t* c, - her2k_t* cntl ) -{ - obj_t a1, a1_pack; - obj_t bh_pack, bhR_pack; - obj_t b1, b1_pack; - obj_t ah_pack, ahR_pack; - obj_t c1, c1_pack; - obj_t c1R, c1R_pack; - - dim_t i; - dim_t bm_alg; - dim_t m_trans; - dim_t offR, nR; - - // Initialize all pack objects that are passed into packm_init(). - bli_obj_init_pack( &a1_pack ); - bli_obj_init_pack( &bh_pack ); - bli_obj_init_pack( &b1_pack ); - bli_obj_init_pack( &ah_pack ); - bli_obj_init_pack( &c1_pack ); - bli_obj_init_pack( &c1R_pack ); - - // Query dimension in partitioning direction. - m_trans = bli_obj_length_after_trans( *c ); - - // Scale C by beta (if instructed). - bli_scalm_int( beta, - c, - cntl_sub_scalm( cntl ) ); - - // Initialize object for packing B1'. - bli_packm_init( bh, &bh_pack, - cntl_sub_packm_b( cntl ) ); - - // Initialize object for packing A1'. - bli_packm_init( ah, &ah_pack, - cntl_sub_packm_b( cntl ) ); - - // Fuse the first iteration with incremental packing and computation. - { - obj_t bh_inc, bh_pack_inc; - obj_t ah_inc, ah_pack_inc; - obj_t c1_pack_inc; - - dim_t j; - dim_t bn_inc; - dim_t n_trans; - - // Query dimension in partitioning direction. - n_trans = bli_obj_width( bh_pack ); - - // Determine the current algorithmic blocksize. - bm_alg = bli_determine_blocksize_f( 0, m_trans, a, - cntl_blocksize( cntl ) ); - - // Acquire partitions for A1, B1, and C1. - bli_acquire_mpart_t2b( BLIS_SUBPART1, - 0, bm_alg, a, &a1 ); - bli_acquire_mpart_t2b( BLIS_SUBPART1, - 0, bm_alg, b, &b1 ); - bli_acquire_mpart_t2b( BLIS_SUBPART1, - 0, bm_alg, c, &c1 ); - - // Initialize objects for packing A1, B1, and C1. - bli_packm_init( &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - bli_packm_init( &b1, &b1_pack, cntl_sub_packm_a( cntl ) ); - bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); - - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, &c1, &c1_pack, cntl_sub_packm_c( cntl ) ); - - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, &a1, &a1_pack, cntl_sub_packm_a( cntl ) ); - - // Partition along the n dimension. - for ( j = 0; j < n_trans; j += bn_inc ) - { - // Determine the current incremental packing blocksize. - bn_inc = bli_determine_blocksize_f( j, n_trans, a, - cntl_blocksize_aux( cntl ) ); - - // Acquire incremental partitions. - bli_acquire_mpart_l2r( BLIS_SUBPART1, - j, bn_inc, bh, &bh_inc ); - bli_acquire_mpart_l2r( BLIS_SUBPART1, - j, bn_inc, &bh_pack, &bh_pack_inc ); - bli_acquire_mpart_l2r( BLIS_SUBPART1, - j, bn_inc, &c1_pack, &c1_pack_inc ); - - // Pack Bh_inc and scale by alpha (if instructed). - bli_packm_int( alpha, &bh_inc, &bh_pack_inc, cntl_sub_packm_b( cntl ) ); - - // Perform herk subproblem. - bli_herk_int( &BLIS_ONE, - &a1_pack, - &bh_pack_inc, - beta, - &c1_pack_inc, - cntl_sub_herk( cntl ) ); - } - - // Pack B1 and scale by alpha_conj (if instructed). - bli_packm_int( alpha_conj, &b1, &b1_pack, cntl_sub_packm_a( cntl ) ); - - // Partition along the n dimension. - for ( j = 0; j < n_trans; j += bn_inc ) - { - // Determine the current incremental packing blocksize. - bn_inc = bli_determine_blocksize_f( j, n_trans, b, - cntl_blocksize_aux( cntl ) ); - - // Acquire incremental partitions. - bli_acquire_mpart_l2r( BLIS_SUBPART1, - j, bn_inc, ah, &ah_inc ); - bli_acquire_mpart_l2r( BLIS_SUBPART1, - j, bn_inc, &ah_pack, &ah_pack_inc ); - bli_acquire_mpart_l2r( BLIS_SUBPART1, - j, bn_inc, &c1_pack, &c1_pack_inc ); - - // Pack Ah_inc and scale by alpha_conj (if instructed). - bli_packm_int( alpha_conj, &ah_inc, &ah_pack_inc, cntl_sub_packm_b( cntl ) ); - - // Perform herk subproblem. - bli_herk_int( &BLIS_ONE, - &b1_pack, - &ah_pack_inc, - beta, - &c1_pack_inc, - cntl_sub_herk( cntl ) ); - } - - // Unpack C1 (if C1 was packed). - bli_unpackm_int( &c1_pack, &c1, cntl_sub_unpackm_c( cntl ) ); - } - - // Partition along the m dimension. - for ( i = bm_alg; i < m_trans; i += bm_alg ) - { - // Determine the current algorithmic blocksize. - bm_alg = bli_determine_blocksize_f( i, m_trans, a, - cntl_blocksize( cntl ) ); - - // Acquire partitions for A1, B1, and C1. - bli_acquire_mpart_t2b( BLIS_SUBPART1, - i, bm_alg, a, &a1 ); - bli_acquire_mpart_t2b( BLIS_SUBPART1, - i, bm_alg, b, &b1 ); - bli_acquire_mpart_t2b( BLIS_SUBPART1, - i, bm_alg, c, &c1 ); - - // Partition off the stored region of C1 and the corresponding regions - // of Bh_pack and Ah_pack. We compute the width of the subpartition - // taking the location of the diagonal into account. - offR = bli_max( 0, bli_obj_diag_offset_after_trans( c1 ) ); - nR = bli_obj_width_after_trans( c1 ) - offR; - bli_acquire_mpart_l2r( BLIS_SUBPART1, - offR, nR, &c1, &c1R ); - bli_acquire_mpart_l2r( BLIS_SUBPART1, - offR, nR, &bh_pack, &bhR_pack ); - bli_acquire_mpart_l2r( BLIS_SUBPART1, - offR, nR, &ah_pack, &ahR_pack ); - - // Initialize objects for packing A1, B1, and C1. - bli_packm_init( &a1, &a1_pack, - cntl_sub_packm_a( cntl ) ); - bli_packm_init( &b1, &b1_pack, - cntl_sub_packm_a( cntl ) ); - bli_packm_init( &c1R, &c1R_pack, - cntl_sub_packm_c( cntl ) ); - - // Pack A1 and scale by alpha (if instructed). - bli_packm_int( alpha, - &a1, &a1_pack, - cntl_sub_packm_a( cntl ) ); - - // Pack B1 and scale by alpha_conj (if instructed). - bli_packm_int( alpha_conj, - &b1, &b1_pack, - cntl_sub_packm_a( cntl ) ); - - // Pack C1 and scale by beta (if instructed). - bli_packm_int( beta, - &c1R, &c1R_pack, - cntl_sub_packm_c( cntl ) ); - - // Perform herk subproblem. - bli_her2k_int( alpha, - &a1_pack, - &bhR_pack, - alpha_conj, - &b1_pack, - &ahR_pack, - beta, - &c1R_pack, - cntl_sub_her2k( cntl ) ); - - // Unpack C1 (if C1 was packed). - bli_unpackm_int( &c1R_pack, &c1R, - cntl_sub_unpackm_c( cntl ) ); - } - - // If any packing buffers were acquired within packm, release them back - // to the memory manager. - bli_obj_release_pack( &a1_pack ); - bli_obj_release_pack( &bh_pack ); - bli_obj_release_pack( &b1_pack ); - bli_obj_release_pack( &ah_pack ); - bli_obj_release_pack( &c1_pack ); - bli_obj_release_pack( &c1R_pack ); -} - diff --git a/frame/3/her2k/other/bli_her2k_u_blk_var4.h b/frame/3/her2k/other/bli_her2k_u_blk_var4.h deleted file mode 100644 index 61fd39982..000000000 --- a/frame/3/her2k/other/bli_her2k_u_blk_var4.h +++ /dev/null @@ -1,44 +0,0 @@ -/* - - BLIS - An object-based framework for developing high-performance BLAS-like - libraries. - - Copyright (C) 2014, The University of Texas - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - Neither the name of The University of Texas nor the names of its - contributors may be used to endorse or promote products derived - from this software without specific prior written permission. - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS - "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT - HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, - SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT - LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, - DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -*/ - -void bli_her2k_u_blk_var4( obj_t* alpha, - obj_t* a, - obj_t* bh, - obj_t* alpha_conj, - obj_t* b, - obj_t* ah, - obj_t* beta, - obj_t* c, - her2k_t* cntl ); - diff --git a/frame/3/symm/bli_symm.c b/frame/3/symm/bli_symm.c index 27fa739da..98c5d3647 100644 --- a/frame/3/symm/bli_symm.c +++ b/frame/3/symm/bli_symm.c @@ -34,7 +34,7 @@ #include "blis.h" -extern gemm_t* hemm_cntl; +extern gemm_t* gemm_cntl; // // Define object-based interface. @@ -86,7 +86,7 @@ void bli_symm( side_t side, // Choose the control tree. We can just use hemm since the algorithm // is nearly identical to that of symm. - cntl = hemm_cntl; + cntl = gemm_cntl; // Invoke the internal back-end. bli_gemm_int( alpha, diff --git a/frame/3/syr2k/bli_syr2k.c b/frame/3/syr2k/bli_syr2k.c index ea40206bb..1c589bddd 100644 --- a/frame/3/syr2k/bli_syr2k.c +++ b/frame/3/syr2k/bli_syr2k.c @@ -34,7 +34,7 @@ #include "blis.h" -extern her2k_t* her2k_cntl; +//extern her2k_t* her2k_cntl; extern herk_t* herk_cntl; // @@ -46,7 +46,7 @@ void bli_syr2k( obj_t* alpha, obj_t* beta, obj_t* c ) { - her2k_t* cntl; + //her2k_t* cntl; obj_t c_local; obj_t a_local; obj_t bt_local; @@ -86,11 +86,11 @@ void bli_syr2k( obj_t* alpha, bli_obj_induce_trans( c_local ); } +#if 0 // Choose the control tree. We can just use her2k since the algorithm // is nearly identical to that of syr2k. cntl = her2k_cntl; -#if 1 // Invoke the internal back-end. bli_her2k_int( alpha, &a_local, diff --git a/frame/cntl/bli_cntl_init.c b/frame/cntl/bli_cntl_init.c index 7975f7d76..c0e9c7ecd 100644 --- a/frame/cntl/bli_cntl_init.c +++ b/frame/cntl/bli_cntl_init.c @@ -57,9 +57,7 @@ void bli_cntl_init( void ) // Level-3 bli_gemm_cntl_init(); - bli_hemm_cntl_init(); bli_herk_cntl_init(); - bli_her2k_cntl_init(); bli_trmm_cntl_init(); bli_trsm_cntl_init(); } @@ -87,9 +85,7 @@ void bli_cntl_finalize( void ) // Level-3 bli_gemm_cntl_finalize(); - bli_hemm_cntl_finalize(); bli_herk_cntl_finalize(); - bli_her2k_cntl_finalize(); bli_trmm_cntl_finalize(); bli_trsm_cntl_finalize(); }