mirror of
https://github.com/amd/blis.git
synced 2026-05-11 17:50:00 +00:00
Removed redundant hemm, her2k control trees.
Details: - Removed code that generated a control tree specifically for hemm and symm. Instead, the gemm control tree is now configured so that it works for gemm, hemm, or symm. - Retired most her2k code, as it was not being used. (Currently, her2k is implemented as two invocations of herk.) I couldn't think of many situations where her2k variants were needed. - Removed some older her2k code.
This commit is contained in:
@@ -47,8 +47,6 @@ func_t* gemm_ukrs;
|
||||
|
||||
packm_t* gemm_packa_cntl;
|
||||
packm_t* gemm_packb_cntl;
|
||||
packm_t* gemm_packc_cntl;
|
||||
unpackm_t* gemm_unpackc_cntl;
|
||||
|
||||
gemm_t* gemm_cntl_bp_ke;
|
||||
gemm_t* gemm_cntl_op_bp;
|
||||
@@ -104,7 +102,7 @@ void bli_gemm_cntl_init()
|
||||
BLIS_VARIANT2,
|
||||
gemm_mr,
|
||||
gemm_kr,
|
||||
FALSE, // already dense; densify not necessary
|
||||
TRUE, // densify; used by hemm/symm
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
@@ -117,33 +115,13 @@ void bli_gemm_cntl_init()
|
||||
BLIS_VARIANT2,
|
||||
gemm_kr,
|
||||
gemm_nr,
|
||||
FALSE, // already dense; densify not necessary
|
||||
TRUE, // densify; used by hemm/symm
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_COL_PANELS,
|
||||
BLIS_BUFFER_FOR_B_PANEL );
|
||||
|
||||
// Create control tree objects for packm/unpackm operations on C.
|
||||
gemm_packc_cntl
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
gemm_mr,
|
||||
gemm_nr,
|
||||
FALSE, // already dense; densify not necessary
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_COLUMNS,
|
||||
BLIS_BUFFER_FOR_C_PANEL );
|
||||
|
||||
gemm_unpackc_cntl
|
||||
=
|
||||
bli_unpackm_cntl_obj_create( BLIS_UNBLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
NULL ); // no blocksize needed
|
||||
|
||||
|
||||
//
|
||||
// Create a control tree for packing A and B, and streaming C.
|
||||
@@ -223,8 +201,6 @@ void bli_gemm_cntl_finalize()
|
||||
|
||||
bli_cntl_obj_free( gemm_packa_cntl );
|
||||
bli_cntl_obj_free( gemm_packb_cntl );
|
||||
bli_cntl_obj_free( gemm_packc_cntl );
|
||||
bli_cntl_obj_free( gemm_unpackc_cntl );
|
||||
|
||||
bli_cntl_obj_free( gemm_cntl_bp_ke );
|
||||
bli_cntl_obj_free( gemm_cntl_op_bp );
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
extern gemm_t* hemm_cntl;
|
||||
extern gemm_t* gemm_cntl;
|
||||
|
||||
//
|
||||
// Define object-based interface.
|
||||
@@ -86,7 +86,7 @@ void bli_hemm( side_t side,
|
||||
}
|
||||
|
||||
// Choose the control tree.
|
||||
cntl = hemm_cntl;
|
||||
cntl = gemm_cntl;
|
||||
|
||||
// Invoke the internal back-end.
|
||||
bli_gemm_int( alpha,
|
||||
|
||||
@@ -32,7 +32,6 @@
|
||||
|
||||
*/
|
||||
|
||||
#include "bli_hemm_cntl.h"
|
||||
#include "bli_hemm_check.h"
|
||||
|
||||
|
||||
|
||||
@@ -1,220 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
extern scalm_t* scalm_cntl;
|
||||
extern func_t* gemm_ukrs;
|
||||
|
||||
blksz_t* hemm_mc;
|
||||
blksz_t* hemm_nc;
|
||||
blksz_t* hemm_kc;
|
||||
blksz_t* hemm_mr;
|
||||
blksz_t* hemm_nr;
|
||||
blksz_t* hemm_kr;
|
||||
|
||||
packm_t* hemm_packa_cntl;
|
||||
packm_t* hemm_packb_cntl;
|
||||
packm_t* hemm_packc_cntl;
|
||||
unpackm_t* hemm_unpackc_cntl;
|
||||
|
||||
gemm_t* hemm_cntl_bp_ke;
|
||||
gemm_t* hemm_cntl_op_bp;
|
||||
gemm_t* hemm_cntl_mm_op;
|
||||
gemm_t* hemm_cntl_vl_mm;
|
||||
|
||||
gemm_t* hemm_cntl;
|
||||
|
||||
|
||||
void bli_hemm_cntl_init()
|
||||
{
|
||||
// Create blocksize objects for each dimension.
|
||||
hemm_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_EXTEND_MC_S,
|
||||
BLIS_DEFAULT_MC_D, BLIS_EXTEND_MC_D,
|
||||
BLIS_DEFAULT_MC_C, BLIS_EXTEND_MC_C,
|
||||
BLIS_DEFAULT_MC_Z, BLIS_EXTEND_MC_Z );
|
||||
|
||||
hemm_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_EXTEND_NC_S,
|
||||
BLIS_DEFAULT_NC_D, BLIS_EXTEND_NC_D,
|
||||
BLIS_DEFAULT_NC_C, BLIS_EXTEND_NC_C,
|
||||
BLIS_DEFAULT_NC_Z, BLIS_EXTEND_NC_Z );
|
||||
|
||||
hemm_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_EXTEND_KC_S,
|
||||
BLIS_DEFAULT_KC_D, BLIS_EXTEND_KC_D,
|
||||
BLIS_DEFAULT_KC_C, BLIS_EXTEND_KC_C,
|
||||
BLIS_DEFAULT_KC_Z, BLIS_EXTEND_KC_Z );
|
||||
|
||||
hemm_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_EXTEND_MR_S,
|
||||
BLIS_DEFAULT_MR_D, BLIS_EXTEND_MR_D,
|
||||
BLIS_DEFAULT_MR_C, BLIS_EXTEND_MR_C,
|
||||
BLIS_DEFAULT_MR_Z, BLIS_EXTEND_MR_Z );
|
||||
|
||||
hemm_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_EXTEND_NR_S,
|
||||
BLIS_DEFAULT_NR_D, BLIS_EXTEND_NR_D,
|
||||
BLIS_DEFAULT_NR_C, BLIS_EXTEND_NR_C,
|
||||
BLIS_DEFAULT_NR_Z, BLIS_EXTEND_NR_Z );
|
||||
|
||||
hemm_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_EXTEND_KR_S,
|
||||
BLIS_DEFAULT_KR_D, BLIS_EXTEND_KR_D,
|
||||
BLIS_DEFAULT_KR_C, BLIS_EXTEND_KR_C,
|
||||
BLIS_DEFAULT_KR_Z, BLIS_EXTEND_KR_Z );
|
||||
|
||||
|
||||
// Create control tree objects for packm operations.
|
||||
hemm_packa_cntl
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
hemm_mr,
|
||||
hemm_kr,
|
||||
TRUE, // densify
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_ROW_PANELS,
|
||||
BLIS_BUFFER_FOR_A_BLOCK );
|
||||
|
||||
hemm_packb_cntl
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
hemm_kr,
|
||||
hemm_nr,
|
||||
TRUE, // densify
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_COL_PANELS,
|
||||
BLIS_BUFFER_FOR_B_PANEL );
|
||||
|
||||
// Create control tree objects for packm/unpackm operations on C.
|
||||
hemm_packc_cntl
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
hemm_mr,
|
||||
hemm_nr,
|
||||
FALSE, // already dense; densify not necessary
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_COLUMNS,
|
||||
BLIS_BUFFER_FOR_GEN_USE );
|
||||
|
||||
hemm_unpackc_cntl
|
||||
=
|
||||
bli_unpackm_cntl_obj_create( BLIS_UNBLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
NULL ); // no blocksize needed
|
||||
|
||||
|
||||
// Create control tree object for lowest-level block-panel kernel.
|
||||
hemm_cntl_bp_ke
|
||||
=
|
||||
bli_gemm_cntl_obj_create( BLIS_UNB_OPT,
|
||||
BLIS_VARIANT2,
|
||||
NULL,
|
||||
gemm_ukrs,
|
||||
NULL, NULL, NULL,
|
||||
NULL, NULL, NULL );
|
||||
|
||||
// Create control tree object for outer panel (to block-panel)
|
||||
// problem.
|
||||
hemm_cntl_op_bp
|
||||
=
|
||||
bli_gemm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
hemm_mc,
|
||||
NULL,
|
||||
NULL,
|
||||
hemm_packa_cntl,
|
||||
hemm_packb_cntl,
|
||||
NULL,
|
||||
hemm_cntl_bp_ke,
|
||||
NULL );
|
||||
|
||||
// Create control tree object for general problem via multiple
|
||||
// rank-k (outer panel) updates.
|
||||
hemm_cntl_mm_op
|
||||
=
|
||||
bli_gemm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT3,
|
||||
hemm_kc,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
hemm_cntl_op_bp,
|
||||
NULL );
|
||||
|
||||
// Create control tree object for very large problem via multiple
|
||||
// general problems.
|
||||
hemm_cntl_vl_mm
|
||||
=
|
||||
bli_gemm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
hemm_nc,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
hemm_cntl_mm_op,
|
||||
NULL );
|
||||
|
||||
// Alias the "master" hemm control tree to a shorter name.
|
||||
hemm_cntl = hemm_cntl_vl_mm;
|
||||
}
|
||||
|
||||
void bli_hemm_cntl_finalize()
|
||||
{
|
||||
bli_blksz_obj_free( hemm_mc );
|
||||
bli_blksz_obj_free( hemm_nc );
|
||||
bli_blksz_obj_free( hemm_kc );
|
||||
bli_blksz_obj_free( hemm_mr );
|
||||
bli_blksz_obj_free( hemm_nr );
|
||||
bli_blksz_obj_free( hemm_kr );
|
||||
|
||||
bli_cntl_obj_free( hemm_packa_cntl );
|
||||
bli_cntl_obj_free( hemm_packb_cntl );
|
||||
bli_cntl_obj_free( hemm_packc_cntl );
|
||||
bli_cntl_obj_free( hemm_unpackc_cntl );
|
||||
|
||||
bli_cntl_obj_free( hemm_cntl_bp_ke );
|
||||
bli_cntl_obj_free( hemm_cntl_op_bp );
|
||||
bli_cntl_obj_free( hemm_cntl_mm_op );
|
||||
bli_cntl_obj_free( hemm_cntl_vl_mm );
|
||||
}
|
||||
|
||||
@@ -1,37 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
void bli_hemm_cntl_init( void );
|
||||
void bli_hemm_cntl_finalize( void );
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
extern her2k_t* her2k_cntl;
|
||||
//extern her2k_t* her2k_cntl;
|
||||
extern herk_t* herk_cntl;
|
||||
|
||||
//
|
||||
|
||||
@@ -32,8 +32,9 @@
|
||||
|
||||
*/
|
||||
|
||||
#include "bli_her2k_cntl.h"
|
||||
//#include "bli_her2k_cntl.h"
|
||||
#include "bli_her2k_check.h"
|
||||
/*
|
||||
#include "bli_her2k_int.h"
|
||||
#include "bli_her2k_target.h"
|
||||
|
||||
@@ -45,6 +46,7 @@
|
||||
|
||||
#include "bli_her2k_l_ker_var2.h"
|
||||
#include "bli_her2k_u_ker_var2.h"
|
||||
*/
|
||||
|
||||
|
||||
//
|
||||
|
||||
@@ -151,6 +151,7 @@ void bli_her2k_check( obj_t* alpha,
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
#if 0
|
||||
void bli_her2k_int_check( obj_t* alpha,
|
||||
obj_t* a,
|
||||
obj_t* bh,
|
||||
@@ -172,4 +173,4 @@ void bli_her2k_int_check( obj_t* alpha,
|
||||
e_val = bli_check_valid_cntl( ( void* )cntl );
|
||||
bli_check_error_code( e_val );
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
@@ -47,6 +47,7 @@ void bli_her2k_check( obj_t* alpha,
|
||||
obj_t* beta,
|
||||
obj_t* c );
|
||||
|
||||
#if 0
|
||||
void bli_her2k_int_check( obj_t* alpha,
|
||||
obj_t* a,
|
||||
obj_t* bh,
|
||||
@@ -56,4 +57,4 @@ void bli_her2k_int_check( obj_t* alpha,
|
||||
obj_t* beta,
|
||||
obj_t* c,
|
||||
her2k_t* cntl );
|
||||
|
||||
#endif
|
||||
|
||||
@@ -1,256 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
void bli_her2k_l_blk_var4( obj_t* alpha,
|
||||
obj_t* a,
|
||||
obj_t* bh,
|
||||
obj_t* alpha_conj,
|
||||
obj_t* b,
|
||||
obj_t* ah,
|
||||
obj_t* beta,
|
||||
obj_t* c,
|
||||
her2k_t* cntl )
|
||||
{
|
||||
obj_t a1, a1_pack;
|
||||
obj_t bh_pack, bhL_pack;
|
||||
obj_t b1, b1_pack;
|
||||
obj_t ah_pack, ahL_pack;
|
||||
obj_t c1, c1_pack;
|
||||
obj_t c1L, c1L_pack;
|
||||
|
||||
dim_t i;
|
||||
dim_t bm_alg;
|
||||
dim_t m_trans;
|
||||
dim_t offL, nL;
|
||||
|
||||
// Initialize all pack objects that are passed into packm_init().
|
||||
bli_obj_init_pack( &a1_pack );
|
||||
bli_obj_init_pack( &bh_pack );
|
||||
bli_obj_init_pack( &b1_pack );
|
||||
bli_obj_init_pack( &ah_pack );
|
||||
bli_obj_init_pack( &c1_pack );
|
||||
bli_obj_init_pack( &c1L_pack );
|
||||
|
||||
// Query dimension in partitioning direction.
|
||||
m_trans = bli_obj_length_after_trans( *c );
|
||||
|
||||
// Scale C by beta (if instructed).
|
||||
bli_scalm_int( beta,
|
||||
c,
|
||||
cntl_sub_scalm( cntl ) );
|
||||
|
||||
// Initialize object for packing B'.
|
||||
bli_packm_init( bh, &bh_pack,
|
||||
cntl_sub_packm_b( cntl ) );
|
||||
|
||||
// Initialize object for packing A'.
|
||||
bli_packm_init( ah, &ah_pack,
|
||||
cntl_sub_packm_b( cntl ) );
|
||||
|
||||
// Fuse the first iteration with incremental packing and computation.
|
||||
{
|
||||
obj_t bh_inc, bh_pack_inc;
|
||||
obj_t ah_inc, ah_pack_inc;
|
||||
obj_t c1_pack_inc;
|
||||
|
||||
dim_t j;
|
||||
dim_t bn_inc;
|
||||
dim_t n_trans;
|
||||
|
||||
// Query dimension in partitioning direction.
|
||||
n_trans = bli_obj_width( bh_pack );
|
||||
|
||||
// Determine the current algorithmic blocksize.
|
||||
bm_alg = bli_determine_blocksize_b( 0, m_trans, a,
|
||||
cntl_blocksize( cntl ) );
|
||||
|
||||
// Acquire partitions for A1, B1, and C1.
|
||||
bli_acquire_mpart_b2t( BLIS_SUBPART1,
|
||||
0, bm_alg, a, &a1 );
|
||||
bli_acquire_mpart_b2t( BLIS_SUBPART1,
|
||||
0, bm_alg, b, &b1 );
|
||||
bli_acquire_mpart_b2t( BLIS_SUBPART1,
|
||||
0, bm_alg, c, &c1 );
|
||||
|
||||
// Initialize objects for packing A1, B1, and C1.
|
||||
bli_packm_init( &a1, &a1_pack, cntl_sub_packm_a( cntl ) );
|
||||
bli_packm_init( &b1, &b1_pack, cntl_sub_packm_a( cntl ) );
|
||||
bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) );
|
||||
|
||||
// Pack C1 and scale by beta (if instructed).
|
||||
bli_packm_int( beta, &c1, &c1_pack, cntl_sub_packm_c( cntl ) );
|
||||
|
||||
// Pack A1 and scale by alpha (if instructed).
|
||||
bli_packm_int( alpha, &a1, &a1_pack, cntl_sub_packm_a( cntl ) );
|
||||
|
||||
// Partition along the n dimension.
|
||||
for ( j = 0; j < n_trans; j += bn_inc )
|
||||
{
|
||||
// Determine the current incremental packing blocksize.
|
||||
bn_inc = bli_determine_blocksize_f( j, n_trans, a,
|
||||
cntl_blocksize_aux( cntl ) );
|
||||
|
||||
// Acquire incremental partitions.
|
||||
bli_acquire_mpart_l2r( BLIS_SUBPART1,
|
||||
j, bn_inc, bh, &bh_inc );
|
||||
bli_acquire_mpart_l2r( BLIS_SUBPART1,
|
||||
j, bn_inc, &bh_pack, &bh_pack_inc );
|
||||
bli_acquire_mpart_l2r( BLIS_SUBPART1,
|
||||
j, bn_inc, &c1_pack, &c1_pack_inc );
|
||||
|
||||
// Pack Bh_inc and scale by alpha (if instructed).
|
||||
bli_packm_int( alpha, &bh_inc, &bh_pack_inc, cntl_sub_packm_b( cntl ) );
|
||||
|
||||
// Perform herk subproblem.
|
||||
bli_herk_int( &BLIS_ONE,
|
||||
&a1_pack,
|
||||
&bh_pack_inc,
|
||||
beta,
|
||||
&c1_pack_inc,
|
||||
cntl_sub_herk( cntl ) );
|
||||
}
|
||||
|
||||
// Pack B1 and scale by alpha_conj (if instructed).
|
||||
bli_packm_int( alpha_conj, &b1, &b1_pack, cntl_sub_packm_a( cntl ) );
|
||||
|
||||
// Partition along the n dimension.
|
||||
for ( j = 0; j < n_trans; j += bn_inc )
|
||||
{
|
||||
// Determine the current incremental packing blocksize.
|
||||
bn_inc = bli_determine_blocksize_f( j, n_trans, b,
|
||||
cntl_blocksize_aux( cntl ) );
|
||||
|
||||
// Acquire incremental partitions.
|
||||
bli_acquire_mpart_l2r( BLIS_SUBPART1,
|
||||
j, bn_inc, ah, &ah_inc );
|
||||
bli_acquire_mpart_l2r( BLIS_SUBPART1,
|
||||
j, bn_inc, &ah_pack, &ah_pack_inc );
|
||||
bli_acquire_mpart_l2r( BLIS_SUBPART1,
|
||||
j, bn_inc, &c1_pack, &c1_pack_inc );
|
||||
|
||||
// Pack Ah_inc and scale by alpha_conj (if instructed).
|
||||
bli_packm_int( alpha_conj, &ah_inc, &ah_pack_inc, cntl_sub_packm_b( cntl ) );
|
||||
|
||||
// Perform herk subproblem.
|
||||
bli_herk_int( &BLIS_ONE,
|
||||
&b1_pack,
|
||||
&ah_pack_inc,
|
||||
beta,
|
||||
&c1_pack_inc,
|
||||
cntl_sub_herk( cntl ) );
|
||||
}
|
||||
|
||||
// Unpack C1 (if C1 was packed).
|
||||
bli_unpackm_int( &c1_pack, &c1, cntl_sub_unpackm_c( cntl ) );
|
||||
}
|
||||
|
||||
// Partition along the m dimension.
|
||||
for ( i = bm_alg; i < m_trans; i += bm_alg )
|
||||
{
|
||||
// Determine the current algorithmic blocksize.
|
||||
bm_alg = bli_determine_blocksize_b( i, m_trans, a,
|
||||
cntl_blocksize( cntl ) );
|
||||
|
||||
// Acquire partitions for A1, B1, and C1.
|
||||
bli_acquire_mpart_b2t( BLIS_SUBPART1,
|
||||
i, bm_alg, a, &a1 );
|
||||
bli_acquire_mpart_b2t( BLIS_SUBPART1,
|
||||
i, bm_alg, b, &b1 );
|
||||
bli_acquire_mpart_b2t( BLIS_SUBPART1,
|
||||
i, bm_alg, c, &c1 );
|
||||
|
||||
// Partition off the stored region of C1 and the corresponding regions
|
||||
// of Bh_pack and Ah_pack. We compute the width of the subpartition
|
||||
// taking the location of the diagonal into account.
|
||||
offL = 0;
|
||||
nL = bli_min( bli_obj_width_after_trans( c1 ),
|
||||
bli_obj_diag_offset_after_trans( c1 ) + bm_alg );
|
||||
bli_acquire_mpart_l2r( BLIS_SUBPART1,
|
||||
offL, nL, &c1, &c1L );
|
||||
bli_acquire_mpart_l2r( BLIS_SUBPART1,
|
||||
offL, nL, &bh_pack, &bhL_pack );
|
||||
bli_acquire_mpart_l2r( BLIS_SUBPART1,
|
||||
offL, nL, &ah_pack, &ahL_pack );
|
||||
|
||||
// Initialize objects for packing A1, B1, and C1.
|
||||
bli_packm_init( &a1, &a1_pack,
|
||||
cntl_sub_packm_a( cntl ) );
|
||||
bli_packm_init( &b1, &b1_pack,
|
||||
cntl_sub_packm_a( cntl ) );
|
||||
bli_packm_init( &c1L, &c1L_pack,
|
||||
cntl_sub_packm_c( cntl ) );
|
||||
|
||||
// Pack A1 and scale by alpha (if instructed).
|
||||
bli_packm_int( alpha,
|
||||
&a1, &a1_pack,
|
||||
cntl_sub_packm_a( cntl ) );
|
||||
|
||||
// Pack B1 and scale by alpha_conj (if instructed).
|
||||
bli_packm_int( alpha_conj,
|
||||
&b1, &b1_pack,
|
||||
cntl_sub_packm_a( cntl ) );
|
||||
|
||||
// Pack C1 and scale by beta (if instructed).
|
||||
bli_packm_int( beta,
|
||||
&c1L, &c1L_pack,
|
||||
cntl_sub_packm_c( cntl ) );
|
||||
|
||||
// Perform herk subproblem.
|
||||
bli_her2k_int( alpha,
|
||||
&a1_pack,
|
||||
&bhL_pack,
|
||||
alpha_conj,
|
||||
&b1_pack,
|
||||
&ahL_pack,
|
||||
beta,
|
||||
&c1L_pack,
|
||||
cntl_sub_her2k( cntl ) );
|
||||
|
||||
// Unpack C1 (if C1 was packed).
|
||||
bli_unpackm_int( &c1L_pack, &c1L,
|
||||
cntl_sub_unpackm_c( cntl ) );
|
||||
}
|
||||
|
||||
// If any packing buffers were acquired within packm, release them back
|
||||
// to the memory manager.
|
||||
bli_obj_release_pack( &a1_pack );
|
||||
bli_obj_release_pack( &bh_pack );
|
||||
bli_obj_release_pack( &b1_pack );
|
||||
bli_obj_release_pack( &ah_pack );
|
||||
bli_obj_release_pack( &c1_pack );
|
||||
bli_obj_release_pack( &c1L_pack );
|
||||
}
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
void bli_her2k_l_blk_var4( obj_t* alpha,
|
||||
obj_t* a,
|
||||
obj_t* bh,
|
||||
obj_t* alpha_conj,
|
||||
obj_t* b,
|
||||
obj_t* ah,
|
||||
obj_t* beta,
|
||||
obj_t* c,
|
||||
her2k_t* cntl );
|
||||
|
||||
@@ -1,215 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
void bli_her2k_u_blk_var1( obj_t* alpha,
|
||||
obj_t* a,
|
||||
obj_t* bh,
|
||||
obj_t* alpha_conj,
|
||||
obj_t* b,
|
||||
obj_t* ah,
|
||||
obj_t* beta,
|
||||
obj_t* c,
|
||||
her2k_t* cntl )
|
||||
{
|
||||
obj_t a1, a1_pack;
|
||||
obj_t bh_pack, bhR_pack;
|
||||
obj_t b1, b1_pack;
|
||||
obj_t ah_pack, ahR_pack;
|
||||
obj_t c1;
|
||||
obj_t c1R, c1R_pack;
|
||||
|
||||
dim_t i;
|
||||
dim_t b_alg;
|
||||
dim_t m_trans;
|
||||
dim_t offR, nR;
|
||||
|
||||
// Initialize all pack objects that are passed into packm_init().
|
||||
bli_obj_init_pack( &a1_pack );
|
||||
bli_obj_init_pack( &bh_pack );
|
||||
bli_obj_init_pack( &b1_pack );
|
||||
bli_obj_init_pack( &ah_pack );
|
||||
bli_obj_init_pack( &c1R_pack );
|
||||
|
||||
// Query dimension in partitioning direction.
|
||||
m_trans = bli_obj_length_after_trans( *c );
|
||||
|
||||
// Scale C by beta (if instructed).
|
||||
bli_scalm_int( beta,
|
||||
c,
|
||||
cntl_sub_scalm( cntl ) );
|
||||
|
||||
//
|
||||
// Perform first rank-k update: C = C + alpha * A * B'.
|
||||
//
|
||||
|
||||
// Initialize object for packing B'.
|
||||
bli_packm_init( bh, &bh_pack,
|
||||
cntl_sub_packm_b( cntl ) );
|
||||
|
||||
// Pack B' and scale by alpha (if instructed).
|
||||
bli_packm_int( alpha,
|
||||
bh, &bh_pack,
|
||||
cntl_sub_packm_b( cntl ) );
|
||||
|
||||
// Partition along the m dimension.
|
||||
for ( i = 0; i < m_trans; i += b_alg )
|
||||
{
|
||||
// Determine the current algorithmic blocksize.
|
||||
b_alg = bli_determine_blocksize_f( i, m_trans, a,
|
||||
cntl_blocksize( cntl ) );
|
||||
|
||||
// Acquire partitions for A1 and C1.
|
||||
bli_acquire_mpart_t2b( BLIS_SUBPART1,
|
||||
i, b_alg, a, &a1 );
|
||||
bli_acquire_mpart_t2b( BLIS_SUBPART1,
|
||||
i, b_alg, c, &c1 );
|
||||
|
||||
// Partition off the stored region of C1 and the corresponding region
|
||||
// of Bh_pack. We compute the width of the subpartition taking the
|
||||
// location of the diagonal into account.
|
||||
offR = bli_max( 0, bli_obj_diag_offset_after_trans( c1 ) );
|
||||
nR = bli_obj_width_after_trans( c1 ) - offR;
|
||||
bli_acquire_mpart_l2r( BLIS_SUBPART1,
|
||||
offR, nR, &c1, &c1R );
|
||||
bli_acquire_mpart_l2r( BLIS_SUBPART1,
|
||||
offR, nR, &bh_pack, &bhR_pack );
|
||||
|
||||
// Initialize objects for packing A1 and C1.
|
||||
bli_packm_init( &a1, &a1_pack,
|
||||
cntl_sub_packm_a( cntl ) );
|
||||
bli_packm_init( &c1R, &c1R_pack,
|
||||
cntl_sub_packm_c( cntl ) );
|
||||
|
||||
// Pack A1 and scale by alpha (if instructed).
|
||||
bli_packm_int( alpha,
|
||||
&a1, &a1_pack,
|
||||
cntl_sub_packm_a( cntl ) );
|
||||
|
||||
// Pack C1 and scale by beta (if instructed).
|
||||
bli_packm_int( beta,
|
||||
&c1R, &c1R_pack,
|
||||
cntl_sub_packm_c( cntl ) );
|
||||
|
||||
// Perform herk subproblem.
|
||||
bli_herk_int( alpha,
|
||||
&a1_pack,
|
||||
&bhR_pack,
|
||||
beta,
|
||||
&c1R_pack,
|
||||
cntl_sub_herk( cntl ) );
|
||||
|
||||
// Unpack C1 (if C1 was packed).
|
||||
bli_unpackm_int( &c1R_pack, &c1R,
|
||||
cntl_sub_unpackm_c( cntl ) );
|
||||
}
|
||||
|
||||
// If any packing buffers were acquired within packm, release them back
|
||||
// to the memory manager.
|
||||
bli_obj_release_pack( &a1_pack );
|
||||
bli_obj_release_pack( &bh_pack );
|
||||
|
||||
//
|
||||
// Perform second rank-k update: C = C + conj(alpha) * B * A'.
|
||||
//
|
||||
|
||||
// Initialize object for packing A'.
|
||||
bli_packm_init( ah, &ah_pack,
|
||||
cntl_sub_packm_b( cntl ) );
|
||||
|
||||
// Pack A' and scale by alpha_conj (if instructed).
|
||||
bli_packm_int( alpha_conj,
|
||||
ah, &ah_pack,
|
||||
cntl_sub_packm_b( cntl ) );
|
||||
|
||||
// Partition along the m dimension.
|
||||
for ( i = 0; i < m_trans; i += b_alg )
|
||||
{
|
||||
// Determine the current algorithmic blocksize.
|
||||
b_alg = bli_determine_blocksize_f( i, m_trans, b,
|
||||
cntl_blocksize( cntl ) );
|
||||
|
||||
// Acquire partitions for B1 and C1.
|
||||
bli_acquire_mpart_t2b( BLIS_SUBPART1,
|
||||
i, b_alg, b, &b1 );
|
||||
bli_acquire_mpart_t2b( BLIS_SUBPART1,
|
||||
i, b_alg, c, &c1 );
|
||||
|
||||
// Partition off the stored region of C1 and the corresponding region
|
||||
// of Ah_pack. We compute the width of the subpartition taking the
|
||||
// location of the diagonal into account.
|
||||
offR = bli_max( 0, bli_obj_diag_offset_after_trans( c1 ) );
|
||||
nR = bli_obj_width_after_trans( c1 ) - offR;
|
||||
bli_acquire_mpart_l2r( BLIS_SUBPART1,
|
||||
offR, nR, &c1, &c1R );
|
||||
bli_acquire_mpart_l2r( BLIS_SUBPART1,
|
||||
offR, nR, &ah_pack, &ahR_pack );
|
||||
|
||||
// Initialize objects for packing B1 and C1.
|
||||
bli_packm_init( &b1, &b1_pack,
|
||||
cntl_sub_packm_a( cntl ) );
|
||||
bli_packm_init( &c1R, &c1R_pack,
|
||||
cntl_sub_packm_c( cntl ) );
|
||||
|
||||
// Pack B1 and scale by alpha_conj (if instructed).
|
||||
bli_packm_int( alpha_conj,
|
||||
&b1, &b1_pack,
|
||||
cntl_sub_packm_a( cntl ) );
|
||||
|
||||
// Pack C1 and scale by beta (if instructed).
|
||||
bli_packm_int( beta,
|
||||
&c1R, &c1R_pack,
|
||||
cntl_sub_packm_c( cntl ) );
|
||||
|
||||
// Perform herk subproblem.
|
||||
bli_herk_int( alpha_conj,
|
||||
&b1_pack,
|
||||
&ahR_pack,
|
||||
&BLIS_ONE,
|
||||
&c1R_pack,
|
||||
cntl_sub_herk( cntl ) );
|
||||
|
||||
// Unpack C1 (if C1 was packed).
|
||||
bli_unpackm_int( &c1R_pack, &c1R,
|
||||
cntl_sub_unpackm_c( cntl ) );
|
||||
}
|
||||
|
||||
// If any packing buffers were acquired within packm, release them back
|
||||
// to the memory manager.
|
||||
bli_obj_release_pack( &b1_pack );
|
||||
bli_obj_release_pack( &ah_pack );
|
||||
bli_obj_release_pack( &c1R_pack );
|
||||
}
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
void bli_her2k_u_blk_var1( obj_t* alpha,
|
||||
obj_t* a,
|
||||
obj_t* bh,
|
||||
obj_t* alpha_conj,
|
||||
obj_t* b,
|
||||
obj_t* ah,
|
||||
obj_t* beta,
|
||||
obj_t* c,
|
||||
her2k_t* cntl );
|
||||
|
||||
@@ -1,165 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
void bli_her2k_u_blk_var2( obj_t* alpha,
|
||||
obj_t* a,
|
||||
obj_t* bh,
|
||||
obj_t* alpha_conj,
|
||||
obj_t* b,
|
||||
obj_t* ah,
|
||||
obj_t* beta,
|
||||
obj_t* c,
|
||||
her2k_t* cntl )
|
||||
{
|
||||
obj_t a_pack, aT_pack;
|
||||
obj_t bh1, bh1_pack;
|
||||
obj_t b_pack, bT_pack;
|
||||
obj_t ah1, ah1_pack;
|
||||
obj_t c1;
|
||||
obj_t c1T, c1T_pack;
|
||||
|
||||
dim_t i;
|
||||
dim_t b_alg;
|
||||
dim_t n_trans;
|
||||
dim_t offT, mT;
|
||||
|
||||
// Initialize all pack objects that are passed into packm_init().
|
||||
bli_obj_init_pack( &a_pack );
|
||||
bli_obj_init_pack( &bh1_pack );
|
||||
bli_obj_init_pack( &b_pack );
|
||||
bli_obj_init_pack( &ah1_pack );
|
||||
bli_obj_init_pack( &c1T_pack );
|
||||
|
||||
// Query dimension in partitioning direction.
|
||||
n_trans = bli_obj_width_after_trans( *c );
|
||||
|
||||
// Scale C by beta (if instructed).
|
||||
bli_scalm_int( beta,
|
||||
c,
|
||||
cntl_sub_scalm( cntl ) );
|
||||
|
||||
// Initialize object for packing A and B.
|
||||
bli_packm_init( a, &a_pack,
|
||||
cntl_sub_packm_a( cntl ) );
|
||||
bli_packm_init( b, &b_pack,
|
||||
cntl_sub_packm_a( cntl ) );
|
||||
|
||||
// Pack A and scale by alpha (if instructed).
|
||||
bli_packm_int( alpha,
|
||||
a, &a_pack,
|
||||
cntl_sub_packm_a( cntl ) );
|
||||
|
||||
// Pack B and scale by alpha_conj (if instructed).
|
||||
bli_packm_int( alpha_conj,
|
||||
b, &b_pack,
|
||||
cntl_sub_packm_a( cntl ) );
|
||||
|
||||
// Partition along the n dimension.
|
||||
for ( i = 0; i < n_trans; i += b_alg )
|
||||
{
|
||||
// Determine the current algorithmic blocksize.
|
||||
b_alg = bli_determine_blocksize_b( i, n_trans, bh,
|
||||
cntl_blocksize( cntl ) );
|
||||
|
||||
// Acquire partitions for B1', A1', and C1.
|
||||
bli_acquire_mpart_r2l( BLIS_SUBPART1,
|
||||
i, b_alg, bh, &bh1 );
|
||||
bli_acquire_mpart_r2l( BLIS_SUBPART1,
|
||||
i, b_alg, ah, &ah1 );
|
||||
bli_acquire_mpart_r2l( BLIS_SUBPART1,
|
||||
i, b_alg, c, &c1 );
|
||||
|
||||
// Partition off the stored region of C1 and the corresponding regions
|
||||
// of A_pack and B_pack. We compute the length of the subpartition
|
||||
// taking the location of the diagonal into account.
|
||||
offT = 0;
|
||||
mT = bli_min( bli_obj_length_after_trans( c1 ),
|
||||
-bli_obj_diag_offset_after_trans( c1 ) + b_alg );
|
||||
bli_acquire_mpart_t2b( BLIS_SUBPART1,
|
||||
offT, mT, &c1, &c1T );
|
||||
bli_acquire_mpart_t2b( BLIS_SUBPART1,
|
||||
offT, mT, &a_pack, &aT_pack );
|
||||
bli_acquire_mpart_t2b( BLIS_SUBPART1,
|
||||
offT, mT, &b_pack, &bT_pack );
|
||||
|
||||
// Initialize objects for packing B1', A1', and C1.
|
||||
bli_packm_init( &bh1, &bh1_pack,
|
||||
cntl_sub_packm_b( cntl ) );
|
||||
bli_packm_init( &ah1, &ah1_pack,
|
||||
cntl_sub_packm_b( cntl ) );
|
||||
bli_packm_init( &c1T, &c1T_pack,
|
||||
cntl_sub_packm_c( cntl ) );
|
||||
|
||||
// Pack B1' and scale by alpha (if instructed).
|
||||
bli_packm_int( alpha,
|
||||
&bh1, &bh1_pack,
|
||||
cntl_sub_packm_b( cntl ) );
|
||||
|
||||
// Pack A1' and scale by alpha_conj (if instructed).
|
||||
bli_packm_int( alpha_conj,
|
||||
&ah1, &ah1_pack,
|
||||
cntl_sub_packm_b( cntl ) );
|
||||
|
||||
// Pack C1 and scale by beta (if instructed).
|
||||
bli_packm_int( beta,
|
||||
&c1T, &c1T_pack,
|
||||
cntl_sub_packm_c( cntl ) );
|
||||
|
||||
// Perform herk subproblem.
|
||||
bli_her2k_int( alpha,
|
||||
&aT_pack,
|
||||
&bh1_pack,
|
||||
alpha_conj,
|
||||
&bT_pack,
|
||||
&ah1_pack,
|
||||
beta,
|
||||
&c1T_pack,
|
||||
cntl_sub_her2k( cntl ) );
|
||||
|
||||
// Unpack C1 (if C1 was packed).
|
||||
bli_unpackm_int( &c1T_pack, &c1T,
|
||||
cntl_sub_unpackm_c( cntl ) );
|
||||
}
|
||||
|
||||
// If any packing buffers were acquired within packm, release them back
|
||||
// to the memory manager.
|
||||
bli_obj_release_pack( &a_pack );
|
||||
bli_obj_release_pack( &bh1_pack );
|
||||
bli_obj_release_pack( &b_pack );
|
||||
bli_obj_release_pack( &ah1_pack );
|
||||
bli_obj_release_pack( &c1T_pack );
|
||||
}
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
void bli_her2k_u_blk_var2( obj_t* alpha,
|
||||
obj_t* a,
|
||||
obj_t* bh,
|
||||
obj_t* alpha_conj,
|
||||
obj_t* b,
|
||||
obj_t* ah,
|
||||
obj_t* beta,
|
||||
obj_t* c,
|
||||
her2k_t* cntl );
|
||||
|
||||
@@ -1,255 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
void bli_her2k_u_blk_var4( obj_t* alpha,
|
||||
obj_t* a,
|
||||
obj_t* bh,
|
||||
obj_t* alpha_conj,
|
||||
obj_t* b,
|
||||
obj_t* ah,
|
||||
obj_t* beta,
|
||||
obj_t* c,
|
||||
her2k_t* cntl )
|
||||
{
|
||||
obj_t a1, a1_pack;
|
||||
obj_t bh_pack, bhR_pack;
|
||||
obj_t b1, b1_pack;
|
||||
obj_t ah_pack, ahR_pack;
|
||||
obj_t c1, c1_pack;
|
||||
obj_t c1R, c1R_pack;
|
||||
|
||||
dim_t i;
|
||||
dim_t bm_alg;
|
||||
dim_t m_trans;
|
||||
dim_t offR, nR;
|
||||
|
||||
// Initialize all pack objects that are passed into packm_init().
|
||||
bli_obj_init_pack( &a1_pack );
|
||||
bli_obj_init_pack( &bh_pack );
|
||||
bli_obj_init_pack( &b1_pack );
|
||||
bli_obj_init_pack( &ah_pack );
|
||||
bli_obj_init_pack( &c1_pack );
|
||||
bli_obj_init_pack( &c1R_pack );
|
||||
|
||||
// Query dimension in partitioning direction.
|
||||
m_trans = bli_obj_length_after_trans( *c );
|
||||
|
||||
// Scale C by beta (if instructed).
|
||||
bli_scalm_int( beta,
|
||||
c,
|
||||
cntl_sub_scalm( cntl ) );
|
||||
|
||||
// Initialize object for packing B1'.
|
||||
bli_packm_init( bh, &bh_pack,
|
||||
cntl_sub_packm_b( cntl ) );
|
||||
|
||||
// Initialize object for packing A1'.
|
||||
bli_packm_init( ah, &ah_pack,
|
||||
cntl_sub_packm_b( cntl ) );
|
||||
|
||||
// Fuse the first iteration with incremental packing and computation.
|
||||
{
|
||||
obj_t bh_inc, bh_pack_inc;
|
||||
obj_t ah_inc, ah_pack_inc;
|
||||
obj_t c1_pack_inc;
|
||||
|
||||
dim_t j;
|
||||
dim_t bn_inc;
|
||||
dim_t n_trans;
|
||||
|
||||
// Query dimension in partitioning direction.
|
||||
n_trans = bli_obj_width( bh_pack );
|
||||
|
||||
// Determine the current algorithmic blocksize.
|
||||
bm_alg = bli_determine_blocksize_f( 0, m_trans, a,
|
||||
cntl_blocksize( cntl ) );
|
||||
|
||||
// Acquire partitions for A1, B1, and C1.
|
||||
bli_acquire_mpart_t2b( BLIS_SUBPART1,
|
||||
0, bm_alg, a, &a1 );
|
||||
bli_acquire_mpart_t2b( BLIS_SUBPART1,
|
||||
0, bm_alg, b, &b1 );
|
||||
bli_acquire_mpart_t2b( BLIS_SUBPART1,
|
||||
0, bm_alg, c, &c1 );
|
||||
|
||||
// Initialize objects for packing A1, B1, and C1.
|
||||
bli_packm_init( &a1, &a1_pack, cntl_sub_packm_a( cntl ) );
|
||||
bli_packm_init( &b1, &b1_pack, cntl_sub_packm_a( cntl ) );
|
||||
bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) );
|
||||
|
||||
// Pack C1 and scale by beta (if instructed).
|
||||
bli_packm_int( beta, &c1, &c1_pack, cntl_sub_packm_c( cntl ) );
|
||||
|
||||
// Pack A1 and scale by alpha (if instructed).
|
||||
bli_packm_int( alpha, &a1, &a1_pack, cntl_sub_packm_a( cntl ) );
|
||||
|
||||
// Partition along the n dimension.
|
||||
for ( j = 0; j < n_trans; j += bn_inc )
|
||||
{
|
||||
// Determine the current incremental packing blocksize.
|
||||
bn_inc = bli_determine_blocksize_f( j, n_trans, a,
|
||||
cntl_blocksize_aux( cntl ) );
|
||||
|
||||
// Acquire incremental partitions.
|
||||
bli_acquire_mpart_l2r( BLIS_SUBPART1,
|
||||
j, bn_inc, bh, &bh_inc );
|
||||
bli_acquire_mpart_l2r( BLIS_SUBPART1,
|
||||
j, bn_inc, &bh_pack, &bh_pack_inc );
|
||||
bli_acquire_mpart_l2r( BLIS_SUBPART1,
|
||||
j, bn_inc, &c1_pack, &c1_pack_inc );
|
||||
|
||||
// Pack Bh_inc and scale by alpha (if instructed).
|
||||
bli_packm_int( alpha, &bh_inc, &bh_pack_inc, cntl_sub_packm_b( cntl ) );
|
||||
|
||||
// Perform herk subproblem.
|
||||
bli_herk_int( &BLIS_ONE,
|
||||
&a1_pack,
|
||||
&bh_pack_inc,
|
||||
beta,
|
||||
&c1_pack_inc,
|
||||
cntl_sub_herk( cntl ) );
|
||||
}
|
||||
|
||||
// Pack B1 and scale by alpha_conj (if instructed).
|
||||
bli_packm_int( alpha_conj, &b1, &b1_pack, cntl_sub_packm_a( cntl ) );
|
||||
|
||||
// Partition along the n dimension.
|
||||
for ( j = 0; j < n_trans; j += bn_inc )
|
||||
{
|
||||
// Determine the current incremental packing blocksize.
|
||||
bn_inc = bli_determine_blocksize_f( j, n_trans, b,
|
||||
cntl_blocksize_aux( cntl ) );
|
||||
|
||||
// Acquire incremental partitions.
|
||||
bli_acquire_mpart_l2r( BLIS_SUBPART1,
|
||||
j, bn_inc, ah, &ah_inc );
|
||||
bli_acquire_mpart_l2r( BLIS_SUBPART1,
|
||||
j, bn_inc, &ah_pack, &ah_pack_inc );
|
||||
bli_acquire_mpart_l2r( BLIS_SUBPART1,
|
||||
j, bn_inc, &c1_pack, &c1_pack_inc );
|
||||
|
||||
// Pack Ah_inc and scale by alpha_conj (if instructed).
|
||||
bli_packm_int( alpha_conj, &ah_inc, &ah_pack_inc, cntl_sub_packm_b( cntl ) );
|
||||
|
||||
// Perform herk subproblem.
|
||||
bli_herk_int( &BLIS_ONE,
|
||||
&b1_pack,
|
||||
&ah_pack_inc,
|
||||
beta,
|
||||
&c1_pack_inc,
|
||||
cntl_sub_herk( cntl ) );
|
||||
}
|
||||
|
||||
// Unpack C1 (if C1 was packed).
|
||||
bli_unpackm_int( &c1_pack, &c1, cntl_sub_unpackm_c( cntl ) );
|
||||
}
|
||||
|
||||
// Partition along the m dimension.
|
||||
for ( i = bm_alg; i < m_trans; i += bm_alg )
|
||||
{
|
||||
// Determine the current algorithmic blocksize.
|
||||
bm_alg = bli_determine_blocksize_f( i, m_trans, a,
|
||||
cntl_blocksize( cntl ) );
|
||||
|
||||
// Acquire partitions for A1, B1, and C1.
|
||||
bli_acquire_mpart_t2b( BLIS_SUBPART1,
|
||||
i, bm_alg, a, &a1 );
|
||||
bli_acquire_mpart_t2b( BLIS_SUBPART1,
|
||||
i, bm_alg, b, &b1 );
|
||||
bli_acquire_mpart_t2b( BLIS_SUBPART1,
|
||||
i, bm_alg, c, &c1 );
|
||||
|
||||
// Partition off the stored region of C1 and the corresponding regions
|
||||
// of Bh_pack and Ah_pack. We compute the width of the subpartition
|
||||
// taking the location of the diagonal into account.
|
||||
offR = bli_max( 0, bli_obj_diag_offset_after_trans( c1 ) );
|
||||
nR = bli_obj_width_after_trans( c1 ) - offR;
|
||||
bli_acquire_mpart_l2r( BLIS_SUBPART1,
|
||||
offR, nR, &c1, &c1R );
|
||||
bli_acquire_mpart_l2r( BLIS_SUBPART1,
|
||||
offR, nR, &bh_pack, &bhR_pack );
|
||||
bli_acquire_mpart_l2r( BLIS_SUBPART1,
|
||||
offR, nR, &ah_pack, &ahR_pack );
|
||||
|
||||
// Initialize objects for packing A1, B1, and C1.
|
||||
bli_packm_init( &a1, &a1_pack,
|
||||
cntl_sub_packm_a( cntl ) );
|
||||
bli_packm_init( &b1, &b1_pack,
|
||||
cntl_sub_packm_a( cntl ) );
|
||||
bli_packm_init( &c1R, &c1R_pack,
|
||||
cntl_sub_packm_c( cntl ) );
|
||||
|
||||
// Pack A1 and scale by alpha (if instructed).
|
||||
bli_packm_int( alpha,
|
||||
&a1, &a1_pack,
|
||||
cntl_sub_packm_a( cntl ) );
|
||||
|
||||
// Pack B1 and scale by alpha_conj (if instructed).
|
||||
bli_packm_int( alpha_conj,
|
||||
&b1, &b1_pack,
|
||||
cntl_sub_packm_a( cntl ) );
|
||||
|
||||
// Pack C1 and scale by beta (if instructed).
|
||||
bli_packm_int( beta,
|
||||
&c1R, &c1R_pack,
|
||||
cntl_sub_packm_c( cntl ) );
|
||||
|
||||
// Perform herk subproblem.
|
||||
bli_her2k_int( alpha,
|
||||
&a1_pack,
|
||||
&bhR_pack,
|
||||
alpha_conj,
|
||||
&b1_pack,
|
||||
&ahR_pack,
|
||||
beta,
|
||||
&c1R_pack,
|
||||
cntl_sub_her2k( cntl ) );
|
||||
|
||||
// Unpack C1 (if C1 was packed).
|
||||
bli_unpackm_int( &c1R_pack, &c1R,
|
||||
cntl_sub_unpackm_c( cntl ) );
|
||||
}
|
||||
|
||||
// If any packing buffers were acquired within packm, release them back
|
||||
// to the memory manager.
|
||||
bli_obj_release_pack( &a1_pack );
|
||||
bli_obj_release_pack( &bh_pack );
|
||||
bli_obj_release_pack( &b1_pack );
|
||||
bli_obj_release_pack( &ah_pack );
|
||||
bli_obj_release_pack( &c1_pack );
|
||||
bli_obj_release_pack( &c1R_pack );
|
||||
}
|
||||
|
||||
@@ -1,44 +0,0 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
void bli_her2k_u_blk_var4( obj_t* alpha,
|
||||
obj_t* a,
|
||||
obj_t* bh,
|
||||
obj_t* alpha_conj,
|
||||
obj_t* b,
|
||||
obj_t* ah,
|
||||
obj_t* beta,
|
||||
obj_t* c,
|
||||
her2k_t* cntl );
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
extern gemm_t* hemm_cntl;
|
||||
extern gemm_t* gemm_cntl;
|
||||
|
||||
//
|
||||
// Define object-based interface.
|
||||
@@ -86,7 +86,7 @@ void bli_symm( side_t side,
|
||||
|
||||
// Choose the control tree. We can just use hemm since the algorithm
|
||||
// is nearly identical to that of symm.
|
||||
cntl = hemm_cntl;
|
||||
cntl = gemm_cntl;
|
||||
|
||||
// Invoke the internal back-end.
|
||||
bli_gemm_int( alpha,
|
||||
|
||||
@@ -34,7 +34,7 @@
|
||||
|
||||
#include "blis.h"
|
||||
|
||||
extern her2k_t* her2k_cntl;
|
||||
//extern her2k_t* her2k_cntl;
|
||||
extern herk_t* herk_cntl;
|
||||
|
||||
//
|
||||
@@ -46,7 +46,7 @@ void bli_syr2k( obj_t* alpha,
|
||||
obj_t* beta,
|
||||
obj_t* c )
|
||||
{
|
||||
her2k_t* cntl;
|
||||
//her2k_t* cntl;
|
||||
obj_t c_local;
|
||||
obj_t a_local;
|
||||
obj_t bt_local;
|
||||
@@ -86,11 +86,11 @@ void bli_syr2k( obj_t* alpha,
|
||||
bli_obj_induce_trans( c_local );
|
||||
}
|
||||
|
||||
#if 0
|
||||
// Choose the control tree. We can just use her2k since the algorithm
|
||||
// is nearly identical to that of syr2k.
|
||||
cntl = her2k_cntl;
|
||||
|
||||
#if 1
|
||||
// Invoke the internal back-end.
|
||||
bli_her2k_int( alpha,
|
||||
&a_local,
|
||||
|
||||
@@ -57,9 +57,7 @@ void bli_cntl_init( void )
|
||||
|
||||
// Level-3
|
||||
bli_gemm_cntl_init();
|
||||
bli_hemm_cntl_init();
|
||||
bli_herk_cntl_init();
|
||||
bli_her2k_cntl_init();
|
||||
bli_trmm_cntl_init();
|
||||
bli_trsm_cntl_init();
|
||||
}
|
||||
@@ -87,9 +85,7 @@ void bli_cntl_finalize( void )
|
||||
|
||||
// Level-3
|
||||
bli_gemm_cntl_finalize();
|
||||
bli_hemm_cntl_finalize();
|
||||
bli_herk_cntl_finalize();
|
||||
bli_her2k_cntl_finalize();
|
||||
bli_trmm_cntl_finalize();
|
||||
bli_trsm_cntl_finalize();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user