Removed redundant hemm, her2k control trees.

Details:
- Removed code that generated a control tree specifically for hemm and
  symm. Instead, the gemm control tree is now configured so that it
  works for gemm, hemm, or symm.
- Retired most her2k code, as it was not being used. (Currently, her2k is
  implemented as two invocations of herk.) I couldn't think of many
  situations where her2k variants were needed.
- Removed some older her2k code.
This commit is contained in:
Field G. Van Zee
2014-01-28 19:40:29 -06:00
parent 5a36e5bf2f
commit 251c5d1121
36 changed files with 17 additions and 1366 deletions

View File

@@ -47,8 +47,6 @@ func_t* gemm_ukrs;
packm_t* gemm_packa_cntl;
packm_t* gemm_packb_cntl;
packm_t* gemm_packc_cntl;
unpackm_t* gemm_unpackc_cntl;
gemm_t* gemm_cntl_bp_ke;
gemm_t* gemm_cntl_op_bp;
@@ -104,7 +102,7 @@ void bli_gemm_cntl_init()
BLIS_VARIANT2,
gemm_mr,
gemm_kr,
FALSE, // already dense; densify not necessary
TRUE, // densify; used by hemm/symm
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
@@ -117,33 +115,13 @@ void bli_gemm_cntl_init()
BLIS_VARIANT2,
gemm_kr,
gemm_nr,
FALSE, // already dense; densify not necessary
TRUE, // densify; used by hemm/symm
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_COL_PANELS,
BLIS_BUFFER_FOR_B_PANEL );
// Create control tree objects for packm/unpackm operations on C.
gemm_packc_cntl
=
bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
BLIS_VARIANT1,
gemm_mr,
gemm_nr,
FALSE, // already dense; densify not necessary
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_COLUMNS,
BLIS_BUFFER_FOR_C_PANEL );
gemm_unpackc_cntl
=
bli_unpackm_cntl_obj_create( BLIS_UNBLOCKED,
BLIS_VARIANT1,
NULL ); // no blocksize needed
//
// Create a control tree for packing A and B, and streaming C.
@@ -223,8 +201,6 @@ void bli_gemm_cntl_finalize()
bli_cntl_obj_free( gemm_packa_cntl );
bli_cntl_obj_free( gemm_packb_cntl );
bli_cntl_obj_free( gemm_packc_cntl );
bli_cntl_obj_free( gemm_unpackc_cntl );
bli_cntl_obj_free( gemm_cntl_bp_ke );
bli_cntl_obj_free( gemm_cntl_op_bp );

View File

@@ -34,7 +34,7 @@
#include "blis.h"
extern gemm_t* hemm_cntl;
extern gemm_t* gemm_cntl;
//
// Define object-based interface.
@@ -86,7 +86,7 @@ void bli_hemm( side_t side,
}
// Choose the control tree.
cntl = hemm_cntl;
cntl = gemm_cntl;
// Invoke the internal back-end.
bli_gemm_int( alpha,

View File

@@ -32,7 +32,6 @@
*/
#include "bli_hemm_cntl.h"
#include "bli_hemm_check.h"

View File

@@ -1,220 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
extern scalm_t* scalm_cntl;
extern func_t* gemm_ukrs;
blksz_t* hemm_mc;
blksz_t* hemm_nc;
blksz_t* hemm_kc;
blksz_t* hemm_mr;
blksz_t* hemm_nr;
blksz_t* hemm_kr;
packm_t* hemm_packa_cntl;
packm_t* hemm_packb_cntl;
packm_t* hemm_packc_cntl;
unpackm_t* hemm_unpackc_cntl;
gemm_t* hemm_cntl_bp_ke;
gemm_t* hemm_cntl_op_bp;
gemm_t* hemm_cntl_mm_op;
gemm_t* hemm_cntl_vl_mm;
gemm_t* hemm_cntl;
void bli_hemm_cntl_init()
{
// Create blocksize objects for each dimension.
hemm_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_EXTEND_MC_S,
BLIS_DEFAULT_MC_D, BLIS_EXTEND_MC_D,
BLIS_DEFAULT_MC_C, BLIS_EXTEND_MC_C,
BLIS_DEFAULT_MC_Z, BLIS_EXTEND_MC_Z );
hemm_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_EXTEND_NC_S,
BLIS_DEFAULT_NC_D, BLIS_EXTEND_NC_D,
BLIS_DEFAULT_NC_C, BLIS_EXTEND_NC_C,
BLIS_DEFAULT_NC_Z, BLIS_EXTEND_NC_Z );
hemm_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_EXTEND_KC_S,
BLIS_DEFAULT_KC_D, BLIS_EXTEND_KC_D,
BLIS_DEFAULT_KC_C, BLIS_EXTEND_KC_C,
BLIS_DEFAULT_KC_Z, BLIS_EXTEND_KC_Z );
hemm_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_EXTEND_MR_S,
BLIS_DEFAULT_MR_D, BLIS_EXTEND_MR_D,
BLIS_DEFAULT_MR_C, BLIS_EXTEND_MR_C,
BLIS_DEFAULT_MR_Z, BLIS_EXTEND_MR_Z );
hemm_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_EXTEND_NR_S,
BLIS_DEFAULT_NR_D, BLIS_EXTEND_NR_D,
BLIS_DEFAULT_NR_C, BLIS_EXTEND_NR_C,
BLIS_DEFAULT_NR_Z, BLIS_EXTEND_NR_Z );
hemm_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_EXTEND_KR_S,
BLIS_DEFAULT_KR_D, BLIS_EXTEND_KR_D,
BLIS_DEFAULT_KR_C, BLIS_EXTEND_KR_C,
BLIS_DEFAULT_KR_Z, BLIS_EXTEND_KR_Z );
// Create control tree objects for packm operations.
hemm_packa_cntl
=
bli_packm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
hemm_mr,
hemm_kr,
TRUE, // densify
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_ROW_PANELS,
BLIS_BUFFER_FOR_A_BLOCK );
hemm_packb_cntl
=
bli_packm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
hemm_kr,
hemm_nr,
TRUE, // densify
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_COL_PANELS,
BLIS_BUFFER_FOR_B_PANEL );
// Create control tree objects for packm/unpackm operations on C.
hemm_packc_cntl
=
bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
BLIS_VARIANT1,
hemm_mr,
hemm_nr,
FALSE, // already dense; densify not necessary
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_COLUMNS,
BLIS_BUFFER_FOR_GEN_USE );
hemm_unpackc_cntl
=
bli_unpackm_cntl_obj_create( BLIS_UNBLOCKED,
BLIS_VARIANT1,
NULL ); // no blocksize needed
// Create control tree object for lowest-level block-panel kernel.
hemm_cntl_bp_ke
=
bli_gemm_cntl_obj_create( BLIS_UNB_OPT,
BLIS_VARIANT2,
NULL,
gemm_ukrs,
NULL, NULL, NULL,
NULL, NULL, NULL );
// Create control tree object for outer panel (to block-panel)
// problem.
hemm_cntl_op_bp
=
bli_gemm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1,
hemm_mc,
NULL,
NULL,
hemm_packa_cntl,
hemm_packb_cntl,
NULL,
hemm_cntl_bp_ke,
NULL );
// Create control tree object for general problem via multiple
// rank-k (outer panel) updates.
hemm_cntl_mm_op
=
bli_gemm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT3,
hemm_kc,
NULL,
NULL,
NULL,
NULL,
NULL,
hemm_cntl_op_bp,
NULL );
// Create control tree object for very large problem via multiple
// general problems.
hemm_cntl_vl_mm
=
bli_gemm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
hemm_nc,
NULL,
NULL,
NULL,
NULL,
NULL,
hemm_cntl_mm_op,
NULL );
// Alias the "master" hemm control tree to a shorter name.
hemm_cntl = hemm_cntl_vl_mm;
}
void bli_hemm_cntl_finalize()
{
bli_blksz_obj_free( hemm_mc );
bli_blksz_obj_free( hemm_nc );
bli_blksz_obj_free( hemm_kc );
bli_blksz_obj_free( hemm_mr );
bli_blksz_obj_free( hemm_nr );
bli_blksz_obj_free( hemm_kr );
bli_cntl_obj_free( hemm_packa_cntl );
bli_cntl_obj_free( hemm_packb_cntl );
bli_cntl_obj_free( hemm_packc_cntl );
bli_cntl_obj_free( hemm_unpackc_cntl );
bli_cntl_obj_free( hemm_cntl_bp_ke );
bli_cntl_obj_free( hemm_cntl_op_bp );
bli_cntl_obj_free( hemm_cntl_mm_op );
bli_cntl_obj_free( hemm_cntl_vl_mm );
}

View File

@@ -1,37 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
void bli_hemm_cntl_init( void );
void bli_hemm_cntl_finalize( void );

View File

@@ -34,7 +34,7 @@
#include "blis.h"
extern her2k_t* her2k_cntl;
//extern her2k_t* her2k_cntl;
extern herk_t* herk_cntl;
//

View File

@@ -32,8 +32,9 @@
*/
#include "bli_her2k_cntl.h"
//#include "bli_her2k_cntl.h"
#include "bli_her2k_check.h"
/*
#include "bli_her2k_int.h"
#include "bli_her2k_target.h"
@@ -45,6 +46,7 @@
#include "bli_her2k_l_ker_var2.h"
#include "bli_her2k_u_ker_var2.h"
*/
//

View File

@@ -151,6 +151,7 @@ void bli_her2k_check( obj_t* alpha,
bli_check_error_code( e_val );
}
#if 0
void bli_her2k_int_check( obj_t* alpha,
obj_t* a,
obj_t* bh,
@@ -172,4 +173,4 @@ void bli_her2k_int_check( obj_t* alpha,
e_val = bli_check_valid_cntl( ( void* )cntl );
bli_check_error_code( e_val );
}
#endif

View File

@@ -47,6 +47,7 @@ void bli_her2k_check( obj_t* alpha,
obj_t* beta,
obj_t* c );
#if 0
void bli_her2k_int_check( obj_t* alpha,
obj_t* a,
obj_t* bh,
@@ -56,4 +57,4 @@ void bli_her2k_int_check( obj_t* alpha,
obj_t* beta,
obj_t* c,
her2k_t* cntl );
#endif

View File

@@ -1,256 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
void bli_her2k_l_blk_var4( obj_t* alpha,
obj_t* a,
obj_t* bh,
obj_t* alpha_conj,
obj_t* b,
obj_t* ah,
obj_t* beta,
obj_t* c,
her2k_t* cntl )
{
obj_t a1, a1_pack;
obj_t bh_pack, bhL_pack;
obj_t b1, b1_pack;
obj_t ah_pack, ahL_pack;
obj_t c1, c1_pack;
obj_t c1L, c1L_pack;
dim_t i;
dim_t bm_alg;
dim_t m_trans;
dim_t offL, nL;
// Initialize all pack objects that are passed into packm_init().
bli_obj_init_pack( &a1_pack );
bli_obj_init_pack( &bh_pack );
bli_obj_init_pack( &b1_pack );
bli_obj_init_pack( &ah_pack );
bli_obj_init_pack( &c1_pack );
bli_obj_init_pack( &c1L_pack );
// Query dimension in partitioning direction.
m_trans = bli_obj_length_after_trans( *c );
// Scale C by beta (if instructed).
bli_scalm_int( beta,
c,
cntl_sub_scalm( cntl ) );
// Initialize object for packing B'.
bli_packm_init( bh, &bh_pack,
cntl_sub_packm_b( cntl ) );
// Initialize object for packing A'.
bli_packm_init( ah, &ah_pack,
cntl_sub_packm_b( cntl ) );
// Fuse the first iteration with incremental packing and computation.
{
obj_t bh_inc, bh_pack_inc;
obj_t ah_inc, ah_pack_inc;
obj_t c1_pack_inc;
dim_t j;
dim_t bn_inc;
dim_t n_trans;
// Query dimension in partitioning direction.
n_trans = bli_obj_width( bh_pack );
// Determine the current algorithmic blocksize.
bm_alg = bli_determine_blocksize_b( 0, m_trans, a,
cntl_blocksize( cntl ) );
// Acquire partitions for A1, B1, and C1.
bli_acquire_mpart_b2t( BLIS_SUBPART1,
0, bm_alg, a, &a1 );
bli_acquire_mpart_b2t( BLIS_SUBPART1,
0, bm_alg, b, &b1 );
bli_acquire_mpart_b2t( BLIS_SUBPART1,
0, bm_alg, c, &c1 );
// Initialize objects for packing A1, B1, and C1.
bli_packm_init( &a1, &a1_pack, cntl_sub_packm_a( cntl ) );
bli_packm_init( &b1, &b1_pack, cntl_sub_packm_a( cntl ) );
bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) );
// Pack C1 and scale by beta (if instructed).
bli_packm_int( beta, &c1, &c1_pack, cntl_sub_packm_c( cntl ) );
// Pack A1 and scale by alpha (if instructed).
bli_packm_int( alpha, &a1, &a1_pack, cntl_sub_packm_a( cntl ) );
// Partition along the n dimension.
for ( j = 0; j < n_trans; j += bn_inc )
{
// Determine the current incremental packing blocksize.
bn_inc = bli_determine_blocksize_f( j, n_trans, a,
cntl_blocksize_aux( cntl ) );
// Acquire incremental partitions.
bli_acquire_mpart_l2r( BLIS_SUBPART1,
j, bn_inc, bh, &bh_inc );
bli_acquire_mpart_l2r( BLIS_SUBPART1,
j, bn_inc, &bh_pack, &bh_pack_inc );
bli_acquire_mpart_l2r( BLIS_SUBPART1,
j, bn_inc, &c1_pack, &c1_pack_inc );
// Pack Bh_inc and scale by alpha (if instructed).
bli_packm_int( alpha, &bh_inc, &bh_pack_inc, cntl_sub_packm_b( cntl ) );
// Perform herk subproblem.
bli_herk_int( &BLIS_ONE,
&a1_pack,
&bh_pack_inc,
beta,
&c1_pack_inc,
cntl_sub_herk( cntl ) );
}
// Pack B1 and scale by alpha_conj (if instructed).
bli_packm_int( alpha_conj, &b1, &b1_pack, cntl_sub_packm_a( cntl ) );
// Partition along the n dimension.
for ( j = 0; j < n_trans; j += bn_inc )
{
// Determine the current incremental packing blocksize.
bn_inc = bli_determine_blocksize_f( j, n_trans, b,
cntl_blocksize_aux( cntl ) );
// Acquire incremental partitions.
bli_acquire_mpart_l2r( BLIS_SUBPART1,
j, bn_inc, ah, &ah_inc );
bli_acquire_mpart_l2r( BLIS_SUBPART1,
j, bn_inc, &ah_pack, &ah_pack_inc );
bli_acquire_mpart_l2r( BLIS_SUBPART1,
j, bn_inc, &c1_pack, &c1_pack_inc );
// Pack Ah_inc and scale by alpha_conj (if instructed).
bli_packm_int( alpha_conj, &ah_inc, &ah_pack_inc, cntl_sub_packm_b( cntl ) );
// Perform herk subproblem.
bli_herk_int( &BLIS_ONE,
&b1_pack,
&ah_pack_inc,
beta,
&c1_pack_inc,
cntl_sub_herk( cntl ) );
}
// Unpack C1 (if C1 was packed).
bli_unpackm_int( &c1_pack, &c1, cntl_sub_unpackm_c( cntl ) );
}
// Partition along the m dimension.
for ( i = bm_alg; i < m_trans; i += bm_alg )
{
// Determine the current algorithmic blocksize.
bm_alg = bli_determine_blocksize_b( i, m_trans, a,
cntl_blocksize( cntl ) );
// Acquire partitions for A1, B1, and C1.
bli_acquire_mpart_b2t( BLIS_SUBPART1,
i, bm_alg, a, &a1 );
bli_acquire_mpart_b2t( BLIS_SUBPART1,
i, bm_alg, b, &b1 );
bli_acquire_mpart_b2t( BLIS_SUBPART1,
i, bm_alg, c, &c1 );
// Partition off the stored region of C1 and the corresponding regions
// of Bh_pack and Ah_pack. We compute the width of the subpartition
// taking the location of the diagonal into account.
offL = 0;
nL = bli_min( bli_obj_width_after_trans( c1 ),
bli_obj_diag_offset_after_trans( c1 ) + bm_alg );
bli_acquire_mpart_l2r( BLIS_SUBPART1,
offL, nL, &c1, &c1L );
bli_acquire_mpart_l2r( BLIS_SUBPART1,
offL, nL, &bh_pack, &bhL_pack );
bli_acquire_mpart_l2r( BLIS_SUBPART1,
offL, nL, &ah_pack, &ahL_pack );
// Initialize objects for packing A1, B1, and C1.
bli_packm_init( &a1, &a1_pack,
cntl_sub_packm_a( cntl ) );
bli_packm_init( &b1, &b1_pack,
cntl_sub_packm_a( cntl ) );
bli_packm_init( &c1L, &c1L_pack,
cntl_sub_packm_c( cntl ) );
// Pack A1 and scale by alpha (if instructed).
bli_packm_int( alpha,
&a1, &a1_pack,
cntl_sub_packm_a( cntl ) );
// Pack B1 and scale by alpha_conj (if instructed).
bli_packm_int( alpha_conj,
&b1, &b1_pack,
cntl_sub_packm_a( cntl ) );
// Pack C1 and scale by beta (if instructed).
bli_packm_int( beta,
&c1L, &c1L_pack,
cntl_sub_packm_c( cntl ) );
// Perform herk subproblem.
bli_her2k_int( alpha,
&a1_pack,
&bhL_pack,
alpha_conj,
&b1_pack,
&ahL_pack,
beta,
&c1L_pack,
cntl_sub_her2k( cntl ) );
// Unpack C1 (if C1 was packed).
bli_unpackm_int( &c1L_pack, &c1L,
cntl_sub_unpackm_c( cntl ) );
}
// If any packing buffers were acquired within packm, release them back
// to the memory manager.
bli_obj_release_pack( &a1_pack );
bli_obj_release_pack( &bh_pack );
bli_obj_release_pack( &b1_pack );
bli_obj_release_pack( &ah_pack );
bli_obj_release_pack( &c1_pack );
bli_obj_release_pack( &c1L_pack );
}

View File

@@ -1,44 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
void bli_her2k_l_blk_var4( obj_t* alpha,
obj_t* a,
obj_t* bh,
obj_t* alpha_conj,
obj_t* b,
obj_t* ah,
obj_t* beta,
obj_t* c,
her2k_t* cntl );

View File

@@ -1,215 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
void bli_her2k_u_blk_var1( obj_t* alpha,
obj_t* a,
obj_t* bh,
obj_t* alpha_conj,
obj_t* b,
obj_t* ah,
obj_t* beta,
obj_t* c,
her2k_t* cntl )
{
obj_t a1, a1_pack;
obj_t bh_pack, bhR_pack;
obj_t b1, b1_pack;
obj_t ah_pack, ahR_pack;
obj_t c1;
obj_t c1R, c1R_pack;
dim_t i;
dim_t b_alg;
dim_t m_trans;
dim_t offR, nR;
// Initialize all pack objects that are passed into packm_init().
bli_obj_init_pack( &a1_pack );
bli_obj_init_pack( &bh_pack );
bli_obj_init_pack( &b1_pack );
bli_obj_init_pack( &ah_pack );
bli_obj_init_pack( &c1R_pack );
// Query dimension in partitioning direction.
m_trans = bli_obj_length_after_trans( *c );
// Scale C by beta (if instructed).
bli_scalm_int( beta,
c,
cntl_sub_scalm( cntl ) );
//
// Perform first rank-k update: C = C + alpha * A * B'.
//
// Initialize object for packing B'.
bli_packm_init( bh, &bh_pack,
cntl_sub_packm_b( cntl ) );
// Pack B' and scale by alpha (if instructed).
bli_packm_int( alpha,
bh, &bh_pack,
cntl_sub_packm_b( cntl ) );
// Partition along the m dimension.
for ( i = 0; i < m_trans; i += b_alg )
{
// Determine the current algorithmic blocksize.
b_alg = bli_determine_blocksize_f( i, m_trans, a,
cntl_blocksize( cntl ) );
// Acquire partitions for A1 and C1.
bli_acquire_mpart_t2b( BLIS_SUBPART1,
i, b_alg, a, &a1 );
bli_acquire_mpart_t2b( BLIS_SUBPART1,
i, b_alg, c, &c1 );
// Partition off the stored region of C1 and the corresponding region
// of Bh_pack. We compute the width of the subpartition taking the
// location of the diagonal into account.
offR = bli_max( 0, bli_obj_diag_offset_after_trans( c1 ) );
nR = bli_obj_width_after_trans( c1 ) - offR;
bli_acquire_mpart_l2r( BLIS_SUBPART1,
offR, nR, &c1, &c1R );
bli_acquire_mpart_l2r( BLIS_SUBPART1,
offR, nR, &bh_pack, &bhR_pack );
// Initialize objects for packing A1 and C1.
bli_packm_init( &a1, &a1_pack,
cntl_sub_packm_a( cntl ) );
bli_packm_init( &c1R, &c1R_pack,
cntl_sub_packm_c( cntl ) );
// Pack A1 and scale by alpha (if instructed).
bli_packm_int( alpha,
&a1, &a1_pack,
cntl_sub_packm_a( cntl ) );
// Pack C1 and scale by beta (if instructed).
bli_packm_int( beta,
&c1R, &c1R_pack,
cntl_sub_packm_c( cntl ) );
// Perform herk subproblem.
bli_herk_int( alpha,
&a1_pack,
&bhR_pack,
beta,
&c1R_pack,
cntl_sub_herk( cntl ) );
// Unpack C1 (if C1 was packed).
bli_unpackm_int( &c1R_pack, &c1R,
cntl_sub_unpackm_c( cntl ) );
}
// If any packing buffers were acquired within packm, release them back
// to the memory manager.
bli_obj_release_pack( &a1_pack );
bli_obj_release_pack( &bh_pack );
//
// Perform second rank-k update: C = C + conj(alpha) * B * A'.
//
// Initialize object for packing A'.
bli_packm_init( ah, &ah_pack,
cntl_sub_packm_b( cntl ) );
// Pack A' and scale by alpha_conj (if instructed).
bli_packm_int( alpha_conj,
ah, &ah_pack,
cntl_sub_packm_b( cntl ) );
// Partition along the m dimension.
for ( i = 0; i < m_trans; i += b_alg )
{
// Determine the current algorithmic blocksize.
b_alg = bli_determine_blocksize_f( i, m_trans, b,
cntl_blocksize( cntl ) );
// Acquire partitions for B1 and C1.
bli_acquire_mpart_t2b( BLIS_SUBPART1,
i, b_alg, b, &b1 );
bli_acquire_mpart_t2b( BLIS_SUBPART1,
i, b_alg, c, &c1 );
// Partition off the stored region of C1 and the corresponding region
// of Ah_pack. We compute the width of the subpartition taking the
// location of the diagonal into account.
offR = bli_max( 0, bli_obj_diag_offset_after_trans( c1 ) );
nR = bli_obj_width_after_trans( c1 ) - offR;
bli_acquire_mpart_l2r( BLIS_SUBPART1,
offR, nR, &c1, &c1R );
bli_acquire_mpart_l2r( BLIS_SUBPART1,
offR, nR, &ah_pack, &ahR_pack );
// Initialize objects for packing B1 and C1.
bli_packm_init( &b1, &b1_pack,
cntl_sub_packm_a( cntl ) );
bli_packm_init( &c1R, &c1R_pack,
cntl_sub_packm_c( cntl ) );
// Pack B1 and scale by alpha_conj (if instructed).
bli_packm_int( alpha_conj,
&b1, &b1_pack,
cntl_sub_packm_a( cntl ) );
// Pack C1 and scale by beta (if instructed).
bli_packm_int( beta,
&c1R, &c1R_pack,
cntl_sub_packm_c( cntl ) );
// Perform herk subproblem.
bli_herk_int( alpha_conj,
&b1_pack,
&ahR_pack,
&BLIS_ONE,
&c1R_pack,
cntl_sub_herk( cntl ) );
// Unpack C1 (if C1 was packed).
bli_unpackm_int( &c1R_pack, &c1R,
cntl_sub_unpackm_c( cntl ) );
}
// If any packing buffers were acquired within packm, release them back
// to the memory manager.
bli_obj_release_pack( &b1_pack );
bli_obj_release_pack( &ah_pack );
bli_obj_release_pack( &c1R_pack );
}

View File

@@ -1,44 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
void bli_her2k_u_blk_var1( obj_t* alpha,
obj_t* a,
obj_t* bh,
obj_t* alpha_conj,
obj_t* b,
obj_t* ah,
obj_t* beta,
obj_t* c,
her2k_t* cntl );

View File

@@ -1,165 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
void bli_her2k_u_blk_var2( obj_t* alpha,
obj_t* a,
obj_t* bh,
obj_t* alpha_conj,
obj_t* b,
obj_t* ah,
obj_t* beta,
obj_t* c,
her2k_t* cntl )
{
obj_t a_pack, aT_pack;
obj_t bh1, bh1_pack;
obj_t b_pack, bT_pack;
obj_t ah1, ah1_pack;
obj_t c1;
obj_t c1T, c1T_pack;
dim_t i;
dim_t b_alg;
dim_t n_trans;
dim_t offT, mT;
// Initialize all pack objects that are passed into packm_init().
bli_obj_init_pack( &a_pack );
bli_obj_init_pack( &bh1_pack );
bli_obj_init_pack( &b_pack );
bli_obj_init_pack( &ah1_pack );
bli_obj_init_pack( &c1T_pack );
// Query dimension in partitioning direction.
n_trans = bli_obj_width_after_trans( *c );
// Scale C by beta (if instructed).
bli_scalm_int( beta,
c,
cntl_sub_scalm( cntl ) );
// Initialize object for packing A and B.
bli_packm_init( a, &a_pack,
cntl_sub_packm_a( cntl ) );
bli_packm_init( b, &b_pack,
cntl_sub_packm_a( cntl ) );
// Pack A and scale by alpha (if instructed).
bli_packm_int( alpha,
a, &a_pack,
cntl_sub_packm_a( cntl ) );
// Pack B and scale by alpha_conj (if instructed).
bli_packm_int( alpha_conj,
b, &b_pack,
cntl_sub_packm_a( cntl ) );
// Partition along the n dimension.
for ( i = 0; i < n_trans; i += b_alg )
{
// Determine the current algorithmic blocksize.
b_alg = bli_determine_blocksize_b( i, n_trans, bh,
cntl_blocksize( cntl ) );
// Acquire partitions for B1', A1', and C1.
bli_acquire_mpart_r2l( BLIS_SUBPART1,
i, b_alg, bh, &bh1 );
bli_acquire_mpart_r2l( BLIS_SUBPART1,
i, b_alg, ah, &ah1 );
bli_acquire_mpart_r2l( BLIS_SUBPART1,
i, b_alg, c, &c1 );
// Partition off the stored region of C1 and the corresponding regions
// of A_pack and B_pack. We compute the length of the subpartition
// taking the location of the diagonal into account.
offT = 0;
mT = bli_min( bli_obj_length_after_trans( c1 ),
-bli_obj_diag_offset_after_trans( c1 ) + b_alg );
bli_acquire_mpart_t2b( BLIS_SUBPART1,
offT, mT, &c1, &c1T );
bli_acquire_mpart_t2b( BLIS_SUBPART1,
offT, mT, &a_pack, &aT_pack );
bli_acquire_mpart_t2b( BLIS_SUBPART1,
offT, mT, &b_pack, &bT_pack );
// Initialize objects for packing B1', A1', and C1.
bli_packm_init( &bh1, &bh1_pack,
cntl_sub_packm_b( cntl ) );
bli_packm_init( &ah1, &ah1_pack,
cntl_sub_packm_b( cntl ) );
bli_packm_init( &c1T, &c1T_pack,
cntl_sub_packm_c( cntl ) );
// Pack B1' and scale by alpha (if instructed).
bli_packm_int( alpha,
&bh1, &bh1_pack,
cntl_sub_packm_b( cntl ) );
// Pack A1' and scale by alpha_conj (if instructed).
bli_packm_int( alpha_conj,
&ah1, &ah1_pack,
cntl_sub_packm_b( cntl ) );
// Pack C1 and scale by beta (if instructed).
bli_packm_int( beta,
&c1T, &c1T_pack,
cntl_sub_packm_c( cntl ) );
// Perform herk subproblem.
bli_her2k_int( alpha,
&aT_pack,
&bh1_pack,
alpha_conj,
&bT_pack,
&ah1_pack,
beta,
&c1T_pack,
cntl_sub_her2k( cntl ) );
// Unpack C1 (if C1 was packed).
bli_unpackm_int( &c1T_pack, &c1T,
cntl_sub_unpackm_c( cntl ) );
}
// If any packing buffers were acquired within packm, release them back
// to the memory manager.
bli_obj_release_pack( &a_pack );
bli_obj_release_pack( &bh1_pack );
bli_obj_release_pack( &b_pack );
bli_obj_release_pack( &ah1_pack );
bli_obj_release_pack( &c1T_pack );
}

View File

@@ -1,44 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
void bli_her2k_u_blk_var2( obj_t* alpha,
obj_t* a,
obj_t* bh,
obj_t* alpha_conj,
obj_t* b,
obj_t* ah,
obj_t* beta,
obj_t* c,
her2k_t* cntl );

View File

@@ -1,255 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
void bli_her2k_u_blk_var4( obj_t* alpha,
obj_t* a,
obj_t* bh,
obj_t* alpha_conj,
obj_t* b,
obj_t* ah,
obj_t* beta,
obj_t* c,
her2k_t* cntl )
{
obj_t a1, a1_pack;
obj_t bh_pack, bhR_pack;
obj_t b1, b1_pack;
obj_t ah_pack, ahR_pack;
obj_t c1, c1_pack;
obj_t c1R, c1R_pack;
dim_t i;
dim_t bm_alg;
dim_t m_trans;
dim_t offR, nR;
// Initialize all pack objects that are passed into packm_init().
bli_obj_init_pack( &a1_pack );
bli_obj_init_pack( &bh_pack );
bli_obj_init_pack( &b1_pack );
bli_obj_init_pack( &ah_pack );
bli_obj_init_pack( &c1_pack );
bli_obj_init_pack( &c1R_pack );
// Query dimension in partitioning direction.
m_trans = bli_obj_length_after_trans( *c );
// Scale C by beta (if instructed).
bli_scalm_int( beta,
c,
cntl_sub_scalm( cntl ) );
// Initialize object for packing B1'.
bli_packm_init( bh, &bh_pack,
cntl_sub_packm_b( cntl ) );
// Initialize object for packing A1'.
bli_packm_init( ah, &ah_pack,
cntl_sub_packm_b( cntl ) );
// Fuse the first iteration with incremental packing and computation.
{
obj_t bh_inc, bh_pack_inc;
obj_t ah_inc, ah_pack_inc;
obj_t c1_pack_inc;
dim_t j;
dim_t bn_inc;
dim_t n_trans;
// Query dimension in partitioning direction.
n_trans = bli_obj_width( bh_pack );
// Determine the current algorithmic blocksize.
bm_alg = bli_determine_blocksize_f( 0, m_trans, a,
cntl_blocksize( cntl ) );
// Acquire partitions for A1, B1, and C1.
bli_acquire_mpart_t2b( BLIS_SUBPART1,
0, bm_alg, a, &a1 );
bli_acquire_mpart_t2b( BLIS_SUBPART1,
0, bm_alg, b, &b1 );
bli_acquire_mpart_t2b( BLIS_SUBPART1,
0, bm_alg, c, &c1 );
// Initialize objects for packing A1, B1, and C1.
bli_packm_init( &a1, &a1_pack, cntl_sub_packm_a( cntl ) );
bli_packm_init( &b1, &b1_pack, cntl_sub_packm_a( cntl ) );
bli_packm_init( &c1, &c1_pack, cntl_sub_packm_c( cntl ) );
// Pack C1 and scale by beta (if instructed).
bli_packm_int( beta, &c1, &c1_pack, cntl_sub_packm_c( cntl ) );
// Pack A1 and scale by alpha (if instructed).
bli_packm_int( alpha, &a1, &a1_pack, cntl_sub_packm_a( cntl ) );
// Partition along the n dimension.
for ( j = 0; j < n_trans; j += bn_inc )
{
// Determine the current incremental packing blocksize.
bn_inc = bli_determine_blocksize_f( j, n_trans, a,
cntl_blocksize_aux( cntl ) );
// Acquire incremental partitions.
bli_acquire_mpart_l2r( BLIS_SUBPART1,
j, bn_inc, bh, &bh_inc );
bli_acquire_mpart_l2r( BLIS_SUBPART1,
j, bn_inc, &bh_pack, &bh_pack_inc );
bli_acquire_mpart_l2r( BLIS_SUBPART1,
j, bn_inc, &c1_pack, &c1_pack_inc );
// Pack Bh_inc and scale by alpha (if instructed).
bli_packm_int( alpha, &bh_inc, &bh_pack_inc, cntl_sub_packm_b( cntl ) );
// Perform herk subproblem.
bli_herk_int( &BLIS_ONE,
&a1_pack,
&bh_pack_inc,
beta,
&c1_pack_inc,
cntl_sub_herk( cntl ) );
}
// Pack B1 and scale by alpha_conj (if instructed).
bli_packm_int( alpha_conj, &b1, &b1_pack, cntl_sub_packm_a( cntl ) );
// Partition along the n dimension.
for ( j = 0; j < n_trans; j += bn_inc )
{
// Determine the current incremental packing blocksize.
bn_inc = bli_determine_blocksize_f( j, n_trans, b,
cntl_blocksize_aux( cntl ) );
// Acquire incremental partitions.
bli_acquire_mpart_l2r( BLIS_SUBPART1,
j, bn_inc, ah, &ah_inc );
bli_acquire_mpart_l2r( BLIS_SUBPART1,
j, bn_inc, &ah_pack, &ah_pack_inc );
bli_acquire_mpart_l2r( BLIS_SUBPART1,
j, bn_inc, &c1_pack, &c1_pack_inc );
// Pack Ah_inc and scale by alpha_conj (if instructed).
bli_packm_int( alpha_conj, &ah_inc, &ah_pack_inc, cntl_sub_packm_b( cntl ) );
// Perform herk subproblem.
bli_herk_int( &BLIS_ONE,
&b1_pack,
&ah_pack_inc,
beta,
&c1_pack_inc,
cntl_sub_herk( cntl ) );
}
// Unpack C1 (if C1 was packed).
bli_unpackm_int( &c1_pack, &c1, cntl_sub_unpackm_c( cntl ) );
}
// Partition along the m dimension.
for ( i = bm_alg; i < m_trans; i += bm_alg )
{
// Determine the current algorithmic blocksize.
bm_alg = bli_determine_blocksize_f( i, m_trans, a,
cntl_blocksize( cntl ) );
// Acquire partitions for A1, B1, and C1.
bli_acquire_mpart_t2b( BLIS_SUBPART1,
i, bm_alg, a, &a1 );
bli_acquire_mpart_t2b( BLIS_SUBPART1,
i, bm_alg, b, &b1 );
bli_acquire_mpart_t2b( BLIS_SUBPART1,
i, bm_alg, c, &c1 );
// Partition off the stored region of C1 and the corresponding regions
// of Bh_pack and Ah_pack. We compute the width of the subpartition
// taking the location of the diagonal into account.
offR = bli_max( 0, bli_obj_diag_offset_after_trans( c1 ) );
nR = bli_obj_width_after_trans( c1 ) - offR;
bli_acquire_mpart_l2r( BLIS_SUBPART1,
offR, nR, &c1, &c1R );
bli_acquire_mpart_l2r( BLIS_SUBPART1,
offR, nR, &bh_pack, &bhR_pack );
bli_acquire_mpart_l2r( BLIS_SUBPART1,
offR, nR, &ah_pack, &ahR_pack );
// Initialize objects for packing A1, B1, and C1.
bli_packm_init( &a1, &a1_pack,
cntl_sub_packm_a( cntl ) );
bli_packm_init( &b1, &b1_pack,
cntl_sub_packm_a( cntl ) );
bli_packm_init( &c1R, &c1R_pack,
cntl_sub_packm_c( cntl ) );
// Pack A1 and scale by alpha (if instructed).
bli_packm_int( alpha,
&a1, &a1_pack,
cntl_sub_packm_a( cntl ) );
// Pack B1 and scale by alpha_conj (if instructed).
bli_packm_int( alpha_conj,
&b1, &b1_pack,
cntl_sub_packm_a( cntl ) );
// Pack C1 and scale by beta (if instructed).
bli_packm_int( beta,
&c1R, &c1R_pack,
cntl_sub_packm_c( cntl ) );
// Perform herk subproblem.
bli_her2k_int( alpha,
&a1_pack,
&bhR_pack,
alpha_conj,
&b1_pack,
&ahR_pack,
beta,
&c1R_pack,
cntl_sub_her2k( cntl ) );
// Unpack C1 (if C1 was packed).
bli_unpackm_int( &c1R_pack, &c1R,
cntl_sub_unpackm_c( cntl ) );
}
// If any packing buffers were acquired within packm, release them back
// to the memory manager.
bli_obj_release_pack( &a1_pack );
bli_obj_release_pack( &bh_pack );
bli_obj_release_pack( &b1_pack );
bli_obj_release_pack( &ah_pack );
bli_obj_release_pack( &c1_pack );
bli_obj_release_pack( &c1R_pack );
}

View File

@@ -1,44 +0,0 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
void bli_her2k_u_blk_var4( obj_t* alpha,
obj_t* a,
obj_t* bh,
obj_t* alpha_conj,
obj_t* b,
obj_t* ah,
obj_t* beta,
obj_t* c,
her2k_t* cntl );

View File

@@ -34,7 +34,7 @@
#include "blis.h"
extern gemm_t* hemm_cntl;
extern gemm_t* gemm_cntl;
//
// Define object-based interface.
@@ -86,7 +86,7 @@ void bli_symm( side_t side,
// Choose the control tree. We can just use hemm since the algorithm
// is nearly identical to that of symm.
cntl = hemm_cntl;
cntl = gemm_cntl;
// Invoke the internal back-end.
bli_gemm_int( alpha,

View File

@@ -34,7 +34,7 @@
#include "blis.h"
extern her2k_t* her2k_cntl;
//extern her2k_t* her2k_cntl;
extern herk_t* herk_cntl;
//
@@ -46,7 +46,7 @@ void bli_syr2k( obj_t* alpha,
obj_t* beta,
obj_t* c )
{
her2k_t* cntl;
//her2k_t* cntl;
obj_t c_local;
obj_t a_local;
obj_t bt_local;
@@ -86,11 +86,11 @@ void bli_syr2k( obj_t* alpha,
bli_obj_induce_trans( c_local );
}
#if 0
// Choose the control tree. We can just use her2k since the algorithm
// is nearly identical to that of syr2k.
cntl = her2k_cntl;
#if 1
// Invoke the internal back-end.
bli_her2k_int( alpha,
&a_local,

View File

@@ -57,9 +57,7 @@ void bli_cntl_init( void )
// Level-3
bli_gemm_cntl_init();
bli_hemm_cntl_init();
bli_herk_cntl_init();
bli_her2k_cntl_init();
bli_trmm_cntl_init();
bli_trsm_cntl_init();
}
@@ -87,9 +85,7 @@ void bli_cntl_finalize( void )
// Level-3
bli_gemm_cntl_finalize();
bli_hemm_cntl_finalize();
bli_herk_cntl_finalize();
bli_her2k_cntl_finalize();
bli_trmm_cntl_finalize();
bli_trsm_cntl_finalize();
}