Extended newly relaxed KC to hemm, symm.

Details:
- These changes were intended for the previous commit.
- Defined bli_gemm_determine_kc_[fb]() and bli_gemm_determine_kc_[fb](),
  which determine blocksizes for gemm-based operations, taking special
  care to "nudge" the kc dimension up to a multiple of MR or NR for
  hemm and symm operations, as needed.
- Changed bli_gemm_blk_var3f.c to call bli_gemm_determine_kc_f().
  instead of bli_determine_blocksize_f().
- Comment updates to bli_trmm_blocksize.c, bli_trsm_blocksize.c.
This commit is contained in:
Field G. Van Zee
2014-10-23 10:50:59 -05:00
parent ab954ba6f8
commit 4674ca8cff
7 changed files with 235 additions and 9 deletions

View File

@@ -33,6 +33,7 @@
*/
#include "bli_gemm_cntl.h"
#include "bli_gemm_blocksize.h"
#include "bli_gemm_query.h"
#include "bli_gemm_check.h"
#include "bli_gemm_entry.h"

View File

@@ -85,11 +85,11 @@ void bli_gemm_blk_var3f( obj_t* a,
for ( i = 0; i < k_trans; i += b_alg )
{
// Determine the current algorithmic blocksize.
// NOTE: Use of b (for execution datatype) is intentional!
// This causes the right blocksize to be used if c and a are
// complex and b is real.
b_alg = bli_determine_blocksize_f( i, k_trans, b,
cntl_blocksize( cntl ) );
// NOTE: We call a gemm/hemm/symm-specific function to determine
// the kc blocksize so that we can implement the "nudging" of kc
// to be a multiple of mr or nr, as needed.
b_alg = bli_gemm_determine_kc_f( i, k_trans, a, b,
cntl_blocksize( cntl ) );
// Acquire partitions for A1 and B1.
bli_acquire_mpart_l2r( BLIS_SUBPART1,

View File

@@ -0,0 +1,170 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "blis.h"
dim_t bli_gemm_determine_kc_f( dim_t i,
dim_t dim,
obj_t* a,
obj_t* b,
blksz_t* bsize )
{
num_t dt;
dim_t mnr;
dim_t b_alg, b_max, b_now;
dim_t dim_left_now;
// We assume that this function is being called from an algorithm that
// is moving "forward" (ie: top to bottom, left to right, top-left
// to bottom-right).
// Extract the execution datatype and use it to query the corresponding
// blocksize and blocksize maximum values from the blksz_t object.
dt = bli_obj_execution_datatype( *a );
b_alg = bli_blksz_for_type( dt, bsize );
b_max = bli_blksz_max_for_type( dt, bsize );
// Nudge the default and maximum kc blocksizes up to the nearest
// multiple of MR if A is Hermitian or symmetric, or NR if B is
// Hermitian or symmetric. If neither case applies, then we leave
// the blocksizes unchanged.
if ( bli_obj_root_is_herm_or_symm( *a ) )
{
mnr = bli_info_get_default_mr( dt );
b_alg = bli_align_dim_to_mult( b_alg, mnr );
b_max = bli_align_dim_to_mult( b_max, mnr );
}
else if ( bli_obj_root_is_herm_or_symm( *b ) )
{
mnr = bli_info_get_default_nr( dt );
b_alg = bli_align_dim_to_mult( b_alg, mnr );
b_max = bli_align_dim_to_mult( b_max, mnr );
}
// Compute how much of the matrix dimension is left, including the
// chunk that will correspond to the blocksize we are computing now.
dim_left_now = dim - i;
// If the dimension currently remaining is less than the maximum
// blocksize, use it instead of the default blocksize b_alg.
// Otherwise, use b_alg.
if ( dim_left_now <= b_max )
{
b_now = dim_left_now;
}
else
{
b_now = b_alg;
}
return b_now;
}
dim_t bli_gemm_determine_kc_b( dim_t i,
dim_t dim,
obj_t* a,
obj_t* b,
blksz_t* bsize )
{
num_t dt;
dim_t mnr;
dim_t b_alg, b_max, b_now;
dim_t dim_at_edge;
dim_t dim_left_now;
// We assume that this function is being called from an algorithm that
// is moving "backward" (ie: bottom to top, right to left, bottom-right
// to top-left).
// Extract the execution datatype and use it to query the corresponding
// blocksize and blocksize maximum values from the blksz_t object.
dt = bli_obj_execution_datatype( *a );
b_alg = bli_blksz_for_type( dt, bsize );
b_max = bli_blksz_max_for_type( dt, bsize );
// Nudge the default and maximum kc blocksizes up to the nearest
// multiple of MR if A is Hermitian or symmetric, or NR if B is
// Hermitian or symmetric. If neither case applies, then we leave
// the blocksizes unchanged.
if ( bli_obj_root_is_herm_or_symm( *a ) )
{
mnr = bli_info_get_default_mr( dt );
b_alg = bli_align_dim_to_mult( b_alg, mnr );
b_max = bli_align_dim_to_mult( b_max, mnr );
}
else if ( bli_obj_root_is_herm_or_symm( *b ) )
{
mnr = bli_info_get_default_nr( dt );
b_alg = bli_align_dim_to_mult( b_alg, mnr );
b_max = bli_align_dim_to_mult( b_max, mnr );
}
// Compute how much of the matrix dimension is left, including the
// chunk that will correspond to the blocksize we are computing now.
dim_left_now = dim - i;
dim_at_edge = dim_left_now % b_alg;
// If dim_left_now is a multiple of b_alg, we can safely return b_alg
// without going any further.
if ( dim_at_edge == 0 )
return b_alg;
// If the dimension currently remaining is less than the maximum
// blocksize, use it as the chosen blocksize. If this is not the case,
// then we know dim_left_now is greater than the maximum blocksize.
// To determine how much of it we should use for the current blocksize,
// we inspect dim_at_edge; if it is smaller than (or equal to) b_max -
// b_alg, then we use b_alg + dim_at_edge. Otherwise, dim_at_edge is
// greater than b_max - b_alg, in which case we use dim_at_edge.
if ( dim_left_now <= b_max )
{
b_now = dim_left_now;
}
else // if ( dim_left_now > b_max )
{
if ( dim_at_edge <= b_max - b_alg )
{
b_now = b_alg + dim_at_edge;
}
else // if ( dim_at_edge > b_max - b_alg )
{
b_now = dim_at_edge;
}
}
return b_now;
}

View File

@@ -0,0 +1,44 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas at Austin nor the names
of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
dim_t bli_gemm_determine_kc_f( dim_t i,
dim_t dim,
obj_t* a,
obj_t* b,
blksz_t* bsize );
dim_t bli_gemm_determine_kc_b( dim_t i,
dim_t dim,
obj_t* a,
obj_t* b,
blksz_t* bsize );

View File

@@ -55,7 +55,7 @@ dim_t bli_trmm_determine_kc_f( dim_t i,
b_alg = bli_blksz_for_type( dt, bsize );
b_max = bli_blksz_max_for_type( dt, bsize );
// Nudge the default and maximum blocksizes up to the nearest
// Nudge the default and maximum kc blocksizes up to the nearest
// multiple of MR if the triangular matrix is on the left, or NR
// if the triangular matrix is one the right.
if ( bli_obj_root_is_triangular( *a ) ) mnr = bli_info_get_default_mr( dt );
@@ -105,7 +105,7 @@ dim_t bli_trmm_determine_kc_b( dim_t i,
b_alg = bli_blksz_for_type( dt, bsize );
b_max = bli_blksz_max_for_type( dt, bsize );
// Nudge the default and maximum blocksizes up to the nearest
// Nudge the default and maximum kc blocksizes up to the nearest
// multiple of MR if the triangular matrix is on the left, or NR
// if the triangular matrix is one the right.
if ( bli_obj_root_is_triangular( *a ) ) mnr = bli_info_get_default_mr( dt );

View File

@@ -54,7 +54,7 @@ dim_t bli_trsm_determine_kc_f( dim_t i,
b_alg = bli_blksz_for_type( dt, bsize );
b_max = bli_blksz_max_for_type( dt, bsize );
// Nudge the default and maximum blocksizes up to the nearest
// Nudge the default and maximum kc blocksizes up to the nearest
// multiple of MR.
mr = bli_info_get_default_mr( dt );
b_alg = bli_align_dim_to_mult( b_alg, mr );
@@ -101,7 +101,7 @@ dim_t bli_trsm_determine_kc_b( dim_t i,
b_alg = bli_blksz_for_type( dt, bsize );
b_max = bli_blksz_max_for_type( dt, bsize );
// Nudge the default and maximum blocksizes up to the nearest
// Nudge the default and maximum kc blocksizes up to the nearest
// multiple of MR.
mr = bli_info_get_default_mr( dt );
b_alg = bli_align_dim_to_mult( b_alg, mr );

View File

@@ -263,6 +263,12 @@
\
( ( (obj).info & BLIS_STRUC_BITS ) == BLIS_BITVAL_TRIANGULAR )
#define bli_obj_is_herm_or_symm( obj ) \
\
( bli_obj_is_hermitian( obj ) || \
bli_obj_is_symmetric( obj ) )
// Info modification
@@ -401,6 +407,11 @@
\
bli_obj_is_triangular( *bli_obj_root( obj ) ) \
#define bli_obj_root_is_herm_or_symm( obj ) \
\
( bli_obj_is_hermitian( *bli_obj_root( obj ) ) || \
bli_obj_is_symmetric( *bli_obj_root( obj ) ) )
#define bli_obj_root_is_upper( obj ) \
\
bli_obj_is_upper( *bli_obj_root( obj ) ) \