Updated behavior of bl2_obj_induce_trans() macro.

Details:
- Changed bl2_obj_induce_trans() so that the transposition bit is no longer
  updated as part of the macro. All current uses of the macro have been
  coupled with instances of bl2_obj_set_trans() to clear the bit.
- Added Jed to CREDITS file.
This commit is contained in:
Field G. Van Zee
2013-03-01 12:55:42 -06:00
parent f24e29b789
commit bb612f864e
16 changed files with 52 additions and 23 deletions

View File

@@ -9,6 +9,7 @@ The BLIS framework was primarily authored by
but many others have contributed input and feedback, including:
Jed Brown (Argonne National Laboratory)
John Gunnels (IBM, T.J. Watson Research Center)
Jeff Hammond (Argonne National Laboratory)
Francisco Igual (Universidad Complutense de Madrid)
@@ -19,12 +20,12 @@ but many others have contributed input and feedback, including:
Rhys Ulerich (The University of Texas at Austin)
Robert van de Geijn (The University of Texas at Austin)
Thanks go the following individuals for porting very early versions
of BLIS to new architectures as proofs-of-concept:
Thanks go the following individuals for porting very early versions of
BLIS to new architectures as proofs-of-concept:
Francisco Igual (The University of Texas at Austin)
Tyler Smith (The University of Texas at Austin)
BLIS's development was partially funded by grants from Microsoft and
the National Science Foundation (Awards CCF-0917167 and OCI-1148125).
BLIS's development was partially funded by grants from Microsoft and the
National Science Foundation (NSF Awards CCF-0917167 and OCI-1148125).

View File

@@ -450,7 +450,7 @@ void PASTEMAC(ch,varname )( \
\
/*
PASTEMAC(ch,fprintm)( stdout, "packm_var3: p copied", panel_dim, panel_len_max_i, \
p_begin, rs_p, cs_p, "%5.2f", "" ); \
p_begin, rs_p, cs_p, "%4.1f", "" ); \
*/ \
\
p_begin += p_inc; \

View File

@@ -104,7 +104,10 @@ void bl2_ger_int( conj_t conjx,
// still needs a transposition, then we must induce one by swapping the
// strides and dimensions.
if ( cntl_is_leaf( cntl ) && bl2_obj_has_trans( a_local ) )
{
bl2_obj_induce_trans( a_local );
bl2_obj_set_trans( BLIS_NO_TRANSPOSE, a_local );
}
// Extract the variant number and implementation type.
n = cntl_var_num( cntl );

View File

@@ -98,17 +98,17 @@ void bl2_trmv_int( obj_t* alpha,
// thing. Alternatively, we could accomplish the same end goal by
// inducing a transposition, via bl2_obj_induce_trans(), in the code
// block below. That macro function swaps dimensions, strides, and
// offsets, and also clears the transposition bit. As an example, given
// a lower triangular, column-major matrix that needs a transpose, we
// would induce that transposition by recasting the object as an upper
// triangular, row-major matrix (with no transpose needed). Note that
// how we choose to handle transposition here does NOT affect the
// optimal choice of kernel (ie: a column-major column panel matrix with
// transpose times a vector would use the same kernel as a row-major
// row panel matrix with no transpose times a vector).
// offsets. As an example, given a lower triangular, column-major matrix
// that needs a transpose, we would induce that transposition by recasting
// the object as an upper triangular, row-major matrix (with no transpose
// needed). Note that how we choose to handle transposition here does NOT
// affect the optimal choice of kernel (ie: a column-major column panel
// matrix with transpose times a vector would use the same kernel as a
// row-major row panel matrix with no transpose times a vector).
if ( bl2_obj_has_trans( a_local ) )
{
//bl2_obj_induce_trans( a_local );
//bl2_obj_set_trans( BLIS_NO_TRANSPOSE, a_local );
bl2_toggle_bool( uplo );
}

View File

@@ -98,17 +98,17 @@ void bl2_trsv_int( obj_t* alpha,
// thing. Alternatively, we could accomplish the same end goal by
// inducing a transposition, via bl2_obj_induce_trans(), in the code
// block below. That macro function swaps dimensions, strides, and
// offsets, and also clears the transposition bit. As an example, given
// a lower triangular, column-major matrix that needs a transpose, we
// would induce that transposition by recasting the object as an upper
// triangular, row-major matrix (with no transpose needed). Note that
// how we choose to handle transposition here does NOT affect the
// optimal choice of kernel (ie: a column-major column panel matrix with
// transpose times a vector would use the same kernel as a row-major
// row panel matrix with no transpose times a vector).
// offsets. As an example, given a lower triangular, column-major matrix
// that needs a transpose, we would induce that transposition by recasting
// the object as an upper triangular, row-major matrix (with no transpose
// needed). Note that how we choose to handle transposition here does NOT
// affect the optimal choice of kernel (ie: a column-major column panel
// matrix with transpose times a vector would use the same kernel as a
// row-major row panel matrix with no transpose times a vector).
if ( bl2_obj_has_trans( a_local ) )
{
//bl2_obj_induce_trans( a_local );
//bl2_obj_set_trans( BLIS_NO_TRANSPOSE, a_local );
bl2_toggle_bool( uplo );
}

View File

@@ -84,7 +84,10 @@ void bl2_gemm_int( obj_t* alpha,
// be handled explicitly in the packing of C, but if C is not being
// packed, this is our last chance to handle the transposition.
if ( cntl_is_leaf( cntl ) && bl2_obj_has_trans( *c ) )
{
bl2_obj_induce_trans( c_local );
bl2_obj_set_trans( BLIS_NO_TRANSPOSE, c_local );
}
// Extract the variant number and implementation type.
n = cntl_var_num( cntl );

View File

@@ -104,7 +104,10 @@ void bl2_her2k_int( obj_t* alpha,
// be handled explicitly in the packing of C, but if C is not being
// packed, this is our last chance to handle the transposition.
if ( cntl_is_leaf( cntl ) && bl2_obj_has_trans( *c ) )
{
bl2_obj_induce_trans( c_local );
bl2_obj_set_trans( BLIS_NO_TRANSPOSE, c_local );
}
// Set a bool based on the uplo field of c.
if ( bl2_obj_root_is_lower( c_local ) ) uplo = 0;

View File

@@ -96,7 +96,10 @@ void bl2_herk_int( obj_t* alpha,
// be handled explicitly in the packing of C, but if C is not being
// packed, this is our last chance to handle the transposition.
if ( cntl_is_leaf( cntl ) && bl2_obj_has_trans( *c ) )
{
bl2_obj_induce_trans( c_local );
bl2_obj_set_trans( BLIS_NO_TRANSPOSE, c_local );
}
// Set a bool based on the uplo field of C's root object.
if ( bl2_obj_root_is_lower( c_local ) ) uplo = 0;

View File

@@ -107,6 +107,7 @@ void bl2_trmm( side_t side,
if ( bl2_obj_has_trans( a_local ) )
{
bl2_obj_induce_trans( a_local );
bl2_obj_set_trans( BLIS_NO_TRANSPOSE, a_local );
}
// Create an object to hold a copy-cast of alpha. Notice that we use

View File

@@ -97,7 +97,10 @@ void bl2_trmm_int( side_t side,
// be handled explicitly in the packing of C, but if C is not being
// packed, this is our last chance to handle the transposition.
if ( cntl_is_leaf( cntl ) && bl2_obj_has_trans( *c ) )
{
bl2_obj_induce_trans( c_local );
bl2_obj_set_trans( BLIS_NO_TRANSPOSE, c_local );
}
// Set a bool based on the uplo field of A's root object.
if ( bl2_obj_root_is_lower( *a ) ) uplo = 0;

View File

@@ -299,6 +299,9 @@ void PASTEMAC(ch,varname)( \
k_a1112 = k - off_a1112; \
\
bp_i = bp + off_a1112 * NR * NDUP; \
\
/*PASTEMAC(ch,fprintm)( stdout, "trmm_u_ker_var2: a1", MR, k_a1112, a1, 1, MR, "%4.1f", "" );*/ \
/*PASTEMAC(ch,fprintm)( stdout, "trmm_u_ker_var2: b1", k_a1112, NR, bp_i, NR, 1, "%4.1f", "" );*/ \
\
/* Handle interior and edge cases separately. */ \
if ( m_cur == MR && n_cur == NR ) \

View File

@@ -113,6 +113,7 @@ void bl2_trmm3( side_t side,
if ( bl2_obj_has_trans( a_local ) )
{
bl2_obj_induce_trans( a_local );
bl2_obj_set_trans( BLIS_NO_TRANSPOSE, a_local );
}
// Create an object to hold a copy-cast of alpha. Notice that we use

View File

@@ -103,6 +103,7 @@ void bl2_trsm( side_t side,
if ( bl2_obj_has_trans( a_local ) )
{
bl2_obj_induce_trans( a_local );
bl2_obj_set_trans( BLIS_NO_TRANSPOSE, a_local );
}
// Create an object to hold a copy-cast of alpha. Notice that we use

View File

@@ -97,7 +97,10 @@ void bl2_trsm_int( side_t side,
// be handled explicitly in the packing of C, but if C is not being
// packed, this is our last chance to handle the transposition.
if ( cntl_is_leaf( cntl ) && bl2_obj_has_trans( *c ) )
{
bl2_obj_induce_trans( c_local );
bl2_obj_set_trans( BLIS_NO_TRANSPOSE, c_local );
}
// Set a bool based on the uplo field of A's root object.
if ( bl2_obj_root_is_lower( *a ) ) uplo = 0;

View File

@@ -1026,7 +1026,11 @@ bl2_obj_width_stored( obj )
if ( bl2_obj_is_upper_or_lower( obj ) ) \
bl2_obj_toggle_uplo( obj ); \
\
bl2_obj_toggle_trans( obj ); \
/* Note that this macro DOES NOT touch the transposition bit! If
the calling code is using this macro to handle an object whose
transposition bit is set prior to computation, that code needs
to manually clear or toggle the bit, via bl2_obj_set_trans() or
bl2_obj_toggle_trans(), respectively. */ \
} \
}

View File

@@ -1 +1 @@
0.0.3-1
0.0.3-2