mirror of
https://github.com/amd/blis.git
synced 2026-04-20 15:48:50 +00:00
Updated behavior of bl2_obj_induce_trans() macro.
Details: - Changed bl2_obj_induce_trans() so that the transposition bit is no longer updated as part of the macro. All current uses of the macro have been coupled with instances of bl2_obj_set_trans() to clear the bit. - Added Jed to CREDITS file.
This commit is contained in:
9
CREDITS
9
CREDITS
@@ -9,6 +9,7 @@ The BLIS framework was primarily authored by
|
||||
|
||||
but many others have contributed input and feedback, including:
|
||||
|
||||
Jed Brown (Argonne National Laboratory)
|
||||
John Gunnels (IBM, T.J. Watson Research Center)
|
||||
Jeff Hammond (Argonne National Laboratory)
|
||||
Francisco Igual (Universidad Complutense de Madrid)
|
||||
@@ -19,12 +20,12 @@ but many others have contributed input and feedback, including:
|
||||
Rhys Ulerich (The University of Texas at Austin)
|
||||
Robert van de Geijn (The University of Texas at Austin)
|
||||
|
||||
Thanks go the following individuals for porting very early versions
|
||||
of BLIS to new architectures as proofs-of-concept:
|
||||
Thanks go the following individuals for porting very early versions of
|
||||
BLIS to new architectures as proofs-of-concept:
|
||||
|
||||
Francisco Igual (The University of Texas at Austin)
|
||||
Tyler Smith (The University of Texas at Austin)
|
||||
|
||||
BLIS's development was partially funded by grants from Microsoft and
|
||||
the National Science Foundation (Awards CCF-0917167 and OCI-1148125).
|
||||
BLIS's development was partially funded by grants from Microsoft and the
|
||||
National Science Foundation (NSF Awards CCF-0917167 and OCI-1148125).
|
||||
|
||||
|
||||
@@ -450,7 +450,7 @@ void PASTEMAC(ch,varname )( \
|
||||
\
|
||||
/*
|
||||
PASTEMAC(ch,fprintm)( stdout, "packm_var3: p copied", panel_dim, panel_len_max_i, \
|
||||
p_begin, rs_p, cs_p, "%5.2f", "" ); \
|
||||
p_begin, rs_p, cs_p, "%4.1f", "" ); \
|
||||
*/ \
|
||||
\
|
||||
p_begin += p_inc; \
|
||||
|
||||
@@ -104,7 +104,10 @@ void bl2_ger_int( conj_t conjx,
|
||||
// still needs a transposition, then we must induce one by swapping the
|
||||
// strides and dimensions.
|
||||
if ( cntl_is_leaf( cntl ) && bl2_obj_has_trans( a_local ) )
|
||||
{
|
||||
bl2_obj_induce_trans( a_local );
|
||||
bl2_obj_set_trans( BLIS_NO_TRANSPOSE, a_local );
|
||||
}
|
||||
|
||||
// Extract the variant number and implementation type.
|
||||
n = cntl_var_num( cntl );
|
||||
|
||||
@@ -98,17 +98,17 @@ void bl2_trmv_int( obj_t* alpha,
|
||||
// thing. Alternatively, we could accomplish the same end goal by
|
||||
// inducing a transposition, via bl2_obj_induce_trans(), in the code
|
||||
// block below. That macro function swaps dimensions, strides, and
|
||||
// offsets, and also clears the transposition bit. As an example, given
|
||||
// a lower triangular, column-major matrix that needs a transpose, we
|
||||
// would induce that transposition by recasting the object as an upper
|
||||
// triangular, row-major matrix (with no transpose needed). Note that
|
||||
// how we choose to handle transposition here does NOT affect the
|
||||
// optimal choice of kernel (ie: a column-major column panel matrix with
|
||||
// transpose times a vector would use the same kernel as a row-major
|
||||
// row panel matrix with no transpose times a vector).
|
||||
// offsets. As an example, given a lower triangular, column-major matrix
|
||||
// that needs a transpose, we would induce that transposition by recasting
|
||||
// the object as an upper triangular, row-major matrix (with no transpose
|
||||
// needed). Note that how we choose to handle transposition here does NOT
|
||||
// affect the optimal choice of kernel (ie: a column-major column panel
|
||||
// matrix with transpose times a vector would use the same kernel as a
|
||||
// row-major row panel matrix with no transpose times a vector).
|
||||
if ( bl2_obj_has_trans( a_local ) )
|
||||
{
|
||||
//bl2_obj_induce_trans( a_local );
|
||||
//bl2_obj_set_trans( BLIS_NO_TRANSPOSE, a_local );
|
||||
bl2_toggle_bool( uplo );
|
||||
}
|
||||
|
||||
|
||||
@@ -98,17 +98,17 @@ void bl2_trsv_int( obj_t* alpha,
|
||||
// thing. Alternatively, we could accomplish the same end goal by
|
||||
// inducing a transposition, via bl2_obj_induce_trans(), in the code
|
||||
// block below. That macro function swaps dimensions, strides, and
|
||||
// offsets, and also clears the transposition bit. As an example, given
|
||||
// a lower triangular, column-major matrix that needs a transpose, we
|
||||
// would induce that transposition by recasting the object as an upper
|
||||
// triangular, row-major matrix (with no transpose needed). Note that
|
||||
// how we choose to handle transposition here does NOT affect the
|
||||
// optimal choice of kernel (ie: a column-major column panel matrix with
|
||||
// transpose times a vector would use the same kernel as a row-major
|
||||
// row panel matrix with no transpose times a vector).
|
||||
// offsets. As an example, given a lower triangular, column-major matrix
|
||||
// that needs a transpose, we would induce that transposition by recasting
|
||||
// the object as an upper triangular, row-major matrix (with no transpose
|
||||
// needed). Note that how we choose to handle transposition here does NOT
|
||||
// affect the optimal choice of kernel (ie: a column-major column panel
|
||||
// matrix with transpose times a vector would use the same kernel as a
|
||||
// row-major row panel matrix with no transpose times a vector).
|
||||
if ( bl2_obj_has_trans( a_local ) )
|
||||
{
|
||||
//bl2_obj_induce_trans( a_local );
|
||||
//bl2_obj_set_trans( BLIS_NO_TRANSPOSE, a_local );
|
||||
bl2_toggle_bool( uplo );
|
||||
}
|
||||
|
||||
|
||||
@@ -84,7 +84,10 @@ void bl2_gemm_int( obj_t* alpha,
|
||||
// be handled explicitly in the packing of C, but if C is not being
|
||||
// packed, this is our last chance to handle the transposition.
|
||||
if ( cntl_is_leaf( cntl ) && bl2_obj_has_trans( *c ) )
|
||||
{
|
||||
bl2_obj_induce_trans( c_local );
|
||||
bl2_obj_set_trans( BLIS_NO_TRANSPOSE, c_local );
|
||||
}
|
||||
|
||||
// Extract the variant number and implementation type.
|
||||
n = cntl_var_num( cntl );
|
||||
|
||||
@@ -104,7 +104,10 @@ void bl2_her2k_int( obj_t* alpha,
|
||||
// be handled explicitly in the packing of C, but if C is not being
|
||||
// packed, this is our last chance to handle the transposition.
|
||||
if ( cntl_is_leaf( cntl ) && bl2_obj_has_trans( *c ) )
|
||||
{
|
||||
bl2_obj_induce_trans( c_local );
|
||||
bl2_obj_set_trans( BLIS_NO_TRANSPOSE, c_local );
|
||||
}
|
||||
|
||||
// Set a bool based on the uplo field of c.
|
||||
if ( bl2_obj_root_is_lower( c_local ) ) uplo = 0;
|
||||
|
||||
@@ -96,7 +96,10 @@ void bl2_herk_int( obj_t* alpha,
|
||||
// be handled explicitly in the packing of C, but if C is not being
|
||||
// packed, this is our last chance to handle the transposition.
|
||||
if ( cntl_is_leaf( cntl ) && bl2_obj_has_trans( *c ) )
|
||||
{
|
||||
bl2_obj_induce_trans( c_local );
|
||||
bl2_obj_set_trans( BLIS_NO_TRANSPOSE, c_local );
|
||||
}
|
||||
|
||||
// Set a bool based on the uplo field of C's root object.
|
||||
if ( bl2_obj_root_is_lower( c_local ) ) uplo = 0;
|
||||
|
||||
@@ -107,6 +107,7 @@ void bl2_trmm( side_t side,
|
||||
if ( bl2_obj_has_trans( a_local ) )
|
||||
{
|
||||
bl2_obj_induce_trans( a_local );
|
||||
bl2_obj_set_trans( BLIS_NO_TRANSPOSE, a_local );
|
||||
}
|
||||
|
||||
// Create an object to hold a copy-cast of alpha. Notice that we use
|
||||
|
||||
@@ -97,7 +97,10 @@ void bl2_trmm_int( side_t side,
|
||||
// be handled explicitly in the packing of C, but if C is not being
|
||||
// packed, this is our last chance to handle the transposition.
|
||||
if ( cntl_is_leaf( cntl ) && bl2_obj_has_trans( *c ) )
|
||||
{
|
||||
bl2_obj_induce_trans( c_local );
|
||||
bl2_obj_set_trans( BLIS_NO_TRANSPOSE, c_local );
|
||||
}
|
||||
|
||||
// Set a bool based on the uplo field of A's root object.
|
||||
if ( bl2_obj_root_is_lower( *a ) ) uplo = 0;
|
||||
|
||||
@@ -299,6 +299,9 @@ void PASTEMAC(ch,varname)( \
|
||||
k_a1112 = k - off_a1112; \
|
||||
\
|
||||
bp_i = bp + off_a1112 * NR * NDUP; \
|
||||
\
|
||||
/*PASTEMAC(ch,fprintm)( stdout, "trmm_u_ker_var2: a1", MR, k_a1112, a1, 1, MR, "%4.1f", "" );*/ \
|
||||
/*PASTEMAC(ch,fprintm)( stdout, "trmm_u_ker_var2: b1", k_a1112, NR, bp_i, NR, 1, "%4.1f", "" );*/ \
|
||||
\
|
||||
/* Handle interior and edge cases separately. */ \
|
||||
if ( m_cur == MR && n_cur == NR ) \
|
||||
|
||||
@@ -113,6 +113,7 @@ void bl2_trmm3( side_t side,
|
||||
if ( bl2_obj_has_trans( a_local ) )
|
||||
{
|
||||
bl2_obj_induce_trans( a_local );
|
||||
bl2_obj_set_trans( BLIS_NO_TRANSPOSE, a_local );
|
||||
}
|
||||
|
||||
// Create an object to hold a copy-cast of alpha. Notice that we use
|
||||
|
||||
@@ -103,6 +103,7 @@ void bl2_trsm( side_t side,
|
||||
if ( bl2_obj_has_trans( a_local ) )
|
||||
{
|
||||
bl2_obj_induce_trans( a_local );
|
||||
bl2_obj_set_trans( BLIS_NO_TRANSPOSE, a_local );
|
||||
}
|
||||
|
||||
// Create an object to hold a copy-cast of alpha. Notice that we use
|
||||
|
||||
@@ -97,7 +97,10 @@ void bl2_trsm_int( side_t side,
|
||||
// be handled explicitly in the packing of C, but if C is not being
|
||||
// packed, this is our last chance to handle the transposition.
|
||||
if ( cntl_is_leaf( cntl ) && bl2_obj_has_trans( *c ) )
|
||||
{
|
||||
bl2_obj_induce_trans( c_local );
|
||||
bl2_obj_set_trans( BLIS_NO_TRANSPOSE, c_local );
|
||||
}
|
||||
|
||||
// Set a bool based on the uplo field of A's root object.
|
||||
if ( bl2_obj_root_is_lower( *a ) ) uplo = 0;
|
||||
|
||||
@@ -1026,7 +1026,11 @@ bl2_obj_width_stored( obj )
|
||||
if ( bl2_obj_is_upper_or_lower( obj ) ) \
|
||||
bl2_obj_toggle_uplo( obj ); \
|
||||
\
|
||||
bl2_obj_toggle_trans( obj ); \
|
||||
/* Note that this macro DOES NOT touch the transposition bit! If
|
||||
the calling code is using this macro to handle an object whose
|
||||
transposition bit is set prior to computation, that code needs
|
||||
to manually clear or toggle the bit, via bl2_obj_set_trans() or
|
||||
bl2_obj_toggle_trans(), respectively. */ \
|
||||
} \
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user