mirror of
https://github.com/amd/blis.git
synced 2026-05-11 17:50:00 +00:00
Some fixes for the internal functions,
was innappropriately only having thread chief do some things.
This commit is contained in:
@@ -110,28 +110,25 @@ void bli_gemm_int( obj_t* alpha,
|
||||
// packed, this is our last chance to handle the transposition.
|
||||
if ( cntl_is_leaf( cntl ) && bli_obj_has_trans( *c ) )
|
||||
{
|
||||
if( thread_am_ochief( thread ) ) {
|
||||
//if( thread_am_ochief( thread ) ) {
|
||||
bli_obj_induce_trans( c_local );
|
||||
bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, c_local );
|
||||
}
|
||||
// }
|
||||
}
|
||||
|
||||
// If alpha is non-unit, typecast and apply it to the scalar attached
|
||||
// to B.
|
||||
if ( !bli_obj_equals( alpha, &BLIS_ONE ) )
|
||||
{
|
||||
if( thread_am_ochief( thread ) )
|
||||
bli_obj_scalar_apply_scalar( alpha, &b_local );
|
||||
bli_obj_scalar_apply_scalar( alpha, &b_local );
|
||||
}
|
||||
|
||||
// If beta is non-unit, typecast and apply it to the scalar attached
|
||||
// to C.
|
||||
if ( !bli_obj_equals( beta, &BLIS_ONE ) )
|
||||
{
|
||||
if( thread_am_ochief( thread ) )
|
||||
bli_obj_scalar_apply_scalar( beta, &c_local );
|
||||
bli_obj_scalar_apply_scalar( beta, &c_local );
|
||||
}
|
||||
thread_obarrier( thread );
|
||||
|
||||
// Extract the variant number and implementation type.
|
||||
n = cntl_var_num( cntl );
|
||||
|
||||
@@ -109,34 +109,28 @@ void bli_herk_int( obj_t* alpha,
|
||||
// packed, this is our last chance to handle the transposition.
|
||||
if ( cntl_is_leaf( cntl ) && bli_obj_has_trans( *c ) )
|
||||
{
|
||||
if( thread_am_ochief( thread ) ) {
|
||||
bli_obj_induce_trans( c_local );
|
||||
bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, c_local );
|
||||
}
|
||||
bli_obj_induce_trans( c_local );
|
||||
bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, c_local );
|
||||
}
|
||||
|
||||
// If alpha is non-unit, typecast and apply it to the scalar
|
||||
// attached to A'.
|
||||
if ( !bli_obj_equals( alpha, &BLIS_ONE ) )
|
||||
{
|
||||
if( thread_am_ochief( thread ) )
|
||||
bli_obj_scalar_apply_scalar( alpha, &ah_local );
|
||||
bli_obj_scalar_apply_scalar( alpha, &ah_local );
|
||||
}
|
||||
|
||||
// If beta is non-unit, typecast and apply it to the scalar
|
||||
// attached to C.
|
||||
if ( !bli_obj_equals( beta, &BLIS_ONE ) )
|
||||
{
|
||||
if( thread_am_ochief( thread ) )
|
||||
bli_obj_scalar_apply_scalar( beta, &c_local );
|
||||
bli_obj_scalar_apply_scalar( beta, &c_local );
|
||||
}
|
||||
|
||||
// Set a bool based on the uplo field of C's root object.
|
||||
if ( bli_obj_root_is_lower( c_local ) ) uplo = 0;
|
||||
else uplo = 1;
|
||||
|
||||
thread_obarrier( thread );
|
||||
|
||||
// Extract the variant number and implementation type.
|
||||
n = cntl_var_num( cntl );
|
||||
i = cntl_impl_type( cntl );
|
||||
|
||||
@@ -131,26 +131,22 @@ void bli_trmm_int( obj_t* alpha,
|
||||
// packed, this is our last chance to handle the transposition.
|
||||
if ( cntl_is_leaf( cntl ) && bli_obj_has_trans( *c ) )
|
||||
{
|
||||
if( thread_am_ochief( thread ) ) {
|
||||
bli_obj_induce_trans( c_local );
|
||||
bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, c_local );
|
||||
}
|
||||
bli_obj_induce_trans( c_local );
|
||||
bli_obj_set_onlytrans( BLIS_NO_TRANSPOSE, c_local );
|
||||
}
|
||||
|
||||
// If alpha is non-unit, typecast and apply it to the scalar attached
|
||||
// to B.
|
||||
if ( !bli_obj_equals( alpha, &BLIS_ONE ) )
|
||||
{
|
||||
if( thread_am_ochief( thread ) )
|
||||
bli_obj_scalar_apply_scalar( alpha, &b_local );
|
||||
bli_obj_scalar_apply_scalar( alpha, &b_local );
|
||||
}
|
||||
|
||||
// If beta is non-unit, typecast and apply it to the scalar attached
|
||||
// to C.
|
||||
if ( !bli_obj_equals( beta, &BLIS_ONE ) )
|
||||
{
|
||||
if( thread_am_ochief( thread ) )
|
||||
bli_obj_scalar_apply_scalar( beta, &c_local );
|
||||
bli_obj_scalar_apply_scalar( beta, &c_local );
|
||||
}
|
||||
|
||||
// Set two bools: one based on the implied side parameter (the structure
|
||||
@@ -170,8 +166,6 @@ void bli_trmm_int( obj_t* alpha,
|
||||
else uplo = 1;
|
||||
}
|
||||
|
||||
thread_obarrier( thread );
|
||||
|
||||
// Extract the variant number and implementation type.
|
||||
n = cntl_var_num( cntl );
|
||||
i = cntl_impl_type( cntl );
|
||||
|
||||
Reference in New Issue
Block a user