mirror of
https://github.com/amd/blis.git
synced 2026-05-12 01:59:59 +00:00
Some improvements to trsm parallelism
This commit is contained in:
@@ -83,9 +83,9 @@ void bli_trsm_blk_var2b( obj_t* a,
|
||||
// Query dimension in partitioning direction.
|
||||
n_trans = bli_obj_width_after_trans( *b );
|
||||
dim_t start, end;
|
||||
bli_get_range_weighted( thread, 0, n_trans,
|
||||
bli_determine_reg_blocksize( b, cntl_blocksize( cntl ) ),
|
||||
bli_obj_is_upper( *c ), &start, &end );
|
||||
bli_get_range( thread, 0, n_trans,
|
||||
bli_determine_reg_blocksize( b, cntl_blocksize( cntl ) ),
|
||||
&start, &end );
|
||||
|
||||
// Partition along the n dimension.
|
||||
for ( i = start; i < end; i += b_alg )
|
||||
|
||||
@@ -83,9 +83,9 @@ void bli_trsm_blk_var2f( obj_t* a,
|
||||
// Query dimension in partitioning direction.
|
||||
n_trans = bli_obj_width_after_trans( *b );
|
||||
dim_t start, end;
|
||||
bli_get_range_weighted( thread, 0, n_trans,
|
||||
bli_determine_reg_blocksize( b, cntl_blocksize( cntl ) ),
|
||||
bli_obj_is_lower( *c ), &start, &end );
|
||||
bli_get_range( thread, 0, n_trans,
|
||||
bli_determine_reg_blocksize( b, cntl_blocksize( cntl ) ),
|
||||
&start, &end );
|
||||
|
||||
// Partition along the n dimension.
|
||||
for ( i = start; i < end; i += b_alg )
|
||||
|
||||
Reference in New Issue
Block a user