mirror of
https://github.com/amd/blis.git
synced 2026-05-11 17:50:00 +00:00
Replaced register blocksize hack with querying the register blocksize for determining parallelism granularity
This commit is contained in:
@@ -83,7 +83,9 @@ void bli_gemm_blk_var1f( obj_t* a,
|
||||
// Query dimension in partitioning direction.
|
||||
m_trans = bli_obj_length_after_trans( *a );
|
||||
dim_t start, end;
|
||||
bli_get_range( thread, 0, m_trans, BLIS_DEFAULT_MR_D, &start, &end );
|
||||
bli_get_range( thread, 0, m_trans,
|
||||
bli_determine_reg_blocksize( a, cntl_blocksize( cntl ) ),
|
||||
&start, &end );
|
||||
|
||||
// Partition along the m dimension.
|
||||
for ( i = start; i < end; i += b_alg )
|
||||
|
||||
@@ -82,7 +82,9 @@ void bli_gemm_blk_var2f( obj_t* a,
|
||||
// Query dimension in partitioning direction.
|
||||
n_trans = bli_obj_width_after_trans( *b );
|
||||
dim_t start, end;
|
||||
bli_get_range( thread, 0, n_trans, BLIS_DEFAULT_NR_D, &start, &end );
|
||||
bli_get_range( thread, 0, n_trans,
|
||||
bli_determine_reg_blocksize( b, cntl_blocksize( cntl ) ),
|
||||
&start, &end );
|
||||
|
||||
// Partition along the n dimension.
|
||||
for ( i = start; i < end; i += b_alg )
|
||||
|
||||
@@ -82,7 +82,9 @@ void bli_herk_blk_var1f( obj_t* a,
|
||||
// Query dimension in partitioning direction.
|
||||
m_trans = bli_obj_length_after_trans( *c );
|
||||
dim_t start, end;
|
||||
bli_get_range_weighted( thread, 0, m_trans, BLIS_DEFAULT_MR_D, bli_obj_is_upper( *c ), &start, &end );
|
||||
bli_get_range_weighted( thread, 0, m_trans,
|
||||
bli_determine_reg_blocksize( a, cntl_blocksize( cntl ) ),
|
||||
bli_obj_is_upper( *c ), &start, &end );
|
||||
|
||||
// Partition along the m dimension.
|
||||
for ( i = start; i < end; i += b_alg )
|
||||
|
||||
@@ -90,8 +90,9 @@ void bli_herk_blk_var2f( obj_t* a,
|
||||
dim_t start, end;
|
||||
|
||||
// Needs to be replaced with a weighted range because triangle
|
||||
//bli_get_range( thread, 0, n_trans, 8, &start, &end );
|
||||
bli_get_range_weighted( thread, 0, n_trans, BLIS_DEFAULT_NR_D, bli_obj_is_lower( *c ), &start, &end );
|
||||
bli_get_range_weighted( thread, 0, n_trans,
|
||||
bli_determine_reg_blocksize( a, cntl_blocksize( cntl ) ),
|
||||
bli_obj_is_lower( *c ), &start, &end );
|
||||
|
||||
// Partition along the n dimension.
|
||||
for ( i = start; i < end; i += b_alg )
|
||||
|
||||
@@ -94,7 +94,10 @@ void bli_trmm_blk_var1f( obj_t* a,
|
||||
bli_obj_width_after_trans( *a );
|
||||
|
||||
dim_t start, end;
|
||||
bli_get_range( thread, offA, m_trans, BLIS_DEFAULT_MR_D, &start, &end );
|
||||
bli_get_range_weighted( thread, offA, m_trans,
|
||||
bli_determine_reg_blocksize( a, cntl_blocksize( cntl ) ),
|
||||
bli_obj_is_upper( *c ), &start, &end );
|
||||
|
||||
// Partition along the m dimension.
|
||||
for ( i = start; i < end; i += b_alg )
|
||||
{
|
||||
|
||||
@@ -82,8 +82,9 @@ void bli_trmm_blk_var2b( obj_t* a,
|
||||
// Query dimension in partitioning direction.
|
||||
n_trans = bli_obj_width_after_trans( *b );
|
||||
dim_t start, end;
|
||||
//bli_get_range( thread, 0, n_trans, 8, &start, &end );
|
||||
bli_get_range_weighted( thread, 0, n_trans, BLIS_DEFAULT_NR_D, bli_obj_is_upper( *c ), &start, &end );
|
||||
bli_get_range_weighted( thread, 0, n_trans,
|
||||
bli_determine_reg_blocksize( b, cntl_blocksize( cntl ) ),
|
||||
bli_obj_is_upper( *c ), &start, &end );
|
||||
|
||||
// Partition along the n dimension.
|
||||
for ( i = start; i < end; i += b_alg )
|
||||
|
||||
@@ -82,8 +82,9 @@ void bli_trmm_blk_var2f( obj_t* a,
|
||||
// Query dimension in partitioning direction.
|
||||
n_trans = bli_obj_width_after_trans( *b );
|
||||
dim_t start, end;
|
||||
//bli_get_range( thread, 0, n_trans, 8, &start, &end );
|
||||
bli_get_range_weighted( thread, 0, n_trans, BLIS_DEFAULT_NR_D, bli_obj_is_lower( *c ), &start, &end );
|
||||
bli_get_range_weighted( thread, 0, n_trans,
|
||||
bli_determine_reg_blocksize( b, cntl_blocksize( cntl ) ),
|
||||
bli_obj_is_lower( *c ), &start, &end );
|
||||
|
||||
// Partition along the n dimension.
|
||||
for ( i = start; i < end; i += b_alg )
|
||||
|
||||
@@ -190,7 +190,7 @@ void PASTEMAC(ch,varname)( \
|
||||
dim_t off_b1121; \
|
||||
dim_t i, j; \
|
||||
inc_t rstep_a; \
|
||||
inc_t cstep_b; \
|
||||
/*inc_t cstep_b; */\
|
||||
inc_t rstep_c, cstep_c; \
|
||||
inc_t ss_b; \
|
||||
auxinfo_t aux; \
|
||||
@@ -271,7 +271,7 @@ void PASTEMAC(ch,varname)( \
|
||||
/* Determine some increments used to step through A, B, and C. */ \
|
||||
rstep_a = ps_a; \
|
||||
\
|
||||
cstep_b = ps_b; \
|
||||
/*cstep_b = ps_b; */\
|
||||
\
|
||||
rstep_c = rs_c * MR; \
|
||||
cstep_c = cs_c * NR; \
|
||||
|
||||
@@ -190,7 +190,7 @@ void PASTEMAC(ch,varname)( \
|
||||
dim_t off_b0111; \
|
||||
dim_t i, j; \
|
||||
inc_t rstep_a; \
|
||||
inc_t cstep_b; \
|
||||
/*inc_t cstep_b; */\
|
||||
inc_t rstep_c, cstep_c; \
|
||||
inc_t ss_b; \
|
||||
auxinfo_t aux; \
|
||||
@@ -272,7 +272,7 @@ void PASTEMAC(ch,varname)( \
|
||||
/* Determine some increments used to step through A, B, and C. */ \
|
||||
rstep_a = ps_a; \
|
||||
\
|
||||
cstep_b = ps_b; \
|
||||
/*cstep_b = ps_b; */\
|
||||
\
|
||||
rstep_c = rs_c * MR; \
|
||||
cstep_c = cs_c * NR; \
|
||||
|
||||
@@ -82,7 +82,9 @@ void bli_trsm_blk_var1b( obj_t* a,
|
||||
bli_obj_width_after_trans( *a );
|
||||
|
||||
dim_t start, end;
|
||||
bli_get_range( thread, offA, m_trans, BLIS_DEFAULT_MR_D, &start, &end );
|
||||
bli_get_range_weighted( thread, offA, m_trans,
|
||||
bli_determine_reg_blocksize( a, cntl_blocksize( cntl ) ),
|
||||
bli_obj_is_upper( *c ), &start, &end );
|
||||
|
||||
// Partition along the remaining portion of the m dimension.
|
||||
for ( i = start; i < end; i += b_alg )
|
||||
|
||||
@@ -81,7 +81,9 @@ void bli_trsm_blk_var1f( obj_t* a,
|
||||
offA = bli_abs( bli_obj_diag_offset_after_trans( *a ) );
|
||||
|
||||
dim_t start, end;
|
||||
bli_get_range( thread, offA, m_trans, BLIS_DEFAULT_MR_D, &start, &end );
|
||||
bli_get_range( thread, offA, m_trans,
|
||||
bli_determine_reg_blocksize( a, cntl_blocksize( cntl ) ),
|
||||
&start, &end );
|
||||
|
||||
// Partition along the remaining portion of the m dimension.
|
||||
for ( i = start; i < end; i += b_alg )
|
||||
|
||||
@@ -83,7 +83,9 @@ void bli_trsm_blk_var2b( obj_t* a,
|
||||
// Query dimension in partitioning direction.
|
||||
n_trans = bli_obj_width_after_trans( *b );
|
||||
dim_t start, end;
|
||||
bli_get_range_weighted( thread, 0, n_trans, BLIS_DEFAULT_NR_D, 0, &start, &end );
|
||||
bli_get_range_weighted( thread, 0, n_trans,
|
||||
bli_determine_reg_blocksize( b, cntl_blocksize( cntl ) ),
|
||||
bli_obj_is_upper( *c ), &start, &end );
|
||||
|
||||
// Partition along the n dimension.
|
||||
for ( i = start; i < end; i += b_alg )
|
||||
|
||||
@@ -83,8 +83,9 @@ void bli_trsm_blk_var2f( obj_t* a,
|
||||
// Query dimension in partitioning direction.
|
||||
n_trans = bli_obj_width_after_trans( *b );
|
||||
dim_t start, end;
|
||||
//bli_get_range( thread, 0, n_trans, 8, &start, &end );
|
||||
bli_get_range_weighted( thread, 0, n_trans, BLIS_DEFAULT_NR_D, 1, &start, &end );
|
||||
bli_get_range_weighted( thread, 0, n_trans,
|
||||
bli_determine_reg_blocksize( b, cntl_blocksize( cntl ) ),
|
||||
bli_obj_is_lower( *c ), &start, &end );
|
||||
|
||||
// Partition along the n dimension.
|
||||
for ( i = start; i < end; i += b_alg )
|
||||
|
||||
Reference in New Issue
Block a user