diff --git a/frame/3/gemm/bli_gemm_blk_var1f.c b/frame/3/gemm/bli_gemm_blk_var1f.c index bba9c3290..8288f3ac9 100644 --- a/frame/3/gemm/bli_gemm_blk_var1f.c +++ b/frame/3/gemm/bli_gemm_blk_var1f.c @@ -83,7 +83,9 @@ void bli_gemm_blk_var1f( obj_t* a, // Query dimension in partitioning direction. m_trans = bli_obj_length_after_trans( *a ); dim_t start, end; - bli_get_range( thread, 0, m_trans, BLIS_DEFAULT_MR_D, &start, &end ); + bli_get_range( thread, 0, m_trans, + bli_determine_reg_blocksize( a, cntl_blocksize( cntl ) ), + &start, &end ); // Partition along the m dimension. for ( i = start; i < end; i += b_alg ) diff --git a/frame/3/gemm/bli_gemm_blk_var2f.c b/frame/3/gemm/bli_gemm_blk_var2f.c index 71b190068..63c2f5824 100644 --- a/frame/3/gemm/bli_gemm_blk_var2f.c +++ b/frame/3/gemm/bli_gemm_blk_var2f.c @@ -82,7 +82,9 @@ void bli_gemm_blk_var2f( obj_t* a, // Query dimension in partitioning direction. n_trans = bli_obj_width_after_trans( *b ); dim_t start, end; - bli_get_range( thread, 0, n_trans, BLIS_DEFAULT_NR_D, &start, &end ); + bli_get_range( thread, 0, n_trans, + bli_determine_reg_blocksize( b, cntl_blocksize( cntl ) ), + &start, &end ); // Partition along the n dimension. for ( i = start; i < end; i += b_alg ) diff --git a/frame/3/herk/bli_herk_blk_var1f.c b/frame/3/herk/bli_herk_blk_var1f.c index 5d5825087..fbee3a750 100644 --- a/frame/3/herk/bli_herk_blk_var1f.c +++ b/frame/3/herk/bli_herk_blk_var1f.c @@ -82,7 +82,9 @@ void bli_herk_blk_var1f( obj_t* a, // Query dimension in partitioning direction. m_trans = bli_obj_length_after_trans( *c ); dim_t start, end; - bli_get_range_weighted( thread, 0, m_trans, BLIS_DEFAULT_MR_D, bli_obj_is_upper( *c ), &start, &end ); + bli_get_range_weighted( thread, 0, m_trans, + bli_determine_reg_blocksize( a, cntl_blocksize( cntl ) ), + bli_obj_is_upper( *c ), &start, &end ); // Partition along the m dimension. for ( i = start; i < end; i += b_alg ) diff --git a/frame/3/herk/bli_herk_blk_var2f.c b/frame/3/herk/bli_herk_blk_var2f.c index c860de1d6..f8fc666ba 100644 --- a/frame/3/herk/bli_herk_blk_var2f.c +++ b/frame/3/herk/bli_herk_blk_var2f.c @@ -90,8 +90,9 @@ void bli_herk_blk_var2f( obj_t* a, dim_t start, end; // Needs to be replaced with a weighted range because triangle - //bli_get_range( thread, 0, n_trans, 8, &start, &end ); - bli_get_range_weighted( thread, 0, n_trans, BLIS_DEFAULT_NR_D, bli_obj_is_lower( *c ), &start, &end ); + bli_get_range_weighted( thread, 0, n_trans, + bli_determine_reg_blocksize( a, cntl_blocksize( cntl ) ), + bli_obj_is_lower( *c ), &start, &end ); // Partition along the n dimension. for ( i = start; i < end; i += b_alg ) diff --git a/frame/3/trmm/bli_trmm_blk_var1f.c b/frame/3/trmm/bli_trmm_blk_var1f.c index 472cfc965..c6cd75421 100644 --- a/frame/3/trmm/bli_trmm_blk_var1f.c +++ b/frame/3/trmm/bli_trmm_blk_var1f.c @@ -94,7 +94,10 @@ void bli_trmm_blk_var1f( obj_t* a, bli_obj_width_after_trans( *a ); dim_t start, end; - bli_get_range( thread, offA, m_trans, BLIS_DEFAULT_MR_D, &start, &end ); + bli_get_range_weighted( thread, offA, m_trans, + bli_determine_reg_blocksize( a, cntl_blocksize( cntl ) ), + bli_obj_is_upper( *c ), &start, &end ); + // Partition along the m dimension. for ( i = start; i < end; i += b_alg ) { diff --git a/frame/3/trmm/bli_trmm_blk_var2b.c b/frame/3/trmm/bli_trmm_blk_var2b.c index 6281c6e2d..64b33f310 100644 --- a/frame/3/trmm/bli_trmm_blk_var2b.c +++ b/frame/3/trmm/bli_trmm_blk_var2b.c @@ -82,8 +82,9 @@ void bli_trmm_blk_var2b( obj_t* a, // Query dimension in partitioning direction. n_trans = bli_obj_width_after_trans( *b ); dim_t start, end; - //bli_get_range( thread, 0, n_trans, 8, &start, &end ); - bli_get_range_weighted( thread, 0, n_trans, BLIS_DEFAULT_NR_D, bli_obj_is_upper( *c ), &start, &end ); + bli_get_range_weighted( thread, 0, n_trans, + bli_determine_reg_blocksize( b, cntl_blocksize( cntl ) ), + bli_obj_is_upper( *c ), &start, &end ); // Partition along the n dimension. for ( i = start; i < end; i += b_alg ) diff --git a/frame/3/trmm/bli_trmm_blk_var2f.c b/frame/3/trmm/bli_trmm_blk_var2f.c index aabe9f251..8adaf2b57 100644 --- a/frame/3/trmm/bli_trmm_blk_var2f.c +++ b/frame/3/trmm/bli_trmm_blk_var2f.c @@ -82,8 +82,9 @@ void bli_trmm_blk_var2f( obj_t* a, // Query dimension in partitioning direction. n_trans = bli_obj_width_after_trans( *b ); dim_t start, end; - //bli_get_range( thread, 0, n_trans, 8, &start, &end ); - bli_get_range_weighted( thread, 0, n_trans, BLIS_DEFAULT_NR_D, bli_obj_is_lower( *c ), &start, &end ); + bli_get_range_weighted( thread, 0, n_trans, + bli_determine_reg_blocksize( b, cntl_blocksize( cntl ) ), + bli_obj_is_lower( *c ), &start, &end ); // Partition along the n dimension. for ( i = start; i < end; i += b_alg ) diff --git a/frame/3/trmm/bli_trmm_rl_ker_var2.c b/frame/3/trmm/bli_trmm_rl_ker_var2.c index 296325ec8..133c0d8ed 100644 --- a/frame/3/trmm/bli_trmm_rl_ker_var2.c +++ b/frame/3/trmm/bli_trmm_rl_ker_var2.c @@ -190,7 +190,7 @@ void PASTEMAC(ch,varname)( \ dim_t off_b1121; \ dim_t i, j; \ inc_t rstep_a; \ - inc_t cstep_b; \ + /*inc_t cstep_b; */\ inc_t rstep_c, cstep_c; \ inc_t ss_b; \ auxinfo_t aux; \ @@ -271,7 +271,7 @@ void PASTEMAC(ch,varname)( \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ - cstep_b = ps_b; \ + /*cstep_b = ps_b; */\ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ diff --git a/frame/3/trmm/bli_trmm_ru_ker_var2.c b/frame/3/trmm/bli_trmm_ru_ker_var2.c index 7f13e47a8..cb5ef580f 100644 --- a/frame/3/trmm/bli_trmm_ru_ker_var2.c +++ b/frame/3/trmm/bli_trmm_ru_ker_var2.c @@ -190,7 +190,7 @@ void PASTEMAC(ch,varname)( \ dim_t off_b0111; \ dim_t i, j; \ inc_t rstep_a; \ - inc_t cstep_b; \ + /*inc_t cstep_b; */\ inc_t rstep_c, cstep_c; \ inc_t ss_b; \ auxinfo_t aux; \ @@ -272,7 +272,7 @@ void PASTEMAC(ch,varname)( \ /* Determine some increments used to step through A, B, and C. */ \ rstep_a = ps_a; \ \ - cstep_b = ps_b; \ + /*cstep_b = ps_b; */\ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ diff --git a/frame/3/trsm/bli_trsm_blk_var1b.c b/frame/3/trsm/bli_trsm_blk_var1b.c index f73999b1f..d2037c202 100644 --- a/frame/3/trsm/bli_trsm_blk_var1b.c +++ b/frame/3/trsm/bli_trsm_blk_var1b.c @@ -82,7 +82,9 @@ void bli_trsm_blk_var1b( obj_t* a, bli_obj_width_after_trans( *a ); dim_t start, end; - bli_get_range( thread, offA, m_trans, BLIS_DEFAULT_MR_D, &start, &end ); + bli_get_range_weighted( thread, offA, m_trans, + bli_determine_reg_blocksize( a, cntl_blocksize( cntl ) ), + bli_obj_is_upper( *c ), &start, &end ); // Partition along the remaining portion of the m dimension. for ( i = start; i < end; i += b_alg ) diff --git a/frame/3/trsm/bli_trsm_blk_var1f.c b/frame/3/trsm/bli_trsm_blk_var1f.c index e341341c9..7072d0438 100644 --- a/frame/3/trsm/bli_trsm_blk_var1f.c +++ b/frame/3/trsm/bli_trsm_blk_var1f.c @@ -81,7 +81,9 @@ void bli_trsm_blk_var1f( obj_t* a, offA = bli_abs( bli_obj_diag_offset_after_trans( *a ) ); dim_t start, end; - bli_get_range( thread, offA, m_trans, BLIS_DEFAULT_MR_D, &start, &end ); + bli_get_range( thread, offA, m_trans, + bli_determine_reg_blocksize( a, cntl_blocksize( cntl ) ), + &start, &end ); // Partition along the remaining portion of the m dimension. for ( i = start; i < end; i += b_alg ) diff --git a/frame/3/trsm/bli_trsm_blk_var2b.c b/frame/3/trsm/bli_trsm_blk_var2b.c index 9b2dc3b41..2ee269cee 100644 --- a/frame/3/trsm/bli_trsm_blk_var2b.c +++ b/frame/3/trsm/bli_trsm_blk_var2b.c @@ -83,7 +83,9 @@ void bli_trsm_blk_var2b( obj_t* a, // Query dimension in partitioning direction. n_trans = bli_obj_width_after_trans( *b ); dim_t start, end; - bli_get_range_weighted( thread, 0, n_trans, BLIS_DEFAULT_NR_D, 0, &start, &end ); + bli_get_range_weighted( thread, 0, n_trans, + bli_determine_reg_blocksize( b, cntl_blocksize( cntl ) ), + bli_obj_is_upper( *c ), &start, &end ); // Partition along the n dimension. for ( i = start; i < end; i += b_alg ) diff --git a/frame/3/trsm/bli_trsm_blk_var2f.c b/frame/3/trsm/bli_trsm_blk_var2f.c index 85af8212d..41ccc668f 100644 --- a/frame/3/trsm/bli_trsm_blk_var2f.c +++ b/frame/3/trsm/bli_trsm_blk_var2f.c @@ -83,8 +83,9 @@ void bli_trsm_blk_var2f( obj_t* a, // Query dimension in partitioning direction. n_trans = bli_obj_width_after_trans( *b ); dim_t start, end; - //bli_get_range( thread, 0, n_trans, 8, &start, &end ); - bli_get_range_weighted( thread, 0, n_trans, BLIS_DEFAULT_NR_D, 1, &start, &end ); + bli_get_range_weighted( thread, 0, n_trans, + bli_determine_reg_blocksize( b, cntl_blocksize( cntl ) ), + bli_obj_is_lower( *c ), &start, &end ); // Partition along the n dimension. for ( i = start; i < end; i += b_alg )