mirror of
https://github.com/amd/blis.git
synced 2026-03-16 15:27:22 +00:00
Renamed "next micropanel" macros in _l3_thrinfo.h.
Details: - Renamed several macros defined in bli_l3_thrinfo.h designed to compute the values of a_next and b_next to insert into an auxinfo_t struct in level-3 macrokernels. (Previously, the macros did not use a bli_ prefix.) - Updated instances of above macro usage within various macrokernels.
This commit is contained in:
@@ -38,24 +38,24 @@
|
||||
|
||||
// gemm
|
||||
|
||||
#define gemm_get_next_a_micropanel( thread, a1, step ) ( a1 + step * thread->n_way )
|
||||
#define gemm_get_next_b_micropanel( thread, b1, step ) ( b1 + step * thread->n_way )
|
||||
#define bli_gemm_get_next_a_upanel( thread, a1, step ) ( a1 + step * thread->n_way )
|
||||
#define bli_gemm_get_next_b_upanel( thread, b1, step ) ( b1 + step * thread->n_way )
|
||||
|
||||
// herk
|
||||
|
||||
#define herk_get_next_a_micropanel( thread, a1, step ) ( a1 + step * thread->n_way )
|
||||
#define herk_get_next_b_micropanel( thread, b1, step ) ( b1 + step * thread->n_way )
|
||||
#define bli_herk_get_next_a_upanel( thread, a1, step ) ( a1 + step * thread->n_way )
|
||||
#define bli_herk_get_next_b_upanel( thread, b1, step ) ( b1 + step * thread->n_way )
|
||||
|
||||
// trmm
|
||||
|
||||
#define trmm_r_ir_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
|
||||
#define trmm_r_jr_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
|
||||
#define trmm_l_ir_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
|
||||
#define trmm_l_jr_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
|
||||
#define bli_trmm_r_ir_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
|
||||
#define bli_trmm_r_jr_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
|
||||
#define bli_trmm_l_ir_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
|
||||
#define bli_trmm_l_jr_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
|
||||
|
||||
// trsm
|
||||
|
||||
#define trsm_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
|
||||
#define bli_trsm_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way )
|
||||
|
||||
//
|
||||
// thrinfo_t APIs specific to level-3 operations.
|
||||
|
||||
@@ -295,11 +295,11 @@ void PASTEMAC(ch,varname) \
|
||||
m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \
|
||||
\
|
||||
/* Compute the addresses of the next panels of A and B. */ \
|
||||
a2 = gemm_get_next_a_micropanel( caucus, a1, rstep_a ); \
|
||||
a2 = bli_gemm_get_next_a_upanel( caucus, a1, rstep_a ); \
|
||||
if ( bli_is_last_iter( i, m_iter, ir_thread_id, ir_num_threads ) ) \
|
||||
{ \
|
||||
a2 = a_cast; \
|
||||
b2 = gemm_get_next_b_micropanel( thread, b1, cstep_b ); \
|
||||
b2 = bli_gemm_get_next_b_upanel( thread, b1, cstep_b ); \
|
||||
if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
|
||||
@@ -291,11 +291,11 @@ void PASTEMAC(ch,varname) \
|
||||
m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \
|
||||
\
|
||||
/* Compute the addresses of the next panels of A and B. */ \
|
||||
a2 = gemm_get_next_a_micropanel( caucus, a1, rstep_a ); \
|
||||
a2 = bli_gemm_get_next_a_upanel( caucus, a1, rstep_a ); \
|
||||
if ( bli_is_last_iter( i, m_iter, ir_thread_id, ir_num_threads ) ) \
|
||||
{ \
|
||||
a2 = a_cast; \
|
||||
b2 = gemm_get_next_b_micropanel( thread, b1, cstep_b ); \
|
||||
b2 = bli_gemm_get_next_b_upanel( thread, b1, cstep_b ); \
|
||||
if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
|
||||
@@ -300,11 +300,11 @@ void PASTEMAC(ch,varname) \
|
||||
m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \
|
||||
\
|
||||
/* Compute the addresses of the next panels of A and B. */ \
|
||||
a2 = gemm_get_next_a_micropanel( caucus, a1, rstep_a ); \
|
||||
a2 = bli_gemm_get_next_a_upanel( caucus, a1, rstep_a ); \
|
||||
if ( bli_is_last_iter( i, m_iter, ir_thread_id, ir_num_threads ) ) \
|
||||
{ \
|
||||
a2 = a_cast; \
|
||||
b2 = gemm_get_next_b_micropanel( thread, b1, cstep_b ); \
|
||||
b2 = bli_gemm_get_next_b_upanel( thread, b1, cstep_b ); \
|
||||
if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
|
||||
@@ -312,11 +312,11 @@ void PASTEMAC(ch,varname) \
|
||||
m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \
|
||||
\
|
||||
/* Compute the addresses of the next panels of A and B. */ \
|
||||
a2 = herk_get_next_a_micropanel( caucus, a1, rstep_a ); \
|
||||
a2 = bli_herk_get_next_a_upanel( caucus, a1, rstep_a ); \
|
||||
if ( bli_is_last_iter( i, m_iter, ir_thread_id, ir_num_threads ) ) \
|
||||
{ \
|
||||
a2 = a_cast; \
|
||||
b2 = herk_get_next_b_micropanel( thread, b1, cstep_b ); \
|
||||
b2 = bli_herk_get_next_b_upanel( thread, b1, cstep_b ); \
|
||||
if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
|
||||
@@ -312,11 +312,11 @@ void PASTEMAC(ch,varname) \
|
||||
m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \
|
||||
\
|
||||
/* Compute the addresses of the next panels of A and B. */ \
|
||||
a2 = herk_get_next_a_micropanel( caucus, a1, rstep_a ); \
|
||||
a2 = bli_herk_get_next_a_upanel( caucus, a1, rstep_a ); \
|
||||
if ( bli_is_last_iter( i, m_iter, ir_thread_id, ir_num_threads ) ) \
|
||||
{ \
|
||||
a2 = a_cast; \
|
||||
b2 = herk_get_next_b_micropanel( thread, b1, cstep_b ); \
|
||||
b2 = bli_herk_get_next_b_upanel( thread, b1, cstep_b ); \
|
||||
if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \
|
||||
b2 = b_cast; \
|
||||
} \
|
||||
|
||||
@@ -322,7 +322,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Loop over the n dimension (NR columns at a time). */ \
|
||||
for ( j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
if ( trmm_l_jr_my_iter( j, jr_thread ) ) { \
|
||||
if ( bli_trmm_l_jr_my_iter( j, jr_thread ) ) { \
|
||||
\
|
||||
ctype* restrict a1; \
|
||||
ctype* restrict c11; \
|
||||
@@ -364,7 +364,7 @@ void PASTEMAC(ch,varname) \
|
||||
is_a_cur += ( bli_is_odd( is_a_cur ) ? 1 : 0 ); \
|
||||
ps_a_cur = ( is_a_cur * ss_a_num ) / ss_a_den; \
|
||||
\
|
||||
if ( trmm_l_ir_my_iter( i, ir_thread ) ) { \
|
||||
if ( bli_trmm_l_ir_my_iter( i, ir_thread ) ) { \
|
||||
\
|
||||
b1_i = b1 + ( off_a1011 * PACKNR ) / off_scl; \
|
||||
\
|
||||
@@ -434,7 +434,7 @@ void PASTEMAC(ch,varname) \
|
||||
} \
|
||||
else if ( bli_is_strictly_below_diag_n( diagoffa_i, MR, k ) ) \
|
||||
{ \
|
||||
if ( trmm_l_ir_my_iter( i, ir_thread ) ) { \
|
||||
if ( bli_trmm_l_ir_my_iter( i, ir_thread ) ) { \
|
||||
\
|
||||
ctype* restrict a2; \
|
||||
\
|
||||
|
||||
@@ -329,7 +329,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Loop over the n dimension (NR columns at a time). */ \
|
||||
for ( j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
if ( trmm_l_jr_my_iter( j, jr_thread ) ) { \
|
||||
if ( bli_trmm_l_jr_my_iter( j, jr_thread ) ) { \
|
||||
\
|
||||
ctype* restrict a1; \
|
||||
ctype* restrict c11; \
|
||||
@@ -371,7 +371,7 @@ void PASTEMAC(ch,varname) \
|
||||
is_a_cur += ( bli_is_odd( is_a_cur ) ? 1 : 0 ); \
|
||||
ps_a_cur = ( is_a_cur * ss_a_num ) / ss_a_den; \
|
||||
\
|
||||
if ( trmm_l_ir_my_iter( i, ir_thread ) ) { \
|
||||
if ( bli_trmm_l_ir_my_iter( i, ir_thread ) ) { \
|
||||
\
|
||||
b1_i = b1 + ( off_a1112 * PACKNR ) / off_scl; \
|
||||
\
|
||||
@@ -441,7 +441,7 @@ void PASTEMAC(ch,varname) \
|
||||
} \
|
||||
else if ( bli_is_strictly_above_diag_n( diagoffa_i, MR, k ) ) \
|
||||
{ \
|
||||
if ( trmm_l_ir_my_iter( i, ir_thread ) ) { \
|
||||
if ( bli_trmm_l_ir_my_iter( i, ir_thread ) ) { \
|
||||
\
|
||||
ctype* restrict a2; \
|
||||
\
|
||||
|
||||
@@ -361,7 +361,7 @@ void PASTEMAC(ch,varname) \
|
||||
is_b_cur += ( bli_is_odd( is_b_cur ) ? 1 : 0 ); \
|
||||
ps_b_cur = ( is_b_cur * ss_b_num ) / ss_b_den; \
|
||||
\
|
||||
if ( trmm_r_jr_my_iter( j, jr_thread ) ) { \
|
||||
if ( bli_trmm_r_jr_my_iter( j, jr_thread ) ) { \
|
||||
\
|
||||
/* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t
|
||||
object. */ \
|
||||
@@ -370,7 +370,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Loop over the m dimension (MR rows at a time). */ \
|
||||
for ( i = 0; i < m_iter; ++i ) \
|
||||
{ \
|
||||
if ( trmm_r_ir_my_iter( i, ir_thread ) ) { \
|
||||
if ( bli_trmm_r_ir_my_iter( i, ir_thread ) ) { \
|
||||
\
|
||||
ctype* restrict a1_i; \
|
||||
ctype* restrict a2; \
|
||||
@@ -446,7 +446,7 @@ void PASTEMAC(ch,varname) \
|
||||
} \
|
||||
else if ( bli_is_strictly_below_diag_n( diagoffb_j, k, NR ) ) \
|
||||
{ \
|
||||
if ( trmm_r_jr_my_iter( j, jr_thread ) ) { \
|
||||
if ( bli_trmm_r_jr_my_iter( j, jr_thread ) ) { \
|
||||
\
|
||||
/* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t
|
||||
object. */ \
|
||||
@@ -455,7 +455,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Loop over the m dimension (MR rows at a time). */ \
|
||||
for ( i = 0; i < m_iter; ++i ) \
|
||||
{ \
|
||||
if ( trmm_r_ir_my_iter( i, ir_thread ) ) { \
|
||||
if ( bli_trmm_r_ir_my_iter( i, ir_thread ) ) { \
|
||||
\
|
||||
ctype* restrict a2; \
|
||||
\
|
||||
|
||||
@@ -361,7 +361,7 @@ void PASTEMAC(ch,varname) \
|
||||
is_b_cur += ( bli_is_odd( is_b_cur ) ? 1 : 0 ); \
|
||||
ps_b_cur = ( is_b_cur * ss_b_num ) / ss_b_den; \
|
||||
\
|
||||
if ( trmm_r_jr_my_iter( j, jr_thread ) ) { \
|
||||
if ( bli_trmm_r_jr_my_iter( j, jr_thread ) ) { \
|
||||
\
|
||||
/* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t
|
||||
object. */ \
|
||||
@@ -370,7 +370,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Loop over the m dimension (MR rows at a time). */ \
|
||||
for ( i = 0; i < m_iter; ++i ) \
|
||||
{ \
|
||||
if ( trmm_r_ir_my_iter( i, ir_thread ) ) { \
|
||||
if ( bli_trmm_r_ir_my_iter( i, ir_thread ) ) { \
|
||||
\
|
||||
ctype* restrict a1_i; \
|
||||
ctype* restrict a2; \
|
||||
@@ -446,7 +446,7 @@ void PASTEMAC(ch,varname) \
|
||||
} \
|
||||
else if ( bli_is_strictly_above_diag_n( diagoffb_j, k, NR ) ) \
|
||||
{ \
|
||||
if ( trmm_r_jr_my_iter( j, jr_thread ) ) { \
|
||||
if ( bli_trmm_r_jr_my_iter( j, jr_thread ) ) { \
|
||||
\
|
||||
/* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t
|
||||
object. */ \
|
||||
@@ -455,7 +455,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Loop over the m dimension (MR rows at a time). */ \
|
||||
for ( i = 0; i < m_iter; ++i ) \
|
||||
{ \
|
||||
if ( trmm_r_ir_my_iter( i, ir_thread ) ) { \
|
||||
if ( bli_trmm_r_ir_my_iter( i, ir_thread ) ) { \
|
||||
\
|
||||
ctype* restrict a2; \
|
||||
\
|
||||
|
||||
@@ -340,7 +340,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Loop over the n dimension (NR columns at a time). */ \
|
||||
for ( j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
if( trsm_my_iter( j, thread ) ) { \
|
||||
if( bli_trsm_my_iter( j, thread ) ) { \
|
||||
\
|
||||
ctype* restrict a1; \
|
||||
ctype* restrict c11; \
|
||||
|
||||
@@ -348,7 +348,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Loop over the n dimension (NR columns at a time). */ \
|
||||
for ( j = 0; j < n_iter; ++j ) \
|
||||
{ \
|
||||
if( trsm_my_iter( j, thread ) ) { \
|
||||
if( bli_trsm_my_iter( j, thread ) ) { \
|
||||
\
|
||||
ctype* restrict a1; \
|
||||
ctype* restrict c11; \
|
||||
|
||||
@@ -422,7 +422,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Loop over the m dimension (MR rows at a time). */ \
|
||||
for ( i = 0; i < m_iter; ++i ) \
|
||||
{ \
|
||||
if( trsm_my_iter( i, thread ) ){ \
|
||||
if( bli_trsm_my_iter( i, thread ) ){ \
|
||||
\
|
||||
ctype* restrict a11; \
|
||||
ctype* restrict a12; \
|
||||
@@ -508,7 +508,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Loop over the m dimension (MR rows at a time). */ \
|
||||
for ( i = 0; i < m_iter; ++i ) \
|
||||
{ \
|
||||
if( trsm_my_iter( i, thread ) ){ \
|
||||
if( bli_trsm_my_iter( i, thread ) ){ \
|
||||
\
|
||||
ctype* restrict a2; \
|
||||
\
|
||||
|
||||
@@ -415,7 +415,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Loop over the m dimension (MR rows at a time). */ \
|
||||
for ( i = 0; i < m_iter; ++i ) \
|
||||
{ \
|
||||
if( trsm_my_iter( i, thread ) ){ \
|
||||
if( bli_trsm_my_iter( i, thread ) ){ \
|
||||
\
|
||||
ctype* restrict a10; \
|
||||
ctype* restrict a11; \
|
||||
@@ -501,7 +501,7 @@ void PASTEMAC(ch,varname) \
|
||||
/* Loop over the m dimension (MR rows at a time). */ \
|
||||
for ( i = 0; i < m_iter; ++i ) \
|
||||
{ \
|
||||
if( trsm_my_iter( i, thread ) ){ \
|
||||
if( bli_trsm_my_iter( i, thread ) ){ \
|
||||
\
|
||||
ctype* restrict a2; \
|
||||
\
|
||||
|
||||
Reference in New Issue
Block a user