Macroized conditionals for a2/b2 in macro-kernels.

Details:
- Replaced conditional expressions in macro-kernels related to computing
  the addresses a2 and b2 (a_next and b_next) with a preprocessor macro
  invocation, bli_is_last_iter(), that tests the same condition.
- Updated gemm_ukr module to use auxinfo_t argument.
- Whitespace changes in test suite ukr modules.
This commit is contained in:
Field G. Van Zee
2013-12-19 16:29:31 -06:00
parent a0331fb10a
commit e3a6c7e776
17 changed files with 136 additions and 108 deletions

View File

@@ -240,11 +240,11 @@ void PASTEMAC(ch,varname)( \
\
/* Compute the addresses of the next panels of A and B. */ \
a2 = a1 + rstep_a; \
if ( i == m_iter - 1 ) \
if ( bli_is_last_iter( i, m_iter ) ) \
{ \
a2 = a_cast; \
b2 = b1 + cstep_b; \
if ( j == n_iter - 1 ) \
if ( bli_is_last_iter( j, n_iter ) ) \
b2 = b_cast; \
} \
\

View File

@@ -261,7 +261,7 @@ void PASTEMAC(ch,varname)( \
\
/* Compute the addresses of the next panels of A and B. */ \
a2 = a1 + rstep_a; \
if ( i == m_iter - 1 ) \
if ( bli_is_last_iter( i, m_iter ) ) \
{ \
a2 = a_cast; \
} \

View File

@@ -274,11 +274,11 @@ void PASTEMAC(ch,varname)( \
\
/* Compute the addresses of the next panels of A and B. */ \
a2 = a1 + rstep_a; \
if ( i == m_iter - 1 ) \
if ( bli_is_last_iter( i, m_iter ) ) \
{ \
a2 = a_cast; \
b2 = b1 + cstep_b; \
if ( j == n_iter - 1 ) \
if ( bli_is_last_iter( j, n_iter ) ) \
b2 = b_cast; \
} \
\

View File

@@ -274,11 +274,11 @@ void PASTEMAC(ch,varname)( \
\
/* Compute the addresses of the next panels of A and B. */ \
a2 = a1 + rstep_a; \
if ( i == m_iter - 1 ) \
if ( bli_is_last_iter( i, m_iter ) ) \
{ \
a2 = a_cast; \
b2 = b1 + cstep_b; \
if ( j == n_iter - 1 ) \
if ( bli_is_last_iter( j, n_iter ) ) \
b2 = b_cast; \
} \
\

View File

@@ -274,11 +274,11 @@ void PASTEMAC(ch,varname)( \
\
/* Compute the addresses of the next panels of A and B. */ \
a2 = a1 + k_a1011 * PACKMR; \
if ( i == m_iter - 1 ) \
if ( bli_is_last_iter( i, m_iter ) ) \
{ \
a2 = a_cast; \
b2 = b1 + cstep_b; \
if ( j == n_iter - 1 ) \
if ( bli_is_last_iter( j, n_iter ) ) \
b2 = b_cast; \
} \
\
@@ -329,11 +329,11 @@ void PASTEMAC(ch,varname)( \
\
/* Compute the addresses of the next panels of A and B. */ \
a2 = a1 + rstep_a; \
if ( i == m_iter - 1 ) \
if ( bli_is_last_iter( i, m_iter ) ) \
{ \
a2 = a_cast; \
b2 = b1 + cstep_b; \
if ( j == n_iter - 1 ) \
if ( bli_is_last_iter( j, n_iter ) ) \
b2 = b_cast; \
} \
\

View File

@@ -282,11 +282,11 @@ void PASTEMAC(ch,varname)( \
\
/* Compute the addresses of the next panels of A and B. */ \
a2 = a1 + k_a1112 * PACKMR; \
if ( i == m_iter - 1 ) \
if ( bli_is_last_iter( i, m_iter ) ) \
{ \
a2 = a_cast; \
b2 = b1 + cstep_b; \
if ( j == n_iter - 1 ) \
if ( bli_is_last_iter( j, n_iter ) ) \
b2 = b_cast; \
} \
\
@@ -337,11 +337,11 @@ void PASTEMAC(ch,varname)( \
\
/* Compute the addresses of the next panels of A and B. */ \
a2 = a1 + rstep_a; \
if ( i == m_iter - 1 ) \
if ( bli_is_last_iter( i, m_iter ) ) \
{ \
a2 = a_cast; \
b2 = b1 + cstep_b; \
if ( j == n_iter - 1 ) \
if ( bli_is_last_iter( j, n_iter ) ) \
b2 = b_cast; \
} \
\

View File

@@ -282,11 +282,11 @@ void PASTEMAC(ch,varname)( \
\
/* Compute the addresses of the next panels of A and B. */ \
a2 = a1 + rstep_a; \
if ( i == m_iter - 1 ) \
if ( bli_is_last_iter( i, m_iter ) ) \
{ \
a2 = a_cast; \
b2 = b1 + k_b1121 * PACKNR; \
if ( j == n_iter - 1 ) \
if ( bli_is_last_iter( j, n_iter ) ) \
b2 = b_cast; \
} \
\
@@ -344,11 +344,11 @@ void PASTEMAC(ch,varname)( \
\
/* Compute the addresses of the next panels of A and B. */ \
a2 = a1 + rstep_a; \
if ( i == m_iter - 1 ) \
if ( bli_is_last_iter( i, m_iter ) ) \
{ \
a2 = a_cast; \
b2 = b1 + cstep_b; \
if ( j == n_iter - 1 ) \
if ( bli_is_last_iter( j, n_iter ) ) \
b2 = b_cast; \
} \
\

View File

@@ -282,11 +282,11 @@ void PASTEMAC(ch,varname)( \
\
/* Compute the addresses of the next panels of A and B. */ \
a2 = a1 + rstep_a; \
if ( i == m_iter - 1 ) \
if ( bli_is_last_iter( i, m_iter ) ) \
{ \
a2 = a_cast; \
b2 = b1 + k_b0111 * PACKNR; \
if ( j == n_iter - 1 ) \
if ( bli_is_last_iter( j, n_iter ) ) \
b2 = b_cast; \
} \
\
@@ -344,11 +344,11 @@ void PASTEMAC(ch,varname)( \
\
/* Compute the addresses of the next panels of A and B. */ \
a2 = a1 + rstep_a; \
if ( i == m_iter - 1 ) \
if ( bli_is_last_iter( i, m_iter ) ) \
{ \
a2 = a_cast; \
b2 = b1 + cstep_b; \
if ( j == n_iter - 1 ) \
if ( bli_is_last_iter( j, n_iter ) ) \
b2 = b_cast; \
} \
\

View File

@@ -286,11 +286,11 @@ void PASTEMAC(ch,varname)( \
\
/* Compute the addresses of the next panels of A and B. */ \
a2 = a1 + k_a1011 * PACKMR; \
if ( i == m_iter - 1 ) \
if ( bli_is_last_iter( i, m_iter ) ) \
{ \
a2 = a_cast; \
b2 = b1 + cstep_b; \
if ( j == n_iter - 1 ) \
if ( bli_is_last_iter( j, n_iter ) ) \
b2 = b_cast; \
} \
\
@@ -338,11 +338,11 @@ void PASTEMAC(ch,varname)( \
\
/* Compute the addresses of the next panels of A and B. */ \
a2 = a1 + rstep_a; \
if ( i == m_iter - 1 ) \
if ( bli_is_last_iter( i, m_iter ) ) \
{ \
a2 = a_cast; \
b2 = b1 + cstep_b; \
if ( j == n_iter - 1 ) \
if ( bli_is_last_iter( j, n_iter ) ) \
b2 = b_cast; \
} \
\

View File

@@ -297,11 +297,11 @@ void PASTEMAC(ch,varname)( \
\
/* Compute the addresses of the next panels of A and B. */ \
a2 = a1 + k_a1112 * PACKMR; \
if ( ib == m_iter - 1 ) \
if ( bli_is_last_iter( ib, m_iter ) ) \
{ \
a2 = a_cast; \
b2 = b1 + cstep_b; \
if ( j == n_iter - 1 ) \
if ( bli_is_last_iter( j, n_iter ) ) \
b2 = b_cast; \
} \
\
@@ -349,11 +349,11 @@ void PASTEMAC(ch,varname)( \
\
/* Compute the addresses of the next panels of A and B. */ \
a2 = a1 + rstep_a; \
if ( ib == m_iter - 1 ) \
if ( bli_is_last_iter( ib, m_iter ) ) \
{ \
a2 = a_cast; \
b2 = b1 + cstep_b; \
if ( j == n_iter - 1 ) \
if ( bli_is_last_iter( j, n_iter ) ) \
b2 = b_cast; \
} \
\

View File

@@ -302,11 +302,11 @@ void PASTEMAC(ch,varname)( \
\
/* Compute the addresses of the next panels of A and B. */ \
a2 = a1 + rstep_a; \
if ( i == m_iter - 1 ) \
if ( bli_is_last_iter( i, m_iter ) ) \
{ \
a2 = a_cast; \
b2 = b1 + k_b1121 * PACKNR; \
if ( jb == n_iter - 1 ) \
if ( bli_is_last_iter( jb, n_iter ) ) \
b2 = b_cast; \
} \
\
@@ -362,11 +362,11 @@ void PASTEMAC(ch,varname)( \
\
/* Compute the addresses of the next panels of A and B. */ \
a2 = a1 + rstep_a; \
if ( i == m_iter - 1 ) \
if ( bli_is_last_iter( i, m_iter ) ) \
{ \
a2 = a_cast; \
b2 = b1 + cstep_b; \
if ( jb == n_iter - 1 ) \
if ( bli_is_last_iter( jb, n_iter ) ) \
b2 = b_cast; \
} \
\

View File

@@ -296,11 +296,11 @@ void PASTEMAC(ch,varname)( \
\
/* Compute the addresses of the next panels of A and B. */ \
a2 = a1 + rstep_a; \
if ( i == m_iter - 1 ) \
if ( bli_is_last_iter( i, m_iter ) ) \
{ \
a2 = a_cast; \
b2 = b1 + k_b0111 * PACKNR; \
if ( j == n_iter - 1 ) \
if ( bli_is_last_iter( j, n_iter ) ) \
b2 = b_cast; \
} \
\
@@ -356,11 +356,11 @@ void PASTEMAC(ch,varname)( \
\
/* Compute the addresses of the next panels of A and B. */ \
a2 = a1 + rstep_a; \
if ( i == m_iter - 1 ) \
if ( bli_is_last_iter( i, m_iter ) ) \
{ \
a2 = a_cast; \
b2 = b1 + cstep_b; \
if ( j == n_iter - 1 ) \
if ( bli_is_last_iter( j, n_iter ) ) \
b2 = b_cast; \
} \
\

View File

@@ -485,6 +485,10 @@
\
( i1 != 0 || left == 0 )
#define bli_is_last_iter( i1, iter ) \
\
( i1 == iter - 1 )
// packbuf_t-related

View File

@@ -363,12 +363,13 @@ void libblis_test_gemm_ukr_check( obj_t* alpha,
#define FUNCPTR_T gemm_ukr_fp
typedef void (*FUNCPTR_T)(
dim_t k,
void* alpha,
void* a,
void* b,
void* beta,
void* c, inc_t rs_c, inc_t cs_c
dim_t k,
void* alpha,
void* a,
void* b,
void* beta,
void* c, inc_t rs_c, inc_t cs_c,
auxinfo_t* data
);
static FUNCPTR_T GENARRAY(ftypes,gemm_ukr);
@@ -396,8 +397,20 @@ void bli_gemm_ukr( obj_t* alpha,
void* buf_beta = bli_obj_buffer_for_1x1( dt, *beta );
inc_t ps_a = bli_obj_panel_stride( *a );
inc_t ps_b = bli_obj_panel_stride( *b );
FUNCPTR_T f;
auxinfo_t data;
// Fill the auxinfo_t struct in case the micro-kernel uses it.
bli_auxinfo_set_next_a( buf_a, data );
bli_auxinfo_set_next_b( buf_b, data );
bli_auxinfo_set_ps_a( ps_a, data );
bli_auxinfo_set_ps_b( ps_b, data );
// Index into the type combination array to extract the correct
// function pointer.
f = ftypes[dt];
@@ -408,7 +421,8 @@ void bli_gemm_ukr( obj_t* alpha,
buf_a,
buf_b,
buf_beta,
buf_c, rs_c, cs_c );
buf_c, rs_c, cs_c,
&data );
}
@@ -416,12 +430,13 @@ void bli_gemm_ukr( obj_t* alpha,
#define GENTFUNC( ctype, ch, varname, ukrname ) \
\
void PASTEMAC(ch,varname)( \
dim_t k, \
void* alpha, \
void* a, \
void* b, \
void* beta, \
void* c, inc_t rs_c, inc_t cs_c \
dim_t k, \
void* alpha, \
void* a, \
void* b, \
void* beta, \
void* c, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
) \
{ \
PASTEMAC(ch,ukrname)( k, \
@@ -430,7 +445,7 @@ void PASTEMAC(ch,varname)( \
b, \
beta, \
c, rs_c, cs_c, \
NULL ); \
data ); \
}
INSERT_GENTFUNC_BASIC( gemm_ukr, GEMM_UKERNEL )

View File

@@ -48,12 +48,13 @@ void bli_gemm_ukr( obj_t* alpha,
#define GENTPROT( ctype, ch, varname ) \
\
void PASTEMAC(ch,varname)( \
dim_t k, \
void* alpha, \
void* a, \
void* b, \
void* beta, \
void* c, inc_t rs_c, inc_t cs_c \
dim_t k, \
void* alpha, \
void* a, \
void* b, \
void* beta, \
void* c, inc_t rs_c, inc_t cs_c, \
auxinfo_t* data \
);
INSERT_GENTPROT_BASIC( gemm_ukr )

View File

@@ -508,48 +508,53 @@ void bli_gemmtrsm_ukr( obj_t* alpha,
{
dim_t k = bli_obj_width( *a1x );
num_t dt = bli_obj_datatype( *c11 );
num_t dt = bli_obj_datatype( *c11 );
void* buf_a1x = bli_obj_buffer_at_off( *a1x );
void* buf_a1x = bli_obj_buffer_at_off( *a1x );
void* buf_a11 = bli_obj_buffer_at_off( *a11 );
void* buf_a11 = bli_obj_buffer_at_off( *a11 );
void* buf_bx1 = bli_obj_buffer_at_off( *bx1 );
void* buf_bx1 = bli_obj_buffer_at_off( *bx1 );
void* buf_b11 = bli_obj_buffer_at_off( *b11 );
void* buf_b11 = bli_obj_buffer_at_off( *b11 );
void* buf_c11 = bli_obj_buffer_at_off( *c11 );
inc_t rs_c = bli_obj_row_stride( *c11 );
inc_t cs_c = bli_obj_col_stride( *c11 );
void* buf_c11 = bli_obj_buffer_at_off( *c11 );
inc_t rs_c = bli_obj_row_stride( *c11 );
inc_t cs_c = bli_obj_col_stride( *c11 );
void* buf_alpha = bli_obj_buffer_for_1x1( dt, *alpha );
FUNCPTR_T f;
inc_t ps_a = bli_obj_panel_stride( *a1x );
inc_t ps_b = bli_obj_panel_stride( *bx1 );
FUNCPTR_T f;
auxinfo_t data;
// Fill the auxinfo_t struct in case the micro-kernel uses it.
if ( bli_obj_is_lower( *a11 ) ) { bli_auxinfo_set_next_a( buf_a1x, data ); }
else { bli_auxinfo_set_next_a( buf_a11, data ); }
if ( bli_obj_is_lower( *a11 ) )
{ bli_auxinfo_set_next_a( buf_a1x, data ); }
else
{ bli_auxinfo_set_next_a( buf_a11, data ); }
bli_auxinfo_set_next_b( buf_bx1, data );
// STILL NEED TO FILL IN PANEL STRIDE FIELDS!
bli_auxinfo_set_ps_a( ps_a, data );
bli_auxinfo_set_ps_b( ps_b, data );
// Index into the type combination array to extract the correct
// function pointer.
if ( bli_obj_is_lower( *a11 ) ) f = ftypes_l[dt];
else f = ftypes_u[dt];
// Index into the type combination array to extract the correct
// function pointer.
if ( bli_obj_is_lower( *a11 ) ) f = ftypes_l[dt];
else f = ftypes_u[dt];
// Invoke the function.
f( k,
// Invoke the function.
f( k,
buf_alpha,
buf_a1x,
buf_a11,
buf_bx1,
buf_b11,
buf_c11, rs_c, cs_c,
buf_b11,
buf_c11, rs_c, cs_c,
&data );
}
@@ -568,13 +573,13 @@ void PASTEMAC(ch,varname)( \
auxinfo_t* data \
) \
{ \
PASTEMAC(ch,ukrname)( k, \
alpha, \
a1x, \
a11, \
bx1, \
b11, \
c11, rs_c, cs_c, \
PASTEMAC(ch,ukrname)( k, \
alpha, \
a1x, \
a11, \
bx1, \
b11, \
c11, rs_c, cs_c, \
data ); \
}

View File

@@ -391,37 +391,40 @@ void bli_trsm_ukr( obj_t* a,
obj_t* b,
obj_t* c )
{
num_t dt = bli_obj_datatype( *c );
num_t dt = bli_obj_datatype( *c );
void* buf_a = bli_obj_buffer_at_off( *a );
void* buf_a = bli_obj_buffer_at_off( *a );
void* buf_b = bli_obj_buffer_at_off( *b );
void* buf_b = bli_obj_buffer_at_off( *b );
void* buf_c = bli_obj_buffer_at_off( *c );
inc_t rs_c = bli_obj_row_stride( *c );
inc_t cs_c = bli_obj_col_stride( *c );
void* buf_c = bli_obj_buffer_at_off( *c );
inc_t rs_c = bli_obj_row_stride( *c );
inc_t cs_c = bli_obj_col_stride( *c );
FUNCPTR_T f;
inc_t ps_a = bli_obj_panel_stride( *a );
inc_t ps_b = bli_obj_panel_stride( *b );
FUNCPTR_T f;
auxinfo_t data;
// Fill the auxinfo_t struct in case the micro-kernel uses it.
bli_auxinfo_set_next_a( buf_a, data );
bli_auxinfo_set_next_a( buf_a, data );
bli_auxinfo_set_next_b( buf_b, data );
// STILL NEED TO FILL IN PANEL STRIDE FIELDS!
bli_auxinfo_set_ps_a( ps_a, data );
bli_auxinfo_set_ps_b( ps_b, data );
// Index into the type combination array to extract the correct
// function pointer.
if ( bli_obj_is_lower( *a ) ) f = ftypes_l[dt];
else f = ftypes_u[dt];
// Index into the type combination array to extract the correct
// function pointer.
if ( bli_obj_is_lower( *a ) ) f = ftypes_l[dt];
else f = ftypes_u[dt];
// Invoke the function.
f( buf_a,
buf_b,
buf_c, rs_c, cs_c,
// Invoke the function.
f( buf_a,
buf_b,
buf_c, rs_c, cs_c,
&data );
}
@@ -436,9 +439,9 @@ void PASTEMAC(ch,varname)( \
auxinfo_t* data \
) \
{ \
PASTEMAC(ch,ukrname)( a, \
b, \
c, rs_c, cs_c, \
PASTEMAC(ch,ukrname)( a, \
b, \
c, rs_c, cs_c, \
data ); \
}