Merge master code till Switched to simpler trsm_r 2016_11_25 to amd-staging

Change-Id: Ibf71d224d8fb6cf0bc497f84d50c27d276512cc1
This commit is contained in:
praveeng
2016-11-25 17:06:36 +05:30
16 changed files with 77 additions and 37 deletions

View File

@@ -172,8 +172,9 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \
const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\
ctype* restrict zero = PASTEMAC(ch,0); \
ctype* restrict a_cast = a; \

View File

@@ -169,8 +169,9 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const inc_t rs_ct = 1; \
const inc_t cs_ct = MR; \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\
ctype* restrict zero = PASTEMAC(ch,0); \
ctype* restrict one = PASTEMAC(ch,1); \

View File

@@ -169,8 +169,9 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const inc_t rs_ct = 1; \
const inc_t cs_ct = MR; \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\
ctype* restrict zero = PASTEMAC(ch,0); \
ctype* restrict one = PASTEMAC(ch,1); \

View File

@@ -177,8 +177,9 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \
const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\
ctype* restrict zero = PASTEMAC(ch,0); \
ctype* restrict a_cast = a; \

View File

@@ -177,8 +177,9 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \
const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\
ctype* restrict zero = PASTEMAC(ch,0); \
ctype* restrict a_cast = a; \

View File

@@ -169,8 +169,9 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \
const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\
ctype* restrict one = PASTEMAC(ch,1); \
ctype* restrict zero = PASTEMAC(ch,0); \

View File

@@ -169,8 +169,9 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \
const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\
ctype* restrict one = PASTEMAC(ch,1); \
ctype* restrict zero = PASTEMAC(ch,0); \

View File

@@ -169,8 +169,9 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \
const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\
ctype* restrict one = PASTEMAC(ch,1); \
ctype* restrict zero = PASTEMAC(ch,0); \

View File

@@ -169,8 +169,9 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \
const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\
ctype* restrict one = PASTEMAC(ch,1); \
ctype* restrict zero = PASTEMAC(ch,0); \

View File

@@ -86,9 +86,6 @@ void bli_trsm_front
}
#if 0
// NOTE: Enabling this code requires that BLIS be configured with
// BLIS_RELAX_MCNR_NCMR_CONSTRAINTS defined.
#ifdef BLIS_RELAX_MCNR_NCMR_CONSTRAINTS
// If A is being solved against from the right, transpose all operands
// so that we can perform the computation as if A were being solved
@@ -101,9 +98,14 @@ void bli_trsm_front
bli_obj_induce_trans( c_local );
}
#endif
#else
// NOTE: Enabling this code requires that BLIS NOT be configured with
// BLIS_RELAX_MCNR_NCMR_CONSTRAINTS defined.
#ifdef BLIS_RELAX_MCNR_NCMR_CONSTRAINTS
#error "BLIS_RELAX_MCNR_NCMR_CONSTRAINTS must not be defined for current trsm_r implementation."
#endif
// If A is being solved against from the right, swap A and B so that
// the triangular matrix will actually be on the right.
if ( bli_is_right( side ) )

View File

@@ -173,8 +173,9 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \
const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\
ctype* restrict zero = PASTEMAC(ch,0); \
ctype* restrict minus_one = PASTEMAC(ch,m1); \

View File

@@ -173,8 +173,9 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \
const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\
ctype* restrict zero = PASTEMAC(ch,0); \
ctype* restrict minus_one = PASTEMAC(ch,m1); \

View File

@@ -178,8 +178,9 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \
const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\
ctype* restrict zero = PASTEMAC(ch,0); \
ctype* restrict minus_one = PASTEMAC(ch,m1); \

View File

@@ -178,8 +178,9 @@ void PASTEMAC(ch,varname) \
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
/ sizeof( ctype ) ] \
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \
const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
\
ctype* restrict zero = PASTEMAC(ch,0); \
ctype* restrict minus_one = PASTEMAC(ch,m1); \

View File

@@ -867,6 +867,30 @@ bool_t bli_cntx_l3_nat_ukr_dislikes_storage_of( obj_t* obj,
return r_val;
}
bool_t bli_cntx_l3_ukr_prefers_rows_dt( num_t dt,
l3ukr_t ukr_id,
cntx_t* cntx )
{
// Reference the ukr storage preferences of the corresponding real
// micro-kernel for induced methods.
if ( bli_cntx_get_ind_method( cntx ) != BLIS_NAT )
dt = bli_datatype_proj_to_real( dt );
return bli_cntx_l3_nat_ukr_prefers_rows_dt( dt, ukr_id, cntx );
}
bool_t bli_cntx_l3_ukr_prefers_cols_dt( num_t dt,
l3ukr_t ukr_id,
cntx_t* cntx )
{
// Reference the ukr storage preferences of the corresponding real
// micro-kernel for induced methods.
if ( bli_cntx_get_ind_method( cntx ) != BLIS_NAT )
dt = bli_datatype_proj_to_real( dt );
return bli_cntx_l3_nat_ukr_prefers_cols_dt( dt, ukr_id, cntx );
}
bool_t bli_cntx_l3_ukr_prefers_storage_of( obj_t* obj,
l3ukr_t ukr_id,
cntx_t* cntx )
@@ -880,15 +904,10 @@ bool_t bli_cntx_l3_ukr_dislikes_storage_of( obj_t* obj,
{
num_t dt = bli_obj_datatype( *obj );
// Reference the ukr storage preferences of the corresponding real
// micro-kernel for induced methods.
if ( bli_cntx_get_ind_method( cntx ) != BLIS_NAT )
dt = bli_obj_datatype_proj_to_real( *obj );
const bool_t ukr_prefers_rows
= bli_cntx_l3_nat_ukr_prefers_rows_dt( dt, ukr_id, cntx );
= bli_cntx_l3_ukr_prefers_rows_dt( dt, ukr_id, cntx );
const bool_t ukr_prefers_cols
= bli_cntx_l3_nat_ukr_prefers_cols_dt( dt, ukr_id, cntx );
= bli_cntx_l3_ukr_prefers_cols_dt( dt, ukr_id, cntx );
bool_t r_val = FALSE;
if ( bli_obj_is_row_stored( *obj ) && ukr_prefers_cols ) r_val = TRUE;

View File

@@ -455,6 +455,12 @@ bool_t bli_cntx_l3_nat_ukr_prefers_storage_of( obj_t* obj,
bool_t bli_cntx_l3_nat_ukr_dislikes_storage_of( obj_t* obj,
l3ukr_t ukr_id,
cntx_t* cntx );
bool_t bli_cntx_l3_ukr_prefers_rows_dt( num_t dt,
l3ukr_t ukr_id,
cntx_t* cntx );
bool_t bli_cntx_l3_ukr_prefers_cols_dt( num_t dt,
l3ukr_t ukr_id,
cntx_t* cntx );
bool_t bli_cntx_l3_ukr_prefers_storage_of( obj_t* obj,
l3ukr_t ukr_id,
cntx_t* cntx );