mirror of
https://github.com/amd/blis.git
synced 2026-05-11 17:50:00 +00:00
Merge master code till Switched to simpler trsm_r 2016_11_25 to amd-staging
Change-Id: Ibf71d224d8fb6cf0bc497f84d50c27d276512cc1
This commit is contained in:
@@ -172,8 +172,9 @@ void PASTEMAC(ch,varname) \
|
||||
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
|
||||
/ sizeof( ctype ) ] \
|
||||
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
|
||||
const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \
|
||||
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
|
||||
\
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
ctype* restrict a_cast = a; \
|
||||
|
||||
@@ -169,8 +169,9 @@ void PASTEMAC(ch,varname) \
|
||||
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
|
||||
/ sizeof( ctype ) ] \
|
||||
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
|
||||
const inc_t rs_ct = 1; \
|
||||
const inc_t cs_ct = MR; \
|
||||
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
|
||||
\
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
ctype* restrict one = PASTEMAC(ch,1); \
|
||||
|
||||
@@ -169,8 +169,9 @@ void PASTEMAC(ch,varname) \
|
||||
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
|
||||
/ sizeof( ctype ) ] \
|
||||
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
|
||||
const inc_t rs_ct = 1; \
|
||||
const inc_t cs_ct = MR; \
|
||||
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
|
||||
\
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
ctype* restrict one = PASTEMAC(ch,1); \
|
||||
|
||||
@@ -177,8 +177,9 @@ void PASTEMAC(ch,varname) \
|
||||
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
|
||||
/ sizeof( ctype ) ] \
|
||||
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
|
||||
const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \
|
||||
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
|
||||
\
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
ctype* restrict a_cast = a; \
|
||||
|
||||
@@ -177,8 +177,9 @@ void PASTEMAC(ch,varname) \
|
||||
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
|
||||
/ sizeof( ctype ) ] \
|
||||
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
|
||||
const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \
|
||||
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
|
||||
\
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
ctype* restrict a_cast = a; \
|
||||
|
||||
@@ -169,8 +169,9 @@ void PASTEMAC(ch,varname) \
|
||||
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
|
||||
/ sizeof( ctype ) ] \
|
||||
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
|
||||
const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \
|
||||
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
|
||||
\
|
||||
ctype* restrict one = PASTEMAC(ch,1); \
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
|
||||
@@ -169,8 +169,9 @@ void PASTEMAC(ch,varname) \
|
||||
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
|
||||
/ sizeof( ctype ) ] \
|
||||
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
|
||||
const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \
|
||||
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
|
||||
\
|
||||
ctype* restrict one = PASTEMAC(ch,1); \
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
|
||||
@@ -169,8 +169,9 @@ void PASTEMAC(ch,varname) \
|
||||
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
|
||||
/ sizeof( ctype ) ] \
|
||||
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
|
||||
const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \
|
||||
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
|
||||
\
|
||||
ctype* restrict one = PASTEMAC(ch,1); \
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
|
||||
@@ -169,8 +169,9 @@ void PASTEMAC(ch,varname) \
|
||||
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
|
||||
/ sizeof( ctype ) ] \
|
||||
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
|
||||
const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \
|
||||
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
|
||||
\
|
||||
ctype* restrict one = PASTEMAC(ch,1); \
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
|
||||
@@ -86,9 +86,6 @@ void bli_trsm_front
|
||||
}
|
||||
|
||||
#if 0
|
||||
// NOTE: Enabling this code requires that BLIS be configured with
|
||||
// BLIS_RELAX_MCNR_NCMR_CONSTRAINTS defined.
|
||||
#ifdef BLIS_RELAX_MCNR_NCMR_CONSTRAINTS
|
||||
|
||||
// If A is being solved against from the right, transpose all operands
|
||||
// so that we can perform the computation as if A were being solved
|
||||
@@ -101,9 +98,14 @@ void bli_trsm_front
|
||||
bli_obj_induce_trans( c_local );
|
||||
}
|
||||
|
||||
#endif
|
||||
#else
|
||||
|
||||
// NOTE: Enabling this code requires that BLIS NOT be configured with
|
||||
// BLIS_RELAX_MCNR_NCMR_CONSTRAINTS defined.
|
||||
#ifdef BLIS_RELAX_MCNR_NCMR_CONSTRAINTS
|
||||
#error "BLIS_RELAX_MCNR_NCMR_CONSTRAINTS must not be defined for current trsm_r implementation."
|
||||
#endif
|
||||
|
||||
// If A is being solved against from the right, swap A and B so that
|
||||
// the triangular matrix will actually be on the right.
|
||||
if ( bli_is_right( side ) )
|
||||
|
||||
@@ -173,8 +173,9 @@ void PASTEMAC(ch,varname) \
|
||||
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
|
||||
/ sizeof( ctype ) ] \
|
||||
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
|
||||
const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \
|
||||
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
|
||||
\
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
ctype* restrict minus_one = PASTEMAC(ch,m1); \
|
||||
|
||||
@@ -173,8 +173,9 @@ void PASTEMAC(ch,varname) \
|
||||
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
|
||||
/ sizeof( ctype ) ] \
|
||||
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
|
||||
const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \
|
||||
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
|
||||
\
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
ctype* restrict minus_one = PASTEMAC(ch,m1); \
|
||||
|
||||
@@ -178,8 +178,9 @@ void PASTEMAC(ch,varname) \
|
||||
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
|
||||
/ sizeof( ctype ) ] \
|
||||
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
|
||||
const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \
|
||||
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
|
||||
\
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
ctype* restrict minus_one = PASTEMAC(ch,m1); \
|
||||
|
||||
@@ -178,8 +178,9 @@ void PASTEMAC(ch,varname) \
|
||||
ctype ct[ BLIS_STACK_BUF_MAX_SIZE \
|
||||
/ sizeof( ctype ) ] \
|
||||
__attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \
|
||||
const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \
|
||||
const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \
|
||||
const inc_t rs_ct = ( col_pref ? 1 : NR ); \
|
||||
const inc_t cs_ct = ( col_pref ? MR : 1 ); \
|
||||
\
|
||||
ctype* restrict zero = PASTEMAC(ch,0); \
|
||||
ctype* restrict minus_one = PASTEMAC(ch,m1); \
|
||||
|
||||
@@ -867,6 +867,30 @@ bool_t bli_cntx_l3_nat_ukr_dislikes_storage_of( obj_t* obj,
|
||||
return r_val;
|
||||
}
|
||||
|
||||
bool_t bli_cntx_l3_ukr_prefers_rows_dt( num_t dt,
|
||||
l3ukr_t ukr_id,
|
||||
cntx_t* cntx )
|
||||
{
|
||||
// Reference the ukr storage preferences of the corresponding real
|
||||
// micro-kernel for induced methods.
|
||||
if ( bli_cntx_get_ind_method( cntx ) != BLIS_NAT )
|
||||
dt = bli_datatype_proj_to_real( dt );
|
||||
|
||||
return bli_cntx_l3_nat_ukr_prefers_rows_dt( dt, ukr_id, cntx );
|
||||
}
|
||||
|
||||
bool_t bli_cntx_l3_ukr_prefers_cols_dt( num_t dt,
|
||||
l3ukr_t ukr_id,
|
||||
cntx_t* cntx )
|
||||
{
|
||||
// Reference the ukr storage preferences of the corresponding real
|
||||
// micro-kernel for induced methods.
|
||||
if ( bli_cntx_get_ind_method( cntx ) != BLIS_NAT )
|
||||
dt = bli_datatype_proj_to_real( dt );
|
||||
|
||||
return bli_cntx_l3_nat_ukr_prefers_cols_dt( dt, ukr_id, cntx );
|
||||
}
|
||||
|
||||
bool_t bli_cntx_l3_ukr_prefers_storage_of( obj_t* obj,
|
||||
l3ukr_t ukr_id,
|
||||
cntx_t* cntx )
|
||||
@@ -880,15 +904,10 @@ bool_t bli_cntx_l3_ukr_dislikes_storage_of( obj_t* obj,
|
||||
{
|
||||
num_t dt = bli_obj_datatype( *obj );
|
||||
|
||||
// Reference the ukr storage preferences of the corresponding real
|
||||
// micro-kernel for induced methods.
|
||||
if ( bli_cntx_get_ind_method( cntx ) != BLIS_NAT )
|
||||
dt = bli_obj_datatype_proj_to_real( *obj );
|
||||
|
||||
const bool_t ukr_prefers_rows
|
||||
= bli_cntx_l3_nat_ukr_prefers_rows_dt( dt, ukr_id, cntx );
|
||||
= bli_cntx_l3_ukr_prefers_rows_dt( dt, ukr_id, cntx );
|
||||
const bool_t ukr_prefers_cols
|
||||
= bli_cntx_l3_nat_ukr_prefers_cols_dt( dt, ukr_id, cntx );
|
||||
= bli_cntx_l3_ukr_prefers_cols_dt( dt, ukr_id, cntx );
|
||||
bool_t r_val = FALSE;
|
||||
|
||||
if ( bli_obj_is_row_stored( *obj ) && ukr_prefers_cols ) r_val = TRUE;
|
||||
|
||||
@@ -455,6 +455,12 @@ bool_t bli_cntx_l3_nat_ukr_prefers_storage_of( obj_t* obj,
|
||||
bool_t bli_cntx_l3_nat_ukr_dislikes_storage_of( obj_t* obj,
|
||||
l3ukr_t ukr_id,
|
||||
cntx_t* cntx );
|
||||
bool_t bli_cntx_l3_ukr_prefers_rows_dt( num_t dt,
|
||||
l3ukr_t ukr_id,
|
||||
cntx_t* cntx );
|
||||
bool_t bli_cntx_l3_ukr_prefers_cols_dt( num_t dt,
|
||||
l3ukr_t ukr_id,
|
||||
cntx_t* cntx );
|
||||
bool_t bli_cntx_l3_ukr_prefers_storage_of( obj_t* obj,
|
||||
l3ukr_t ukr_id,
|
||||
cntx_t* cntx );
|
||||
|
||||
Reference in New Issue
Block a user