diff --git a/frame/3/gemm/bli_gemm_ker_var2.c b/frame/3/gemm/bli_gemm_ker_var2.c index 69b30a52e..8af29594d 100644 --- a/frame/3/gemm/bli_gemm_ker_var2.c +++ b/frame/3/gemm/bli_gemm_ker_var2.c @@ -172,8 +172,9 @@ void PASTEMAC(ch,varname) \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ - const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \ - const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \ + const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ + const inc_t rs_ct = ( col_pref ? 1 : NR ); \ + const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ diff --git a/frame/3/gemm/ind/bli_gemm3m2_ker_var2.c b/frame/3/gemm/ind/bli_gemm3m2_ker_var2.c index ea8904183..195a1ee28 100644 --- a/frame/3/gemm/ind/bli_gemm3m2_ker_var2.c +++ b/frame/3/gemm/ind/bli_gemm3m2_ker_var2.c @@ -169,8 +169,9 @@ void PASTEMAC(ch,varname) \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ - const inc_t rs_ct = 1; \ - const inc_t cs_ct = MR; \ + const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ + const inc_t rs_ct = ( col_pref ? 1 : NR ); \ + const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict one = PASTEMAC(ch,1); \ diff --git a/frame/3/gemm/ind/bli_gemm4mb_ker_var2.c b/frame/3/gemm/ind/bli_gemm4mb_ker_var2.c index d9d714917..b5788ae5e 100644 --- a/frame/3/gemm/ind/bli_gemm4mb_ker_var2.c +++ b/frame/3/gemm/ind/bli_gemm4mb_ker_var2.c @@ -169,8 +169,9 @@ void PASTEMAC(ch,varname) \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ - const inc_t rs_ct = 1; \ - const inc_t cs_ct = MR; \ + const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ + const inc_t rs_ct = ( col_pref ? 1 : NR ); \ + const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict one = PASTEMAC(ch,1); \ diff --git a/frame/3/herk/bli_herk_l_ker_var2.c b/frame/3/herk/bli_herk_l_ker_var2.c index ce2bdb6b4..11cf2f5fa 100644 --- a/frame/3/herk/bli_herk_l_ker_var2.c +++ b/frame/3/herk/bli_herk_l_ker_var2.c @@ -177,8 +177,9 @@ void PASTEMAC(ch,varname) \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ - const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \ - const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \ + const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ + const inc_t rs_ct = ( col_pref ? 1 : NR ); \ + const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ diff --git a/frame/3/herk/bli_herk_u_ker_var2.c b/frame/3/herk/bli_herk_u_ker_var2.c index 771087c24..ab1e336e3 100644 --- a/frame/3/herk/bli_herk_u_ker_var2.c +++ b/frame/3/herk/bli_herk_u_ker_var2.c @@ -177,8 +177,9 @@ void PASTEMAC(ch,varname) \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ - const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \ - const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \ + const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ + const inc_t rs_ct = ( col_pref ? 1 : NR ); \ + const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict a_cast = a; \ diff --git a/frame/3/trmm/bli_trmm_ll_ker_var2.c b/frame/3/trmm/bli_trmm_ll_ker_var2.c index 2aa127f76..378d8d4f2 100644 --- a/frame/3/trmm/bli_trmm_ll_ker_var2.c +++ b/frame/3/trmm/bli_trmm_ll_ker_var2.c @@ -169,8 +169,9 @@ void PASTEMAC(ch,varname) \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ - const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \ - const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \ + const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ + const inc_t rs_ct = ( col_pref ? 1 : NR ); \ + const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict one = PASTEMAC(ch,1); \ ctype* restrict zero = PASTEMAC(ch,0); \ diff --git a/frame/3/trmm/bli_trmm_lu_ker_var2.c b/frame/3/trmm/bli_trmm_lu_ker_var2.c index ee7308d49..b572fe500 100644 --- a/frame/3/trmm/bli_trmm_lu_ker_var2.c +++ b/frame/3/trmm/bli_trmm_lu_ker_var2.c @@ -169,8 +169,9 @@ void PASTEMAC(ch,varname) \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ - const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \ - const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \ + const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ + const inc_t rs_ct = ( col_pref ? 1 : NR ); \ + const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict one = PASTEMAC(ch,1); \ ctype* restrict zero = PASTEMAC(ch,0); \ diff --git a/frame/3/trmm/bli_trmm_rl_ker_var2.c b/frame/3/trmm/bli_trmm_rl_ker_var2.c index 5502d3cf4..a3b48065d 100644 --- a/frame/3/trmm/bli_trmm_rl_ker_var2.c +++ b/frame/3/trmm/bli_trmm_rl_ker_var2.c @@ -169,8 +169,9 @@ void PASTEMAC(ch,varname) \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ - const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \ - const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \ + const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ + const inc_t rs_ct = ( col_pref ? 1 : NR ); \ + const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict one = PASTEMAC(ch,1); \ ctype* restrict zero = PASTEMAC(ch,0); \ diff --git a/frame/3/trmm/bli_trmm_ru_ker_var2.c b/frame/3/trmm/bli_trmm_ru_ker_var2.c index fd5267dd6..47a878bfd 100644 --- a/frame/3/trmm/bli_trmm_ru_ker_var2.c +++ b/frame/3/trmm/bli_trmm_ru_ker_var2.c @@ -169,8 +169,9 @@ void PASTEMAC(ch,varname) \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ - const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \ - const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \ + const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ + const inc_t rs_ct = ( col_pref ? 1 : NR ); \ + const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict one = PASTEMAC(ch,1); \ ctype* restrict zero = PASTEMAC(ch,0); \ diff --git a/frame/3/trsm/bli_trsm_front.c b/frame/3/trsm/bli_trsm_front.c index c38a193f8..eda248b6b 100644 --- a/frame/3/trsm/bli_trsm_front.c +++ b/frame/3/trsm/bli_trsm_front.c @@ -86,9 +86,6 @@ void bli_trsm_front } #if 0 - // NOTE: Enabling this code requires that BLIS be configured with - // BLIS_RELAX_MCNR_NCMR_CONSTRAINTS defined. -#ifdef BLIS_RELAX_MCNR_NCMR_CONSTRAINTS // If A is being solved against from the right, transpose all operands // so that we can perform the computation as if A were being solved @@ -101,9 +98,14 @@ void bli_trsm_front bli_obj_induce_trans( c_local ); } -#endif #else + // NOTE: Enabling this code requires that BLIS NOT be configured with + // BLIS_RELAX_MCNR_NCMR_CONSTRAINTS defined. +#ifdef BLIS_RELAX_MCNR_NCMR_CONSTRAINTS + #error "BLIS_RELAX_MCNR_NCMR_CONSTRAINTS must not be defined for current trsm_r implementation." +#endif + // If A is being solved against from the right, swap A and B so that // the triangular matrix will actually be on the right. if ( bli_is_right( side ) ) diff --git a/frame/3/trsm/bli_trsm_ll_ker_var2.c b/frame/3/trsm/bli_trsm_ll_ker_var2.c index f20c3a021..c5e9e06f5 100644 --- a/frame/3/trsm/bli_trsm_ll_ker_var2.c +++ b/frame/3/trsm/bli_trsm_ll_ker_var2.c @@ -173,8 +173,9 @@ void PASTEMAC(ch,varname) \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ - const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \ - const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \ + const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ + const inc_t rs_ct = ( col_pref ? 1 : NR ); \ + const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict minus_one = PASTEMAC(ch,m1); \ diff --git a/frame/3/trsm/bli_trsm_lu_ker_var2.c b/frame/3/trsm/bli_trsm_lu_ker_var2.c index 1f8a09d08..f8a363b50 100644 --- a/frame/3/trsm/bli_trsm_lu_ker_var2.c +++ b/frame/3/trsm/bli_trsm_lu_ker_var2.c @@ -173,8 +173,9 @@ void PASTEMAC(ch,varname) \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ - const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \ - const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \ + const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ + const inc_t rs_ct = ( col_pref ? 1 : NR ); \ + const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict minus_one = PASTEMAC(ch,m1); \ diff --git a/frame/3/trsm/bli_trsm_rl_ker_var2.c b/frame/3/trsm/bli_trsm_rl_ker_var2.c index 4e34454f1..91f5c8598 100644 --- a/frame/3/trsm/bli_trsm_rl_ker_var2.c +++ b/frame/3/trsm/bli_trsm_rl_ker_var2.c @@ -178,8 +178,9 @@ void PASTEMAC(ch,varname) \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ - const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \ - const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \ + const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ + const inc_t rs_ct = ( col_pref ? 1 : NR ); \ + const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict minus_one = PASTEMAC(ch,m1); \ diff --git a/frame/3/trsm/bli_trsm_ru_ker_var2.c b/frame/3/trsm/bli_trsm_ru_ker_var2.c index 572c47402..8ec330c0b 100644 --- a/frame/3/trsm/bli_trsm_ru_ker_var2.c +++ b/frame/3/trsm/bli_trsm_ru_ker_var2.c @@ -178,8 +178,9 @@ void PASTEMAC(ch,varname) \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ - const inc_t rs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? 1 : NR ); \ - const inc_t cs_ct = ( bli_is_col_stored( rs_c, cs_c ) ? MR : 1 ); \ + const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ + const inc_t rs_ct = ( col_pref ? 1 : NR ); \ + const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ ctype* restrict zero = PASTEMAC(ch,0); \ ctype* restrict minus_one = PASTEMAC(ch,m1); \ diff --git a/frame/base/bli_cntx.c b/frame/base/bli_cntx.c index 084604cdc..bd9972332 100644 --- a/frame/base/bli_cntx.c +++ b/frame/base/bli_cntx.c @@ -867,6 +867,30 @@ bool_t bli_cntx_l3_nat_ukr_dislikes_storage_of( obj_t* obj, return r_val; } +bool_t bli_cntx_l3_ukr_prefers_rows_dt( num_t dt, + l3ukr_t ukr_id, + cntx_t* cntx ) +{ + // Reference the ukr storage preferences of the corresponding real + // micro-kernel for induced methods. + if ( bli_cntx_get_ind_method( cntx ) != BLIS_NAT ) + dt = bli_datatype_proj_to_real( dt ); + + return bli_cntx_l3_nat_ukr_prefers_rows_dt( dt, ukr_id, cntx ); +} + +bool_t bli_cntx_l3_ukr_prefers_cols_dt( num_t dt, + l3ukr_t ukr_id, + cntx_t* cntx ) +{ + // Reference the ukr storage preferences of the corresponding real + // micro-kernel for induced methods. + if ( bli_cntx_get_ind_method( cntx ) != BLIS_NAT ) + dt = bli_datatype_proj_to_real( dt ); + + return bli_cntx_l3_nat_ukr_prefers_cols_dt( dt, ukr_id, cntx ); +} + bool_t bli_cntx_l3_ukr_prefers_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx ) @@ -880,15 +904,10 @@ bool_t bli_cntx_l3_ukr_dislikes_storage_of( obj_t* obj, { num_t dt = bli_obj_datatype( *obj ); - // Reference the ukr storage preferences of the corresponding real - // micro-kernel for induced methods. - if ( bli_cntx_get_ind_method( cntx ) != BLIS_NAT ) - dt = bli_obj_datatype_proj_to_real( *obj ); - const bool_t ukr_prefers_rows - = bli_cntx_l3_nat_ukr_prefers_rows_dt( dt, ukr_id, cntx ); + = bli_cntx_l3_ukr_prefers_rows_dt( dt, ukr_id, cntx ); const bool_t ukr_prefers_cols - = bli_cntx_l3_nat_ukr_prefers_cols_dt( dt, ukr_id, cntx ); + = bli_cntx_l3_ukr_prefers_cols_dt( dt, ukr_id, cntx ); bool_t r_val = FALSE; if ( bli_obj_is_row_stored( *obj ) && ukr_prefers_cols ) r_val = TRUE; diff --git a/frame/base/bli_cntx.h b/frame/base/bli_cntx.h index 8cfd67750..38bff6720 100644 --- a/frame/base/bli_cntx.h +++ b/frame/base/bli_cntx.h @@ -455,6 +455,12 @@ bool_t bli_cntx_l3_nat_ukr_prefers_storage_of( obj_t* obj, bool_t bli_cntx_l3_nat_ukr_dislikes_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx ); +bool_t bli_cntx_l3_ukr_prefers_rows_dt( num_t dt, + l3ukr_t ukr_id, + cntx_t* cntx ); +bool_t bli_cntx_l3_ukr_prefers_cols_dt( num_t dt, + l3ukr_t ukr_id, + cntx_t* cntx ); bool_t bli_cntx_l3_ukr_prefers_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx );