From 0020ef7c82711a7ebf08e5174f939bee2563184c Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Sat, 13 Apr 2013 15:26:35 -0500 Subject: [PATCH] Removed gemmtrsm-, trsm-specific blocksize macros. Details: - Modified gemmtrsm micro-kernel wrappers to use new aliased blocksize macros instead of operation-specific ones. - Removed local, gemmtrsm-specific blocksize macro definitions found in micro-kernel header files. (Meant to include above changes in 31b100e7bf4a.) - Added comments to reference gemmtrsm micro-kernel wrapper implementation. --- frame/3/trsm/ukernels/bli_gemmtrsm_l_ref_mxn.c | 6 +++++- frame/3/trsm/ukernels/bli_gemmtrsm_l_ref_mxn.h | 16 ---------------- frame/3/trsm/ukernels/bli_gemmtrsm_u_ref_mxn.c | 6 +++++- frame/3/trsm/ukernels/bli_gemmtrsm_u_ref_mxn.h | 16 ---------------- frame/3/trsm/ukernels/bli_trsm_l_ref_mxn.c | 4 ++-- frame/3/trsm/ukernels/bli_trsm_l_ref_mxn.h | 16 ---------------- frame/3/trsm/ukernels/bli_trsm_u_ref_mxn.c | 4 ++-- frame/3/trsm/ukernels/bli_trsm_u_ref_mxn.h | 16 ---------------- 8 files changed, 14 insertions(+), 70 deletions(-) diff --git a/frame/3/trsm/ukernels/bli_gemmtrsm_l_ref_mxn.c b/frame/3/trsm/ukernels/bli_gemmtrsm_l_ref_mxn.c index 33bddb233..9a7aac804 100644 --- a/frame/3/trsm/ukernels/bli_gemmtrsm_l_ref_mxn.c +++ b/frame/3/trsm/ukernels/bli_gemmtrsm_l_ref_mxn.c @@ -49,13 +49,14 @@ void PASTEMAC(ch,varname)( \ ctype* restrict c, inc_t rs_c, inc_t cs_c \ ) \ { \ - const dim_t NR = PASTEMAC2(ch,varname,_nr); \ + const dim_t NR = PASTEMAC(ch,nr); \ \ const inc_t rs_b = NR; \ const inc_t cs_b = 1; \ \ ctype* restrict minus_one = PASTEMAC(ch,m1); \ \ + /* b = alpha * b - aL * bdT; */ \ PASTEMAC(ch,gemmukr)( k, \ minus_one, \ aL, \ @@ -63,6 +64,9 @@ void PASTEMAC(ch,varname)( \ alpha, \ b, rs_b, cs_b ); \ \ + /* b = inv(a) * b; + bd = b; (if gemm ukernel needs duplicated B) + c = b; */ \ PASTEMAC(ch,trsmukr)( a, \ b, \ bd, \ diff --git a/frame/3/trsm/ukernels/bli_gemmtrsm_l_ref_mxn.h b/frame/3/trsm/ukernels/bli_gemmtrsm_l_ref_mxn.h index d02beb9c7..284acdd36 100644 --- a/frame/3/trsm/ukernels/bli_gemmtrsm_l_ref_mxn.h +++ b/frame/3/trsm/ukernels/bli_gemmtrsm_l_ref_mxn.h @@ -33,22 +33,6 @@ */ -// -// Define micro-kernel blocksizes. -// -#define bli_sgemmtrsm_l_ref_mxn_mr BLIS_DEFAULT_MR_S -#define bli_sgemmtrsm_l_ref_mxn_nr BLIS_DEFAULT_NR_S - -#define bli_dgemmtrsm_l_ref_mxn_mr BLIS_DEFAULT_MR_D -#define bli_dgemmtrsm_l_ref_mxn_nr BLIS_DEFAULT_NR_D - -#define bli_cgemmtrsm_l_ref_mxn_mr BLIS_DEFAULT_MR_C -#define bli_cgemmtrsm_l_ref_mxn_nr BLIS_DEFAULT_NR_C - -#define bli_zgemmtrsm_l_ref_mxn_mr BLIS_DEFAULT_MR_Z -#define bli_zgemmtrsm_l_ref_mxn_nr BLIS_DEFAULT_NR_Z - - // // Prototype micro-kernel interfaces. // diff --git a/frame/3/trsm/ukernels/bli_gemmtrsm_u_ref_mxn.c b/frame/3/trsm/ukernels/bli_gemmtrsm_u_ref_mxn.c index 8b33d4787..04a49bed4 100644 --- a/frame/3/trsm/ukernels/bli_gemmtrsm_u_ref_mxn.c +++ b/frame/3/trsm/ukernels/bli_gemmtrsm_u_ref_mxn.c @@ -49,13 +49,14 @@ void PASTEMAC(ch,varname)( \ ctype* restrict c, inc_t rs_c, inc_t cs_c \ ) \ { \ - const dim_t NR = PASTEMAC2(ch,varname,_nr); \ + const dim_t NR = PASTEMAC(ch,nr); \ \ const inc_t rs_b = NR; \ const inc_t cs_b = 1; \ \ ctype* restrict minus_one = PASTEMAC(ch,m1); \ \ + /* b = alpha * b - aR * bdB; */ \ PASTEMAC(ch,gemmukr)( k, \ minus_one, \ aR, \ @@ -63,6 +64,9 @@ void PASTEMAC(ch,varname)( \ alpha, \ b, rs_b, cs_b ); \ \ + /* b = inv(a) * b; + bd = b; (if gemm ukernel needs duplicated B) + c = b; */ \ PASTEMAC(ch,trsmukr)( a, \ b, \ bd, \ diff --git a/frame/3/trsm/ukernels/bli_gemmtrsm_u_ref_mxn.h b/frame/3/trsm/ukernels/bli_gemmtrsm_u_ref_mxn.h index 9732dc0bf..63a66f6af 100644 --- a/frame/3/trsm/ukernels/bli_gemmtrsm_u_ref_mxn.h +++ b/frame/3/trsm/ukernels/bli_gemmtrsm_u_ref_mxn.h @@ -33,22 +33,6 @@ */ -// -// Define micro-kernel blocksizes. -// -#define bli_sgemmtrsm_u_ref_mxn_mr BLIS_DEFAULT_MR_S -#define bli_sgemmtrsm_u_ref_mxn_nr BLIS_DEFAULT_NR_S - -#define bli_dgemmtrsm_u_ref_mxn_mr BLIS_DEFAULT_MR_D -#define bli_dgemmtrsm_u_ref_mxn_nr BLIS_DEFAULT_NR_D - -#define bli_cgemmtrsm_u_ref_mxn_mr BLIS_DEFAULT_MR_C -#define bli_cgemmtrsm_u_ref_mxn_nr BLIS_DEFAULT_NR_C - -#define bli_zgemmtrsm_u_ref_mxn_mr BLIS_DEFAULT_MR_Z -#define bli_zgemmtrsm_u_ref_mxn_nr BLIS_DEFAULT_NR_Z - - // // Prototype micro-kernel interfaces. // diff --git a/frame/3/trsm/ukernels/bli_trsm_l_ref_mxn.c b/frame/3/trsm/ukernels/bli_trsm_l_ref_mxn.c index a6d31fb59..13687457e 100644 --- a/frame/3/trsm/ukernels/bli_trsm_l_ref_mxn.c +++ b/frame/3/trsm/ukernels/bli_trsm_l_ref_mxn.c @@ -45,8 +45,8 @@ void PASTEMAC(ch,varname)( \ ctype* restrict c, inc_t rs_c, inc_t cs_c \ ) \ { \ - const dim_t MR = PASTEMAC2(ch,varname,_mr); \ - const dim_t NR = PASTEMAC2(ch,varname,_nr); \ + const dim_t MR = PASTEMAC(ch,mr); \ + const dim_t NR = PASTEMAC(ch,nr); \ \ const dim_t m = MR; \ const dim_t n = NR; \ diff --git a/frame/3/trsm/ukernels/bli_trsm_l_ref_mxn.h b/frame/3/trsm/ukernels/bli_trsm_l_ref_mxn.h index 9f4a702d9..59c5a0dc9 100644 --- a/frame/3/trsm/ukernels/bli_trsm_l_ref_mxn.h +++ b/frame/3/trsm/ukernels/bli_trsm_l_ref_mxn.h @@ -33,22 +33,6 @@ */ -// -// Define micro-kernel blocksizes. -// -#define bli_strsm_l_ref_mxn_mr BLIS_DEFAULT_MR_S -#define bli_strsm_l_ref_mxn_nr BLIS_DEFAULT_NR_S - -#define bli_dtrsm_l_ref_mxn_mr BLIS_DEFAULT_MR_D -#define bli_dtrsm_l_ref_mxn_nr BLIS_DEFAULT_NR_D - -#define bli_ctrsm_l_ref_mxn_mr BLIS_DEFAULT_MR_C -#define bli_ctrsm_l_ref_mxn_nr BLIS_DEFAULT_NR_C - -#define bli_ztrsm_l_ref_mxn_mr BLIS_DEFAULT_MR_Z -#define bli_ztrsm_l_ref_mxn_nr BLIS_DEFAULT_NR_Z - - // // Prototype micro-kernel interfaces. // diff --git a/frame/3/trsm/ukernels/bli_trsm_u_ref_mxn.c b/frame/3/trsm/ukernels/bli_trsm_u_ref_mxn.c index 25bd2af71..e88fcabf5 100644 --- a/frame/3/trsm/ukernels/bli_trsm_u_ref_mxn.c +++ b/frame/3/trsm/ukernels/bli_trsm_u_ref_mxn.c @@ -45,8 +45,8 @@ void PASTEMAC(ch,varname)( \ ctype* restrict c, inc_t rs_c, inc_t cs_c \ ) \ { \ - const dim_t MR = PASTEMAC2(ch,varname,_mr); \ - const dim_t NR = PASTEMAC2(ch,varname,_nr); \ + const dim_t MR = PASTEMAC(ch,mr); \ + const dim_t NR = PASTEMAC(ch,nr); \ \ const dim_t m = MR; \ const dim_t n = NR; \ diff --git a/frame/3/trsm/ukernels/bli_trsm_u_ref_mxn.h b/frame/3/trsm/ukernels/bli_trsm_u_ref_mxn.h index e595e1732..80d7748a7 100644 --- a/frame/3/trsm/ukernels/bli_trsm_u_ref_mxn.h +++ b/frame/3/trsm/ukernels/bli_trsm_u_ref_mxn.h @@ -33,22 +33,6 @@ */ -// -// Define micro-kernel blocksizes. -// -#define bli_strsm_u_ref_mxn_mr BLIS_DEFAULT_MR_S -#define bli_strsm_u_ref_mxn_nr BLIS_DEFAULT_NR_S - -#define bli_dtrsm_u_ref_mxn_mr BLIS_DEFAULT_MR_D -#define bli_dtrsm_u_ref_mxn_nr BLIS_DEFAULT_NR_D - -#define bli_ctrsm_u_ref_mxn_mr BLIS_DEFAULT_MR_C -#define bli_ctrsm_u_ref_mxn_nr BLIS_DEFAULT_NR_C - -#define bli_ztrsm_u_ref_mxn_mr BLIS_DEFAULT_MR_Z -#define bli_ztrsm_u_ref_mxn_nr BLIS_DEFAULT_NR_Z - - // // Prototype micro-kernel interfaces. //