From cf5efdde0588a0d5b6ea57fe7d7be5000be06f8e Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Thu, 11 Sep 2014 11:47:56 -0500 Subject: [PATCH] Pass pack_t schemas into ukernels via auxinfo_t. Details: - Modified macro-kernels to pass the pack_t schema values for matrices A and B into the datatype-specific functions, where they are now inserted into a newly-expanded auxinfo_t struct. This gives gives the micro-kernels access to the pack_t schema values embedded in the control trees, which determine the precise format into which the matrix elements are packed. - Updated a call to bli_packm_init_pack() in src/test_libblis.c to remove densify argument. Meant to include this in commit c472993b. --- frame/3/gemm/bli_gemm_ker_var2.c | 15 ++++++++- frame/3/gemm/bli_gemm_ker_var2.h | 2 ++ frame/3/herk/bli_herk_l_ker_var2.c | 13 ++++++++ frame/3/herk/bli_herk_l_ker_var2.h | 2 ++ frame/3/herk/bli_herk_u_ker_var2.c | 13 ++++++++ frame/3/herk/bli_herk_u_ker_var2.h | 2 ++ frame/3/trmm/bli_trmm_ll_ker_var2.c | 13 ++++++++ frame/3/trmm/bli_trmm_ll_ker_var2.h | 2 ++ frame/3/trmm/bli_trmm_lu_ker_var2.c | 13 ++++++++ frame/3/trmm/bli_trmm_lu_ker_var2.h | 2 ++ frame/3/trmm/bli_trmm_rl_ker_var2.c | 13 ++++++++ frame/3/trmm/bli_trmm_rl_ker_var2.h | 2 ++ frame/3/trmm/bli_trmm_ru_ker_var2.c | 13 ++++++++ frame/3/trmm/bli_trmm_ru_ker_var2.h | 2 ++ frame/3/trsm/bli_trsm_ll_ker_var2.c | 13 ++++++++ frame/3/trsm/bli_trsm_ll_ker_var2.h | 2 ++ frame/3/trsm/bli_trsm_lu_ker_var2.c | 13 ++++++++ frame/3/trsm/bli_trsm_lu_ker_var2.h | 2 ++ frame/3/trsm/bli_trsm_rl_ker_var2.c | 15 +++++++++ frame/3/trsm/bli_trsm_rl_ker_var2.h | 2 ++ frame/3/trsm/bli_trsm_ru_ker_var2.c | 15 +++++++++ frame/3/trsm/bli_trsm_ru_ker_var2.h | 2 ++ frame/include/bli_auxinfo_macro_defs.h | 18 +++++++---- frame/include/bli_type_defs.h | 42 ++++++++++++++------------ testsuite/src/test_libblis.c | 3 +- 25 files changed, 206 insertions(+), 28 deletions(-) diff --git a/frame/3/gemm/bli_gemm_ker_var2.c b/frame/3/gemm/bli_gemm_ker_var2.c index f15a84836..0764e3c0f 100644 --- a/frame/3/gemm/bli_gemm_ker_var2.c +++ b/frame/3/gemm/bli_gemm_ker_var2.c @@ -37,6 +37,8 @@ #define FUNCPTR_T gemm_fp typedef void (*FUNCPTR_T)( + pack_t schema_a, + pack_t schema_b, dim_t m, dim_t n, dim_t k, @@ -60,6 +62,9 @@ void bli_gemm_ker_var2( obj_t* a, { num_t dt_exec = bli_obj_execution_datatype( *c ); + pack_t schema_a = bli_obj_pack_status( *a ); + pack_t schema_b = bli_obj_pack_status( *b ); + dim_t m = bli_obj_length( *c ); dim_t n = bli_obj_width( *c ); dim_t k = bli_obj_width( *a ); @@ -111,7 +116,9 @@ void bli_gemm_ker_var2( obj_t* a, gemm_ukr = bli_func_obj_query( dt_exec, gemm_ukrs ); // Invoke the function. - f( m, + f( schema_a, + schema_b, + m, n, k, buf_alpha, @@ -128,6 +135,8 @@ void bli_gemm_ker_var2( obj_t* a, #define GENTFUNC( ctype, ch, varname, ukrtype ) \ \ void PASTEMAC(ch,varname)( \ + pack_t schema_a, \ + pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ @@ -212,6 +221,10 @@ void PASTEMAC(ch,varname)( \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ +\ + /* Save the pack schemas of A and B to the auxinfo_t object. */ \ + bli_auxinfo_set_schema_a( schema_a, aux ); \ + bli_auxinfo_set_schema_b( schema_b, aux ); \ \ /* Save the panel strides of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_ps_a( ps_a, aux ); \ diff --git a/frame/3/gemm/bli_gemm_ker_var2.h b/frame/3/gemm/bli_gemm_ker_var2.h index 88350143f..f6dea6bea 100644 --- a/frame/3/gemm/bli_gemm_ker_var2.h +++ b/frame/3/gemm/bli_gemm_ker_var2.h @@ -50,6 +50,8 @@ void bli_gemm_ker_var2( obj_t* a, #define GENTPROT( ctype, ch, varname ) \ \ void PASTEMAC(ch,varname)( \ + pack_t schema_a, \ + pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ diff --git a/frame/3/herk/bli_herk_l_ker_var2.c b/frame/3/herk/bli_herk_l_ker_var2.c index 30e562cf2..8b1a06559 100644 --- a/frame/3/herk/bli_herk_l_ker_var2.c +++ b/frame/3/herk/bli_herk_l_ker_var2.c @@ -38,6 +38,8 @@ typedef void (*FUNCPTR_T)( doff_t diagoffc, + pack_t schema_a, + pack_t schema_b, dim_t m, dim_t n, dim_t k, @@ -63,6 +65,9 @@ void bli_herk_l_ker_var2( obj_t* a, doff_t diagoffc = bli_obj_diag_offset( *c ); + pack_t schema_a = bli_obj_pack_status( *a ); + pack_t schema_b = bli_obj_pack_status( *b ); + dim_t m = bli_obj_length( *c ); dim_t n = bli_obj_width( *c ); dim_t k = bli_obj_width( *a ); @@ -115,6 +120,8 @@ void bli_herk_l_ker_var2( obj_t* a, // Invoke the function. f( diagoffc, + schema_a, + schema_b, m, n, k, @@ -133,6 +140,8 @@ void bli_herk_l_ker_var2( obj_t* a, \ void PASTEMAC(ch,varname)( \ doff_t diagoffc, \ + pack_t schema_a, \ + pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ @@ -243,6 +252,10 @@ void PASTEMAC(ch,varname)( \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ +\ + /* Save the pack schemas of A and B to the auxinfo_t object. */ \ + bli_auxinfo_set_schema_a( schema_a, aux ); \ + bli_auxinfo_set_schema_b( schema_b, aux ); \ \ /* Save the panel strides of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_ps_a( ps_a, aux ); \ diff --git a/frame/3/herk/bli_herk_l_ker_var2.h b/frame/3/herk/bli_herk_l_ker_var2.h index e6154024c..0d4f1ac57 100644 --- a/frame/3/herk/bli_herk_l_ker_var2.h +++ b/frame/3/herk/bli_herk_l_ker_var2.h @@ -51,6 +51,8 @@ void bli_herk_l_ker_var2( obj_t* a, \ void PASTEMAC(ch,varname)( \ doff_t diagoffc, \ + pack_t schema_a, \ + pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ diff --git a/frame/3/herk/bli_herk_u_ker_var2.c b/frame/3/herk/bli_herk_u_ker_var2.c index f1eaf7ecf..367a41548 100644 --- a/frame/3/herk/bli_herk_u_ker_var2.c +++ b/frame/3/herk/bli_herk_u_ker_var2.c @@ -38,6 +38,8 @@ typedef void (*FUNCPTR_T)( doff_t diagoffc, + pack_t schema_a, + pack_t schema_b, dim_t m, dim_t n, dim_t k, @@ -63,6 +65,9 @@ void bli_herk_u_ker_var2( obj_t* a, doff_t diagoffc = bli_obj_diag_offset( *c ); + pack_t schema_a = bli_obj_pack_status( *a ); + pack_t schema_b = bli_obj_pack_status( *b ); + dim_t m = bli_obj_length( *c ); dim_t n = bli_obj_width( *c ); dim_t k = bli_obj_width( *a ); @@ -115,6 +120,8 @@ void bli_herk_u_ker_var2( obj_t* a, // Invoke the function. f( diagoffc, + schema_a, + schema_b, m, n, k, @@ -133,6 +140,8 @@ void bli_herk_u_ker_var2( obj_t* a, \ void PASTEMAC(ch,varname)( \ doff_t diagoffc, \ + pack_t schema_a, \ + pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ @@ -243,6 +252,10 @@ void PASTEMAC(ch,varname)( \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ +\ + /* Save the pack schemas of A and B to the auxinfo_t object. */ \ + bli_auxinfo_set_schema_a( schema_a, aux ); \ + bli_auxinfo_set_schema_b( schema_b, aux ); \ \ /* Save the panel strides of A and B to the auxinfo_t object. */ \ bli_auxinfo_set_ps_a( ps_a, aux ); \ diff --git a/frame/3/herk/bli_herk_u_ker_var2.h b/frame/3/herk/bli_herk_u_ker_var2.h index 48e5a04cf..d24912727 100644 --- a/frame/3/herk/bli_herk_u_ker_var2.h +++ b/frame/3/herk/bli_herk_u_ker_var2.h @@ -51,6 +51,8 @@ void bli_herk_u_ker_var2( obj_t* a, \ void PASTEMAC(ch,varname)( \ doff_t diagoffc, \ + pack_t schema_a, \ + pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ diff --git a/frame/3/trmm/bli_trmm_ll_ker_var2.c b/frame/3/trmm/bli_trmm_ll_ker_var2.c index dfe3a36e5..2cc93b4ae 100644 --- a/frame/3/trmm/bli_trmm_ll_ker_var2.c +++ b/frame/3/trmm/bli_trmm_ll_ker_var2.c @@ -38,6 +38,8 @@ typedef void (*FUNCPTR_T)( doff_t diagoffa, + pack_t schema_a, + pack_t schema_b, dim_t m, dim_t n, dim_t k, @@ -63,6 +65,9 @@ void bli_trmm_ll_ker_var2( obj_t* a, doff_t diagoffa = bli_obj_diag_offset( *a ); + pack_t schema_a = bli_obj_pack_status( *a ); + pack_t schema_b = bli_obj_pack_status( *b ); + dim_t m = bli_obj_length( *c ); dim_t n = bli_obj_width( *c ); dim_t k = bli_obj_width( *a ); @@ -125,6 +130,8 @@ void bli_trmm_ll_ker_var2( obj_t* a, // Invoke the function. f( diagoffa, + schema_a, + schema_b, m, n, k, @@ -143,6 +150,8 @@ void bli_trmm_ll_ker_var2( obj_t* a, \ void PASTEMAC(ch,varname)( \ doff_t diagoffa, \ + pack_t schema_a, \ + pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ @@ -263,6 +272,10 @@ void PASTEMAC(ch,varname)( \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ +\ + /* Save the pack schemas of A and B to the auxinfo_t object. */ \ + bli_auxinfo_set_schema_a( schema_a, aux ); \ + bli_auxinfo_set_schema_b( schema_b, aux ); \ \ /* Save the panel stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_ps_b( ps_b, aux ); \ diff --git a/frame/3/trmm/bli_trmm_ll_ker_var2.h b/frame/3/trmm/bli_trmm_ll_ker_var2.h index 8e5b3e066..b8ceffaaf 100644 --- a/frame/3/trmm/bli_trmm_ll_ker_var2.h +++ b/frame/3/trmm/bli_trmm_ll_ker_var2.h @@ -51,6 +51,8 @@ void bli_trmm_ll_ker_var2( obj_t* a, \ void PASTEMAC(ch,varname)( \ doff_t diagoffa, \ + pack_t schema_a, \ + pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ diff --git a/frame/3/trmm/bli_trmm_lu_ker_var2.c b/frame/3/trmm/bli_trmm_lu_ker_var2.c index 1ccb29d6d..f8d6a4278 100644 --- a/frame/3/trmm/bli_trmm_lu_ker_var2.c +++ b/frame/3/trmm/bli_trmm_lu_ker_var2.c @@ -38,6 +38,8 @@ typedef void (*FUNCPTR_T)( doff_t diagoffa, + pack_t schema_a, + pack_t schema_b, dim_t m, dim_t n, dim_t k, @@ -63,6 +65,9 @@ void bli_trmm_lu_ker_var2( obj_t* a, doff_t diagoffa = bli_obj_diag_offset( *a ); + pack_t schema_a = bli_obj_pack_status( *a ); + pack_t schema_b = bli_obj_pack_status( *b ); + dim_t m = bli_obj_length( *c ); dim_t n = bli_obj_width( *c ); dim_t k = bli_obj_width( *a ); @@ -125,6 +130,8 @@ void bli_trmm_lu_ker_var2( obj_t* a, // Invoke the function. f( diagoffa, + schema_a, + schema_b, m, n, k, @@ -143,6 +150,8 @@ void bli_trmm_lu_ker_var2( obj_t* a, \ void PASTEMAC(ch,varname)( \ doff_t diagoffa, \ + pack_t schema_a, \ + pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ @@ -270,6 +279,10 @@ void PASTEMAC(ch,varname)( \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ +\ + /* Save the pack schemas of A and B to the auxinfo_t object. */ \ + bli_auxinfo_set_schema_a( schema_a, aux ); \ + bli_auxinfo_set_schema_b( schema_b, aux ); \ \ /* Save the panel stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_ps_b( ps_b, aux ); \ diff --git a/frame/3/trmm/bli_trmm_lu_ker_var2.h b/frame/3/trmm/bli_trmm_lu_ker_var2.h index 1a9ae3352..378ff32d7 100644 --- a/frame/3/trmm/bli_trmm_lu_ker_var2.h +++ b/frame/3/trmm/bli_trmm_lu_ker_var2.h @@ -51,6 +51,8 @@ void bli_trmm_lu_ker_var2( obj_t* a, \ void PASTEMAC(ch,varname)( \ doff_t diagoffa, \ + pack_t schema_a, \ + pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ diff --git a/frame/3/trmm/bli_trmm_rl_ker_var2.c b/frame/3/trmm/bli_trmm_rl_ker_var2.c index 261b45ba9..1d1902be8 100644 --- a/frame/3/trmm/bli_trmm_rl_ker_var2.c +++ b/frame/3/trmm/bli_trmm_rl_ker_var2.c @@ -38,6 +38,8 @@ typedef void (*FUNCPTR_T)( doff_t diagoffb, + pack_t schema_a, + pack_t schema_b, dim_t m, dim_t n, dim_t k, @@ -63,6 +65,9 @@ void bli_trmm_rl_ker_var2( obj_t* a, doff_t diagoffb = bli_obj_diag_offset( *b ); + pack_t schema_a = bli_obj_pack_status( *a ); + pack_t schema_b = bli_obj_pack_status( *b ); + dim_t m = bli_obj_length( *c ); dim_t n = bli_obj_width( *c ); dim_t k = bli_obj_width( *a ); @@ -125,6 +130,8 @@ void bli_trmm_rl_ker_var2( obj_t* a, // Invoke the function. f( diagoffb, + schema_a, + schema_b, m, n, k, @@ -143,6 +150,8 @@ void bli_trmm_rl_ker_var2( obj_t* a, \ void PASTEMAC(ch,varname)( \ doff_t diagoffb, \ + pack_t schema_a, \ + pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ @@ -270,6 +279,10 @@ void PASTEMAC(ch,varname)( \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ +\ + /* Save the pack schemas of A and B to the auxinfo_t object. */ \ + bli_auxinfo_set_schema_a( schema_a, aux ); \ + bli_auxinfo_set_schema_b( schema_b, aux ); \ \ /* Save the panel stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_ps_a( ps_a, aux ); \ diff --git a/frame/3/trmm/bli_trmm_rl_ker_var2.h b/frame/3/trmm/bli_trmm_rl_ker_var2.h index 278ae11c0..19e1461af 100644 --- a/frame/3/trmm/bli_trmm_rl_ker_var2.h +++ b/frame/3/trmm/bli_trmm_rl_ker_var2.h @@ -51,6 +51,8 @@ void bli_trmm_rl_ker_var2( obj_t* a, \ void PASTEMAC(ch,varname)( \ doff_t diagoffb, \ + pack_t schema_a, \ + pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ diff --git a/frame/3/trmm/bli_trmm_ru_ker_var2.c b/frame/3/trmm/bli_trmm_ru_ker_var2.c index 9bbc576b5..f0243a39b 100644 --- a/frame/3/trmm/bli_trmm_ru_ker_var2.c +++ b/frame/3/trmm/bli_trmm_ru_ker_var2.c @@ -38,6 +38,8 @@ typedef void (*FUNCPTR_T)( doff_t diagoffb, + pack_t schema_a, + pack_t schema_b, dim_t m, dim_t n, dim_t k, @@ -63,6 +65,9 @@ void bli_trmm_ru_ker_var2( obj_t* a, doff_t diagoffb = bli_obj_diag_offset( *b ); + pack_t schema_a = bli_obj_pack_status( *a ); + pack_t schema_b = bli_obj_pack_status( *b ); + dim_t m = bli_obj_length( *c ); dim_t n = bli_obj_width( *c ); dim_t k = bli_obj_width( *a ); @@ -125,6 +130,8 @@ void bli_trmm_ru_ker_var2( obj_t* a, // Invoke the function. f( diagoffb, + schema_a, + schema_b, m, n, k, @@ -143,6 +150,8 @@ void bli_trmm_ru_ker_var2( obj_t* a, \ void PASTEMAC(ch,varname)( \ doff_t diagoffb, \ + pack_t schema_a, \ + pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ @@ -271,6 +280,10 @@ void PASTEMAC(ch,varname)( \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ +\ + /* Save the pack schemas of A and B to the auxinfo_t object. */ \ + bli_auxinfo_set_schema_a( schema_a, aux ); \ + bli_auxinfo_set_schema_b( schema_b, aux ); \ \ /* Save the panel stride of A to the auxinfo_t object. */ \ bli_auxinfo_set_ps_a( ps_a, aux ); \ diff --git a/frame/3/trmm/bli_trmm_ru_ker_var2.h b/frame/3/trmm/bli_trmm_ru_ker_var2.h index b7cdc0944..f233e628d 100644 --- a/frame/3/trmm/bli_trmm_ru_ker_var2.h +++ b/frame/3/trmm/bli_trmm_ru_ker_var2.h @@ -51,6 +51,8 @@ void bli_trmm_ru_ker_var2( obj_t* a, \ void PASTEMAC(ch,varname)( \ doff_t diagoffb, \ + pack_t schema_a, \ + pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ diff --git a/frame/3/trsm/bli_trsm_ll_ker_var2.c b/frame/3/trsm/bli_trsm_ll_ker_var2.c index 812ec1ecc..82d4337e5 100644 --- a/frame/3/trsm/bli_trsm_ll_ker_var2.c +++ b/frame/3/trsm/bli_trsm_ll_ker_var2.c @@ -38,6 +38,8 @@ typedef void (*FUNCPTR_T)( doff_t diagoffa, + pack_t schema_a, + pack_t schema_b, dim_t m, dim_t n, dim_t k, @@ -64,6 +66,9 @@ void bli_trsm_ll_ker_var2( obj_t* a, doff_t diagoffa = bli_obj_diag_offset( *a ); + pack_t schema_a = bli_obj_pack_status( *a ); + pack_t schema_b = bli_obj_pack_status( *b ); + dim_t m = bli_obj_length( *c ); dim_t n = bli_obj_width( *c ); dim_t k = bli_obj_width( *a ); @@ -132,6 +137,8 @@ void bli_trsm_ll_ker_var2( obj_t* a, // Invoke the function. f( diagoffa, + schema_a, + schema_b, m, n, k, @@ -151,6 +158,8 @@ void bli_trsm_ll_ker_var2( obj_t* a, \ void PASTEMAC(ch,varname)( \ doff_t diagoffa, \ + pack_t schema_a, \ + pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ @@ -291,6 +300,10 @@ void PASTEMAC(ch,varname)( \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ +\ + /* Save the pack schemas of A and B to the auxinfo_t object. */ \ + bli_auxinfo_set_schema_a( schema_a, aux ); \ + bli_auxinfo_set_schema_b( schema_b, aux ); \ \ /* Save the panel stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_ps_b( ps_b, aux ); \ diff --git a/frame/3/trsm/bli_trsm_ll_ker_var2.h b/frame/3/trsm/bli_trsm_ll_ker_var2.h index 302cc3d20..7c2416870 100644 --- a/frame/3/trsm/bli_trsm_ll_ker_var2.h +++ b/frame/3/trsm/bli_trsm_ll_ker_var2.h @@ -51,6 +51,8 @@ void bli_trsm_ll_ker_var2( obj_t* a, \ void PASTEMAC(ch,varname)( \ doff_t diagoffa, \ + pack_t schema_a, \ + pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ diff --git a/frame/3/trsm/bli_trsm_lu_ker_var2.c b/frame/3/trsm/bli_trsm_lu_ker_var2.c index c36d197ba..16a2dcc26 100644 --- a/frame/3/trsm/bli_trsm_lu_ker_var2.c +++ b/frame/3/trsm/bli_trsm_lu_ker_var2.c @@ -38,6 +38,8 @@ typedef void (*FUNCPTR_T)( doff_t diagoffa, + pack_t schema_a, + pack_t schema_b, dim_t m, dim_t n, dim_t k, @@ -64,6 +66,9 @@ void bli_trsm_lu_ker_var2( obj_t* a, doff_t diagoffa = bli_obj_diag_offset( *a ); + pack_t schema_a = bli_obj_pack_status( *a ); + pack_t schema_b = bli_obj_pack_status( *b ); + dim_t m = bli_obj_length( *c ); dim_t n = bli_obj_width( *c ); dim_t k = bli_obj_width( *a ); @@ -132,6 +137,8 @@ void bli_trsm_lu_ker_var2( obj_t* a, // Invoke the function. f( diagoffa, + schema_a, + schema_b, m, n, k, @@ -151,6 +158,8 @@ void bli_trsm_lu_ker_var2( obj_t* a, \ void PASTEMAC(ch,varname)( \ doff_t diagoffa, \ + pack_t schema_a, \ + pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ @@ -299,6 +308,10 @@ void PASTEMAC(ch,varname)( \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ +\ + /* Save the pack schemas of A and B to the auxinfo_t object. */ \ + bli_auxinfo_set_schema_a( schema_a, aux ); \ + bli_auxinfo_set_schema_b( schema_b, aux ); \ \ /* Save the panel stride of B to the auxinfo_t object. */ \ bli_auxinfo_set_ps_b( ps_b, aux ); \ diff --git a/frame/3/trsm/bli_trsm_lu_ker_var2.h b/frame/3/trsm/bli_trsm_lu_ker_var2.h index 6cc4bb874..4bbd4a4d2 100644 --- a/frame/3/trsm/bli_trsm_lu_ker_var2.h +++ b/frame/3/trsm/bli_trsm_lu_ker_var2.h @@ -51,6 +51,8 @@ void bli_trsm_lu_ker_var2( obj_t* a, \ void PASTEMAC(ch,varname)( \ doff_t diagoffa, \ + pack_t schema_a, \ + pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ diff --git a/frame/3/trsm/bli_trsm_rl_ker_var2.c b/frame/3/trsm/bli_trsm_rl_ker_var2.c index 48fb37ab3..33f1f0131 100644 --- a/frame/3/trsm/bli_trsm_rl_ker_var2.c +++ b/frame/3/trsm/bli_trsm_rl_ker_var2.c @@ -38,6 +38,8 @@ typedef void (*FUNCPTR_T)( doff_t diagoffb, + pack_t schema_a, + pack_t schema_b, dim_t m, dim_t n, dim_t k, @@ -64,6 +66,9 @@ void bli_trsm_rl_ker_var2( obj_t* a, doff_t diagoffb = bli_obj_diag_offset( *b ); + pack_t schema_a = bli_obj_pack_status( *a ); + pack_t schema_b = bli_obj_pack_status( *b ); + dim_t m = bli_obj_length( *c ); dim_t n = bli_obj_width( *c ); dim_t k = bli_obj_width( *a ); @@ -132,6 +137,8 @@ void bli_trsm_rl_ker_var2( obj_t* a, // Invoke the function. f( diagoffb, + schema_a, + schema_b, m, n, k, @@ -151,6 +158,8 @@ void bli_trsm_rl_ker_var2( obj_t* a, \ void PASTEMAC(ch,varname)( \ doff_t diagoffb, \ + pack_t schema_a, \ + pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ @@ -312,6 +321,12 @@ void PASTEMAC(ch,varname)( \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ +\ + /* Save the pack schemas of A and B to the auxinfo_t object. + NOTE: We swap the values for A and B since the triangular + "A" matrix is actually contained within B. */ \ + bli_auxinfo_set_schema_a( schema_b, aux ); \ + bli_auxinfo_set_schema_b( schema_a, aux ); \ \ /* Save the panel stride of A to the auxinfo_t object. NOTE: We swap the values for A and B since the triangular diff --git a/frame/3/trsm/bli_trsm_rl_ker_var2.h b/frame/3/trsm/bli_trsm_rl_ker_var2.h index 5dc49a6b9..652282248 100644 --- a/frame/3/trsm/bli_trsm_rl_ker_var2.h +++ b/frame/3/trsm/bli_trsm_rl_ker_var2.h @@ -51,6 +51,8 @@ void bli_trsm_rl_ker_var2( obj_t* a, \ void PASTEMAC(ch,varname)( \ doff_t diagoffa, \ + pack_t schema_a, \ + pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ diff --git a/frame/3/trsm/bli_trsm_ru_ker_var2.c b/frame/3/trsm/bli_trsm_ru_ker_var2.c index 7e467d08c..056fa5424 100644 --- a/frame/3/trsm/bli_trsm_ru_ker_var2.c +++ b/frame/3/trsm/bli_trsm_ru_ker_var2.c @@ -38,6 +38,8 @@ typedef void (*FUNCPTR_T)( doff_t diagoffb, + pack_t schema_a, + pack_t schema_b, dim_t m, dim_t n, dim_t k, @@ -64,6 +66,9 @@ void bli_trsm_ru_ker_var2( obj_t* a, doff_t diagoffb = bli_obj_diag_offset( *b ); + pack_t schema_a = bli_obj_pack_status( *a ); + pack_t schema_b = bli_obj_pack_status( *b ); + dim_t m = bli_obj_length( *c ); dim_t n = bli_obj_width( *c ); dim_t k = bli_obj_width( *a ); @@ -132,6 +137,8 @@ void bli_trsm_ru_ker_var2( obj_t* a, // Invoke the function. f( diagoffb, + schema_a, + schema_b, m, n, k, @@ -151,6 +158,8 @@ void bli_trsm_ru_ker_var2( obj_t* a, \ void PASTEMAC(ch,varname)( \ doff_t diagoffb, \ + pack_t schema_a, \ + pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ @@ -307,6 +316,12 @@ void PASTEMAC(ch,varname)( \ \ rstep_c = rs_c * MR; \ cstep_c = cs_c * NR; \ +\ + /* Save the pack schemas of A and B to the auxinfo_t object. + NOTE: We swap the values for A and B since the triangular + "A" matrix is actually contained within B. */ \ + bli_auxinfo_set_schema_a( schema_b, aux ); \ + bli_auxinfo_set_schema_b( schema_a, aux ); \ \ /* Save the panel stride of A to the auxinfo_t object. NOTE: We swap the values for A and B since the triangular diff --git a/frame/3/trsm/bli_trsm_ru_ker_var2.h b/frame/3/trsm/bli_trsm_ru_ker_var2.h index b6c049db6..c37cf31f2 100644 --- a/frame/3/trsm/bli_trsm_ru_ker_var2.h +++ b/frame/3/trsm/bli_trsm_ru_ker_var2.h @@ -51,6 +51,8 @@ void bli_trsm_ru_ker_var2( obj_t* a, \ void PASTEMAC(ch,varname)( \ doff_t diagoffa, \ + pack_t schema_a, \ + pack_t schema_b, \ dim_t m, \ dim_t n, \ dim_t k, \ diff --git a/frame/include/bli_auxinfo_macro_defs.h b/frame/include/bli_auxinfo_macro_defs.h index 46ac6eaf6..fc3b83f49 100644 --- a/frame/include/bli_auxinfo_macro_defs.h +++ b/frame/include/bli_auxinfo_macro_defs.h @@ -38,15 +38,21 @@ // auxinfo_t field query -#define bli_auxinfo_next_a( auxinfo ) ( (auxinfo)->a_next ) -#define bli_auxinfo_next_b( auxinfo ) ( (auxinfo)->b_next ) +#define bli_auxinfo_schema_a( auxinfo ) ( (auxinfo)->schema_a ) +#define bli_auxinfo_schema_b( auxinfo ) ( (auxinfo)->schema_b ) -#define bli_auxinfo_ps_a( auxinfo ) ( (auxinfo)->ps_a ) -#define bli_auxinfo_ps_b( auxinfo ) ( (auxinfo)->ps_b ) +#define bli_auxinfo_next_a( auxinfo ) ( (auxinfo)->a_next ) +#define bli_auxinfo_next_b( auxinfo ) ( (auxinfo)->b_next ) + +#define bli_auxinfo_ps_a( auxinfo ) ( (auxinfo)->ps_a ) +#define bli_auxinfo_ps_b( auxinfo ) ( (auxinfo)->ps_b ) // auxinfo_t field modification +#define bli_auxinfo_set_schema_a( schema, auxinfo ) { (auxinfo).schema_a = schema; } +#define bli_auxinfo_set_schema_b( schema, auxinfo ) { (auxinfo).schema_b = schema; } + #define bli_auxinfo_set_next_a( a_p, auxinfo ) { (auxinfo).a_next = a_p; } #define bli_auxinfo_set_next_b( b_p, auxinfo ) { (auxinfo).b_next = b_p; } @@ -56,8 +62,8 @@ bli_auxinfo_set_next_b( b_p, auxinfo ); \ } -#define bli_auxinfo_set_ps_a( a_p, auxinfo ) { (auxinfo).ps_a = a_p; } -#define bli_auxinfo_set_ps_b( b_p, auxinfo ) { (auxinfo).ps_b = b_p; } +#define bli_auxinfo_set_ps_a( ps, auxinfo ) { (auxinfo).ps_a = ps; } +#define bli_auxinfo_set_ps_b( ps, auxinfo ) { (auxinfo).ps_b = ps; } #endif diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index 9a3b63903..3ee95b16a 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -167,25 +167,6 @@ typedef double f77_double; typedef scomplex f77_scomplex; typedef dcomplex f77_dcomplex; -// -- Auxiliary kernel info type -- - -// Note: This struct is used by macro-kernels to package together extra -// parameter values that may be of use to the micro-kernel without -// cluttering up the micro-kernel interface itself. - -typedef struct -{ - // Pointers to the micro-panels of A and B which will be used by the - // next call to the micro-kernel. - void* a_next; - void* b_next; - - // The panel strides of A and B. - inc_t ps_a; - inc_t ps_b; - -} auxinfo_t; - // @@ -522,6 +503,29 @@ typedef struct func_s bool_t prefers_contig_rows[BLIS_NUM_FP_TYPES]; } func_t; +// -- Auxiliary kernel info type -- + +// Note: This struct is used by macro-kernels to package together extra +// parameter values that may be of use to the micro-kernel without +// cluttering up the micro-kernel interface itself. + +typedef struct +{ + // The pack schemas of A and B. + pack_t schema_a; + pack_t schema_b; + + // Pointers to the micro-panels of A and B which will be used by the + // next call to the micro-kernel. + void* a_next; + void* b_next; + + // The panel strides of A and B. + inc_t ps_a; + inc_t ps_b; + +} auxinfo_t; + // diff --git a/testsuite/src/test_libblis.c b/testsuite/src/test_libblis.c index d60963636..0fadba172 100644 --- a/testsuite/src/test_libblis.c +++ b/testsuite/src/test_libblis.c @@ -1578,8 +1578,7 @@ void libblis_test_pobj_create( blksz_t* m, blksz_t* n, invdiag_t inv_diag, pack_ bli_obj_alias_to( *a, *p ); // Then initialize p appropriately for packing. - bli_packm_init_pack( FALSE, - inv_diag, + bli_packm_init_pack( inv_diag, pack_schema, BLIS_PACK_FWD_IF_UPPER, BLIS_PACK_FWD_IF_LOWER,