From eb13cb2c6b182df5e2a9b88c76f50e2cee25b9e0 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Mon, 3 Feb 2014 11:07:01 -0600 Subject: [PATCH] Removed redundant non-gemm blksz_t creation. Details: - Removed code that creates duplicate blksz_t objects for herk, trmm, and trsm. Instead, the gemm blksz_t objects are accessed via extern and used directly. This reduces the amount of code associated with each of the three _cntl_init() and _cntl_finalize() function. --- frame/3/gemm/bli_gemm_cntl_exp.c | 4 - frame/3/her2k/attic/bli_her2k_cntl.c | 77 +++---------------- frame/3/herk/bli_herk_cntl.c | 92 ++++------------------ frame/3/trmm/bli_trmm_cntl.c | 109 ++++++--------------------- frame/3/trsm/bli_trsm_cntl.c | 107 ++++++-------------------- 5 files changed, 72 insertions(+), 317 deletions(-) diff --git a/frame/3/gemm/bli_gemm_cntl_exp.c b/frame/3/gemm/bli_gemm_cntl_exp.c index 633753b24..20a5beac2 100644 --- a/frame/3/gemm/bli_gemm_cntl_exp.c +++ b/frame/3/gemm/bli_gemm_cntl_exp.c @@ -47,8 +47,6 @@ extern func_t* gemm_ukrs; extern packm_t* gemm_packa_cntl; extern packm_t* gemm_packb_cntl; -extern packm_t* gemm_packc_cntl; -extern unpackm_t* gemm_unpackc_cntl; gemm_t* gemm_cntl5; @@ -82,10 +80,8 @@ void bli_gemm_cntl_init_exp() NULL, gemm_packa_cntl, NULL, - //gemm_packc_cntl, NULL, gemm_cntl_bp_ke5, - //gemm_unpackc_cntl ); NULL ); gemm_cntl_mm_pm diff --git a/frame/3/her2k/attic/bli_her2k_cntl.c b/frame/3/her2k/attic/bli_her2k_cntl.c index c4c14c821..fc2770511 100644 --- a/frame/3/her2k/attic/bli_her2k_cntl.c +++ b/frame/3/her2k/attic/bli_her2k_cntl.c @@ -35,20 +35,20 @@ #include "blis.h" extern scalm_t* scalm_cntl; -extern herk_t* herk_cntl_bp_ke; + +extern blksz_t* gemm_mc; +extern blksz_t* gemm_nc; +extern blksz_t* gemm_kc; +extern blksz_t* gemm_mr; +extern blksz_t* gemm_nr; +extern blksz_t* gemm_kr; + extern func_t* gemm_ukrs; -blksz_t* her2k_mc; -blksz_t* her2k_nc; -blksz_t* her2k_kc; -blksz_t* her2k_mr; -blksz_t* her2k_nr; -blksz_t* her2k_kr; +extern herk_t* herk_cntl_bp_ke; packm_t* her2k_packa_cntl; packm_t* her2k_packb_cntl; -packm_t* her2k_packc_cntl; -unpackm_t* her2k_unpackc_cntl; her2k_t* her2k_cntl_bp_ke; her2k_t* her2k_cntl_op_bp; @@ -60,37 +60,6 @@ her2k_t* her2k_cntl; void bli_her2k_cntl_init() { - // Create blocksize objects for each dimension. - her2k_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_EXTEND_MC_S, - BLIS_DEFAULT_MC_D, BLIS_EXTEND_MC_D, - BLIS_DEFAULT_MC_C, BLIS_EXTEND_MC_C, - BLIS_DEFAULT_MC_Z, BLIS_EXTEND_MC_Z ); - - her2k_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_EXTEND_NC_S, - BLIS_DEFAULT_NC_D, BLIS_EXTEND_NC_D, - BLIS_DEFAULT_NC_C, BLIS_EXTEND_NC_C, - BLIS_DEFAULT_NC_Z, BLIS_EXTEND_NC_Z ); - - her2k_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_EXTEND_KC_S, - BLIS_DEFAULT_KC_D, BLIS_EXTEND_KC_D, - BLIS_DEFAULT_KC_C, BLIS_EXTEND_KC_C, - BLIS_DEFAULT_KC_Z, BLIS_EXTEND_KC_Z ); - - her2k_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_EXTEND_MR_S, - BLIS_DEFAULT_MR_D, BLIS_EXTEND_MR_D, - BLIS_DEFAULT_MR_C, BLIS_EXTEND_MR_C, - BLIS_DEFAULT_MR_Z, BLIS_EXTEND_MR_Z ); - - her2k_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_EXTEND_NR_S, - BLIS_DEFAULT_NR_D, BLIS_EXTEND_NR_D, - BLIS_DEFAULT_NR_C, BLIS_EXTEND_NR_C, - BLIS_DEFAULT_NR_Z, BLIS_EXTEND_NR_Z ); - - her2k_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_EXTEND_KR_S, - BLIS_DEFAULT_KR_D, BLIS_EXTEND_KR_D, - BLIS_DEFAULT_KR_C, BLIS_EXTEND_KR_C, - BLIS_DEFAULT_KR_Z, BLIS_EXTEND_KR_Z ); - // Create control tree objects for packm operations. her2k_packa_cntl @@ -119,25 +88,6 @@ void bli_her2k_cntl_init() BLIS_PACKED_COL_PANELS, BLIS_BUFFER_FOR_B_PANEL ); - // Create control tree objects for packm/unpackm operations on C. - her2k_packc_cntl - = - bli_packm_cntl_obj_create( BLIS_UNBLOCKED, - BLIS_VARIANT1, - her2k_mr, - her2k_nr, - FALSE, // already dense; densify not necessary - FALSE, // do NOT invert diagonal - FALSE, // reverse iteration if upper? - FALSE, // reverse iteration if lower? - BLIS_PACKED_COLUMNS, - BLIS_BUFFER_FOR_GEN_USE ); - - her2k_unpackc_cntl - = - bli_unpackm_cntl_obj_create( BLIS_UNBLOCKED, - BLIS_VARIANT1, - NULL ); // no blocksize needed // Create control tree object for lowest-level block-panel kernel. her2k_cntl_bp_ke @@ -203,17 +153,8 @@ void bli_her2k_cntl_init() void bli_her2k_cntl_finalize() { - bli_blksz_obj_free( her2k_mc ); - bli_blksz_obj_free( her2k_nc ); - bli_blksz_obj_free( her2k_kc ); - bli_blksz_obj_free( her2k_mr ); - bli_blksz_obj_free( her2k_nr ); - bli_blksz_obj_free( her2k_kr ); - bli_cntl_obj_free( her2k_packa_cntl ); bli_cntl_obj_free( her2k_packb_cntl ); - bli_cntl_obj_free( her2k_packc_cntl ); - bli_cntl_obj_free( her2k_unpackc_cntl ); bli_cntl_obj_free( her2k_cntl_bp_ke ); bli_cntl_obj_free( her2k_cntl_op_bp ); diff --git a/frame/3/herk/bli_herk_cntl.c b/frame/3/herk/bli_herk_cntl.c index ed9cb49ef..3340003a3 100644 --- a/frame/3/herk/bli_herk_cntl.c +++ b/frame/3/herk/bli_herk_cntl.c @@ -35,19 +35,18 @@ #include "blis.h" extern scalm_t* scalm_cntl; -extern func_t* gemm_ukrs; -blksz_t* herk_mc; -blksz_t* herk_nc; -blksz_t* herk_kc; -blksz_t* herk_mr; -blksz_t* herk_nr; -blksz_t* herk_kr; +extern blksz_t* gemm_mc; +extern blksz_t* gemm_nc; +extern blksz_t* gemm_kc; +extern blksz_t* gemm_mr; +extern blksz_t* gemm_nr; +extern blksz_t* gemm_kr; + +extern func_t* gemm_ukrs; packm_t* herk_packa_cntl; packm_t* herk_packb_cntl; -packm_t* herk_packc_cntl; -unpackm_t* herk_unpackc_cntl; herk_t* herk_cntl_bp_ke; herk_t* herk_cntl_op_bp; @@ -59,45 +58,13 @@ herk_t* herk_cntl; void bli_herk_cntl_init() { - // Create blocksize objects for each dimension. - herk_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_EXTEND_MC_S, - BLIS_DEFAULT_MC_D, BLIS_EXTEND_MC_D, - BLIS_DEFAULT_MC_C, BLIS_EXTEND_MC_C, - BLIS_DEFAULT_MC_Z, BLIS_EXTEND_MC_Z ); - - herk_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_EXTEND_NC_S, - BLIS_DEFAULT_NC_D, BLIS_EXTEND_NC_D, - BLIS_DEFAULT_NC_C, BLIS_EXTEND_NC_C, - BLIS_DEFAULT_NC_Z, BLIS_EXTEND_NC_Z ); - - herk_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_EXTEND_KC_S, - BLIS_DEFAULT_KC_D, BLIS_EXTEND_KC_D, - BLIS_DEFAULT_KC_C, BLIS_EXTEND_KC_C, - BLIS_DEFAULT_KC_Z, BLIS_EXTEND_KC_Z ); - - herk_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_EXTEND_MR_S, - BLIS_DEFAULT_MR_D, BLIS_EXTEND_MR_D, - BLIS_DEFAULT_MR_C, BLIS_EXTEND_MR_C, - BLIS_DEFAULT_MR_Z, BLIS_EXTEND_MR_Z ); - - herk_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_EXTEND_NR_S, - BLIS_DEFAULT_NR_D, BLIS_EXTEND_NR_D, - BLIS_DEFAULT_NR_C, BLIS_EXTEND_NR_C, - BLIS_DEFAULT_NR_Z, BLIS_EXTEND_NR_Z ); - - herk_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_EXTEND_KR_S, - BLIS_DEFAULT_KR_D, BLIS_EXTEND_KR_D, - BLIS_DEFAULT_KR_C, BLIS_EXTEND_KR_C, - BLIS_DEFAULT_KR_Z, BLIS_EXTEND_KR_Z ); - - // Create control tree objects for packm operations. herk_packa_cntl = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, - herk_mr, - herk_kr, + gemm_mr, + gemm_kr, FALSE, // already dense; densify not necessary FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? @@ -109,8 +76,8 @@ void bli_herk_cntl_init() = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, - herk_kr, - herk_nr, + gemm_kr, + gemm_nr, FALSE, // already dense; densify not necessary FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? @@ -118,26 +85,6 @@ void bli_herk_cntl_init() BLIS_PACKED_COL_PANELS, BLIS_BUFFER_FOR_B_PANEL ); - // Create control tree objects for packm/unpackm operations on C. - herk_packc_cntl - = - bli_packm_cntl_obj_create( BLIS_UNBLOCKED, - BLIS_VARIANT1, - herk_mr, - herk_nr, - FALSE, // already dense; densify not necessary - FALSE, // do NOT invert diagonal - FALSE, // reverse iteration if upper? - FALSE, // reverse iteration if lower? - BLIS_PACKED_COLUMNS, - BLIS_BUFFER_FOR_GEN_USE ); - - herk_unpackc_cntl - = - bli_unpackm_cntl_obj_create( BLIS_UNBLOCKED, - BLIS_VARIANT1, - NULL ); // no blocksize needed - // Create control tree object for lowest-level block-panel kernel. herk_cntl_bp_ke @@ -155,7 +102,7 @@ void bli_herk_cntl_init() = bli_herk_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, - herk_mc, + gemm_mc, NULL, NULL, herk_packa_cntl, @@ -170,7 +117,7 @@ void bli_herk_cntl_init() = bli_herk_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, - herk_kc, + gemm_kc, NULL, NULL, NULL, @@ -185,7 +132,7 @@ void bli_herk_cntl_init() = bli_herk_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, - herk_nc, + gemm_nc, NULL, NULL, NULL, @@ -200,17 +147,8 @@ void bli_herk_cntl_init() void bli_herk_cntl_finalize() { - bli_blksz_obj_free( herk_mc ); - bli_blksz_obj_free( herk_nc ); - bli_blksz_obj_free( herk_kc ); - bli_blksz_obj_free( herk_mr ); - bli_blksz_obj_free( herk_nr ); - bli_blksz_obj_free( herk_kr ); - bli_cntl_obj_free( herk_packa_cntl ); bli_cntl_obj_free( herk_packb_cntl ); - bli_cntl_obj_free( herk_packc_cntl ); - bli_cntl_obj_free( herk_unpackc_cntl ); bli_cntl_obj_free( herk_cntl_bp_ke ); bli_cntl_obj_free( herk_cntl_op_bp ); diff --git a/frame/3/trmm/bli_trmm_cntl.c b/frame/3/trmm/bli_trmm_cntl.c index 1dd3bb7d6..06dbc0b69 100644 --- a/frame/3/trmm/bli_trmm_cntl.c +++ b/frame/3/trmm/bli_trmm_cntl.c @@ -35,15 +35,17 @@ #include "blis.h" extern scalm_t* scalm_cntl; -extern gemm_t* gemm_cntl_bp_ke; + +extern blksz_t* gemm_mc; +extern blksz_t* gemm_nc; +extern blksz_t* gemm_kc; +extern blksz_t* gemm_mr; +extern blksz_t* gemm_nr; +extern blksz_t* gemm_kr; + extern func_t* gemm_ukrs; -blksz_t* trmm_mc; -blksz_t* trmm_nc; -blksz_t* trmm_kc; -blksz_t* trmm_mr; -blksz_t* trmm_nr; -blksz_t* trmm_kr; +extern gemm_t* gemm_cntl_bp_ke; packm_t* trmm_l_packa_cntl; packm_t* trmm_l_packb_cntl; @@ -51,9 +53,6 @@ packm_t* trmm_l_packb_cntl; packm_t* trmm_r_packa_cntl; packm_t* trmm_r_packb_cntl; -packm_t* trmm_packc_cntl; -unpackm_t* trmm_unpackc_cntl; - trmm_t* trmm_cntl_bp_ke; trmm_t* trmm_l_cntl_op_bp; @@ -70,38 +69,6 @@ trmm_t* trmm_r_cntl; void bli_trmm_cntl_init() { - // Create blocksize objects for each dimension. - trmm_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_EXTEND_MC_S, - BLIS_DEFAULT_MC_D, BLIS_EXTEND_MC_D, - BLIS_DEFAULT_MC_C, BLIS_EXTEND_MC_C, - BLIS_DEFAULT_MC_Z, BLIS_EXTEND_MC_Z ); - - trmm_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_EXTEND_NC_S, - BLIS_DEFAULT_NC_D, BLIS_EXTEND_NC_D, - BLIS_DEFAULT_NC_C, BLIS_EXTEND_NC_C, - BLIS_DEFAULT_NC_Z, BLIS_EXTEND_NC_Z ); - - trmm_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_EXTEND_KC_S, - BLIS_DEFAULT_KC_D, BLIS_EXTEND_KC_D, - BLIS_DEFAULT_KC_C, BLIS_EXTEND_KC_C, - BLIS_DEFAULT_KC_Z, BLIS_EXTEND_KC_Z ); - - trmm_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_EXTEND_MR_S, - BLIS_DEFAULT_MR_D, BLIS_EXTEND_MR_D, - BLIS_DEFAULT_MR_C, BLIS_EXTEND_MR_C, - BLIS_DEFAULT_MR_Z, BLIS_EXTEND_MR_Z ); - - trmm_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_EXTEND_NR_S, - BLIS_DEFAULT_NR_D, BLIS_EXTEND_NR_D, - BLIS_DEFAULT_NR_C, BLIS_EXTEND_NR_C, - BLIS_DEFAULT_NR_Z, BLIS_EXTEND_NR_Z ); - - trmm_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_EXTEND_KR_S, - BLIS_DEFAULT_KR_D, BLIS_EXTEND_KR_D, - BLIS_DEFAULT_KR_C, BLIS_EXTEND_KR_C, - BLIS_DEFAULT_KR_Z, BLIS_EXTEND_KR_Z ); - - // Create control tree objects for packm operations (left side). trmm_l_packa_cntl = @@ -109,8 +76,8 @@ void bli_trmm_cntl_init() BLIS_VARIANT3, // pack panels of A compactly // IMPORTANT: for consistency with trsm, "k" dim // multiple is set to mr. - trmm_mr, - trmm_mr, + gemm_mr, + gemm_mr, TRUE, // densify FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? @@ -124,8 +91,8 @@ void bli_trmm_cntl_init() BLIS_VARIANT2, // IMPORTANT: m dim multiple here must be mr // since "k" dim multiple is set to mr above. - trmm_mr, - trmm_nr, + gemm_mr, + gemm_nr, FALSE, // already dense FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? @@ -140,8 +107,8 @@ void bli_trmm_cntl_init() BLIS_VARIANT2, // IMPORTANT: for consistency with trsm, "k" dim // multiple is set to nr. - trmm_mr, - trmm_nr, + gemm_mr, + gemm_nr, FALSE, // already dense FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? @@ -155,8 +122,8 @@ void bli_trmm_cntl_init() BLIS_VARIANT3, // pack panels of B compactly // IMPORTANT: m dim multiple here must be nr // since "k" dim multiple is set to nr above. - trmm_nr, - trmm_nr, + gemm_nr, + gemm_nr, TRUE, // densify FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? @@ -164,26 +131,6 @@ void bli_trmm_cntl_init() BLIS_PACKED_COL_PANELS, BLIS_BUFFER_FOR_B_PANEL ); - // Create control tree objects for packm/unpackm operations on C. - trmm_packc_cntl - = - bli_packm_cntl_obj_create( BLIS_UNBLOCKED, - BLIS_VARIANT1, - trmm_mr, - trmm_nr, - FALSE, // already dense; densify not necessary - FALSE, // do NOT invert diagonal - FALSE, // reverse iteration if upper? - FALSE, // reverse iteration if lower? - BLIS_PACKED_COLUMNS, - BLIS_BUFFER_FOR_GEN_USE ); - - trmm_unpackc_cntl - = - bli_unpackm_cntl_obj_create( BLIS_UNBLOCKED, - BLIS_VARIANT1, - NULL ); // no blocksize needed - // Create control tree object for lowest-level block-panel kernel. trmm_cntl_bp_ke @@ -201,7 +148,7 @@ void bli_trmm_cntl_init() = bli_trmm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, - trmm_mc, + gemm_mc, NULL, NULL, trmm_l_packa_cntl, @@ -217,7 +164,7 @@ void bli_trmm_cntl_init() = bli_trmm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, - trmm_kc, + gemm_kc, NULL, NULL, NULL, @@ -233,7 +180,7 @@ void bli_trmm_cntl_init() = bli_trmm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, - trmm_nc, + gemm_nc, NULL, NULL, NULL, @@ -249,7 +196,7 @@ void bli_trmm_cntl_init() = bli_trmm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, - trmm_mc, + gemm_mc, NULL, NULL, trmm_r_packa_cntl, @@ -265,7 +212,7 @@ void bli_trmm_cntl_init() = bli_trmm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, - trmm_kc, + gemm_kc, NULL, NULL, NULL, @@ -281,7 +228,7 @@ void bli_trmm_cntl_init() = bli_trmm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, - trmm_nc, + gemm_nc, NULL, NULL, NULL, @@ -298,21 +245,13 @@ void bli_trmm_cntl_init() void bli_trmm_cntl_finalize() { - bli_blksz_obj_free( trmm_mc ); - bli_blksz_obj_free( trmm_nc ); - bli_blksz_obj_free( trmm_kc ); - bli_blksz_obj_free( trmm_mr ); - bli_blksz_obj_free( trmm_nr ); - bli_blksz_obj_free( trmm_kr ); - bli_cntl_obj_free( trmm_l_packa_cntl ); bli_cntl_obj_free( trmm_l_packb_cntl ); bli_cntl_obj_free( trmm_r_packa_cntl ); bli_cntl_obj_free( trmm_r_packb_cntl ); - bli_cntl_obj_free( trmm_packc_cntl ); - bli_cntl_obj_free( trmm_unpackc_cntl ); bli_cntl_obj_free( trmm_cntl_bp_ke ); + bli_cntl_obj_free( trmm_l_cntl_op_bp ); bli_cntl_obj_free( trmm_l_cntl_mm_op ); bli_cntl_obj_free( trmm_l_cntl_vl_mm ); diff --git a/frame/3/trsm/bli_trsm_cntl.c b/frame/3/trsm/bli_trsm_cntl.c index 4ecd5865c..c856a43f3 100644 --- a/frame/3/trsm/bli_trsm_cntl.c +++ b/frame/3/trsm/bli_trsm_cntl.c @@ -35,15 +35,17 @@ #include "blis.h" extern scalm_t* scalm_cntl; -extern gemm_t* gemm_cntl_bp_ke; + +extern blksz_t* gemm_mc; +extern blksz_t* gemm_nc; +extern blksz_t* gemm_kc; +extern blksz_t* gemm_mr; +extern blksz_t* gemm_nr; +extern blksz_t* gemm_kr; + extern func_t* gemm_ukrs; -blksz_t* trsm_mc; -blksz_t* trsm_nc; -blksz_t* trsm_kc; -blksz_t* trsm_mr; -blksz_t* trsm_nr; -blksz_t* trsm_kr; +extern gemm_t* gemm_cntl_bp_ke; func_t* gemmtrsm_l_ukrs; func_t* gemmtrsm_u_ukrs; @@ -54,9 +56,6 @@ packm_t* trsm_l_packb_cntl; packm_t* trsm_r_packa_cntl; packm_t* trsm_r_packb_cntl; -packm_t* trsm_packc_cntl; -unpackm_t* trsm_unpackc_cntl; - trsm_t* trsm_cntl_bp_ke; trsm_t* trsm_l_cntl_op_bp; @@ -73,36 +72,6 @@ trsm_t* trsm_r_cntl; void bli_trsm_cntl_init() { - // Create blocksize objects for each dimension. - trsm_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_EXTEND_MC_S, - BLIS_DEFAULT_MC_D, BLIS_EXTEND_MC_D, - BLIS_DEFAULT_MC_C, BLIS_EXTEND_MC_C, - BLIS_DEFAULT_MC_Z, BLIS_EXTEND_MC_Z ); - - trsm_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_EXTEND_NC_S, - BLIS_DEFAULT_NC_D, BLIS_EXTEND_NC_D, - BLIS_DEFAULT_NC_C, BLIS_EXTEND_NC_C, - BLIS_DEFAULT_NC_Z, BLIS_EXTEND_NC_Z ); - - trsm_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_EXTEND_KC_S, - BLIS_DEFAULT_KC_D, BLIS_EXTEND_KC_D, - BLIS_DEFAULT_KC_C, BLIS_EXTEND_KC_C, - BLIS_DEFAULT_KC_Z, BLIS_EXTEND_KC_Z ); - - trsm_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_EXTEND_MR_S, - BLIS_DEFAULT_MR_D, BLIS_EXTEND_MR_D, - BLIS_DEFAULT_MR_C, BLIS_EXTEND_MR_C, - BLIS_DEFAULT_MR_Z, BLIS_EXTEND_MR_Z ); - - trsm_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_EXTEND_NR_S, - BLIS_DEFAULT_NR_D, BLIS_EXTEND_NR_D, - BLIS_DEFAULT_NR_C, BLIS_EXTEND_NR_C, - BLIS_DEFAULT_NR_Z, BLIS_EXTEND_NR_Z ); - - trsm_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_EXTEND_KR_S, - BLIS_DEFAULT_KR_D, BLIS_EXTEND_KR_D, - BLIS_DEFAULT_KR_C, BLIS_EXTEND_KR_C, - BLIS_DEFAULT_KR_Z, BLIS_EXTEND_KR_Z ); gemmtrsm_l_ukrs = bli_func_obj_create( BLIS_SGEMMTRSM_L_UKERNEL, BLIS_DGEMMTRSM_L_UKERNEL, @@ -122,8 +91,8 @@ void bli_trsm_cntl_init() BLIS_VARIANT3, // pack panels of A compactly // IMPORTANT: n dim multiple must be mr to // support right and bottom-right edge cases - trsm_mr, - trsm_mr, + gemm_mr, + gemm_mr, TRUE, // densify TRUE, // invert diagonal TRUE, // reverse iteration if upper? @@ -137,8 +106,8 @@ void bli_trsm_cntl_init() BLIS_VARIANT2, // IMPORTANT: m dim multiple must be mr since // B_pack is updated (ie: serves as C) in trsm - trsm_mr, - trsm_nr, + gemm_mr, + gemm_nr, FALSE, // already dense; densify not necessary FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? @@ -151,8 +120,8 @@ void bli_trsm_cntl_init() = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, - trsm_nr, - trsm_mr, + gemm_nr, + gemm_mr, FALSE, // already dense; densify not necessary FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? @@ -164,8 +133,8 @@ void bli_trsm_cntl_init() = bli_packm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, // pack panels of B compactly - trsm_mr, - trsm_mr, + gemm_mr, + gemm_mr, TRUE, // densify TRUE, // invert diagonal FALSE, // reverse iteration if upper? @@ -173,26 +142,6 @@ void bli_trsm_cntl_init() BLIS_PACKED_COL_PANELS, BLIS_BUFFER_FOR_B_PANEL ); - // Create control tree objects for packm/unpackm operations on C. - trsm_packc_cntl - = - bli_packm_cntl_obj_create( BLIS_UNBLOCKED, - BLIS_VARIANT1, - trsm_mr, - trsm_nr, - FALSE, // already dense; densify not necessary - FALSE, // do NOT invert diagonal - FALSE, // reverse iteration if upper? - FALSE, // reverse iteration if lower? - BLIS_PACKED_COLUMNS, - BLIS_BUFFER_FOR_GEN_USE ); - - trsm_unpackc_cntl - = - bli_unpackm_cntl_obj_create( BLIS_UNBLOCKED, - BLIS_VARIANT1, - NULL ); // no blocksize needed - // Create control tree object for lowest-level block-panel kernel. trsm_cntl_bp_ke @@ -212,7 +161,7 @@ void bli_trsm_cntl_init() = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, - trsm_mc, + gemm_mc, NULL, NULL, NULL, NULL, trsm_l_packa_cntl, @@ -228,7 +177,7 @@ void bli_trsm_cntl_init() = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, - trsm_kc, + gemm_kc, NULL, NULL, NULL, NULL, NULL, @@ -244,7 +193,7 @@ void bli_trsm_cntl_init() = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, - trsm_nc, + gemm_nc, NULL, NULL, NULL, NULL, NULL, @@ -260,7 +209,7 @@ void bli_trsm_cntl_init() = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT1, - trsm_mc, + gemm_mc, NULL, NULL, NULL, NULL, trsm_r_packa_cntl, @@ -276,7 +225,7 @@ void bli_trsm_cntl_init() = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT3, - trsm_kc, + gemm_kc, NULL, NULL, NULL, NULL, NULL, @@ -292,7 +241,7 @@ void bli_trsm_cntl_init() = bli_trsm_cntl_obj_create( BLIS_BLOCKED, BLIS_VARIANT2, - trsm_nc, + gemm_nc, NULL, NULL, NULL, NULL, NULL, @@ -309,13 +258,6 @@ void bli_trsm_cntl_init() void bli_trsm_cntl_finalize() { - bli_blksz_obj_free( trsm_mc ); - bli_blksz_obj_free( trsm_nc ); - bli_blksz_obj_free( trsm_kc ); - bli_blksz_obj_free( trsm_mr ); - bli_blksz_obj_free( trsm_nr ); - bli_blksz_obj_free( trsm_kr ); - bli_func_obj_free( gemmtrsm_l_ukrs ); bli_func_obj_free( gemmtrsm_u_ukrs ); @@ -323,10 +265,9 @@ void bli_trsm_cntl_finalize() bli_cntl_obj_free( trsm_l_packb_cntl ); bli_cntl_obj_free( trsm_r_packa_cntl ); bli_cntl_obj_free( trsm_r_packb_cntl ); - bli_cntl_obj_free( trsm_packc_cntl ); - bli_cntl_obj_free( trsm_unpackc_cntl ); bli_cntl_obj_free( trsm_cntl_bp_ke ); + bli_cntl_obj_free( trsm_l_cntl_op_bp ); bli_cntl_obj_free( trsm_l_cntl_mm_op ); bli_cntl_obj_free( trsm_l_cntl_vl_mm );