Removed redundant non-gemm blksz_t creation.

Details:
- Removed code that creates duplicate blksz_t objects for herk, trmm,
  and trsm. Instead, the gemm blksz_t objects are accessed via extern
  and used directly. This reduces the amount of code associated with
  each of the three _cntl_init() and _cntl_finalize() function.
This commit is contained in:
Field G. Van Zee
2014-02-03 11:07:01 -06:00
parent 0a023a7d9e
commit eb13cb2c6b
5 changed files with 72 additions and 317 deletions

View File

@@ -47,8 +47,6 @@ extern func_t* gemm_ukrs;
extern packm_t* gemm_packa_cntl;
extern packm_t* gemm_packb_cntl;
extern packm_t* gemm_packc_cntl;
extern unpackm_t* gemm_unpackc_cntl;
gemm_t* gemm_cntl5;
@@ -82,10 +80,8 @@ void bli_gemm_cntl_init_exp()
NULL,
gemm_packa_cntl,
NULL,
//gemm_packc_cntl,
NULL,
gemm_cntl_bp_ke5,
//gemm_unpackc_cntl );
NULL );
gemm_cntl_mm_pm

View File

@@ -35,20 +35,20 @@
#include "blis.h"
extern scalm_t* scalm_cntl;
extern herk_t* herk_cntl_bp_ke;
extern blksz_t* gemm_mc;
extern blksz_t* gemm_nc;
extern blksz_t* gemm_kc;
extern blksz_t* gemm_mr;
extern blksz_t* gemm_nr;
extern blksz_t* gemm_kr;
extern func_t* gemm_ukrs;
blksz_t* her2k_mc;
blksz_t* her2k_nc;
blksz_t* her2k_kc;
blksz_t* her2k_mr;
blksz_t* her2k_nr;
blksz_t* her2k_kr;
extern herk_t* herk_cntl_bp_ke;
packm_t* her2k_packa_cntl;
packm_t* her2k_packb_cntl;
packm_t* her2k_packc_cntl;
unpackm_t* her2k_unpackc_cntl;
her2k_t* her2k_cntl_bp_ke;
her2k_t* her2k_cntl_op_bp;
@@ -60,37 +60,6 @@ her2k_t* her2k_cntl;
void bli_her2k_cntl_init()
{
// Create blocksize objects for each dimension.
her2k_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_EXTEND_MC_S,
BLIS_DEFAULT_MC_D, BLIS_EXTEND_MC_D,
BLIS_DEFAULT_MC_C, BLIS_EXTEND_MC_C,
BLIS_DEFAULT_MC_Z, BLIS_EXTEND_MC_Z );
her2k_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_EXTEND_NC_S,
BLIS_DEFAULT_NC_D, BLIS_EXTEND_NC_D,
BLIS_DEFAULT_NC_C, BLIS_EXTEND_NC_C,
BLIS_DEFAULT_NC_Z, BLIS_EXTEND_NC_Z );
her2k_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_EXTEND_KC_S,
BLIS_DEFAULT_KC_D, BLIS_EXTEND_KC_D,
BLIS_DEFAULT_KC_C, BLIS_EXTEND_KC_C,
BLIS_DEFAULT_KC_Z, BLIS_EXTEND_KC_Z );
her2k_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_EXTEND_MR_S,
BLIS_DEFAULT_MR_D, BLIS_EXTEND_MR_D,
BLIS_DEFAULT_MR_C, BLIS_EXTEND_MR_C,
BLIS_DEFAULT_MR_Z, BLIS_EXTEND_MR_Z );
her2k_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_EXTEND_NR_S,
BLIS_DEFAULT_NR_D, BLIS_EXTEND_NR_D,
BLIS_DEFAULT_NR_C, BLIS_EXTEND_NR_C,
BLIS_DEFAULT_NR_Z, BLIS_EXTEND_NR_Z );
her2k_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_EXTEND_KR_S,
BLIS_DEFAULT_KR_D, BLIS_EXTEND_KR_D,
BLIS_DEFAULT_KR_C, BLIS_EXTEND_KR_C,
BLIS_DEFAULT_KR_Z, BLIS_EXTEND_KR_Z );
// Create control tree objects for packm operations.
her2k_packa_cntl
@@ -119,25 +88,6 @@ void bli_her2k_cntl_init()
BLIS_PACKED_COL_PANELS,
BLIS_BUFFER_FOR_B_PANEL );
// Create control tree objects for packm/unpackm operations on C.
her2k_packc_cntl
=
bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
BLIS_VARIANT1,
her2k_mr,
her2k_nr,
FALSE, // already dense; densify not necessary
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_COLUMNS,
BLIS_BUFFER_FOR_GEN_USE );
her2k_unpackc_cntl
=
bli_unpackm_cntl_obj_create( BLIS_UNBLOCKED,
BLIS_VARIANT1,
NULL ); // no blocksize needed
// Create control tree object for lowest-level block-panel kernel.
her2k_cntl_bp_ke
@@ -203,17 +153,8 @@ void bli_her2k_cntl_init()
void bli_her2k_cntl_finalize()
{
bli_blksz_obj_free( her2k_mc );
bli_blksz_obj_free( her2k_nc );
bli_blksz_obj_free( her2k_kc );
bli_blksz_obj_free( her2k_mr );
bli_blksz_obj_free( her2k_nr );
bli_blksz_obj_free( her2k_kr );
bli_cntl_obj_free( her2k_packa_cntl );
bli_cntl_obj_free( her2k_packb_cntl );
bli_cntl_obj_free( her2k_packc_cntl );
bli_cntl_obj_free( her2k_unpackc_cntl );
bli_cntl_obj_free( her2k_cntl_bp_ke );
bli_cntl_obj_free( her2k_cntl_op_bp );

View File

@@ -35,19 +35,18 @@
#include "blis.h"
extern scalm_t* scalm_cntl;
extern func_t* gemm_ukrs;
blksz_t* herk_mc;
blksz_t* herk_nc;
blksz_t* herk_kc;
blksz_t* herk_mr;
blksz_t* herk_nr;
blksz_t* herk_kr;
extern blksz_t* gemm_mc;
extern blksz_t* gemm_nc;
extern blksz_t* gemm_kc;
extern blksz_t* gemm_mr;
extern blksz_t* gemm_nr;
extern blksz_t* gemm_kr;
extern func_t* gemm_ukrs;
packm_t* herk_packa_cntl;
packm_t* herk_packb_cntl;
packm_t* herk_packc_cntl;
unpackm_t* herk_unpackc_cntl;
herk_t* herk_cntl_bp_ke;
herk_t* herk_cntl_op_bp;
@@ -59,45 +58,13 @@ herk_t* herk_cntl;
void bli_herk_cntl_init()
{
// Create blocksize objects for each dimension.
herk_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_EXTEND_MC_S,
BLIS_DEFAULT_MC_D, BLIS_EXTEND_MC_D,
BLIS_DEFAULT_MC_C, BLIS_EXTEND_MC_C,
BLIS_DEFAULT_MC_Z, BLIS_EXTEND_MC_Z );
herk_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_EXTEND_NC_S,
BLIS_DEFAULT_NC_D, BLIS_EXTEND_NC_D,
BLIS_DEFAULT_NC_C, BLIS_EXTEND_NC_C,
BLIS_DEFAULT_NC_Z, BLIS_EXTEND_NC_Z );
herk_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_EXTEND_KC_S,
BLIS_DEFAULT_KC_D, BLIS_EXTEND_KC_D,
BLIS_DEFAULT_KC_C, BLIS_EXTEND_KC_C,
BLIS_DEFAULT_KC_Z, BLIS_EXTEND_KC_Z );
herk_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_EXTEND_MR_S,
BLIS_DEFAULT_MR_D, BLIS_EXTEND_MR_D,
BLIS_DEFAULT_MR_C, BLIS_EXTEND_MR_C,
BLIS_DEFAULT_MR_Z, BLIS_EXTEND_MR_Z );
herk_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_EXTEND_NR_S,
BLIS_DEFAULT_NR_D, BLIS_EXTEND_NR_D,
BLIS_DEFAULT_NR_C, BLIS_EXTEND_NR_C,
BLIS_DEFAULT_NR_Z, BLIS_EXTEND_NR_Z );
herk_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_EXTEND_KR_S,
BLIS_DEFAULT_KR_D, BLIS_EXTEND_KR_D,
BLIS_DEFAULT_KR_C, BLIS_EXTEND_KR_C,
BLIS_DEFAULT_KR_Z, BLIS_EXTEND_KR_Z );
// Create control tree objects for packm operations.
herk_packa_cntl
=
bli_packm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
herk_mr,
herk_kr,
gemm_mr,
gemm_kr,
FALSE, // already dense; densify not necessary
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
@@ -109,8 +76,8 @@ void bli_herk_cntl_init()
=
bli_packm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
herk_kr,
herk_nr,
gemm_kr,
gemm_nr,
FALSE, // already dense; densify not necessary
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
@@ -118,26 +85,6 @@ void bli_herk_cntl_init()
BLIS_PACKED_COL_PANELS,
BLIS_BUFFER_FOR_B_PANEL );
// Create control tree objects for packm/unpackm operations on C.
herk_packc_cntl
=
bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
BLIS_VARIANT1,
herk_mr,
herk_nr,
FALSE, // already dense; densify not necessary
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_COLUMNS,
BLIS_BUFFER_FOR_GEN_USE );
herk_unpackc_cntl
=
bli_unpackm_cntl_obj_create( BLIS_UNBLOCKED,
BLIS_VARIANT1,
NULL ); // no blocksize needed
// Create control tree object for lowest-level block-panel kernel.
herk_cntl_bp_ke
@@ -155,7 +102,7 @@ void bli_herk_cntl_init()
=
bli_herk_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1,
herk_mc,
gemm_mc,
NULL,
NULL,
herk_packa_cntl,
@@ -170,7 +117,7 @@ void bli_herk_cntl_init()
=
bli_herk_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT3,
herk_kc,
gemm_kc,
NULL,
NULL,
NULL,
@@ -185,7 +132,7 @@ void bli_herk_cntl_init()
=
bli_herk_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
herk_nc,
gemm_nc,
NULL,
NULL,
NULL,
@@ -200,17 +147,8 @@ void bli_herk_cntl_init()
void bli_herk_cntl_finalize()
{
bli_blksz_obj_free( herk_mc );
bli_blksz_obj_free( herk_nc );
bli_blksz_obj_free( herk_kc );
bli_blksz_obj_free( herk_mr );
bli_blksz_obj_free( herk_nr );
bli_blksz_obj_free( herk_kr );
bli_cntl_obj_free( herk_packa_cntl );
bli_cntl_obj_free( herk_packb_cntl );
bli_cntl_obj_free( herk_packc_cntl );
bli_cntl_obj_free( herk_unpackc_cntl );
bli_cntl_obj_free( herk_cntl_bp_ke );
bli_cntl_obj_free( herk_cntl_op_bp );

View File

@@ -35,15 +35,17 @@
#include "blis.h"
extern scalm_t* scalm_cntl;
extern gemm_t* gemm_cntl_bp_ke;
extern blksz_t* gemm_mc;
extern blksz_t* gemm_nc;
extern blksz_t* gemm_kc;
extern blksz_t* gemm_mr;
extern blksz_t* gemm_nr;
extern blksz_t* gemm_kr;
extern func_t* gemm_ukrs;
blksz_t* trmm_mc;
blksz_t* trmm_nc;
blksz_t* trmm_kc;
blksz_t* trmm_mr;
blksz_t* trmm_nr;
blksz_t* trmm_kr;
extern gemm_t* gemm_cntl_bp_ke;
packm_t* trmm_l_packa_cntl;
packm_t* trmm_l_packb_cntl;
@@ -51,9 +53,6 @@ packm_t* trmm_l_packb_cntl;
packm_t* trmm_r_packa_cntl;
packm_t* trmm_r_packb_cntl;
packm_t* trmm_packc_cntl;
unpackm_t* trmm_unpackc_cntl;
trmm_t* trmm_cntl_bp_ke;
trmm_t* trmm_l_cntl_op_bp;
@@ -70,38 +69,6 @@ trmm_t* trmm_r_cntl;
void bli_trmm_cntl_init()
{
// Create blocksize objects for each dimension.
trmm_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_EXTEND_MC_S,
BLIS_DEFAULT_MC_D, BLIS_EXTEND_MC_D,
BLIS_DEFAULT_MC_C, BLIS_EXTEND_MC_C,
BLIS_DEFAULT_MC_Z, BLIS_EXTEND_MC_Z );
trmm_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_EXTEND_NC_S,
BLIS_DEFAULT_NC_D, BLIS_EXTEND_NC_D,
BLIS_DEFAULT_NC_C, BLIS_EXTEND_NC_C,
BLIS_DEFAULT_NC_Z, BLIS_EXTEND_NC_Z );
trmm_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_EXTEND_KC_S,
BLIS_DEFAULT_KC_D, BLIS_EXTEND_KC_D,
BLIS_DEFAULT_KC_C, BLIS_EXTEND_KC_C,
BLIS_DEFAULT_KC_Z, BLIS_EXTEND_KC_Z );
trmm_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_EXTEND_MR_S,
BLIS_DEFAULT_MR_D, BLIS_EXTEND_MR_D,
BLIS_DEFAULT_MR_C, BLIS_EXTEND_MR_C,
BLIS_DEFAULT_MR_Z, BLIS_EXTEND_MR_Z );
trmm_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_EXTEND_NR_S,
BLIS_DEFAULT_NR_D, BLIS_EXTEND_NR_D,
BLIS_DEFAULT_NR_C, BLIS_EXTEND_NR_C,
BLIS_DEFAULT_NR_Z, BLIS_EXTEND_NR_Z );
trmm_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_EXTEND_KR_S,
BLIS_DEFAULT_KR_D, BLIS_EXTEND_KR_D,
BLIS_DEFAULT_KR_C, BLIS_EXTEND_KR_C,
BLIS_DEFAULT_KR_Z, BLIS_EXTEND_KR_Z );
// Create control tree objects for packm operations (left side).
trmm_l_packa_cntl
=
@@ -109,8 +76,8 @@ void bli_trmm_cntl_init()
BLIS_VARIANT3, // pack panels of A compactly
// IMPORTANT: for consistency with trsm, "k" dim
// multiple is set to mr.
trmm_mr,
trmm_mr,
gemm_mr,
gemm_mr,
TRUE, // densify
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
@@ -124,8 +91,8 @@ void bli_trmm_cntl_init()
BLIS_VARIANT2,
// IMPORTANT: m dim multiple here must be mr
// since "k" dim multiple is set to mr above.
trmm_mr,
trmm_nr,
gemm_mr,
gemm_nr,
FALSE, // already dense
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
@@ -140,8 +107,8 @@ void bli_trmm_cntl_init()
BLIS_VARIANT2,
// IMPORTANT: for consistency with trsm, "k" dim
// multiple is set to nr.
trmm_mr,
trmm_nr,
gemm_mr,
gemm_nr,
FALSE, // already dense
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
@@ -155,8 +122,8 @@ void bli_trmm_cntl_init()
BLIS_VARIANT3, // pack panels of B compactly
// IMPORTANT: m dim multiple here must be nr
// since "k" dim multiple is set to nr above.
trmm_nr,
trmm_nr,
gemm_nr,
gemm_nr,
TRUE, // densify
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
@@ -164,26 +131,6 @@ void bli_trmm_cntl_init()
BLIS_PACKED_COL_PANELS,
BLIS_BUFFER_FOR_B_PANEL );
// Create control tree objects for packm/unpackm operations on C.
trmm_packc_cntl
=
bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
BLIS_VARIANT1,
trmm_mr,
trmm_nr,
FALSE, // already dense; densify not necessary
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_COLUMNS,
BLIS_BUFFER_FOR_GEN_USE );
trmm_unpackc_cntl
=
bli_unpackm_cntl_obj_create( BLIS_UNBLOCKED,
BLIS_VARIANT1,
NULL ); // no blocksize needed
// Create control tree object for lowest-level block-panel kernel.
trmm_cntl_bp_ke
@@ -201,7 +148,7 @@ void bli_trmm_cntl_init()
=
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1,
trmm_mc,
gemm_mc,
NULL,
NULL,
trmm_l_packa_cntl,
@@ -217,7 +164,7 @@ void bli_trmm_cntl_init()
=
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT3,
trmm_kc,
gemm_kc,
NULL,
NULL,
NULL,
@@ -233,7 +180,7 @@ void bli_trmm_cntl_init()
=
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
trmm_nc,
gemm_nc,
NULL,
NULL,
NULL,
@@ -249,7 +196,7 @@ void bli_trmm_cntl_init()
=
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1,
trmm_mc,
gemm_mc,
NULL,
NULL,
trmm_r_packa_cntl,
@@ -265,7 +212,7 @@ void bli_trmm_cntl_init()
=
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT3,
trmm_kc,
gemm_kc,
NULL,
NULL,
NULL,
@@ -281,7 +228,7 @@ void bli_trmm_cntl_init()
=
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
trmm_nc,
gemm_nc,
NULL,
NULL,
NULL,
@@ -298,21 +245,13 @@ void bli_trmm_cntl_init()
void bli_trmm_cntl_finalize()
{
bli_blksz_obj_free( trmm_mc );
bli_blksz_obj_free( trmm_nc );
bli_blksz_obj_free( trmm_kc );
bli_blksz_obj_free( trmm_mr );
bli_blksz_obj_free( trmm_nr );
bli_blksz_obj_free( trmm_kr );
bli_cntl_obj_free( trmm_l_packa_cntl );
bli_cntl_obj_free( trmm_l_packb_cntl );
bli_cntl_obj_free( trmm_r_packa_cntl );
bli_cntl_obj_free( trmm_r_packb_cntl );
bli_cntl_obj_free( trmm_packc_cntl );
bli_cntl_obj_free( trmm_unpackc_cntl );
bli_cntl_obj_free( trmm_cntl_bp_ke );
bli_cntl_obj_free( trmm_l_cntl_op_bp );
bli_cntl_obj_free( trmm_l_cntl_mm_op );
bli_cntl_obj_free( trmm_l_cntl_vl_mm );

View File

@@ -35,15 +35,17 @@
#include "blis.h"
extern scalm_t* scalm_cntl;
extern gemm_t* gemm_cntl_bp_ke;
extern blksz_t* gemm_mc;
extern blksz_t* gemm_nc;
extern blksz_t* gemm_kc;
extern blksz_t* gemm_mr;
extern blksz_t* gemm_nr;
extern blksz_t* gemm_kr;
extern func_t* gemm_ukrs;
blksz_t* trsm_mc;
blksz_t* trsm_nc;
blksz_t* trsm_kc;
blksz_t* trsm_mr;
blksz_t* trsm_nr;
blksz_t* trsm_kr;
extern gemm_t* gemm_cntl_bp_ke;
func_t* gemmtrsm_l_ukrs;
func_t* gemmtrsm_u_ukrs;
@@ -54,9 +56,6 @@ packm_t* trsm_l_packb_cntl;
packm_t* trsm_r_packa_cntl;
packm_t* trsm_r_packb_cntl;
packm_t* trsm_packc_cntl;
unpackm_t* trsm_unpackc_cntl;
trsm_t* trsm_cntl_bp_ke;
trsm_t* trsm_l_cntl_op_bp;
@@ -73,36 +72,6 @@ trsm_t* trsm_r_cntl;
void bli_trsm_cntl_init()
{
// Create blocksize objects for each dimension.
trsm_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_EXTEND_MC_S,
BLIS_DEFAULT_MC_D, BLIS_EXTEND_MC_D,
BLIS_DEFAULT_MC_C, BLIS_EXTEND_MC_C,
BLIS_DEFAULT_MC_Z, BLIS_EXTEND_MC_Z );
trsm_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_EXTEND_NC_S,
BLIS_DEFAULT_NC_D, BLIS_EXTEND_NC_D,
BLIS_DEFAULT_NC_C, BLIS_EXTEND_NC_C,
BLIS_DEFAULT_NC_Z, BLIS_EXTEND_NC_Z );
trsm_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_EXTEND_KC_S,
BLIS_DEFAULT_KC_D, BLIS_EXTEND_KC_D,
BLIS_DEFAULT_KC_C, BLIS_EXTEND_KC_C,
BLIS_DEFAULT_KC_Z, BLIS_EXTEND_KC_Z );
trsm_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_EXTEND_MR_S,
BLIS_DEFAULT_MR_D, BLIS_EXTEND_MR_D,
BLIS_DEFAULT_MR_C, BLIS_EXTEND_MR_C,
BLIS_DEFAULT_MR_Z, BLIS_EXTEND_MR_Z );
trsm_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_EXTEND_NR_S,
BLIS_DEFAULT_NR_D, BLIS_EXTEND_NR_D,
BLIS_DEFAULT_NR_C, BLIS_EXTEND_NR_C,
BLIS_DEFAULT_NR_Z, BLIS_EXTEND_NR_Z );
trsm_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_EXTEND_KR_S,
BLIS_DEFAULT_KR_D, BLIS_EXTEND_KR_D,
BLIS_DEFAULT_KR_C, BLIS_EXTEND_KR_C,
BLIS_DEFAULT_KR_Z, BLIS_EXTEND_KR_Z );
gemmtrsm_l_ukrs = bli_func_obj_create( BLIS_SGEMMTRSM_L_UKERNEL,
BLIS_DGEMMTRSM_L_UKERNEL,
@@ -122,8 +91,8 @@ void bli_trsm_cntl_init()
BLIS_VARIANT3, // pack panels of A compactly
// IMPORTANT: n dim multiple must be mr to
// support right and bottom-right edge cases
trsm_mr,
trsm_mr,
gemm_mr,
gemm_mr,
TRUE, // densify
TRUE, // invert diagonal
TRUE, // reverse iteration if upper?
@@ -137,8 +106,8 @@ void bli_trsm_cntl_init()
BLIS_VARIANT2,
// IMPORTANT: m dim multiple must be mr since
// B_pack is updated (ie: serves as C) in trsm
trsm_mr,
trsm_nr,
gemm_mr,
gemm_nr,
FALSE, // already dense; densify not necessary
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
@@ -151,8 +120,8 @@ void bli_trsm_cntl_init()
=
bli_packm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
trsm_nr,
trsm_mr,
gemm_nr,
gemm_mr,
FALSE, // already dense; densify not necessary
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
@@ -164,8 +133,8 @@ void bli_trsm_cntl_init()
=
bli_packm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT3, // pack panels of B compactly
trsm_mr,
trsm_mr,
gemm_mr,
gemm_mr,
TRUE, // densify
TRUE, // invert diagonal
FALSE, // reverse iteration if upper?
@@ -173,26 +142,6 @@ void bli_trsm_cntl_init()
BLIS_PACKED_COL_PANELS,
BLIS_BUFFER_FOR_B_PANEL );
// Create control tree objects for packm/unpackm operations on C.
trsm_packc_cntl
=
bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
BLIS_VARIANT1,
trsm_mr,
trsm_nr,
FALSE, // already dense; densify not necessary
FALSE, // do NOT invert diagonal
FALSE, // reverse iteration if upper?
FALSE, // reverse iteration if lower?
BLIS_PACKED_COLUMNS,
BLIS_BUFFER_FOR_GEN_USE );
trsm_unpackc_cntl
=
bli_unpackm_cntl_obj_create( BLIS_UNBLOCKED,
BLIS_VARIANT1,
NULL ); // no blocksize needed
// Create control tree object for lowest-level block-panel kernel.
trsm_cntl_bp_ke
@@ -212,7 +161,7 @@ void bli_trsm_cntl_init()
=
bli_trsm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1,
trsm_mc,
gemm_mc,
NULL, NULL, NULL,
NULL,
trsm_l_packa_cntl,
@@ -228,7 +177,7 @@ void bli_trsm_cntl_init()
=
bli_trsm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT3,
trsm_kc,
gemm_kc,
NULL, NULL, NULL,
NULL,
NULL,
@@ -244,7 +193,7 @@ void bli_trsm_cntl_init()
=
bli_trsm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
trsm_nc,
gemm_nc,
NULL, NULL, NULL,
NULL,
NULL,
@@ -260,7 +209,7 @@ void bli_trsm_cntl_init()
=
bli_trsm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1,
trsm_mc,
gemm_mc,
NULL, NULL, NULL,
NULL,
trsm_r_packa_cntl,
@@ -276,7 +225,7 @@ void bli_trsm_cntl_init()
=
bli_trsm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT3,
trsm_kc,
gemm_kc,
NULL, NULL, NULL,
NULL,
NULL,
@@ -292,7 +241,7 @@ void bli_trsm_cntl_init()
=
bli_trsm_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
trsm_nc,
gemm_nc,
NULL, NULL, NULL,
NULL,
NULL,
@@ -309,13 +258,6 @@ void bli_trsm_cntl_init()
void bli_trsm_cntl_finalize()
{
bli_blksz_obj_free( trsm_mc );
bli_blksz_obj_free( trsm_nc );
bli_blksz_obj_free( trsm_kc );
bli_blksz_obj_free( trsm_mr );
bli_blksz_obj_free( trsm_nr );
bli_blksz_obj_free( trsm_kr );
bli_func_obj_free( gemmtrsm_l_ukrs );
bli_func_obj_free( gemmtrsm_u_ukrs );
@@ -323,10 +265,9 @@ void bli_trsm_cntl_finalize()
bli_cntl_obj_free( trsm_l_packb_cntl );
bli_cntl_obj_free( trsm_r_packa_cntl );
bli_cntl_obj_free( trsm_r_packb_cntl );
bli_cntl_obj_free( trsm_packc_cntl );
bli_cntl_obj_free( trsm_unpackc_cntl );
bli_cntl_obj_free( trsm_cntl_bp_ke );
bli_cntl_obj_free( trsm_l_cntl_op_bp );
bli_cntl_obj_free( trsm_l_cntl_mm_op );
bli_cntl_obj_free( trsm_l_cntl_vl_mm );