mirror of
https://github.com/amd/blis.git
synced 2026-05-11 09:39:59 +00:00
Removed redundant non-gemm blksz_t creation.
Details: - Removed code that creates duplicate blksz_t objects for herk, trmm, and trsm. Instead, the gemm blksz_t objects are accessed via extern and used directly. This reduces the amount of code associated with each of the three _cntl_init() and _cntl_finalize() function.
This commit is contained in:
@@ -47,8 +47,6 @@ extern func_t* gemm_ukrs;
|
||||
|
||||
extern packm_t* gemm_packa_cntl;
|
||||
extern packm_t* gemm_packb_cntl;
|
||||
extern packm_t* gemm_packc_cntl;
|
||||
extern unpackm_t* gemm_unpackc_cntl;
|
||||
|
||||
gemm_t* gemm_cntl5;
|
||||
|
||||
@@ -82,10 +80,8 @@ void bli_gemm_cntl_init_exp()
|
||||
NULL,
|
||||
gemm_packa_cntl,
|
||||
NULL,
|
||||
//gemm_packc_cntl,
|
||||
NULL,
|
||||
gemm_cntl_bp_ke5,
|
||||
//gemm_unpackc_cntl );
|
||||
NULL );
|
||||
|
||||
gemm_cntl_mm_pm
|
||||
|
||||
@@ -35,20 +35,20 @@
|
||||
#include "blis.h"
|
||||
|
||||
extern scalm_t* scalm_cntl;
|
||||
extern herk_t* herk_cntl_bp_ke;
|
||||
|
||||
extern blksz_t* gemm_mc;
|
||||
extern blksz_t* gemm_nc;
|
||||
extern blksz_t* gemm_kc;
|
||||
extern blksz_t* gemm_mr;
|
||||
extern blksz_t* gemm_nr;
|
||||
extern blksz_t* gemm_kr;
|
||||
|
||||
extern func_t* gemm_ukrs;
|
||||
|
||||
blksz_t* her2k_mc;
|
||||
blksz_t* her2k_nc;
|
||||
blksz_t* her2k_kc;
|
||||
blksz_t* her2k_mr;
|
||||
blksz_t* her2k_nr;
|
||||
blksz_t* her2k_kr;
|
||||
extern herk_t* herk_cntl_bp_ke;
|
||||
|
||||
packm_t* her2k_packa_cntl;
|
||||
packm_t* her2k_packb_cntl;
|
||||
packm_t* her2k_packc_cntl;
|
||||
unpackm_t* her2k_unpackc_cntl;
|
||||
|
||||
her2k_t* her2k_cntl_bp_ke;
|
||||
her2k_t* her2k_cntl_op_bp;
|
||||
@@ -60,37 +60,6 @@ her2k_t* her2k_cntl;
|
||||
|
||||
void bli_her2k_cntl_init()
|
||||
{
|
||||
// Create blocksize objects for each dimension.
|
||||
her2k_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_EXTEND_MC_S,
|
||||
BLIS_DEFAULT_MC_D, BLIS_EXTEND_MC_D,
|
||||
BLIS_DEFAULT_MC_C, BLIS_EXTEND_MC_C,
|
||||
BLIS_DEFAULT_MC_Z, BLIS_EXTEND_MC_Z );
|
||||
|
||||
her2k_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_EXTEND_NC_S,
|
||||
BLIS_DEFAULT_NC_D, BLIS_EXTEND_NC_D,
|
||||
BLIS_DEFAULT_NC_C, BLIS_EXTEND_NC_C,
|
||||
BLIS_DEFAULT_NC_Z, BLIS_EXTEND_NC_Z );
|
||||
|
||||
her2k_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_EXTEND_KC_S,
|
||||
BLIS_DEFAULT_KC_D, BLIS_EXTEND_KC_D,
|
||||
BLIS_DEFAULT_KC_C, BLIS_EXTEND_KC_C,
|
||||
BLIS_DEFAULT_KC_Z, BLIS_EXTEND_KC_Z );
|
||||
|
||||
her2k_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_EXTEND_MR_S,
|
||||
BLIS_DEFAULT_MR_D, BLIS_EXTEND_MR_D,
|
||||
BLIS_DEFAULT_MR_C, BLIS_EXTEND_MR_C,
|
||||
BLIS_DEFAULT_MR_Z, BLIS_EXTEND_MR_Z );
|
||||
|
||||
her2k_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_EXTEND_NR_S,
|
||||
BLIS_DEFAULT_NR_D, BLIS_EXTEND_NR_D,
|
||||
BLIS_DEFAULT_NR_C, BLIS_EXTEND_NR_C,
|
||||
BLIS_DEFAULT_NR_Z, BLIS_EXTEND_NR_Z );
|
||||
|
||||
her2k_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_EXTEND_KR_S,
|
||||
BLIS_DEFAULT_KR_D, BLIS_EXTEND_KR_D,
|
||||
BLIS_DEFAULT_KR_C, BLIS_EXTEND_KR_C,
|
||||
BLIS_DEFAULT_KR_Z, BLIS_EXTEND_KR_Z );
|
||||
|
||||
|
||||
// Create control tree objects for packm operations.
|
||||
her2k_packa_cntl
|
||||
@@ -119,25 +88,6 @@ void bli_her2k_cntl_init()
|
||||
BLIS_PACKED_COL_PANELS,
|
||||
BLIS_BUFFER_FOR_B_PANEL );
|
||||
|
||||
// Create control tree objects for packm/unpackm operations on C.
|
||||
her2k_packc_cntl
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
her2k_mr,
|
||||
her2k_nr,
|
||||
FALSE, // already dense; densify not necessary
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_COLUMNS,
|
||||
BLIS_BUFFER_FOR_GEN_USE );
|
||||
|
||||
her2k_unpackc_cntl
|
||||
=
|
||||
bli_unpackm_cntl_obj_create( BLIS_UNBLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
NULL ); // no blocksize needed
|
||||
|
||||
// Create control tree object for lowest-level block-panel kernel.
|
||||
her2k_cntl_bp_ke
|
||||
@@ -203,17 +153,8 @@ void bli_her2k_cntl_init()
|
||||
|
||||
void bli_her2k_cntl_finalize()
|
||||
{
|
||||
bli_blksz_obj_free( her2k_mc );
|
||||
bli_blksz_obj_free( her2k_nc );
|
||||
bli_blksz_obj_free( her2k_kc );
|
||||
bli_blksz_obj_free( her2k_mr );
|
||||
bli_blksz_obj_free( her2k_nr );
|
||||
bli_blksz_obj_free( her2k_kr );
|
||||
|
||||
bli_cntl_obj_free( her2k_packa_cntl );
|
||||
bli_cntl_obj_free( her2k_packb_cntl );
|
||||
bli_cntl_obj_free( her2k_packc_cntl );
|
||||
bli_cntl_obj_free( her2k_unpackc_cntl );
|
||||
|
||||
bli_cntl_obj_free( her2k_cntl_bp_ke );
|
||||
bli_cntl_obj_free( her2k_cntl_op_bp );
|
||||
|
||||
@@ -35,19 +35,18 @@
|
||||
#include "blis.h"
|
||||
|
||||
extern scalm_t* scalm_cntl;
|
||||
extern func_t* gemm_ukrs;
|
||||
|
||||
blksz_t* herk_mc;
|
||||
blksz_t* herk_nc;
|
||||
blksz_t* herk_kc;
|
||||
blksz_t* herk_mr;
|
||||
blksz_t* herk_nr;
|
||||
blksz_t* herk_kr;
|
||||
extern blksz_t* gemm_mc;
|
||||
extern blksz_t* gemm_nc;
|
||||
extern blksz_t* gemm_kc;
|
||||
extern blksz_t* gemm_mr;
|
||||
extern blksz_t* gemm_nr;
|
||||
extern blksz_t* gemm_kr;
|
||||
|
||||
extern func_t* gemm_ukrs;
|
||||
|
||||
packm_t* herk_packa_cntl;
|
||||
packm_t* herk_packb_cntl;
|
||||
packm_t* herk_packc_cntl;
|
||||
unpackm_t* herk_unpackc_cntl;
|
||||
|
||||
herk_t* herk_cntl_bp_ke;
|
||||
herk_t* herk_cntl_op_bp;
|
||||
@@ -59,45 +58,13 @@ herk_t* herk_cntl;
|
||||
|
||||
void bli_herk_cntl_init()
|
||||
{
|
||||
// Create blocksize objects for each dimension.
|
||||
herk_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_EXTEND_MC_S,
|
||||
BLIS_DEFAULT_MC_D, BLIS_EXTEND_MC_D,
|
||||
BLIS_DEFAULT_MC_C, BLIS_EXTEND_MC_C,
|
||||
BLIS_DEFAULT_MC_Z, BLIS_EXTEND_MC_Z );
|
||||
|
||||
herk_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_EXTEND_NC_S,
|
||||
BLIS_DEFAULT_NC_D, BLIS_EXTEND_NC_D,
|
||||
BLIS_DEFAULT_NC_C, BLIS_EXTEND_NC_C,
|
||||
BLIS_DEFAULT_NC_Z, BLIS_EXTEND_NC_Z );
|
||||
|
||||
herk_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_EXTEND_KC_S,
|
||||
BLIS_DEFAULT_KC_D, BLIS_EXTEND_KC_D,
|
||||
BLIS_DEFAULT_KC_C, BLIS_EXTEND_KC_C,
|
||||
BLIS_DEFAULT_KC_Z, BLIS_EXTEND_KC_Z );
|
||||
|
||||
herk_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_EXTEND_MR_S,
|
||||
BLIS_DEFAULT_MR_D, BLIS_EXTEND_MR_D,
|
||||
BLIS_DEFAULT_MR_C, BLIS_EXTEND_MR_C,
|
||||
BLIS_DEFAULT_MR_Z, BLIS_EXTEND_MR_Z );
|
||||
|
||||
herk_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_EXTEND_NR_S,
|
||||
BLIS_DEFAULT_NR_D, BLIS_EXTEND_NR_D,
|
||||
BLIS_DEFAULT_NR_C, BLIS_EXTEND_NR_C,
|
||||
BLIS_DEFAULT_NR_Z, BLIS_EXTEND_NR_Z );
|
||||
|
||||
herk_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_EXTEND_KR_S,
|
||||
BLIS_DEFAULT_KR_D, BLIS_EXTEND_KR_D,
|
||||
BLIS_DEFAULT_KR_C, BLIS_EXTEND_KR_C,
|
||||
BLIS_DEFAULT_KR_Z, BLIS_EXTEND_KR_Z );
|
||||
|
||||
|
||||
// Create control tree objects for packm operations.
|
||||
herk_packa_cntl
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
herk_mr,
|
||||
herk_kr,
|
||||
gemm_mr,
|
||||
gemm_kr,
|
||||
FALSE, // already dense; densify not necessary
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
@@ -109,8 +76,8 @@ void bli_herk_cntl_init()
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
herk_kr,
|
||||
herk_nr,
|
||||
gemm_kr,
|
||||
gemm_nr,
|
||||
FALSE, // already dense; densify not necessary
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
@@ -118,26 +85,6 @@ void bli_herk_cntl_init()
|
||||
BLIS_PACKED_COL_PANELS,
|
||||
BLIS_BUFFER_FOR_B_PANEL );
|
||||
|
||||
// Create control tree objects for packm/unpackm operations on C.
|
||||
herk_packc_cntl
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
herk_mr,
|
||||
herk_nr,
|
||||
FALSE, // already dense; densify not necessary
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_COLUMNS,
|
||||
BLIS_BUFFER_FOR_GEN_USE );
|
||||
|
||||
herk_unpackc_cntl
|
||||
=
|
||||
bli_unpackm_cntl_obj_create( BLIS_UNBLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
NULL ); // no blocksize needed
|
||||
|
||||
|
||||
// Create control tree object for lowest-level block-panel kernel.
|
||||
herk_cntl_bp_ke
|
||||
@@ -155,7 +102,7 @@ void bli_herk_cntl_init()
|
||||
=
|
||||
bli_herk_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
herk_mc,
|
||||
gemm_mc,
|
||||
NULL,
|
||||
NULL,
|
||||
herk_packa_cntl,
|
||||
@@ -170,7 +117,7 @@ void bli_herk_cntl_init()
|
||||
=
|
||||
bli_herk_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT3,
|
||||
herk_kc,
|
||||
gemm_kc,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
@@ -185,7 +132,7 @@ void bli_herk_cntl_init()
|
||||
=
|
||||
bli_herk_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
herk_nc,
|
||||
gemm_nc,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
@@ -200,17 +147,8 @@ void bli_herk_cntl_init()
|
||||
|
||||
void bli_herk_cntl_finalize()
|
||||
{
|
||||
bli_blksz_obj_free( herk_mc );
|
||||
bli_blksz_obj_free( herk_nc );
|
||||
bli_blksz_obj_free( herk_kc );
|
||||
bli_blksz_obj_free( herk_mr );
|
||||
bli_blksz_obj_free( herk_nr );
|
||||
bli_blksz_obj_free( herk_kr );
|
||||
|
||||
bli_cntl_obj_free( herk_packa_cntl );
|
||||
bli_cntl_obj_free( herk_packb_cntl );
|
||||
bli_cntl_obj_free( herk_packc_cntl );
|
||||
bli_cntl_obj_free( herk_unpackc_cntl );
|
||||
|
||||
bli_cntl_obj_free( herk_cntl_bp_ke );
|
||||
bli_cntl_obj_free( herk_cntl_op_bp );
|
||||
|
||||
@@ -35,15 +35,17 @@
|
||||
#include "blis.h"
|
||||
|
||||
extern scalm_t* scalm_cntl;
|
||||
extern gemm_t* gemm_cntl_bp_ke;
|
||||
|
||||
extern blksz_t* gemm_mc;
|
||||
extern blksz_t* gemm_nc;
|
||||
extern blksz_t* gemm_kc;
|
||||
extern blksz_t* gemm_mr;
|
||||
extern blksz_t* gemm_nr;
|
||||
extern blksz_t* gemm_kr;
|
||||
|
||||
extern func_t* gemm_ukrs;
|
||||
|
||||
blksz_t* trmm_mc;
|
||||
blksz_t* trmm_nc;
|
||||
blksz_t* trmm_kc;
|
||||
blksz_t* trmm_mr;
|
||||
blksz_t* trmm_nr;
|
||||
blksz_t* trmm_kr;
|
||||
extern gemm_t* gemm_cntl_bp_ke;
|
||||
|
||||
packm_t* trmm_l_packa_cntl;
|
||||
packm_t* trmm_l_packb_cntl;
|
||||
@@ -51,9 +53,6 @@ packm_t* trmm_l_packb_cntl;
|
||||
packm_t* trmm_r_packa_cntl;
|
||||
packm_t* trmm_r_packb_cntl;
|
||||
|
||||
packm_t* trmm_packc_cntl;
|
||||
unpackm_t* trmm_unpackc_cntl;
|
||||
|
||||
trmm_t* trmm_cntl_bp_ke;
|
||||
|
||||
trmm_t* trmm_l_cntl_op_bp;
|
||||
@@ -70,38 +69,6 @@ trmm_t* trmm_r_cntl;
|
||||
|
||||
void bli_trmm_cntl_init()
|
||||
{
|
||||
// Create blocksize objects for each dimension.
|
||||
trmm_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_EXTEND_MC_S,
|
||||
BLIS_DEFAULT_MC_D, BLIS_EXTEND_MC_D,
|
||||
BLIS_DEFAULT_MC_C, BLIS_EXTEND_MC_C,
|
||||
BLIS_DEFAULT_MC_Z, BLIS_EXTEND_MC_Z );
|
||||
|
||||
trmm_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_EXTEND_NC_S,
|
||||
BLIS_DEFAULT_NC_D, BLIS_EXTEND_NC_D,
|
||||
BLIS_DEFAULT_NC_C, BLIS_EXTEND_NC_C,
|
||||
BLIS_DEFAULT_NC_Z, BLIS_EXTEND_NC_Z );
|
||||
|
||||
trmm_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_EXTEND_KC_S,
|
||||
BLIS_DEFAULT_KC_D, BLIS_EXTEND_KC_D,
|
||||
BLIS_DEFAULT_KC_C, BLIS_EXTEND_KC_C,
|
||||
BLIS_DEFAULT_KC_Z, BLIS_EXTEND_KC_Z );
|
||||
|
||||
trmm_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_EXTEND_MR_S,
|
||||
BLIS_DEFAULT_MR_D, BLIS_EXTEND_MR_D,
|
||||
BLIS_DEFAULT_MR_C, BLIS_EXTEND_MR_C,
|
||||
BLIS_DEFAULT_MR_Z, BLIS_EXTEND_MR_Z );
|
||||
|
||||
trmm_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_EXTEND_NR_S,
|
||||
BLIS_DEFAULT_NR_D, BLIS_EXTEND_NR_D,
|
||||
BLIS_DEFAULT_NR_C, BLIS_EXTEND_NR_C,
|
||||
BLIS_DEFAULT_NR_Z, BLIS_EXTEND_NR_Z );
|
||||
|
||||
trmm_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_EXTEND_KR_S,
|
||||
BLIS_DEFAULT_KR_D, BLIS_EXTEND_KR_D,
|
||||
BLIS_DEFAULT_KR_C, BLIS_EXTEND_KR_C,
|
||||
BLIS_DEFAULT_KR_Z, BLIS_EXTEND_KR_Z );
|
||||
|
||||
|
||||
// Create control tree objects for packm operations (left side).
|
||||
trmm_l_packa_cntl
|
||||
=
|
||||
@@ -109,8 +76,8 @@ void bli_trmm_cntl_init()
|
||||
BLIS_VARIANT3, // pack panels of A compactly
|
||||
// IMPORTANT: for consistency with trsm, "k" dim
|
||||
// multiple is set to mr.
|
||||
trmm_mr,
|
||||
trmm_mr,
|
||||
gemm_mr,
|
||||
gemm_mr,
|
||||
TRUE, // densify
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
@@ -124,8 +91,8 @@ void bli_trmm_cntl_init()
|
||||
BLIS_VARIANT2,
|
||||
// IMPORTANT: m dim multiple here must be mr
|
||||
// since "k" dim multiple is set to mr above.
|
||||
trmm_mr,
|
||||
trmm_nr,
|
||||
gemm_mr,
|
||||
gemm_nr,
|
||||
FALSE, // already dense
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
@@ -140,8 +107,8 @@ void bli_trmm_cntl_init()
|
||||
BLIS_VARIANT2,
|
||||
// IMPORTANT: for consistency with trsm, "k" dim
|
||||
// multiple is set to nr.
|
||||
trmm_mr,
|
||||
trmm_nr,
|
||||
gemm_mr,
|
||||
gemm_nr,
|
||||
FALSE, // already dense
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
@@ -155,8 +122,8 @@ void bli_trmm_cntl_init()
|
||||
BLIS_VARIANT3, // pack panels of B compactly
|
||||
// IMPORTANT: m dim multiple here must be nr
|
||||
// since "k" dim multiple is set to nr above.
|
||||
trmm_nr,
|
||||
trmm_nr,
|
||||
gemm_nr,
|
||||
gemm_nr,
|
||||
TRUE, // densify
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
@@ -164,26 +131,6 @@ void bli_trmm_cntl_init()
|
||||
BLIS_PACKED_COL_PANELS,
|
||||
BLIS_BUFFER_FOR_B_PANEL );
|
||||
|
||||
// Create control tree objects for packm/unpackm operations on C.
|
||||
trmm_packc_cntl
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
trmm_mr,
|
||||
trmm_nr,
|
||||
FALSE, // already dense; densify not necessary
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_COLUMNS,
|
||||
BLIS_BUFFER_FOR_GEN_USE );
|
||||
|
||||
trmm_unpackc_cntl
|
||||
=
|
||||
bli_unpackm_cntl_obj_create( BLIS_UNBLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
NULL ); // no blocksize needed
|
||||
|
||||
|
||||
// Create control tree object for lowest-level block-panel kernel.
|
||||
trmm_cntl_bp_ke
|
||||
@@ -201,7 +148,7 @@ void bli_trmm_cntl_init()
|
||||
=
|
||||
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
trmm_mc,
|
||||
gemm_mc,
|
||||
NULL,
|
||||
NULL,
|
||||
trmm_l_packa_cntl,
|
||||
@@ -217,7 +164,7 @@ void bli_trmm_cntl_init()
|
||||
=
|
||||
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT3,
|
||||
trmm_kc,
|
||||
gemm_kc,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
@@ -233,7 +180,7 @@ void bli_trmm_cntl_init()
|
||||
=
|
||||
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
trmm_nc,
|
||||
gemm_nc,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
@@ -249,7 +196,7 @@ void bli_trmm_cntl_init()
|
||||
=
|
||||
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
trmm_mc,
|
||||
gemm_mc,
|
||||
NULL,
|
||||
NULL,
|
||||
trmm_r_packa_cntl,
|
||||
@@ -265,7 +212,7 @@ void bli_trmm_cntl_init()
|
||||
=
|
||||
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT3,
|
||||
trmm_kc,
|
||||
gemm_kc,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
@@ -281,7 +228,7 @@ void bli_trmm_cntl_init()
|
||||
=
|
||||
bli_trmm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
trmm_nc,
|
||||
gemm_nc,
|
||||
NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
@@ -298,21 +245,13 @@ void bli_trmm_cntl_init()
|
||||
|
||||
void bli_trmm_cntl_finalize()
|
||||
{
|
||||
bli_blksz_obj_free( trmm_mc );
|
||||
bli_blksz_obj_free( trmm_nc );
|
||||
bli_blksz_obj_free( trmm_kc );
|
||||
bli_blksz_obj_free( trmm_mr );
|
||||
bli_blksz_obj_free( trmm_nr );
|
||||
bli_blksz_obj_free( trmm_kr );
|
||||
|
||||
bli_cntl_obj_free( trmm_l_packa_cntl );
|
||||
bli_cntl_obj_free( trmm_l_packb_cntl );
|
||||
bli_cntl_obj_free( trmm_r_packa_cntl );
|
||||
bli_cntl_obj_free( trmm_r_packb_cntl );
|
||||
bli_cntl_obj_free( trmm_packc_cntl );
|
||||
bli_cntl_obj_free( trmm_unpackc_cntl );
|
||||
|
||||
bli_cntl_obj_free( trmm_cntl_bp_ke );
|
||||
|
||||
bli_cntl_obj_free( trmm_l_cntl_op_bp );
|
||||
bli_cntl_obj_free( trmm_l_cntl_mm_op );
|
||||
bli_cntl_obj_free( trmm_l_cntl_vl_mm );
|
||||
|
||||
@@ -35,15 +35,17 @@
|
||||
#include "blis.h"
|
||||
|
||||
extern scalm_t* scalm_cntl;
|
||||
extern gemm_t* gemm_cntl_bp_ke;
|
||||
|
||||
extern blksz_t* gemm_mc;
|
||||
extern blksz_t* gemm_nc;
|
||||
extern blksz_t* gemm_kc;
|
||||
extern blksz_t* gemm_mr;
|
||||
extern blksz_t* gemm_nr;
|
||||
extern blksz_t* gemm_kr;
|
||||
|
||||
extern func_t* gemm_ukrs;
|
||||
|
||||
blksz_t* trsm_mc;
|
||||
blksz_t* trsm_nc;
|
||||
blksz_t* trsm_kc;
|
||||
blksz_t* trsm_mr;
|
||||
blksz_t* trsm_nr;
|
||||
blksz_t* trsm_kr;
|
||||
extern gemm_t* gemm_cntl_bp_ke;
|
||||
|
||||
func_t* gemmtrsm_l_ukrs;
|
||||
func_t* gemmtrsm_u_ukrs;
|
||||
@@ -54,9 +56,6 @@ packm_t* trsm_l_packb_cntl;
|
||||
packm_t* trsm_r_packa_cntl;
|
||||
packm_t* trsm_r_packb_cntl;
|
||||
|
||||
packm_t* trsm_packc_cntl;
|
||||
unpackm_t* trsm_unpackc_cntl;
|
||||
|
||||
trsm_t* trsm_cntl_bp_ke;
|
||||
|
||||
trsm_t* trsm_l_cntl_op_bp;
|
||||
@@ -73,36 +72,6 @@ trsm_t* trsm_r_cntl;
|
||||
|
||||
void bli_trsm_cntl_init()
|
||||
{
|
||||
// Create blocksize objects for each dimension.
|
||||
trsm_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_EXTEND_MC_S,
|
||||
BLIS_DEFAULT_MC_D, BLIS_EXTEND_MC_D,
|
||||
BLIS_DEFAULT_MC_C, BLIS_EXTEND_MC_C,
|
||||
BLIS_DEFAULT_MC_Z, BLIS_EXTEND_MC_Z );
|
||||
|
||||
trsm_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_EXTEND_NC_S,
|
||||
BLIS_DEFAULT_NC_D, BLIS_EXTEND_NC_D,
|
||||
BLIS_DEFAULT_NC_C, BLIS_EXTEND_NC_C,
|
||||
BLIS_DEFAULT_NC_Z, BLIS_EXTEND_NC_Z );
|
||||
|
||||
trsm_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_EXTEND_KC_S,
|
||||
BLIS_DEFAULT_KC_D, BLIS_EXTEND_KC_D,
|
||||
BLIS_DEFAULT_KC_C, BLIS_EXTEND_KC_C,
|
||||
BLIS_DEFAULT_KC_Z, BLIS_EXTEND_KC_Z );
|
||||
|
||||
trsm_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_EXTEND_MR_S,
|
||||
BLIS_DEFAULT_MR_D, BLIS_EXTEND_MR_D,
|
||||
BLIS_DEFAULT_MR_C, BLIS_EXTEND_MR_C,
|
||||
BLIS_DEFAULT_MR_Z, BLIS_EXTEND_MR_Z );
|
||||
|
||||
trsm_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_EXTEND_NR_S,
|
||||
BLIS_DEFAULT_NR_D, BLIS_EXTEND_NR_D,
|
||||
BLIS_DEFAULT_NR_C, BLIS_EXTEND_NR_C,
|
||||
BLIS_DEFAULT_NR_Z, BLIS_EXTEND_NR_Z );
|
||||
|
||||
trsm_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, BLIS_EXTEND_KR_S,
|
||||
BLIS_DEFAULT_KR_D, BLIS_EXTEND_KR_D,
|
||||
BLIS_DEFAULT_KR_C, BLIS_EXTEND_KR_C,
|
||||
BLIS_DEFAULT_KR_Z, BLIS_EXTEND_KR_Z );
|
||||
|
||||
gemmtrsm_l_ukrs = bli_func_obj_create( BLIS_SGEMMTRSM_L_UKERNEL,
|
||||
BLIS_DGEMMTRSM_L_UKERNEL,
|
||||
@@ -122,8 +91,8 @@ void bli_trsm_cntl_init()
|
||||
BLIS_VARIANT3, // pack panels of A compactly
|
||||
// IMPORTANT: n dim multiple must be mr to
|
||||
// support right and bottom-right edge cases
|
||||
trsm_mr,
|
||||
trsm_mr,
|
||||
gemm_mr,
|
||||
gemm_mr,
|
||||
TRUE, // densify
|
||||
TRUE, // invert diagonal
|
||||
TRUE, // reverse iteration if upper?
|
||||
@@ -137,8 +106,8 @@ void bli_trsm_cntl_init()
|
||||
BLIS_VARIANT2,
|
||||
// IMPORTANT: m dim multiple must be mr since
|
||||
// B_pack is updated (ie: serves as C) in trsm
|
||||
trsm_mr,
|
||||
trsm_nr,
|
||||
gemm_mr,
|
||||
gemm_nr,
|
||||
FALSE, // already dense; densify not necessary
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
@@ -151,8 +120,8 @@ void bli_trsm_cntl_init()
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
trsm_nr,
|
||||
trsm_mr,
|
||||
gemm_nr,
|
||||
gemm_mr,
|
||||
FALSE, // already dense; densify not necessary
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
@@ -164,8 +133,8 @@ void bli_trsm_cntl_init()
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT3, // pack panels of B compactly
|
||||
trsm_mr,
|
||||
trsm_mr,
|
||||
gemm_mr,
|
||||
gemm_mr,
|
||||
TRUE, // densify
|
||||
TRUE, // invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
@@ -173,26 +142,6 @@ void bli_trsm_cntl_init()
|
||||
BLIS_PACKED_COL_PANELS,
|
||||
BLIS_BUFFER_FOR_B_PANEL );
|
||||
|
||||
// Create control tree objects for packm/unpackm operations on C.
|
||||
trsm_packc_cntl
|
||||
=
|
||||
bli_packm_cntl_obj_create( BLIS_UNBLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
trsm_mr,
|
||||
trsm_nr,
|
||||
FALSE, // already dense; densify not necessary
|
||||
FALSE, // do NOT invert diagonal
|
||||
FALSE, // reverse iteration if upper?
|
||||
FALSE, // reverse iteration if lower?
|
||||
BLIS_PACKED_COLUMNS,
|
||||
BLIS_BUFFER_FOR_GEN_USE );
|
||||
|
||||
trsm_unpackc_cntl
|
||||
=
|
||||
bli_unpackm_cntl_obj_create( BLIS_UNBLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
NULL ); // no blocksize needed
|
||||
|
||||
|
||||
// Create control tree object for lowest-level block-panel kernel.
|
||||
trsm_cntl_bp_ke
|
||||
@@ -212,7 +161,7 @@ void bli_trsm_cntl_init()
|
||||
=
|
||||
bli_trsm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
trsm_mc,
|
||||
gemm_mc,
|
||||
NULL, NULL, NULL,
|
||||
NULL,
|
||||
trsm_l_packa_cntl,
|
||||
@@ -228,7 +177,7 @@ void bli_trsm_cntl_init()
|
||||
=
|
||||
bli_trsm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT3,
|
||||
trsm_kc,
|
||||
gemm_kc,
|
||||
NULL, NULL, NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
@@ -244,7 +193,7 @@ void bli_trsm_cntl_init()
|
||||
=
|
||||
bli_trsm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
trsm_nc,
|
||||
gemm_nc,
|
||||
NULL, NULL, NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
@@ -260,7 +209,7 @@ void bli_trsm_cntl_init()
|
||||
=
|
||||
bli_trsm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT1,
|
||||
trsm_mc,
|
||||
gemm_mc,
|
||||
NULL, NULL, NULL,
|
||||
NULL,
|
||||
trsm_r_packa_cntl,
|
||||
@@ -276,7 +225,7 @@ void bli_trsm_cntl_init()
|
||||
=
|
||||
bli_trsm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT3,
|
||||
trsm_kc,
|
||||
gemm_kc,
|
||||
NULL, NULL, NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
@@ -292,7 +241,7 @@ void bli_trsm_cntl_init()
|
||||
=
|
||||
bli_trsm_cntl_obj_create( BLIS_BLOCKED,
|
||||
BLIS_VARIANT2,
|
||||
trsm_nc,
|
||||
gemm_nc,
|
||||
NULL, NULL, NULL,
|
||||
NULL,
|
||||
NULL,
|
||||
@@ -309,13 +258,6 @@ void bli_trsm_cntl_init()
|
||||
|
||||
void bli_trsm_cntl_finalize()
|
||||
{
|
||||
bli_blksz_obj_free( trsm_mc );
|
||||
bli_blksz_obj_free( trsm_nc );
|
||||
bli_blksz_obj_free( trsm_kc );
|
||||
bli_blksz_obj_free( trsm_mr );
|
||||
bli_blksz_obj_free( trsm_nr );
|
||||
bli_blksz_obj_free( trsm_kr );
|
||||
|
||||
bli_func_obj_free( gemmtrsm_l_ukrs );
|
||||
bli_func_obj_free( gemmtrsm_u_ukrs );
|
||||
|
||||
@@ -323,10 +265,9 @@ void bli_trsm_cntl_finalize()
|
||||
bli_cntl_obj_free( trsm_l_packb_cntl );
|
||||
bli_cntl_obj_free( trsm_r_packa_cntl );
|
||||
bli_cntl_obj_free( trsm_r_packb_cntl );
|
||||
bli_cntl_obj_free( trsm_packc_cntl );
|
||||
bli_cntl_obj_free( trsm_unpackc_cntl );
|
||||
|
||||
bli_cntl_obj_free( trsm_cntl_bp_ke );
|
||||
|
||||
bli_cntl_obj_free( trsm_l_cntl_op_bp );
|
||||
bli_cntl_obj_free( trsm_l_cntl_mm_op );
|
||||
bli_cntl_obj_free( trsm_l_cntl_vl_mm );
|
||||
|
||||
Reference in New Issue
Block a user