This commit is contained in:
Tyler Smith
2014-04-23 12:30:19 -05:00
14 changed files with 162 additions and 2677 deletions

View File

@@ -39,8 +39,8 @@ extern packm_t* packm_cntl;
extern packv_t* packv_cntl;
extern unpackv_t* unpackv_cntl;
static blksz_t* gemv_mc;
static blksz_t* gemv_nc;
blksz_t* gemv_mc;
blksz_t* gemv_nc;
gemv_t* gemv_cntl_bs_ke_dot;
gemv_t* gemv_cntl_bs_ke_axpy;
@@ -54,32 +54,22 @@ gemv_t* gemv_cntl_cp_bs_axpy;
gemv_t* gemv_cntl_ge_dot;
gemv_t* gemv_cntl_ge_axpy;
// Cache blocksizes.
#define BLIS_GEMV_MC_S BLIS_DEFAULT_L2_MC_S
#define BLIS_GEMV_MC_D BLIS_DEFAULT_L2_MC_D
#define BLIS_GEMV_MC_C BLIS_DEFAULT_L2_MC_C
#define BLIS_GEMV_MC_Z BLIS_DEFAULT_L2_MC_Z
#define BLIS_GEMV_NC_S BLIS_DEFAULT_L2_NC_S
#define BLIS_GEMV_NC_D BLIS_DEFAULT_L2_NC_D
#define BLIS_GEMV_NC_C BLIS_DEFAULT_L2_NC_C
#define BLIS_GEMV_NC_Z BLIS_DEFAULT_L2_NC_Z
void bli_gemv_cntl_init()
{
// Create blocksize objects for each dimension.
gemv_mc = bli_blksz_obj_create( BLIS_GEMV_MC_S, 0,
BLIS_GEMV_MC_D, 0,
BLIS_GEMV_MC_C, 0,
BLIS_GEMV_MC_Z, 0 );
gemv_nc = bli_blksz_obj_create( BLIS_GEMV_NC_S, 0,
BLIS_GEMV_NC_D, 0,
BLIS_GEMV_NC_C, 0,
BLIS_GEMV_NC_Z, 0 );
gemv_mc
=
bli_blksz_obj_create( BLIS_DEFAULT_L2_MC_S, 0,
BLIS_DEFAULT_L2_MC_D, 0,
BLIS_DEFAULT_L2_MC_C, 0,
BLIS_DEFAULT_L2_MC_Z, 0 );
gemv_nc
=
bli_blksz_obj_create( BLIS_DEFAULT_L2_NC_S, 0,
BLIS_DEFAULT_L2_NC_D, 0,
BLIS_DEFAULT_L2_NC_C, 0,
BLIS_DEFAULT_L2_NC_Z, 0 );
// Create control trees for the lowest-level kernels. These trees induce

View File

@@ -38,8 +38,8 @@ extern packm_t* packm_cntl;
extern packv_t* packv_cntl;
extern unpackm_t* unpackm_cntl;
static blksz_t* ger_mc;
static blksz_t* ger_nc;
extern blksz_t* gemv_mc;
extern blksz_t* gemv_nc;
ger_t* ger_cntl_bs_ke_row;
ger_t* ger_cntl_bs_ke_col;
@@ -53,34 +53,9 @@ ger_t* ger_cntl_cp_bs_col;
ger_t* ger_cntl_ge_row;
ger_t* ger_cntl_ge_col;
// Cache blocksizes.
#define BLIS_GER_MC_S BLIS_DEFAULT_L2_MC_S
#define BLIS_GER_MC_D BLIS_DEFAULT_L2_MC_D
#define BLIS_GER_MC_C BLIS_DEFAULT_L2_MC_C
#define BLIS_GER_MC_Z BLIS_DEFAULT_L2_MC_Z
#define BLIS_GER_NC_S BLIS_DEFAULT_L2_NC_S
#define BLIS_GER_NC_D BLIS_DEFAULT_L2_NC_D
#define BLIS_GER_NC_C BLIS_DEFAULT_L2_NC_C
#define BLIS_GER_NC_Z BLIS_DEFAULT_L2_NC_Z
void bli_ger_cntl_init()
{
// Create blocksize objects.
ger_mc = bli_blksz_obj_create( BLIS_GER_MC_S, 0,
BLIS_GER_MC_D, 0,
BLIS_GER_MC_C, 0,
BLIS_GER_MC_Z, 0 );
ger_nc = bli_blksz_obj_create( BLIS_GER_NC_S, 0,
BLIS_GER_NC_D, 0,
BLIS_GER_NC_C, 0,
BLIS_GER_NC_Z, 0 );
// Create control trees for the lowest-level kernels. These trees induce
// operations on (persumably) relatively small block-subvector problems.
ger_cntl_bs_ke_row
@@ -103,7 +78,7 @@ void bli_ger_cntl_init()
=
bli_ger_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
ger_nc,
gemv_nc,
NULL, // x is not partitioned in var2
packv_cntl, // pack y1 (if needed)
packm_cntl, // pack A1 (if needed)
@@ -113,7 +88,7 @@ void bli_ger_cntl_init()
=
bli_ger_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
ger_nc,
gemv_nc,
NULL, // x is not partitioned in var2
packv_cntl, // pack y1 (if needed)
packm_cntl, // pack A1 (if needed)
@@ -127,7 +102,7 @@ void bli_ger_cntl_init()
=
bli_ger_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1,
ger_mc,
gemv_mc,
packv_cntl, // pack x1 (if needed)
NULL, // y is not partitioned in var1
packm_cntl, // pack A1 (if needed)
@@ -137,7 +112,7 @@ void bli_ger_cntl_init()
=
bli_ger_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1,
ger_mc,
gemv_mc,
packv_cntl, // pack x1 (if needed)
NULL, // y is not partitioned in var1
packm_cntl, // pack A1 (if needed)
@@ -151,7 +126,7 @@ void bli_ger_cntl_init()
=
bli_ger_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
ger_nc,
gemv_nc,
NULL, // x is not partitioned in var2
packv_cntl, // pack y1 (if needed)
NULL, // do not pack A1
@@ -161,7 +136,7 @@ void bli_ger_cntl_init()
=
bli_ger_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
ger_nc,
gemv_nc,
NULL, // x is not partitioned in var2
packv_cntl, // pack y1 (if needed)
NULL, // do not pack A1
@@ -171,9 +146,6 @@ void bli_ger_cntl_init()
void bli_ger_cntl_finalize()
{
bli_cntl_obj_free( ger_mc );
bli_cntl_obj_free( ger_nc );
bli_cntl_obj_free( ger_cntl_bs_ke_row );
bli_cntl_obj_free( ger_cntl_bs_ke_col );

View File

@@ -44,31 +44,16 @@ extern gemv_t* gemv_cntl_rp_bs_axpy;
extern gemv_t* gemv_cntl_cp_bs_dot;
extern gemv_t* gemv_cntl_cp_bs_axpy;
static blksz_t* hemv_mc;
extern blksz_t* gemv_mc;
hemv_t* hemv_cntl_bs_ke_lrow_ucol;
hemv_t* hemv_cntl_bs_ke_lcol_urow;
hemv_t* hemv_cntl_ge_lrow_ucol;
hemv_t* hemv_cntl_ge_lcol_urow;
// Cache blocksizes.
#define BLIS_HEMV_MC_S BLIS_DEFAULT_L2_MC_S
#define BLIS_HEMV_MC_D BLIS_DEFAULT_L2_MC_D
#define BLIS_HEMV_MC_C BLIS_DEFAULT_L2_MC_C
#define BLIS_HEMV_MC_Z BLIS_DEFAULT_L2_MC_Z
void bli_hemv_cntl_init()
{
// Create blocksize objects.
hemv_mc = bli_blksz_obj_create( BLIS_HEMV_MC_S, 0,
BLIS_HEMV_MC_D, 0,
BLIS_HEMV_MC_C, 0,
BLIS_HEMV_MC_Z, 0 );
// Create control trees for the lowest-level kernels. These trees induce
// operations on (presumably) relatively small block-subvector problems.
hemv_cntl_bs_ke_lrow_ucol
@@ -93,7 +78,7 @@ void bli_hemv_cntl_init()
=
bli_hemv_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
hemv_mc,
gemv_mc,
scalv_cntl, // scale y up-front
packm_cntl, // pack A11 (if needed)
packv_cntl, // pack x1 (if needed)
@@ -108,7 +93,7 @@ void bli_hemv_cntl_init()
=
bli_hemv_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
hemv_mc,
gemv_mc,
scalv_cntl, // scale y up-front
packm_cntl, // pack A11 (if needed)
packv_cntl, // pack x1 (if needed)

View File

@@ -43,7 +43,7 @@ extern ger_t* ger_cntl_cp_bs_col;
extern ger_t* ger_cntl_bs_ke_row;
extern ger_t* ger_cntl_bs_ke_col;
static blksz_t* her_mc;
extern blksz_t* gemv_mc;
her_t* her_cntl_bs_ke_lrow_ucol;
her_t* her_cntl_bs_ke_lcol_urow;
@@ -51,24 +51,9 @@ her_t* her_cntl_bs_ke_lcol_urow;
her_t* her_cntl_ge_lrow_ucol;
her_t* her_cntl_ge_lcol_urow;
// Cache blocksizes.
#define BLIS_HER_MC_S BLIS_DEFAULT_L2_MC_S
#define BLIS_HER_MC_D BLIS_DEFAULT_L2_MC_D
#define BLIS_HER_MC_C BLIS_DEFAULT_L2_MC_C
#define BLIS_HER_MC_Z BLIS_DEFAULT_L2_MC_Z
void bli_her_cntl_init()
{
// Create blocksize objects.
her_mc = bli_blksz_obj_create( BLIS_HER_MC_S, 0,
BLIS_HER_MC_D, 0,
BLIS_HER_MC_C, 0,
BLIS_HER_MC_Z, 0 );
// Create control trees for the lowest-level kernels. These trees induce
// operations on (persumably) relatively small block-subvector problems.
her_cntl_bs_ke_lrow_ucol
@@ -92,7 +77,7 @@ void bli_her_cntl_init()
=
bli_her_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1,
her_mc,
gemv_mc,
packv_cntl, // pack x1 (if needed)
NULL, // do NOT pack C11
ger_cntl_rp_bs_row,
@@ -102,7 +87,7 @@ void bli_her_cntl_init()
=
bli_her_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT2,
her_mc,
gemv_mc,
packv_cntl, // pack x1 (if needed)
NULL, // do NOT pack C11
ger_cntl_cp_bs_col,

View File

@@ -41,7 +41,7 @@ extern unpackm_t* unpackm_cntl;
extern ger_t* ger_cntl_rp_bs_row;
extern ger_t* ger_cntl_cp_bs_col;
static blksz_t* her2_mc;
extern blksz_t* gemv_mc;
her2_t* her2_cntl_bs_ke_lrow_ucol;
her2_t* her2_cntl_bs_ke_lcol_urow;
@@ -49,24 +49,9 @@ her2_t* her2_cntl_bs_ke_lcol_urow;
her2_t* her2_cntl_ge_lrow_ucol;
her2_t* her2_cntl_ge_lcol_urow;
// Cache blocksizes.
#define BLIS_HER2_MC_S BLIS_DEFAULT_L2_MC_S
#define BLIS_HER2_MC_D BLIS_DEFAULT_L2_MC_D
#define BLIS_HER2_MC_C BLIS_DEFAULT_L2_MC_C
#define BLIS_HER2_MC_Z BLIS_DEFAULT_L2_MC_Z
void bli_her2_cntl_init()
{
// Create blocksize objects.
her2_mc = bli_blksz_obj_create( BLIS_HER2_MC_S, 0,
BLIS_HER2_MC_D, 0,
BLIS_HER2_MC_C, 0,
BLIS_HER2_MC_Z, 0 );
// Create control trees for the lowest-level kernels. These trees induce
// operations on (persumably) relatively small block-subvector problems.
her2_cntl_bs_ke_lrow_ucol
@@ -92,7 +77,7 @@ void bli_her2_cntl_init()
=
bli_her2_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1,
her2_mc,
gemv_mc,
packv_cntl, // pack x1 (if needed)
packv_cntl, // pack y1 (if needed)
packm_cntl, // pack C11 (if needed)
@@ -104,7 +89,7 @@ void bli_her2_cntl_init()
=
bli_her2_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT4,
her2_mc,
gemv_mc,
packv_cntl, // pack x1 (if needed)
packv_cntl, // pack y1 (if needed)
packm_cntl, // pack C11 (if needed)

View File

@@ -43,31 +43,16 @@ extern gemv_t* gemv_cntl_rp_bs_axpy;
extern gemv_t* gemv_cntl_cp_bs_dot;
extern gemv_t* gemv_cntl_cp_bs_axpy;
static blksz_t* trmv_mc;
extern blksz_t* gemv_mc;
trmv_t* trmv_cntl_bs_ke_nrow_tcol;
trmv_t* trmv_cntl_bs_ke_ncol_trow;
trmv_t* trmv_cntl_ge_nrow_tcol;
trmv_t* trmv_cntl_ge_ncol_trow;
// Cache blocksizes.
#define BLIS_TRMV_MC_S BLIS_DEFAULT_L2_MC_S
#define BLIS_TRMV_MC_D BLIS_DEFAULT_L2_MC_D
#define BLIS_TRMV_MC_C BLIS_DEFAULT_L2_MC_C
#define BLIS_TRMV_MC_Z BLIS_DEFAULT_L2_MC_Z
void bli_trmv_cntl_init()
{
// Create blocksize objects.
trmv_mc = bli_blksz_obj_create( BLIS_TRMV_MC_S, 0,
BLIS_TRMV_MC_D, 0,
BLIS_TRMV_MC_C, 0,
BLIS_TRMV_MC_Z, 0 );
// Create control trees for the lowest-level kernels. These trees induce
// operations on (presumably) relatively small block-subvector problems.
trmv_cntl_bs_ke_nrow_tcol
@@ -93,7 +78,7 @@ void bli_trmv_cntl_init()
=
bli_trmv_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1, // use var1 to maximize x1 usage
trmv_mc,
gemv_mc,
packm_cntl, // pack A11 (if needed)
packv_cntl, // pack x1 (if needed)
gemv_cntl_rp_bs_dot, // gemv_rp needed by var1
@@ -104,7 +89,7 @@ void bli_trmv_cntl_init()
=
bli_trmv_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1, // use var1 to maximize x1 usage
trmv_mc,
gemv_mc,
packm_cntl, // pack A11 (if needed)
packv_cntl, // pack x1 (if needed)
gemv_cntl_rp_bs_axpy, // gemv_rp needed by var1

View File

@@ -44,29 +44,16 @@ extern gemv_t* gemv_cntl_rp_bs_axpy;
extern gemv_t* gemv_cntl_cp_bs_dot;
extern gemv_t* gemv_cntl_cp_bs_axpy;
static blksz_t* trsv_mc;
extern blksz_t* gemv_mc;
trsv_t* trsv_cntl_bs_ke_nrow_tcol;
trsv_t* trsv_cntl_bs_ke_ncol_trow;
trsv_t* trsv_cntl_ge_nrow_tcol;
trsv_t* trsv_cntl_ge_ncol_trow;
// Cache blocksizes.
#define BLIS_TRSV_MC_S BLIS_DEFAULT_L2_MC_S
#define BLIS_TRSV_MC_D BLIS_DEFAULT_L2_MC_D
#define BLIS_TRSV_MC_C BLIS_DEFAULT_L2_MC_C
#define BLIS_TRSV_MC_Z BLIS_DEFAULT_L2_MC_Z
void bli_trsv_cntl_init()
{
// Create blocksize objects.
trsv_mc = bli_blksz_obj_create( BLIS_TRSV_MC_S, 0,
BLIS_TRSV_MC_D, 0,
BLIS_TRSV_MC_C, 0,
BLIS_TRSV_MC_Z, 0 );
// Create control trees for the lowest-level kernels. These trees induce
// operations on (presumably) relatively small block-subvector problems.
trsv_cntl_bs_ke_nrow_tcol
@@ -90,7 +77,7 @@ void bli_trsv_cntl_init()
=
bli_trsv_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1, // use var1 to maximize x1 usage
trsv_mc,
gemv_mc,
scalv_cntl, // scale x up-front
packm_cntl, // pack A11 (if needed)
packv_cntl, // pack x1 (if needed)
@@ -102,7 +89,7 @@ void bli_trsv_cntl_init()
=
bli_trsv_cntl_obj_create( BLIS_BLOCKED,
BLIS_VARIANT1, // use var1 to maximize x1 usage
trsv_mc,
gemv_mc,
scalv_cntl, // scale x up-front
packm_cntl, // pack A11 (if needed)
packv_cntl, // pack x1 (if needed)

View File

@@ -61,41 +61,48 @@ void bli_gemm3m_cntl_init()
// Create blocksize objects for each dimension.
gemm3m_mc
=
bli_blksz_obj_create( 0, 0, 0, 0,
bli_blksz_obj_create( 0, 0,
0, 0,
BLIS_DEFAULT_3M_MC_C, BLIS_EXTEND_3M_MC_C,
BLIS_DEFAULT_3M_MC_Z, BLIS_EXTEND_3M_MC_Z );
gemm3m_nc
=
bli_blksz_obj_create( 0, 0, 0, 0,
bli_blksz_obj_create( 0, 0,
0, 0,
BLIS_DEFAULT_3M_NC_C, BLIS_EXTEND_3M_NC_C,
BLIS_DEFAULT_3M_NC_Z, BLIS_EXTEND_3M_NC_Z );
gemm3m_kc
=
bli_blksz_obj_create( 0, 0, 0, 0,
bli_blksz_obj_create( 0, 0,
0, 0,
BLIS_DEFAULT_3M_KC_C, BLIS_EXTEND_3M_KC_C,
BLIS_DEFAULT_3M_KC_Z, BLIS_EXTEND_3M_KC_Z );
gemm3m_mr
=
bli_blksz_obj_create( 0, 0, 0, 0,
bli_blksz_obj_create( 0, 0,
0, 0,
BLIS_DEFAULT_3M_MR_C, BLIS_EXTEND_3M_MR_C,
BLIS_DEFAULT_3M_MR_Z, BLIS_EXTEND_3M_MR_Z );
gemm3m_nr
=
bli_blksz_obj_create( 0, 0, 0, 0,
bli_blksz_obj_create( 0, 0,
0, 0,
BLIS_DEFAULT_3M_NR_C, BLIS_EXTEND_3M_NR_C,
BLIS_DEFAULT_3M_NR_Z, BLIS_EXTEND_3M_NR_Z );
gemm3m_kr
=
bli_blksz_obj_create( 0, 0, 0, 0,
bli_blksz_obj_create( 0, 0,
0, 0,
BLIS_DEFAULT_3M_KR_C, BLIS_EXTEND_3M_KR_C,
BLIS_DEFAULT_3M_KR_Z, BLIS_EXTEND_3M_KR_Z );
// Attach the register blksz_t objects as sub-blocksizes to the cache
// blksz_t objects.
bli_blksz_obj_attach_to( gemm3m_mr, gemm3m_mc );
bli_blksz_obj_attach_to( gemm3m_nr, gemm3m_nc );
bli_blksz_obj_attach_to( gemm3m_kr, gemm3m_kc );
// Create function pointer object for each datatype-specific gemm
// micro-kernel.

View File

@@ -61,41 +61,48 @@ void bli_gemm4m_cntl_init()
// Create blocksize objects for each dimension.
gemm4m_mc
=
bli_blksz_obj_create( 0, 0, 0, 0,
bli_blksz_obj_create( 0, 0,
0, 0,
BLIS_DEFAULT_4M_MC_C, BLIS_EXTEND_4M_MC_C,
BLIS_DEFAULT_4M_MC_Z, BLIS_EXTEND_4M_MC_Z );
gemm4m_nc
=
bli_blksz_obj_create( 0, 0, 0, 0,
bli_blksz_obj_create( 0, 0,
0, 0,
BLIS_DEFAULT_4M_NC_C, BLIS_EXTEND_4M_NC_C,
BLIS_DEFAULT_4M_NC_Z, BLIS_EXTEND_4M_NC_Z );
gemm4m_kc
=
bli_blksz_obj_create( 0, 0, 0, 0,
bli_blksz_obj_create( 0, 0,
0, 0,
BLIS_DEFAULT_4M_KC_C, BLIS_EXTEND_4M_KC_C,
BLIS_DEFAULT_4M_KC_Z, BLIS_EXTEND_4M_KC_Z );
gemm4m_mr
=
bli_blksz_obj_create( 0, 0, 0, 0,
bli_blksz_obj_create( 0, 0,
0, 0,
BLIS_DEFAULT_4M_MR_C, BLIS_EXTEND_4M_MR_C,
BLIS_DEFAULT_4M_MR_Z, BLIS_EXTEND_4M_MR_Z );
gemm4m_nr
=
bli_blksz_obj_create( 0, 0, 0, 0,
bli_blksz_obj_create( 0, 0,
0, 0,
BLIS_DEFAULT_4M_NR_C, BLIS_EXTEND_4M_NR_C,
BLIS_DEFAULT_4M_NR_Z, BLIS_EXTEND_4M_NR_Z );
gemm4m_kr
=
bli_blksz_obj_create( 0, 0, 0, 0,
bli_blksz_obj_create( 0, 0,
0, 0,
BLIS_DEFAULT_4M_KR_C, BLIS_EXTEND_4M_KR_C,
BLIS_DEFAULT_4M_KR_Z, BLIS_EXTEND_4M_KR_Z );
// Attach the register blksz_t objects as sub-blocksizes to the cache
// blksz_t objects.
bli_blksz_obj_attach_to( gemm4m_mr, gemm4m_mc );
bli_blksz_obj_attach_to( gemm4m_nr, gemm4m_nc );
bli_blksz_obj_attach_to( gemm4m_kr, gemm4m_kc );
// Create function pointer object for each datatype-specific gemm
// micro-kernel.

View File

@@ -58,35 +58,50 @@ gemm_t* gemm_cntl;
void bli_gemm_cntl_init()
{
// Create blocksize objects for each dimension.
gemm_mc = bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_EXTEND_MC_S,
BLIS_DEFAULT_MC_D, BLIS_EXTEND_MC_D,
BLIS_DEFAULT_MC_C, BLIS_EXTEND_MC_C,
BLIS_DEFAULT_MC_Z, BLIS_EXTEND_MC_Z );
gemm_mc
=
bli_blksz_obj_create( BLIS_DEFAULT_MC_S, BLIS_EXTEND_MC_S,
BLIS_DEFAULT_MC_D, BLIS_EXTEND_MC_D,
BLIS_DEFAULT_MC_C, BLIS_EXTEND_MC_C,
BLIS_DEFAULT_MC_Z, BLIS_EXTEND_MC_Z );
gemm_nc
=
bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_EXTEND_NC_S,
BLIS_DEFAULT_NC_D, BLIS_EXTEND_NC_D,
BLIS_DEFAULT_NC_C, BLIS_EXTEND_NC_C,
BLIS_DEFAULT_NC_Z, BLIS_EXTEND_NC_Z );
gemm_kc
=
bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_EXTEND_KC_S,
BLIS_DEFAULT_KC_D, BLIS_EXTEND_KC_D,
BLIS_DEFAULT_KC_C, BLIS_EXTEND_KC_C,
BLIS_DEFAULT_KC_Z, BLIS_EXTEND_KC_Z );
gemm_mr
=
bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_EXTEND_MR_S,
BLIS_DEFAULT_MR_D, BLIS_EXTEND_MR_D,
BLIS_DEFAULT_MR_C, BLIS_EXTEND_MR_C,
BLIS_DEFAULT_MR_Z, BLIS_EXTEND_MR_Z );
gemm_nr
=
bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_EXTEND_NR_S,
BLIS_DEFAULT_NR_D, BLIS_EXTEND_NR_D,
BLIS_DEFAULT_NR_C, BLIS_EXTEND_NR_C,
BLIS_DEFAULT_NR_Z, BLIS_EXTEND_NR_Z );
gemm_kr
=
bli_blksz_obj_create( BLIS_DEFAULT_KR_S, 0,
BLIS_DEFAULT_KR_D, 0,
BLIS_DEFAULT_KR_C, 0,
BLIS_DEFAULT_KR_Z, 0 );
gemm_nc = bli_blksz_obj_create( BLIS_DEFAULT_NC_S, BLIS_EXTEND_NC_S,
BLIS_DEFAULT_NC_D, BLIS_EXTEND_NC_D,
BLIS_DEFAULT_NC_C, BLIS_EXTEND_NC_C,
BLIS_DEFAULT_NC_Z, BLIS_EXTEND_NC_Z );
gemm_kc = bli_blksz_obj_create( BLIS_DEFAULT_KC_S, BLIS_EXTEND_KC_S,
BLIS_DEFAULT_KC_D, BLIS_EXTEND_KC_D,
BLIS_DEFAULT_KC_C, BLIS_EXTEND_KC_C,
BLIS_DEFAULT_KC_Z, BLIS_EXTEND_KC_Z );
// Attach the register blksz_t objects as sub-blocksizes to the cache
// blksz_t objects.
bli_blksz_obj_attach_to( gemm_mr, gemm_mc );
bli_blksz_obj_attach_to( gemm_nr, gemm_nc );
bli_blksz_obj_attach_to( gemm_kr, gemm_kc );
gemm_mr = bli_blksz_obj_create( BLIS_DEFAULT_MR_S, BLIS_EXTEND_MR_S,
BLIS_DEFAULT_MR_D, BLIS_EXTEND_MR_D,
BLIS_DEFAULT_MR_C, BLIS_EXTEND_MR_C,
BLIS_DEFAULT_MR_Z, BLIS_EXTEND_MR_Z );
gemm_nr = bli_blksz_obj_create( BLIS_DEFAULT_NR_S, BLIS_EXTEND_NR_S,
BLIS_DEFAULT_NR_D, BLIS_EXTEND_NR_D,
BLIS_DEFAULT_NR_C, BLIS_EXTEND_NR_C,
BLIS_DEFAULT_NR_Z, BLIS_EXTEND_NR_Z );
gemm_kr = bli_blksz_obj_create( BLIS_DEFAULT_KR_S, 0,
BLIS_DEFAULT_KR_D, 0,
BLIS_DEFAULT_KR_C, 0,
BLIS_DEFAULT_KR_Z, 0 );
// Create function pointer object for each datatype-specific gemm
// micro-kernel.

View File

@@ -68,6 +68,16 @@ void bli_blksz_obj_init( blksz_t* b,
b->e[BLIS_BITVAL_DOUBLE_TYPE] = be_d;
b->e[BLIS_BITVAL_SCOMPLEX_TYPE] = be_c;
b->e[BLIS_BITVAL_DCOMPLEX_TYPE] = be_z;
// By default, set the sub-blocksize field to NULL.
b->sub = NULL;
}
void bli_blksz_obj_attach_to( blksz_t* br,
blksz_t* bc )
{
bc->sub = br;
}
@@ -119,6 +129,12 @@ dim_t bli_blksz_total_for_obj( obj_t* obj,
}
blksz_t* bli_blksz_sub( blksz_t* b )
{
return b->sub;
}
dim_t bli_determine_blocksize_f( dim_t i,
dim_t dim,
obj_t* obj,
@@ -133,7 +149,7 @@ dim_t bli_determine_blocksize_f( dim_t i,
// to bottom-right).
// Extract the execution datatype and use it to query the corresponding
// blocksize and blocksize extension values rom the blksz_t object.
// blocksize and blocksize extension values from the blksz_t object.
dt = bli_obj_execution_datatype( *obj );
b_alg = bli_blksz_for_type( dt, b );
b_ext = bli_blksz_ext_for_type( dt, b );
@@ -173,7 +189,7 @@ dim_t bli_determine_blocksize_b( dim_t i,
// to top-left).
// Extract the execution datatype and use it to query the corresponding
// blocksize and blocksize extension values rom the blksz_t object.
// blocksize and blocksize extension values from the blksz_t object.
dt = bli_obj_execution_datatype( *obj );
b_alg = bli_blksz_for_type( dt, b );
b_ext = bli_blksz_ext_for_type( dt, b );
@@ -215,3 +231,20 @@ dim_t bli_determine_blocksize_b( dim_t i,
return b_now;
}
dim_t bli_determine_reg_blocksize( obj_t* obj,
blksz_t* b )
{
num_t dt;
blksz_t* b_sub_obj;
dim_t b_sub;
// Extract the execution datatype and sub-blocksize and use them to
// query the the register blocksize from the blksz_t object.
dt = bli_obj_execution_datatype( *obj );
b_sub_obj = bli_blksz_sub( b );
b_sub = bli_blksz_for_type( dt, b_sub_obj );
return b_sub;
}

View File

@@ -44,6 +44,9 @@ void bli_blksz_obj_init( blksz_t* b,
dim_t b_c, dim_t be_c,
dim_t b_z, dim_t be_z );
void bli_blksz_obj_attach_to( blksz_t* br,
blksz_t* bc );
void bli_blksz_obj_free( blksz_t* b );
dim_t bli_blksz_for_type( num_t dt,
@@ -64,6 +67,8 @@ dim_t bli_blksz_ext_for_obj( obj_t* obj,
dim_t bli_blksz_total_for_obj( obj_t* obj,
blksz_t* b );
blksz_t* bli_blksz_sub( blksz_t* b );
dim_t bli_determine_blocksize_f( dim_t i,
dim_t dim,
obj_t* obj,
@@ -73,3 +78,5 @@ dim_t bli_determine_blocksize_b( dim_t i,
obj_t* obj,
blksz_t* b );
dim_t bli_determine_reg_blocksize( obj_t* obj,
blksz_t* b );

View File

@@ -416,10 +416,14 @@ typedef struct mem_s
typedef struct blksz_s
{
// Primary blocksize values.
dim_t v[BLIS_NUM_FP_TYPES];
dim_t v[BLIS_NUM_FP_TYPES];
// Blocksize Extensions.
dim_t e[BLIS_NUM_FP_TYPES];
dim_t e[BLIS_NUM_FP_TYPES];
// Sub-blocksize pointer.
struct blksz_s* sub;
} blksz_t;
// -- Function pointer object type --

File diff suppressed because it is too large Load Diff