Merge pull request #48 from figual/master

Updated and improved ARMv8 micro-kernels.
This commit is contained in:
Field G. Van Zee
2016-03-24 12:30:21 -05:00
2 changed files with 1701 additions and 434 deletions

View File

@@ -51,13 +51,13 @@
// (b) MR (for zero-padding purposes when MR and NR are "swapped")
//
#define BLIS_DEFAULT_MC_S 336
#define BLIS_DEFAULT_KC_S 336
#define BLIS_DEFAULT_NC_S 4096
#define BLIS_DEFAULT_MC_S 120 //1536 //336 //416 // 1280 //160 // 160 // 160 //2048 //336
#define BLIS_DEFAULT_KC_S 640 //1536 //336 //704 //1280 //672 //528 // 856 //2048 //528
#define BLIS_DEFAULT_NC_S 3072
#define BLIS_DEFAULT_MC_D 160
#define BLIS_DEFAULT_KC_D 304
#define BLIS_DEFAULT_NC_D 4096
#define BLIS_DEFAULT_MC_D 120 //1536 //160 //80 //176
#define BLIS_DEFAULT_KC_D 240 //1536 //304 //336 //368
#define BLIS_DEFAULT_NC_D 3072
#define BLIS_DEFAULT_MC_C 64
#define BLIS_DEFAULT_KC_C 128
@@ -69,11 +69,11 @@
// -- Register blocksizes --
#define BLIS_DEFAULT_MR_S 4
#define BLIS_DEFAULT_NR_S 4
#define BLIS_DEFAULT_MR_S 8
#define BLIS_DEFAULT_NR_S 12
#define BLIS_DEFAULT_MR_D 4
#define BLIS_DEFAULT_NR_D 4
#define BLIS_DEFAULT_MR_D 6
#define BLIS_DEFAULT_NR_D 8
#define BLIS_DEFAULT_MR_C 8
#define BLIS_DEFAULT_NR_C 4
@@ -132,6 +132,8 @@
//#define BLIS_PACKDIM_MR_Z (BLIS_DEFAULT_MR_Z + ...)
//#define BLIS_PACKDIM_NR_Z (BLIS_DEFAULT_NR_Z + ...)
// -- LEVEL-2 KERNEL CONSTANTS -------------------------------------------------
@@ -146,8 +148,8 @@
// -- gemm --
#define BLIS_SGEMM_UKERNEL bli_sgemm_opt_4x4
#define BLIS_DGEMM_UKERNEL bli_dgemm_opt_4x4
#define BLIS_SGEMM_UKERNEL bli_sgemm_opt_8x12
#define BLIS_DGEMM_UKERNEL bli_dgemm_opt_6x8
// -- trsm-related --

File diff suppressed because it is too large Load Diff