Merge branch 'master' into compose

This commit is contained in:
Field G. Van Zee
2016-09-06 15:47:13 -05:00
3 changed files with 1066 additions and 26 deletions

View File

@@ -262,11 +262,17 @@ A fourth paper, submitted to ACM TOMS, also exists, which proposes an
```
@article{BLIS4,
author = {Tze Meng Low and Francisco D. Igual and Tyler M. Smith and Enrique S. Quintana-Ort\'{\i}},
title = {Analytical Models for the {BLIS} Framework},
author = {Tze Meng Low and Francisco D. Igual and Tyler M. Smith and
Enrique S. Quintana-Ort\'{\i}},
title = {Analytical Modeling Is Enough for High-Performance {BLIS}},
journal = {ACM Transactions on Mathematical Software},
year = 2016,
note = {Accepted},
volume = {43},
number = {2},
pages = {12:1--12:18},
month = aug,
year = {2016},
issue_date = {August 2016},
url = {http://doi.acm.org/10.1145/2925987},
}
```

View File

@@ -49,26 +49,27 @@
// (b) MR (for zero-padding purposes when MR and NR are "swapped")
//
#if 0
// sgemm micro-kernel
#if 0
#define BLIS_SGEMM_UKERNEL bli_sgemm_asm_24x4
#define BLIS_DEFAULT_MC_S 264
#define BLIS_DEFAULT_KC_S 128
#define BLIS_DEFAULT_NC_S 4080
#define BLIS_DEFAULT_MR_S 24
#define BLIS_DEFAULT_NR_S 4
#endif
#else
/*
#if 0
#define BLIS_SGEMM_UKERNEL bli_sgemm_asm_16x6
#define BLIS_DEFAULT_MC_S 144
#define BLIS_DEFAULT_KC_S 256
#define BLIS_DEFAULT_NC_S 4080
#define BLIS_DEFAULT_MR_S 16
#define BLIS_DEFAULT_NR_S 6
*/
#endif
#if 1
#define BLIS_SGEMM_UKERNEL bli_sgemm_asm_6x16
#define BLIS_DEFAULT_MC_S 144
#define BLIS_DEFAULT_KC_S 256
@@ -77,29 +78,29 @@
#define BLIS_DEFAULT_NR_S 16
#define BLIS_SGEMM_UKERNEL_PREFERS_CONTIG_ROWS
#endif
#if 0
// dgemm micro-kernel
#if 0
#define BLIS_DGEMM_UKERNEL bli_dgemm_asm_12x4
#define BLIS_DEFAULT_MC_D 96
#define BLIS_DEFAULT_KC_D 192
#define BLIS_DEFAULT_NC_D 4080
#define BLIS_DEFAULT_MR_D 12
#define BLIS_DEFAULT_NR_D 4
#endif
#else
/*
#if 0
#define BLIS_DGEMM_UKERNEL bli_dgemm_asm_8x6
#define BLIS_DEFAULT_MC_D 72
#define BLIS_DEFAULT_KC_D 256
#define BLIS_DEFAULT_NC_D 4080
#define BLIS_DEFAULT_MR_D 8
#define BLIS_DEFAULT_NR_D 6
*/
#endif
#if 1
#define BLIS_DGEMM_UKERNEL bli_dgemm_asm_6x8
#define BLIS_DEFAULT_MC_D 72
#define BLIS_DEFAULT_KC_D 256
@@ -108,10 +109,33 @@
#define BLIS_DEFAULT_NR_D 8
#define BLIS_DGEMM_UKERNEL_PREFERS_CONTIG_ROWS
#endif
// cgemm micro-kernel
#if 1
#define BLIS_CGEMM_UKERNEL bli_cgemm_asm_3x8
#define BLIS_DEFAULT_MC_C 144
#define BLIS_DEFAULT_KC_C 256
#define BLIS_DEFAULT_NC_C 4080
#define BLIS_DEFAULT_MR_C 3
#define BLIS_DEFAULT_NR_C 8
#define BLIS_CGEMM_UKERNEL_PREFERS_CONTIG_ROWS
#endif
// zgemm micro-kernel
#if 1
#define BLIS_ZGEMM_UKERNEL bli_zgemm_asm_3x4
#define BLIS_DEFAULT_MC_Z 72
#define BLIS_DEFAULT_KC_Z 256
#define BLIS_DEFAULT_NC_Z 4080
#define BLIS_DEFAULT_MR_Z 3
#define BLIS_DEFAULT_NR_Z 4
#define BLIS_ZGEMM_UKERNEL_PREFERS_CONTIG_ROWS
#endif

File diff suppressed because it is too large Load Diff