diff --git a/kernels/bgq/1f/bli_axpyf_opt_var1.c b/kernels/bgq/1f/bli_axpyf_opt_var1.c index e31cd6b06..2925dfe25 100644 --- a/kernels/bgq/1f/bli_axpyf_opt_var1.c +++ b/kernels/bgq/1f/bli_axpyf_opt_var1.c @@ -174,15 +174,15 @@ void bli_dddaxpyf_opt_var1( if ( bli_zero_dim2( m, b_n ) ) return; bool_t use_ref = FALSE; -// printf("%d\t%d\t%d\t%d\t%d\t%d\t%d\n", b_n, PASTEMAC(d, axpyf_fuse_fac), inca, incx, incy, bli_is_unaligned_to(a, 32), bli_is_unaligned_to( y, 32)); +// printf("%d\t%d\t%d\t%d\t%d\t%d\t%d\n", b_n, PASTEMAC(d, axpyf_fusefac), inca, incx, incy, bli_is_unaligned_to(a, 32), bli_is_unaligned_to( y, 32)); // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. - if ( b_n < PASTEMAC(d,axpyf_fuse_fac) || inca != 1 || incx != 1 || incy != 1 || bli_is_unaligned_to( a, 32 ) || bli_is_unaligned_to( y, 32 ) ) + if ( b_n < PASTEMAC(d,axpyf_fusefac) || inca != 1 || incx != 1 || incy != 1 || bli_is_unaligned_to( a, 32 ) || bli_is_unaligned_to( y, 32 ) ) use_ref = TRUE; // Call the reference implementation if needed. if ( use_ref == TRUE ) { -// printf("%d\t%d\t%d\t%d\t%d\t%d\n", PASTEMAC(d, axpyf_fuse_fac), inca, incx, incy, bli_is_unaligned_to(a, 32), bli_is_unaligned_to( y, 32)); +// printf("%d\t%d\t%d\t%d\t%d\t%d\n", PASTEMAC(d, axpyf_fusefac), inca, incx, incy, bli_is_unaligned_to(a, 32), bli_is_unaligned_to( y, 32)); // printf("DEFAULTING TO REFERENCE IMPLEMENTATION\n"); PASTEMAC3(d,d,d,axpyf_unb_var1)( conja, conjx, m, b_n, alpha_cast, a_cast, inca, lda, x_cast, incx, y_cast, incy ); return; diff --git a/kernels/bgq/1f/bli_axpyf_opt_var1.h b/kernels/bgq/1f/bli_axpyf_opt_var1.h index 721bf6ff7..79c32604c 100644 --- a/kernels/bgq/1f/bli_axpyf_opt_var1.h +++ b/kernels/bgq/1f/bli_axpyf_opt_var1.h @@ -26,39 +26,28 @@ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* -void bli_axpyf_opt_var1( obj_t* alpha, - obj_t* x, - obj_t* y ); -*/ // -// Define fusing factors. +// Prototype axpyf kernel interfaces. // -#define bli_saxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_S -#define bli_daxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_D -#define bli_caxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_C -#define bli_zaxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_Z - - #undef GENTPROT3U12 #define GENTPROT3U12( ctype_a, ctype_x, ctype_y, ctype_ax, cha, chx, chy, chax, varname ) \ \ void PASTEMAC3(cha,chx,chy,varname)( \ - conj_t conja, \ - conj_t conjx, \ - dim_t m, \ - dim_t n, \ - void* alpha, \ - void* a, inc_t inca, inc_t lda, \ - void* x, inc_t incx, \ - void* y, inc_t incy \ + conj_t conja, \ + conj_t conjx, \ + dim_t m, \ + dim_t b_n, \ + ctype_ax* restrict alpha, \ + ctype_a* restrict a, inc_t inca, inc_t lda, \ + ctype_x* restrict x, inc_t incx, \ + ctype_y* restrict y, inc_t incy \ ); INSERT_GENTPROT3U12_BASIC( axpyf_opt_var1 ) diff --git a/kernels/x86_64/core2-sse3/1f/bli_axpyf_opt_var1.c b/kernels/x86_64/core2-sse3/1f/bli_axpyf_opt_var1.c index ec0f5fec3..04218495c 100644 --- a/kernels/x86_64/core2-sse3/1f/bli_axpyf_opt_var1.c +++ b/kernels/x86_64/core2-sse3/1f/bli_axpyf_opt_var1.c @@ -208,7 +208,7 @@ void bli_dddaxpyf_opt_var1( // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. - if ( b_n < PASTEMAC(d,axpyf_fuse_fac) ) + if ( b_n < PASTEMAC(d,axpyf_fusefac) ) { use_ref = TRUE; } diff --git a/kernels/x86_64/core2-sse3/1f/bli_axpyf_opt_var1.c.alt b/kernels/x86_64/core2-sse3/1f/bli_axpyf_opt_var1.c.alt index fe09abc25..220ec1976 100644 --- a/kernels/x86_64/core2-sse3/1f/bli_axpyf_opt_var1.c.alt +++ b/kernels/x86_64/core2-sse3/1f/bli_axpyf_opt_var1.c.alt @@ -201,7 +201,7 @@ void bli_dddaxpyf_opt_var1( if ( bli_zero_dim2( m, b_n ) ) return; - if ( b_n < PASTEMAC(d,axpyf_fuse_fac) ) + if ( b_n < PASTEMAC(d,axpyf_fusefac) ) { PASTEMAC3(d,d,d,axpyf_unb_var1)( conja, conjx, diff --git a/kernels/x86_64/core2-sse3/1f/bli_axpyf_opt_var1.h b/kernels/x86_64/core2-sse3/1f/bli_axpyf_opt_var1.h index 721bf6ff7..79c32604c 100644 --- a/kernels/x86_64/core2-sse3/1f/bli_axpyf_opt_var1.h +++ b/kernels/x86_64/core2-sse3/1f/bli_axpyf_opt_var1.h @@ -26,39 +26,28 @@ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -/* -void bli_axpyf_opt_var1( obj_t* alpha, - obj_t* x, - obj_t* y ); -*/ // -// Define fusing factors. +// Prototype axpyf kernel interfaces. // -#define bli_saxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_S -#define bli_daxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_D -#define bli_caxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_C -#define bli_zaxpyf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_Z - - #undef GENTPROT3U12 #define GENTPROT3U12( ctype_a, ctype_x, ctype_y, ctype_ax, cha, chx, chy, chax, varname ) \ \ void PASTEMAC3(cha,chx,chy,varname)( \ - conj_t conja, \ - conj_t conjx, \ - dim_t m, \ - dim_t n, \ - void* alpha, \ - void* a, inc_t inca, inc_t lda, \ - void* x, inc_t incx, \ - void* y, inc_t incy \ + conj_t conja, \ + conj_t conjx, \ + dim_t m, \ + dim_t b_n, \ + ctype_ax* restrict alpha, \ + ctype_a* restrict a, inc_t inca, inc_t lda, \ + ctype_x* restrict x, inc_t incx, \ + ctype_y* restrict y, inc_t incy \ ); INSERT_GENTPROT3U12_BASIC( axpyf_opt_var1 ) diff --git a/kernels/x86_64/core2-sse3/1f/bli_dotxaxpyf_opt_var1.c b/kernels/x86_64/core2-sse3/1f/bli_dotxaxpyf_opt_var1.c index 06b070277..621724ab2 100644 --- a/kernels/x86_64/core2-sse3/1f/bli_dotxaxpyf_opt_var1.c +++ b/kernels/x86_64/core2-sse3/1f/bli_dotxaxpyf_opt_var1.c @@ -183,7 +183,7 @@ void bli_ddddotxaxpyf_opt_var1( // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. - if ( b_n < PASTEMAC(d,dotxaxpyf_fuse_fac) ) + if ( b_n < PASTEMAC(d,dotxaxpyf_fusefac) ) { use_ref = TRUE; } diff --git a/kernels/x86_64/core2-sse3/1f/bli_dotxaxpyf_opt_var1.h b/kernels/x86_64/core2-sse3/1f/bli_dotxaxpyf_opt_var1.h index 27ca9c990..ae29e0ed2 100644 --- a/kernels/x86_64/core2-sse3/1f/bli_dotxaxpyf_opt_var1.h +++ b/kernels/x86_64/core2-sse3/1f/bli_dotxaxpyf_opt_var1.h @@ -26,37 +26,30 @@ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -// Define fusing factors. -#define bli_sdotxaxpyf_fuse_fac ( BLIS_DEFAULT_FUSING_FACTOR_S ) -#define bli_ddotxaxpyf_fuse_fac ( BLIS_DEFAULT_FUSING_FACTOR_D ) -#define bli_cdotxaxpyf_fuse_fac ( BLIS_DEFAULT_FUSING_FACTOR_C / 2 ) -#define bli_zdotxaxpyf_fuse_fac ( BLIS_DEFAULT_FUSING_FACTOR_Z / 2 ) - - #undef GENTPROT3U12 #define GENTPROT3U12( ctype_a, ctype_b, ctype_c, ctype_ab, cha, chb, chc, chab, varname ) \ \ void PASTEMAC3(cha,chb,chc,varname)( \ - conj_t conjat, \ - conj_t conja, \ - conj_t conjw, \ - conj_t conjx, \ - dim_t m, \ - dim_t n, \ - void* alpha, \ - void* a, inc_t inca, inc_t lda, \ - void* w, inc_t incw, \ - void* x, inc_t incx, \ - void* beta, \ - void* y, inc_t incy, \ - void* z, inc_t incz \ + conj_t conjat, \ + conj_t conja, \ + conj_t conjw, \ + conj_t conjx, \ + dim_t m, \ + dim_t b_n, \ + ctype_ab* restrict alpha, \ + ctype_a* restrict a, inc_t inca, inc_t lda, \ + ctype_b* restrict w, inc_t incw, \ + ctype_b* restrict x, inc_t incx, \ + ctype_c* restrict beta, \ + ctype_c* restrict y, inc_t incy, \ + ctype_c* restrict z, inc_t incz \ ); INSERT_GENTPROT3U12_BASIC( dotxaxpyf_opt_var1 ) diff --git a/kernels/x86_64/core2-sse3/1f/bli_dotxf_opt_var1.c b/kernels/x86_64/core2-sse3/1f/bli_dotxf_opt_var1.c index fd750a381..99f3e720c 100644 --- a/kernels/x86_64/core2-sse3/1f/bli_dotxf_opt_var1.c +++ b/kernels/x86_64/core2-sse3/1f/bli_dotxf_opt_var1.c @@ -158,7 +158,7 @@ void bli_ddddotxf_opt_var1( // If there is anything that would interfere with our use of aligned // vector loads/stores, call the reference implementation. - if ( b_n < PASTEMAC(d,dotxf_fuse_fac) ) + if ( b_n < PASTEMAC(d,dotxf_fusefac) ) { use_ref = TRUE; } diff --git a/kernels/x86_64/core2-sse3/1f/bli_dotxf_opt_var1.c.alt b/kernels/x86_64/core2-sse3/1f/bli_dotxf_opt_var1.c.alt index b6cd23ec7..e5c0f517b 100644 --- a/kernels/x86_64/core2-sse3/1f/bli_dotxf_opt_var1.c.alt +++ b/kernels/x86_64/core2-sse3/1f/bli_dotxf_opt_var1.c.alt @@ -231,7 +231,7 @@ void bli_ddddotxf_opt_var1( return; } - if ( b_m < PASTEMAC(d,dotxf_fuse_fac) ) + if ( b_m < PASTEMAC(d,dotxf_fusefac) ) { PASTEMAC3(d,d,d,dotxf_unb_var1)( conjx, conjy, diff --git a/kernels/x86_64/core2-sse3/1f/bli_dotxf_opt_var1.h b/kernels/x86_64/core2-sse3/1f/bli_dotxf_opt_var1.h index c7c7143e0..b70d262c5 100644 --- a/kernels/x86_64/core2-sse3/1f/bli_dotxf_opt_var1.h +++ b/kernels/x86_64/core2-sse3/1f/bli_dotxf_opt_var1.h @@ -26,7 +26,7 @@ SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY - THEORY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. @@ -34,27 +34,21 @@ // -// Define fusing factors for dotxf operation. +// Prototype dotxf kernel interfaces. // -#define bli_sdotxf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_S -#define bli_ddotxf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_D -#define bli_cdotxf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_C -#define bli_zdotxf_fuse_fac BLIS_DEFAULT_FUSING_FACTOR_Z - - #undef GENTPROT3U12 #define GENTPROT3U12( ctype_a, ctype_x, ctype_y, ctype_ax, cha, chx, chy, chax, varname ) \ \ -void PASTEMAC3(chx,chy,chr,varname)( \ - conj_t conjat, \ - conj_t conjx, \ - dim_t m, \ - dim_t b_n, \ - void* alpha, \ - void* a, inc_t inca, inc_t lda, \ - void* x, inc_t incx, \ - void* beta, \ - void* y, inc_t incy \ +void PASTEMAC3(cha,chx,chy,varname)( \ + conj_t conjat, \ + conj_t conjx, \ + dim_t m, \ + dim_t b_n, \ + ctype_ax* restrict alpha, \ + ctype_a* restrict a, inc_t inca, inc_t lda, \ + ctype_x* restrict x, inc_t incx, \ + ctype_y* restrict beta, \ + ctype_y* restrict y, inc_t incy \ ); INSERT_GENTPROT3U12_BASIC( dotxf_opt_var1 ) diff --git a/windows/build/bli_kernel.h b/windows/build/bli_kernel.h index 95fb3056f..6a270d8d5 100644 --- a/windows/build/bli_kernel.h +++ b/windows/build/bli_kernel.h @@ -54,21 +54,21 @@ // (b) NR (for triangular operations such as trmm and trsm). // -#define BLIS_DEFAULT_MC_S 256 -#define BLIS_DEFAULT_KC_S 256 -#define BLIS_DEFAULT_NC_S 8192 +#define BLIS_DEFAULT_MC_S 64 +#define BLIS_DEFAULT_KC_S 128 +#define BLIS_DEFAULT_NC_S 4096 -#define BLIS_DEFAULT_MC_D 128 -#define BLIS_DEFAULT_KC_D 256 +#define BLIS_DEFAULT_MC_D 64 +#define BLIS_DEFAULT_KC_D 128 #define BLIS_DEFAULT_NC_D 4096 -#define BLIS_DEFAULT_MC_C 128 -#define BLIS_DEFAULT_KC_C 256 +#define BLIS_DEFAULT_MC_C 64 +#define BLIS_DEFAULT_KC_C 128 #define BLIS_DEFAULT_NC_C 4096 #define BLIS_DEFAULT_MC_Z 64 -#define BLIS_DEFAULT_KC_Z 256 -#define BLIS_DEFAULT_NC_Z 2048 +#define BLIS_DEFAULT_KC_Z 128 +#define BLIS_DEFAULT_NC_Z 4096 // -- Cache blocksize extensions (for optimizing edge cases) -- @@ -100,16 +100,16 @@ // in the m and n dimensions should all be equal to the size expected by // the reference micro-kernel(s). -#define BLIS_DEFAULT_MR_S 8 +#define BLIS_DEFAULT_MR_S 4 #define BLIS_DEFAULT_NR_S 4 -#define BLIS_DEFAULT_MR_D 8 +#define BLIS_DEFAULT_MR_D 4 #define BLIS_DEFAULT_NR_D 4 -#define BLIS_DEFAULT_MR_C 8 +#define BLIS_DEFAULT_MR_C 4 #define BLIS_DEFAULT_NR_C 4 -#define BLIS_DEFAULT_MR_Z 8 +#define BLIS_DEFAULT_MR_Z 4 #define BLIS_DEFAULT_NR_Z 4 // NOTE: If the micro-kernel, which is typically unrolled to a factor @@ -220,10 +220,25 @@ // of level-1f operations. They are here only for use when these operations // are optimized. -#define BLIS_DEFAULT_FUSING_FACTOR_S 8 -#define BLIS_DEFAULT_FUSING_FACTOR_D 4 -#define BLIS_DEFAULT_FUSING_FACTOR_C 4 -#define BLIS_DEFAULT_FUSING_FACTOR_Z 2 +#define BLIS_DEFAULT_FUSE_FAC_S 8 +#define BLIS_DEFAULT_FUSE_FAC_D 4 +#define BLIS_DEFAULT_FUSE_FAC_C 4 +#define BLIS_DEFAULT_FUSE_FAC_Z 2 + +#define BLIS_AXPYF_FUSE_FAC_S BLIS_DEFAULT_FUSE_FAC_S +#define BLIS_AXPYF_FUSE_FAC_D BLIS_DEFAULT_FUSE_FAC_D +#define BLIS_AXPYF_FUSE_FAC_C BLIS_DEFAULT_FUSE_FAC_C +#define BLIS_AXPYF_FUSE_FAC_Z BLIS_DEFAULT_FUSE_FAC_Z + +#define BLIS_DOTXF_FUSE_FAC_S BLIS_DEFAULT_FUSE_FAC_S +#define BLIS_DOTXF_FUSE_FAC_D BLIS_DEFAULT_FUSE_FAC_D +#define BLIS_DOTXF_FUSE_FAC_C BLIS_DEFAULT_FUSE_FAC_C +#define BLIS_DOTXF_FUSE_FAC_Z BLIS_DEFAULT_FUSE_FAC_Z + +#define BLIS_DOTXAXPYF_FUSE_FAC_S BLIS_DEFAULT_FUSE_FAC_S +#define BLIS_DOTXAXPYF_FUSE_FAC_D BLIS_DEFAULT_FUSE_FAC_D +#define BLIS_DOTXAXPYF_FUSE_FAC_C BLIS_DEFAULT_FUSE_FAC_C +#define BLIS_DOTXAXPYF_FUSE_FAC_Z BLIS_DEFAULT_FUSE_FAC_Z