From f484c6cd4389dc7ae5b972849e12e98ad5bbf9a4 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Fri, 17 Mar 2017 12:07:27 -0500 Subject: [PATCH 1/3] Whitespace reformatting to armv8a kernels file. Details: - Updated formatting of function signature/header in kernels/armv8a/3/bli_gemm_opt_4x4.c. --- kernels/armv8a/3/bli_gemm_opt_4x4.c | 122 +++++++++++++++------------- 1 file changed, 66 insertions(+), 56 deletions(-) diff --git a/kernels/armv8a/3/bli_gemm_opt_4x4.c b/kernels/armv8a/3/bli_gemm_opt_4x4.c index 992750b93..479c2b624 100644 --- a/kernels/armv8a/3/bli_gemm_opt_4x4.c +++ b/kernels/armv8a/3/bli_gemm_opt_4x4.c @@ -50,16 +50,17 @@ * Tested on Juno board. Around 3.1 GFLOPS, 1 x A53 core @ 850 MHz. * Tested on Juno board. Around 12 GFLOPS, 4 x A53 cores @ 850 MHz. */ -void bli_sgemm_opt_8x12( - dim_t k, - float* restrict alpha, - float* restrict a, - float* restrict b, - float* restrict beta, - float* restrict c, inc_t rs_c, inc_t cs_c, - auxinfo_t* data, - cntx_t* restrict cntx - ) +void bli_sgemm_opt_8x12 + ( + dim_t k, + float* restrict alpha, + float* restrict a, + float* restrict b, + float* restrict beta, + float* restrict c, inc_t rs_c, inc_t cs_c, + auxinfo_t* restrict data, + cntx_t* restrict cntx + ) { void* a_next = bli_auxinfo_next_a( data ); void* b_next = bli_auxinfo_next_b( data ); @@ -1100,16 +1101,17 @@ __asm__ volatile * Tested on Juno board. Around 1.5 GFLOPS, 1 x A53 core @ 850 MHz. * Tested on Juno board. Around 5.5 GFLOPS, 4 x A53 cores @ 850 MHz. */ -void bli_dgemm_opt_6x8( - dim_t k, - double* restrict alpha, - double* restrict a, - double* restrict b, - double* restrict beta, - double* restrict c, inc_t rs_c, inc_t cs_c, - auxinfo_t* data, - cntx_t* restrict cntx - ) +void bli_dgemm_opt_6x8 + ( + dim_t k, + double* restrict alpha, + double* restrict a, + double* restrict b, + double* restrict beta, + double* restrict c, inc_t rs_c, inc_t cs_c, + auxinfo_t* restrict data, + cntx_t* restrict cntx + ) { void* a_next = bli_auxinfo_next_a( data ); void* b_next = bli_auxinfo_next_b( data ); @@ -2070,47 +2072,55 @@ __asm__ volatile } -void bli_cgemm_opt_4x4( - dim_t k, - scomplex* restrict alpha, - scomplex* restrict a, - scomplex* restrict b, - scomplex* restrict beta, - scomplex* restrict c, inc_t rs_c, inc_t cs_c, - auxinfo_t* data, - cntx_t* restrict cntx - ) +void bli_cgemm_opt_4x4 + ( + dim_t k, + scomplex* restrict alpha, + scomplex* restrict a, + scomplex* restrict b, + scomplex* restrict beta, + scomplex* restrict c, inc_t rs_c, inc_t cs_c, + auxinfo_t* restrict data, + cntx_t* restrict cntx + ) { /* Just call the reference implementation. */ - BLIS_CGEMM_UKERNEL_REF( k, - alpha, - a, - b, - beta, - c, rs_c, cs_c, - data, - cntx ); + BLIS_CGEMM_UKERNEL_REF + ( + k, + alpha, + a, + b, + beta, + c, rs_c, cs_c, + data, + cntx + ); } -void bli_zgemm_opt_4x4( - dim_t k, - dcomplex* restrict alpha, - dcomplex* restrict a, - dcomplex* restrict b, - dcomplex* restrict beta, - dcomplex* restrict c, inc_t rs_c, inc_t cs_c, - auxinfo_t* data, - cntx_t* restrict cntx - ) +void bli_zgemm_opt_4x4 + ( + dim_t k, + dcomplex* restrict alpha, + dcomplex* restrict a, + dcomplex* restrict b, + dcomplex* restrict beta, + dcomplex* restrict c, inc_t rs_c, inc_t cs_c, + auxinfo_t* restrict data, + cntx_t* restrict cntx + ) { /* Just call the reference implementation. */ - BLIS_ZGEMM_UKERNEL_REF( k, - alpha, - a, - b, - beta, - c, rs_c, cs_c, - data, - cntx ); + BLIS_ZGEMM_UKERNEL_REF + ( + k, + alpha, + a, + b, + beta, + c, rs_c, cs_c, + data, + cntx + ); } From 6e7de6ef84babb273dc5528a9b9d01f0febe394b Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Fri, 17 Mar 2017 12:10:24 -0500 Subject: [PATCH 2/3] Minor updates to test/3m4m. Details: - Updated initial problem size and increment in Makefile. - Updated code in test_gemm.c to correctly query kc from context. --- test/3m4m/Makefile | 4 ++-- test/3m4m/test_gemm.c | 26 +++++++++++--------------- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/test/3m4m/Makefile b/test/3m4m/Makefile index 9e982032f..7e1fd33bb 100644 --- a/test/3m4m/Makefile +++ b/test/3m4m/Makefile @@ -209,9 +209,9 @@ STR_ST := -DTHR_STR=\"st\" STR_MT := -DTHR_STR=\"mt\" # Problem size specification -PDEF_ST := -DP_BEGIN=80 \ +PDEF_ST := -DP_BEGIN=40 \ -DP_END=2000 \ - -DP_INC=80 + -DP_INC=40 PDEF_MT := -DP_BEGIN=80 \ -DP_END=4000 \ diff --git a/test/3m4m/test_gemm.c b/test/3m4m/test_gemm.c index c8e9ec5d5..c00ca4e25 100644 --- a/test/3m4m/test_gemm.c +++ b/test/3m4m/test_gemm.c @@ -78,24 +78,20 @@ int main( int argc, char** argv ) n_input = -1; k_input = -1; -#if 0 - num_t dt_real = bli_datatype_proj_to_real( DT ); - cntx_t cntx; +#if 1 - bli_gemm_cntx_init( &cntx ); + cntx_t cntx; - // Extract the kc blocksize for the requested datatype and its - // real analogue. - dim_t kc = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, &cntx ); - dim_t kc_real = bli_cntx_get_blksz_def_dt( dt_real, BLIS_KC, &cntx ); + // Initialize a context for the current induced method and datatype. + bli_gemm_cntx_init( &cntx ); + + // Set k to the kc blocksize for the current datatype. + k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, &cntx ); + +#elif 0 + + k_input = 256; - // Assign the k dimension depending on which implementation is - // being tested. Note that the BLIS_NAT case handles the real - // domain cases as well as native complex. - if ( IND == BLIS_NAT ) k_input = kc; - else if ( IND == BLIS_3M1 ) k_input = kc_real / 3; - else if ( IND == BLIS_4M1A ) k_input = kc_real / 2; - else k_input = kc_real; #endif // Choose the char corresponding to the requested datatype. From ca3a7924770d6cf203cce4ca9f5482e1d0d4e961 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Tue, 2 May 2017 12:09:39 -0500 Subject: [PATCH 3/3] README.md update. Details: - Updated bibtex entries for 4th BLIS paper, and adds entries for 5th and 6th BLIS papers. --- README.md | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 9bfa84285..1d7b0ce34 100644 --- a/README.md +++ b/README.md @@ -259,7 +259,9 @@ We also have a third paper, submitted to IPDPS 2014, on achieving ``` A fourth paper, submitted to ACM TOMS, also exists, which proposes an -[analytical model](http://www.cs.utexas.edu/users/flame/pubs/TOMS-BLIS-Analytical.pdf) for determining blocksize parameters in BLIS: +[analytical model](http://dl.acm.org/citation.cfm?id=2925987) +([unofficial backup link](http://www.cs.utexas.edu/users/flame/pubs/TOMS-BLIS-Analytical.pdf)) +for determining blocksize parameters in BLIS: ``` @article{BLIS4, @@ -277,6 +279,32 @@ A fourth paper, submitted to ACM TOMS, also exists, which proposes an } ``` +A fifth paper, submitted to ACM TOMS, begins the study of so-called +[induced methods for complex matrix multiplication](http://www.cs.utexas.edu/users/flame/pubs/blis5_toms_rev2.pdf)): + +``` +@article{BLIS5, + author = {Field G. {V}an~{Z}ee and Tyler Smith}, + title = {Implementing high-performance complex matrix multiplication via the 3m and 4m methods}, + journal = {ACM Transactions on Mathematical Software}, + year = {2017}, + note = {accepted} +} +``` + +A sixth paper, submitted to ACM TOMS, revisits the topic of the previous +article and derives a [superior induced method](http://www.cs.utexas.edu/users/flame/pubs/blis6_toms_rev0.pdf)): + +``` +@article{BLIS6, + author = {Field G. {V}an~{Z}ee}, + title = {Implementing high-performance complex matrix multiplication via the 1m method}, + journal = {ACM Transactions on Mathematical Software}, + note = {submitted} +} +``` + + Funding -------