diff --git a/README.md b/README.md index 80b5e40f4..ab68af816 100644 --- a/README.md +++ b/README.md @@ -52,7 +52,7 @@ automatically optimize performance across virtually all level-2 and level-3 BLIS operations. In this way, the framework acts as a productivity multiplier. And since the optimized (non-portable) code is compartmentalized within these few kernels, instantiating a high-performance BLIS library on a new -architecture is a relatively straightforward endeavour. +architecture is a relatively straightforward endeavor. * **Generalized matrix storage.** The BLIS framework exports interfaces that allow one to specify both the row stride and column stride of a matrix. This @@ -84,7 +84,7 @@ infrastructure which encodes information about the logical thread topology and allows threads to query and communicate data amongst one another. BLIS also employs so-called "quadratic partitioning" when computing dimension sub-ranges for each thread, so that arbitrary diagonal offsets of structured matrices with -unreferend regions are taken into account to achieve proper load balance. +unreferenced regions are taken into account to achieve proper load balance. * **Ease of use.** The BLIS framework, and the library of routines it generates, are easy to use for end users, experts, and vendors alike. An diff --git a/test/3m4m/Makefile b/test/3m4m/Makefile index e83ca4bc1..66eb8dec4 100644 --- a/test/3m4m/Makefile +++ b/test/3m4m/Makefile @@ -231,9 +231,9 @@ PDEF_ST := -DP_BEGIN=40 \ -DP_END=2000 \ -DP_INC=40 -PDEF_MT := -DP_BEGIN=400 \ - -DP_END=8000 \ - -DP_INC=400 +PDEF_MT := -DP_BEGIN=80 \ + -DP_END=4000 \ + -DP_INC=80 diff --git a/test/3m4m/runme.sh b/test/3m4m/runme.sh index 445e40bd0..bb65a5db5 100755 --- a/test/3m4m/runme.sh +++ b/test/3m4m/runme.sh @@ -4,16 +4,24 @@ exec_root="test" out_root="output" +sys="blis" #sys="stampede" -sys="wahlberg" +#sys="wahlberg" # Bind threads to processors. #export OMP_PROC_BIND=true #export GOMP_CPU_AFFINITY="0 2 4 6 8 10 12 14 1 3 5 7 9 11 13 15" -export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15" +#export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7" +export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7" +#export GOMP_CPU_AFFINITY="0 2 4 6 1 3 5 7" +#export GOMP_CPU_AFFINITY="0 4 1 5 2 6 3 7" # Modify LD_LIBRARY_PATH. -if [ ${sys} = "stampede" ]; then +if [ ${sys} = "blis" ]; then + + export LD_LIBRARY_PATH="$LD_LIBRARY_PATH" + +elif [ ${sys} = "stampede" ]; then # A hack to use libiomp5 with gcc. export LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/apps/intel/13/composer_xe_2013.2.146/compiler/lib/intel64" @@ -25,7 +33,15 @@ elif [ ${sys} = "wahlberg" ]; then fi # Threading scheme to use when multithreading -if [ ${sys} = "stampede" ]; then +if [ ${sys} = "blis" ]; then + + jc_nt=1 # 5th loop + ic_nt=4 # 3rd loop + jr_nt=1 # 2nd loop + ir_nt=1 # 1st loop + nt=4 + +elif [ ${sys} = "stampede" ]; then jc_nt=2 # 5th loop ic_nt=8 # 3rd loop @@ -43,8 +59,8 @@ elif [ ${sys} = "wahlberg" ]; then fi # Threadedness to test. -threads="st" # mt" -threads_r="st" # mt" +threads="st mt" # st mt" +threads_r="st mt" # mt" # Datatypes to test. dts="z c" @@ -56,7 +72,12 @@ test_ops="${l3_ops}" test_ops_r="${l3_ops}" # Complex domain implementations to test. -if [ ${sys} = "stampede" ]; then +if [ ${sys} = "blis" ]; then + + #test_impls="openblas mkl 3mhw_blis 3m3_blis 3m2_blis 3m1_blis 4mhw_blis 4m1b_blis 4m1a_blis" + test_impls="openblas 3mhw_blis 3m3_blis 3m2_blis 3m1_blis 4mhw_blis 4m1b_blis 4m1a_blis" + +elif [ ${sys} = "stampede" ]; then test_impls="openblas mkl asm_blis 3mhw_blis 3m3_blis 3m2_blis 3m1_blis 4mhw_blis 4m1b_blis 4m1a_blis" #test_impls="openblas mkl asm_blis" @@ -68,7 +89,8 @@ elif [ ${sys} = "wahlberg" ]; then fi # Real domain implementations to test. -test_impls_r="openblas acml asm_blis" +#test_impls_r="openblas mkl asm_blis" +test_impls_r="openblas asm_blis" # First perform real test cases. for th in ${threads_r}; do @@ -105,7 +127,6 @@ for th in ${threads_r}; do #export GOMP_CPU_AFFINITY="0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15" fi - # Construct the name of the test executable. exec_name="${exec_root}_${dt}${op}_${im}_${th}.x" diff --git a/testsuite/src/test_libblis.c b/testsuite/src/test_libblis.c index e6ae32648..55a98bd96 100644 --- a/testsuite/src/test_libblis.c +++ b/testsuite/src/test_libblis.c @@ -1444,7 +1444,7 @@ void libblis_test_op_driver( test_params_t* params, dt_char = params->datatype_char[dt]; // Build a commented column label string. - libblis_test_build_col_labels_string( op, label_str ); + libblis_test_build_col_labels_string( params, op, label_str ); // Output the column label string. libblis_test_fprintf( stdout, "%s\n", label_str ); @@ -1707,7 +1707,7 @@ void libblis_test_build_dims_string( test_op_t* op, // % dtoper_params_storage m n k gflops resid result -void libblis_test_build_col_labels_string( test_op_t* op, char* l_str ) +void libblis_test_build_col_labels_string( test_params_t* params, test_op_t* op, char* l_str ) { unsigned int n_spaces; char blank_str[64]; @@ -1727,7 +1727,9 @@ void libblis_test_build_col_labels_string( test_op_t* op, char* l_str ) "