diff --git a/frame/3/bli_l3_oapi.c b/frame/3/bli_l3_oapi.c index 701428a59..0340cf566 100644 --- a/frame/3/bli_l3_oapi.c +++ b/frame/3/bli_l3_oapi.c @@ -64,7 +64,7 @@ void PASTEMAC(opname,EX_SUF) \ where mixed datatype support will be implemented (if at all). */ \ if ( bli_obj_dt( c ) == bli_obj_dt( a ) && \ bli_obj_dt( c ) == bli_obj_dt( b ) && \ - bli_obj_dt( c ) == bli_obj_exec_dt( c ) && \ + bli_obj_dt( c ) == bli_obj_comp_dt( c ) && \ bli_obj_is_complex( c ) ) \ { \ /* Invoke the operation's "ind" function--its induced method front-end. diff --git a/test/3m4m/Makefile b/test/3m4m/Makefile index e91b100b2..b129ab020 100644 --- a/test/3m4m/Makefile +++ b/test/3m4m/Makefile @@ -201,9 +201,9 @@ STR_ST := -DTHR_STR=\"st\" STR_MT := -DTHR_STR=\"mt\" # Problem size specification -PDEF_ST := -DP_BEGIN=96 \ +PDEF_ST := -DP_BEGIN=40 \ -DP_END=2000 \ - -DP_INC=96 + -DP_INC=40 PDEF_MT := -DP_BEGIN=192 \ -DP_END=3000 \ @@ -215,34 +215,57 @@ PDEF_MT := -DP_BEGIN=192 \ # --- Targets/rules ------------------------------------------------------------ # -all: all-st all-mt -blis: blis-st blis-mt -blis-nat: blis-nat-st blis-nat-mt -openblas: openblas-st openblas-mt -mkl: mkl-st mkl-mt +all: all-st all-mt +blis: blis-st blis-mt +openblas: openblas-st openblas-mt +mkl: mkl-st mkl-mt -all-st: blis-st openblas-st mkl-st -all-mt: blis-mt openblas-mt mkl-mt +all-st: blis-st openblas-st mkl-st +all-mt: blis-mt openblas-mt mkl-mt -blis-st: blis-gemm-st -blis-mt: blis-gemm-mt +blis-st: blis-nat-st +blis-mt: blis-nat-mt -openblas-st: openblas-gemm-st -openblas-mt: openblas-gemm-mt +blis-ind: blis-ind-st blis-ind-mt +blis-nat: blis-nat-st blis-nat-mt -mkl-st: mkl-gemm-st -mkl-mt: mkl-gemm-mt +blis-ind-st: \ + test_cgemm_3mhw_blis_st.x \ + test_zgemm_3mhw_blis_st.x \ + test_cgemm_3m1_blis_st.x \ + test_zgemm_3m1_blis_st.x \ + test_cgemm_4mhw_blis_st.x \ + test_zgemm_4mhw_blis_st.x \ + test_cgemm_4m1b_blis_st.x \ + test_zgemm_4m1b_blis_st.x \ + test_cgemm_4m1a_blis_st.x \ + test_zgemm_4m1a_blis_st.x \ + test_cgemm_1m_blis_st.x \ + test_zgemm_1m_blis_st.x -blis-gemm-st: blis-gemm-nat-st \ - blis-gemm-ind-st -blis-gemm-mt: blis-gemm-nat-mt \ - blis-gemm-ind-mt +blis-ind-mt: \ + test_cgemm_3mhw_blis_mt.x \ + test_zgemm_3mhw_blis_mt.x \ + test_cgemm_3m1_blis_mt.x \ + test_zgemm_3m1_blis_mt.x \ + test_cgemm_4mhw_blis_mt.x \ + test_zgemm_4mhw_blis_mt.x \ + test_cgemm_4m1b_blis_mt.x \ + test_zgemm_4m1b_blis_mt.x \ + test_cgemm_4m1a_blis_mt.x \ + test_zgemm_4m1a_blis_mt.x \ + test_cgemm_1m_blis_mt.x \ + test_zgemm_1m_blis_mt.x blis-nat-st: \ test_sgemm_asm_blis_st.x \ test_dgemm_asm_blis_st.x \ test_cgemm_asm_blis_st.x \ test_zgemm_asm_blis_st.x \ + test_shemm_asm_blis_st.x \ + test_dhemm_asm_blis_st.x \ + test_chemm_asm_blis_st.x \ + test_zhemm_asm_blis_st.x \ test_sherk_asm_blis_st.x \ test_dherk_asm_blis_st.x \ test_cherk_asm_blis_st.x \ @@ -261,6 +284,10 @@ blis-nat-mt: \ test_dgemm_asm_blis_mt.x \ test_cgemm_asm_blis_mt.x \ test_zgemm_asm_blis_mt.x \ + test_shemm_asm_blis_mt.x \ + test_dhemm_asm_blis_mt.x \ + test_chemm_asm_blis_mt.x \ + test_zhemm_asm_blis_mt.x \ test_sherk_asm_blis_mt.x \ test_dherk_asm_blis_mt.x \ test_cherk_asm_blis_mt.x \ @@ -274,69 +301,94 @@ blis-nat-mt: \ test_ctrsm_asm_blis_mt.x \ test_ztrsm_asm_blis_mt.x -blis-gemm-nat-st: \ - test_sgemm_asm_blis_st.x \ - test_dgemm_asm_blis_st.x \ - test_cgemm_asm_blis_st.x \ - test_zgemm_asm_blis_st.x - -blis-gemm-nat-mt: \ - test_sgemm_asm_blis_mt.x \ - test_dgemm_asm_blis_mt.x \ - test_cgemm_asm_blis_mt.x \ - test_zgemm_asm_blis_mt.x - -blis-gemm-ind-st: \ - test_cgemm_3mhw_blis_st.x \ - test_zgemm_3mhw_blis_st.x \ - test_cgemm_3m1_blis_st.x \ - test_zgemm_3m1_blis_st.x \ - test_cgemm_4mhw_blis_st.x \ - test_zgemm_4mhw_blis_st.x \ - test_cgemm_4m1b_blis_st.x \ - test_zgemm_4m1b_blis_st.x \ - test_cgemm_4m1a_blis_st.x \ - test_zgemm_4m1a_blis_st.x \ - test_cgemm_1m_blis_st.x \ - test_zgemm_1m_blis_st.x - -blis-gemm-ind-mt: \ - test_cgemm_3mhw_blis_mt.x \ - test_zgemm_3mhw_blis_mt.x \ - test_cgemm_3m1_blis_mt.x \ - test_zgemm_3m1_blis_mt.x \ - test_cgemm_4mhw_blis_mt.x \ - test_zgemm_4mhw_blis_mt.x \ - test_cgemm_4m1b_blis_mt.x \ - test_zgemm_4m1b_blis_mt.x \ - test_cgemm_4m1a_blis_mt.x \ - test_zgemm_4m1a_blis_mt.x \ - test_cgemm_1m_blis_mt.x \ - test_zgemm_1m_blis_mt.x - -openblas-gemm-st: \ +openblas-st: \ test_sgemm_openblas_st.x \ test_dgemm_openblas_st.x \ test_cgemm_openblas_st.x \ - test_zgemm_openblas_st.x + test_zgemm_openblas_st.x \ + test_shemm_openblas_st.x \ + test_dhemm_openblas_st.x \ + test_chemm_openblas_st.x \ + test_zhemm_openblas_st.x \ + test_sherk_openblas_st.x \ + test_dherk_openblas_st.x \ + test_cherk_openblas_st.x \ + test_zherk_openblas_st.x \ + test_strmm_openblas_st.x \ + test_dtrmm_openblas_st.x \ + test_ctrmm_openblas_st.x \ + test_ztrmm_openblas_st.x \ + test_strsm_openblas_st.x \ + test_dtrsm_openblas_st.x \ + test_ctrsm_openblas_st.x \ + test_ztrsm_openblas_st.x -openblas-gemm-mt: \ +openblas-mt: \ test_sgemm_openblas_mt.x \ test_dgemm_openblas_mt.x \ test_cgemm_openblas_mt.x \ - test_zgemm_openblas_mt.x + test_zgemm_openblas_mt.x \ + test_shemm_openblas_mt.x \ + test_dhemm_openblas_mt.x \ + test_chemm_openblas_mt.x \ + test_zhemm_openblas_mt.x \ + test_sherk_openblas_mt.x \ + test_dherk_openblas_mt.x \ + test_cherk_openblas_mt.x \ + test_zherk_openblas_mt.x \ + test_strmm_openblas_mt.x \ + test_dtrmm_openblas_mt.x \ + test_ctrmm_openblas_mt.x \ + test_ztrmm_openblas_mt.x \ + test_strsm_openblas_mt.x \ + test_dtrsm_openblas_mt.x \ + test_ctrsm_openblas_mt.x \ + test_ztrsm_openblas_mt.x -mkl-gemm-st: \ +mkl-st: \ test_sgemm_mkl_st.x \ test_dgemm_mkl_st.x \ test_cgemm_mkl_st.x \ - test_zgemm_mkl_st.x + test_zgemm_mkl_st.x \ + test_shemm_mkl_st.x \ + test_dhemm_mkl_st.x \ + test_chemm_mkl_st.x \ + test_zhemm_mkl_st.x \ + test_sherk_mkl_st.x \ + test_dherk_mkl_st.x \ + test_cherk_mkl_st.x \ + test_zherk_mkl_st.x \ + test_strmm_mkl_st.x \ + test_dtrmm_mkl_st.x \ + test_ctrmm_mkl_st.x \ + test_ztrmm_mkl_st.x \ + test_strsm_mkl_st.x \ + test_dtrsm_mkl_st.x \ + test_ctrsm_mkl_st.x \ + test_ztrsm_mkl_st.x -mkl-gemm-mt: \ +mkl-mt: \ test_sgemm_mkl_mt.x \ test_dgemm_mkl_mt.x \ test_cgemm_mkl_mt.x \ - test_zgemm_mkl_mt.x + test_zgemm_mkl_mt.x \ + test_shemm_mkl_mt.x \ + test_dhemm_mkl_mt.x \ + test_chemm_mkl_mt.x \ + test_zhemm_mkl_mt.x \ + test_sherk_mkl_mt.x \ + test_dherk_mkl_mt.x \ + test_cherk_mkl_mt.x \ + test_zherk_mkl_mt.x \ + test_strmm_mkl_mt.x \ + test_dtrmm_mkl_mt.x \ + test_ctrmm_mkl_mt.x \ + test_ztrmm_mkl_mt.x \ + test_strsm_mkl_mt.x \ + test_dtrsm_mkl_mt.x \ + test_ctrsm_mkl_mt.x \ + test_ztrsm_mkl_mt.x + diff --git a/test/3m4m/test_hemm.c b/test/3m4m/test_hemm.c new file mode 100644 index 000000000..cf640b4f6 --- /dev/null +++ b/test/3m4m/test_hemm.c @@ -0,0 +1,331 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include "blis.h" + +//#define PRINT + +int main( int argc, char** argv ) +{ + obj_t a, b, c; + obj_t c_save; + obj_t alpha, beta; + dim_t m, n; + dim_t p; + dim_t p_begin, p_end, p_inc; + int m_input, n_input; + ind_t ind; + num_t dt; + char dt_ch; + int r, n_repeats; + side_t side; + uplo_t uploa; + f77_char f77_side; + f77_char f77_uploa; + + double dtime; + double dtime_save; + double gflops; + + //bli_init(); + + //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); + + n_repeats = 3; + + dt = DT; + + ind = IND; + + p_begin = P_BEGIN; + p_end = P_END; + p_inc = P_INC; + + m_input = -1; + n_input = -1; + + + // Supress compiler warnings about unused variable 'ind'. + ( void )ind; + +#if 0 + + cntx_t* cntx; + + ind_t ind_mod = ind; + + // A hack to use 3m1 as 1mpb (with 1m as 1mbp). + if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M; + + // Initialize a context for the current induced method and datatype. + cntx = bli_gks_query_ind_cntx( ind_mod, dt ); + + // Set k to the kc blocksize for the current datatype. + k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx ); + +#elif 1 + + //k_input = 256; + +#endif + + // Choose the char corresponding to the requested datatype. + if ( bli_is_float( dt ) ) dt_ch = 's'; + else if ( bli_is_double( dt ) ) dt_ch = 'd'; + else if ( bli_is_scomplex( dt ) ) dt_ch = 'c'; + else dt_ch = 'z'; + + side = BLIS_LEFT; + uploa = BLIS_LOWER; + + bli_param_map_blis_to_netlib_side( side, &f77_side ); + bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa ); + + // Begin with initializing the last entry to zero so that + // matlab allocates space for the entire array once up-front. + for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; +#ifdef BLIS + printf( "data_%s_%chemm_%s_blis", THR_STR, dt_ch, STR ); +#else + printf( "data_%s_%chemm_%s", THR_STR, dt_ch, STR ); +#endif + printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", + ( unsigned long )(p - p_begin + 1)/p_inc + 1, + ( unsigned long )0, + ( unsigned long )0, 0.0 ); + + + for ( p = p_begin; p <= p_end; p += p_inc ) + { + + if ( m_input < 0 ) m = p / ( dim_t )abs(m_input); + else m = ( dim_t ) m_input; + if ( n_input < 0 ) n = p / ( dim_t )abs(n_input); + else n = ( dim_t ) n_input; + + bli_obj_create( dt, 1, 1, 0, 0, &alpha ); + bli_obj_create( dt, 1, 1, 0, 0, &beta ); + + if ( bli_is_left( side ) ) + bli_obj_create( dt, m, m, 0, 0, &a ); + else + bli_obj_create( dt, n, n, 0, 0, &a ); + bli_obj_create( dt, m, n, 0, 0, &b ); + bli_obj_create( dt, m, n, 0, 0, &c ); + bli_obj_create( dt, m, n, 0, 0, &c_save ); + + bli_randm( &a ); + bli_randm( &b ); + bli_randm( &c ); + + bli_obj_set_struc( BLIS_HERMITIAN, &a ); + bli_obj_set_uplo( uploa, &a ); + + // Make A densely Hermitian, and zero the unstored triangle to + // ensure the implementation reads only from the stored region. + bli_mkherm( &a ); + bli_mktrim( &a ); + + bli_setsc( (2.0/1.0), 0.0, &alpha ); + bli_setsc( (1.0/1.0), 0.0, &beta ); + + + bli_copym( &c, &c_save ); + +#ifdef BLIS + bli_ind_disable_all_dt( dt ); + bli_ind_enable_dt( ind, dt ); +#endif + + dtime_save = DBL_MAX; + + for ( r = 0; r < n_repeats; ++r ) + { + bli_copym( &c_save, &c ); + + dtime = bli_clock(); + + +#ifdef PRINT + bli_printm( "a", &a, "%4.1f", "" ); + bli_printm( "b", &b, "%4.1f", "" ); + bli_printm( "c", &c, "%4.1f", "" ); +#endif + +#ifdef BLIS + + bli_hemm( side, + &alpha, + &a, + &b, + &beta, + &c ); + +#else + + if ( bli_is_float( dt ) ) + { + f77_int mm = bli_obj_length( &c ); + f77_int nn = bli_obj_width( &c ); + f77_int lda = bli_obj_col_stride( &a ); + f77_int ldb = bli_obj_col_stride( &b ); + f77_int ldc = bli_obj_col_stride( &c ); + float* alphap = bli_obj_buffer( &alpha ); + float* ap = bli_obj_buffer( &a ); + float* bp = bli_obj_buffer( &b ); + float* betap = bli_obj_buffer( &beta ); + float* cp = bli_obj_buffer( &c ); + + ssymm_( &f77_side, + &f77_uploa, + &mm, + &nn, + alphap, + ap, &lda, + bp, &ldb, + betap, + cp, &ldc ); + } + else if ( bli_is_double( dt ) ) + { + f77_int mm = bli_obj_length( &c ); + f77_int nn = bli_obj_width( &c ); + f77_int lda = bli_obj_col_stride( &a ); + f77_int ldb = bli_obj_col_stride( &b ); + f77_int ldc = bli_obj_col_stride( &c ); + double* alphap = bli_obj_buffer( &alpha ); + double* ap = bli_obj_buffer( &a ); + double* bp = bli_obj_buffer( &b ); + double* betap = bli_obj_buffer( &beta ); + double* cp = bli_obj_buffer( &c ); + + dsymm_( &f77_side, + &f77_uploa, + &mm, + &nn, + alphap, + ap, &lda, + bp, &ldb, + betap, + cp, &ldc ); + } + else if ( bli_is_scomplex( dt ) ) + { + f77_int mm = bli_obj_length( &c ); + f77_int nn = bli_obj_width( &c ); + f77_int lda = bli_obj_col_stride( &a ); + f77_int ldb = bli_obj_col_stride( &b ); + f77_int ldc = bli_obj_col_stride( &c ); + scomplex* alphap = bli_obj_buffer( &alpha ); + scomplex* ap = bli_obj_buffer( &a ); + scomplex* bp = bli_obj_buffer( &b ); + scomplex* betap = bli_obj_buffer( &beta ); + scomplex* cp = bli_obj_buffer( &c ); + + chemm_( &f77_side, + &f77_uploa, + &mm, + &nn, + alphap, + ap, &lda, + bp, &ldb, + betap, + cp, &ldc ); + } + else if ( bli_is_dcomplex( dt ) ) + { + f77_int mm = bli_obj_length( &c ); + f77_int nn = bli_obj_width( &c ); + f77_int lda = bli_obj_col_stride( &a ); + f77_int ldb = bli_obj_col_stride( &b ); + f77_int ldc = bli_obj_col_stride( &c ); + dcomplex* alphap = bli_obj_buffer( &alpha ); + dcomplex* ap = bli_obj_buffer( &a ); + dcomplex* bp = bli_obj_buffer( &b ); + dcomplex* betap = bli_obj_buffer( &beta ); + dcomplex* cp = bli_obj_buffer( &c ); + + zhemm_( &f77_side, + &f77_uploa, + &mm, + &nn, + alphap, + ap, &lda, + bp, &ldb, + betap, + cp, &ldc ); + } +#endif + +#ifdef PRINT + bli_printm( "c after", &c, "%4.1f", "" ); + exit(1); +#endif + + + dtime_save = bli_clock_min_diff( dtime_save, dtime ); + } + + if ( bli_is_left( side ) ) + gflops = ( 2.0 * m * m * n ) / ( dtime_save * 1.0e9 ); + else + gflops = ( 2.0 * m * n * n ) / ( dtime_save * 1.0e9 ); + + if ( bli_is_complex( dt ) ) gflops *= 4.0; + +#ifdef BLIS + printf( "data_%s_%chemm_%s_blis", THR_STR, dt_ch, STR ); +#else + printf( "data_%s_%chemm_%s", THR_STR, dt_ch, STR ); +#endif + printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", + ( unsigned long )(p - p_begin + 1)/p_inc + 1, + ( unsigned long )m, + ( unsigned long )n, gflops ); + + bli_obj_free( &alpha ); + bli_obj_free( &beta ); + + bli_obj_free( &a ); + bli_obj_free( &b ); + bli_obj_free( &c ); + bli_obj_free( &c_save ); + } + + //bli_finalize(); + + return 0; +} + diff --git a/test/3m4m/test_herk.c b/test/3m4m/test_herk.c index 66a057a59..6616ffe9f 100644 --- a/test/3m4m/test_herk.c +++ b/test/3m4m/test_herk.c @@ -242,7 +242,7 @@ int main( int argc, char** argv ) f77_int ldc = bli_obj_col_stride( &c ); float* alphap = bli_obj_buffer( &alpha ); scomplex* ap = bli_obj_buffer( &a ); - scomplex* betap = bli_obj_buffer( &beta ); + float* betap = bli_obj_buffer( &beta ); scomplex* cp = bli_obj_buffer( &c ); cherk_( &f77_uploc, @@ -262,7 +262,7 @@ int main( int argc, char** argv ) f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); dcomplex* ap = bli_obj_buffer( &a ); - dcomplex* betap = bli_obj_buffer( &beta ); + double* betap = bli_obj_buffer( &beta ); dcomplex* cp = bli_obj_buffer( &c ); zherk_( &f77_uploc, diff --git a/test/mixeddt/Makefile b/test/mixeddt/Makefile index cb9c3484e..80f05fc21 100644 --- a/test/mixeddt/Makefile +++ b/test/mixeddt/Makefile @@ -100,38 +100,6 @@ endif # BLAS library path(s). This is where the BLAS libraries reside. HOME_LIB_PATH := $(HOME)/flame/lib -#MKL_LIB_PATH := /opt/apps/intel/13/composer_xe_2013.2.146/mkl/lib/intel64 -#MKL_LIB_PATH := $(HOME)/intel/mkl/lib/intel64 -MKL_LIB_PATH := ${MKLROOT}/lib/intel64 -#ICC_LIB_PATH := /opt/apps/intel/13/composer_xe_2013.2.146/compiler/lib/intel64 - -# OpenBLAS -OPENBLAS_LIB := $(HOME_LIB_PATH)/libopenblas.a -OPENBLASP_LIB := $(HOME_LIB_PATH)/libopenblasp.a - -# ATLAS -ATLAS_LIB := $(HOME_LIB_PATH)/libf77blas.a \ - $(HOME_LIB_PATH)/libatlas.a - -# MKL -MKL_LIB := -L$(MKL_LIB_PATH) \ - -lmkl_intel_lp64 \ - -lmkl_core \ - -lmkl_sequential \ - -lpthread -lm -ldl -#MKLP_LIB := -L$(MKL_LIB_PATH) \ -# -lmkl_intel_thread \ -# -lmkl_core \ -# -lmkl_intel_ilp64 \ -# -L$(ICC_LIB_PATH) \ -# -liomp5 -MKLP_LIB := -L$(MKL_LIB_PATH) \ - -lmkl_intel_lp64 \ - -lmkl_core \ - -lmkl_gnu_thread \ - -lpthread -lm -ldl -fopenmp - #-L$(ICC_LIB_PATH) \ - #-lgomp @@ -162,42 +130,18 @@ CFLAGS += -I$(TEST_SRC_PATH) LIBBLIS_LINK := $(LIB_PATH)/$(LIBBLIS_L) -# Datatypes for A, B, and C. -#DTA_S := -DDTA=BLIS_FLOAT -#DTA_D := -DDTA=BLIS_DOUBLE -#DTA_C := -DDTA=BLIS_SCOMPLEX -#DTA_Z := -DDTA=BLIS_DCOMPLEX -# -#DTB_S := -DDTB=BLIS_FLOAT -#DTB_D := -DDTB=BLIS_DOUBLE -#DTB_C := -DDTB=BLIS_SCOMPLEX -#DTB_Z := -DDTB=BLIS_DCOMPLEX -# -#DTC_S := -DDTC=BLIS_FLOAT -#DTC_D := -DDTC=BLIS_DOUBLE -#DTC_C := -DDTC=BLIS_SCOMPLEX -#DTC_Z := -DDTC=BLIS_DCOMPLEX -# -#DTX_S := -DDTC=BLIS_FLOAT -#DTX_D := -DDTC=BLIS_DOUBLE - # Which library? BLI_DEF := -DBLIS BLA_DEF := -DBLAS -# Implementation string -STR_BLI := -DSTR=\"asm_blis\" -STR_OBL := -DSTR=\"openblas\" -STR_MKL := -DSTR=\"mkl\" - # Single or multithreaded string STR_ST := -DTHR_STR=\"st\" STR_MT := -DTHR_STR=\"mt\" # Problem size specification -PDEF_ST := -DP_BEGIN=96 \ - -DP_END=1200 \ - -DP_INC=96 +PDEF_ST := -DP_BEGIN=40 \ + -DP_END=2000 \ + -DP_INC=40 PDEF_MT := -DP_BEGIN=80 \ -DP_END=4000 \ @@ -232,9 +176,9 @@ get-cdef-x = $(strip $(subst s,-DDTX=BLIS_FLOAT, \ get-cdefs = $(call get-cdef-c,$(1)) $(call get-cdef-a,$(1)) $(call get-cdef-b,$(1)) $(call get-cdef-x,$(1)) # Define a function to return the appropriate -DSTR= and -D[BLIS|BLAS] flags. -get-idefs = $(strip $(subst asm_blis,-DSTR=\"$(1)\" -DBLIS, \ - $(subst openblas,-DSTR=\"$(1)\" -DBLAS, \ - $(subst mkl,-DSTR=\"$(1)\" -DBLAS,$(1))))) +get-idefs = $(strip $(subst intern,-DSTR=\"$(1)\" -DBLIS, \ + $(subst ad-hoc,-DSTR=\"$(1)\" -DBLAS, \ + $(subst mkl,-DSTR=\"$(1)\" -DBLAS,$(1))))) # Enumerate all possible datatype combinations. DT_CODES := $(foreach dt0,$(dts),$(foreach dt1,$(dts),$(foreach dt2,$(dts),$(foreach pr,$(prs),$(dt0)_$(dt1)_$(dt2)_$(pr))))) @@ -243,21 +187,15 @@ DT_CODES := $(foreach dt0,$(dts),$(foreach dt1,$(dts),$(foreach dt2,$(dts),$(for DT_COMBOS := $(foreach code,$(DT_CODES),$(call get-cstr,$(code))) # Build a list of BLIS, OpenBLAS, and MKL executables. -BLIS_OBJS_ST := $(foreach combo,$(DT_COMBOS),test_$(combo)gemm_asm_blis_st.o) -BLIS_BINS_ST := $(patsubst %.o,%.x,$(BLIS_OBJS_ST)) -OPENBLAS_OBJS_ST := $(foreach combo,$(DT_COMBOS),test_$(combo)gemm_openblas_st.o) -OPENBLAS_BINS_ST := $(patsubst %.o,%.x,$(OPENBLAS_OBJS_ST)) +INTERN_OBJS_ST := $(foreach combo,$(DT_COMBOS),test_$(combo)gemm_intern_st.o) +INTERN_BINS_ST := $(patsubst %.o,%.x,$(INTERN_OBJS_ST)) +AD_HOC_OBJS_ST := $(foreach combo,$(DT_COMBOS),test_$(combo)gemm_ad-hoc_st.o) +AD_HOC_BINS_ST := $(patsubst %.o,%.x,$(AD_HOC_OBJS_ST)) -BLIS_OBJS_MT := $(foreach combo,$(DT_COMBOS),test_$(combo)gemm_asm_blis_mt.o) -BLIS_BINS_MT := $(patsubst %.o,%.x,$(BLIS_OBJS_MT)) -OPENBLAS_OBJS_MT := $(foreach combo,$(DT_COMBOS),test_$(combo)gemm_openblas_mt.o) -OPENBLAS_BINS_MT := $(patsubst %.o,%.x,$(OPENBLAS_OBJS_MT)) - -#MKL_OBJS_ST := $(foreach combo,$(DT_COMBOS),test_$(combo)gemm_mkl_st.o) - -#BLIS_OBJS_MT := $(foreach combo,$(DT_COMBOS),test_$(combo)gemm_asm_blis_mt.o) -#OPENBLAS_OBJS_MT := $(foreach combo,$(DT_COMBOS),test_$(combo)gemm_openblas_mt.o) -#MKL_OBJS_MT := $(foreach combo,$(DT_COMBOS),test_$(combo)gemm_mkl_mt.o) +INTERN_OBJS_MT := $(foreach combo,$(DT_COMBOS),test_$(combo)gemm_intern_mt.o) +INTERN_BINS_MT := $(patsubst %.o,%.x,$(INTERN_OBJS_MT)) +AD_HOC_OBJS_MT := $(foreach combo,$(DT_COMBOS),test_$(combo)gemm_ad-hoc_mt.o) +AD_HOC_BINS_MT := $(patsubst %.o,%.x,$(AD_HOC_OBJS_MT)) @@ -265,15 +203,16 @@ OPENBLAS_BINS_MT := $(patsubst %.o,%.x,$(OPENBLAS_OBJS_MT)) # --- Targets/rules ------------------------------------------------------------ # -all: st +all: st -st: blis-st openblas-st -mt: blis-mt openblas-mt +st: intern-st ad-hoc-st +mt: intern-mt ad-hoc-mt + +intern-st: $(INTERN_BINS_ST) +ad-hoc-st: $(AD_HOC_BINS_ST) +intern-mt: $(INTERN_BINS_MT) +ad-hoc-mt: $(AD_HOC_BINS_MT) -blis-st: $(BLIS_BINS_ST) -openblas-st: $(OPENBLAS_BINS_ST) -blis-mt: $(BLIS_BINS_MT) -openblas-mt: $(OPENBLAS_BINS_MT) #blis: test_ssssgemm_asm_blis_st.x \ # test_sssdgemm_asm_blis_st.x \ # test_ssdsgemm_asm_blis_st.x \ @@ -281,13 +220,6 @@ openblas-mt: $(OPENBLAS_BINS_MT) # test_dsssgemm_asm_blis_st.x \ # test_dddsgemm_asm_blis_st.x \ # test_ddddgemm_asm_blis_st.x -#openblas: test_ssssgemm_openblas_st.x \ -# test_sssdgemm_openblas_st.x \ -# test_ssdsgemm_openblas_st.x \ -# test_sdssgemm_openblas_st.x \ -# test_dsssgemm_openblas_st.x \ -# test_dddsgemm_openblas_st.x \ -# test_ddddgemm_openblas_st.x # --Object file rules -- @@ -316,7 +248,7 @@ endef # Define the implementations for which we will instantiate compilation rules. -IMPLS := asm_blis openblas +IMPLS := intern ad-hoc # Instantiate the rule function make-st-rule() and make-mt-rule for each # implementation in IMPLS and each of the datatype "codes" in DT_CODES. @@ -334,56 +266,44 @@ $(foreach code,$(DT_CODES),$(eval $(call make-mt-rule,$(code),$(impl))))) # compatibility layer. This prevents BLIS from inadvertently getting called # for the BLAS routines we are trying to test with. -test_%_openblas_st.x: test_%_openblas_st.o $(LIBBLIS_LINK) +test_%_ad-hoc_st.x: test_%_ad-hoc_st.o $(LIBBLIS_LINK) ifeq ($(ENABLE_VERBOSE),yes) - $(LINKER) $< $(OPENBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ - $(RM_F) $< -else - @@echo "Linking $@ to '$(notdir $(OPENBLAS_LIB)) $(LIBBLIS_LINK)'" - @$(LINKER) $< $(OPENBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ - @$(RM_F) $< -endif - -test_%_openblas_mt.x: test_%_openblas_mt.o $(LIBBLIS_LINK) -ifeq ($(ENABLE_VERBOSE),yes) - $(LINKER) $< $(OPENBLASP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ - $(RM_F) $< -else - @@echo "Linking $@ to '$(notdir $(OPENBLAS_LIB)) $(LIBBLIS_LINK)'" - @$(LINKER) $< $(OPENBLASP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ - @$(RM_F) $< -endif - -#test_%_mkl_st.x: test_%_mkl_st.o $(LIBBLIS_LINK) -#ifeq ($(ENABLE_VERBOSE),yes) -# $(LINKER) $< $(MKL_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ -# $(RM_F) $< -#else -# @@echo "Linking $@ to '$(notdir $(MKL_LIB)) $(LIBBLIS_LINK)'" -# @$(LINKER) $< $(MKL_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ -# @$(RM_F) $< -#endif - -#test_%_mkl_mt.x: test_%_mkl_mt.o $(LIBBLIS_LINK) -# $(LINKER) $< $(MKLP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@ - -test_%_blis_st.x: test_%_blis_st.o $(LIBBLIS_LINK) -ifeq ($(ENABLE_VERBOSE),yes) - $(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ + $(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ $(RM_F) $< else @@echo "Linking $@ to '$(LIBBLIS_LINK)'" - @$(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ + @$(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ @$(RM_F) $< endif -test_%_blis_mt.x: test_%_blis_mt.o $(LIBBLIS_LINK) +test_%_ad-hoc_mt.x: test_%_ad-hoc_mt.o $(LIBBLIS_LINK) ifeq ($(ENABLE_VERBOSE),yes) - $(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ + $(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ $(RM_F) $< else @@echo "Linking $@ to '$(LIBBLIS_LINK)'" - @$(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ + @$(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ + @$(RM_F) $< +endif + + +test_%_intern_st.x: test_%_intern_st.o $(LIBBLIS_LINK) +ifeq ($(ENABLE_VERBOSE),yes) + $(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ + $(RM_F) $< +else + @@echo "Linking $@ to '$(LIBBLIS_LINK)'" + @$(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ + @$(RM_F) $< +endif + +test_%_intern_mt.x: test_%_intern_mt.o $(LIBBLIS_LINK) +ifeq ($(ENABLE_VERBOSE),yes) + $(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ + $(RM_F) $< +else + @@echo "Linking $@ to '$(LIBBLIS_LINK)'" + @$(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@ @$(RM_F) $< endif diff --git a/test/mixeddt/matlab/gemm_md.pdf b/test/mixeddt/matlab/gemm_md.pdf deleted file mode 100644 index e665aef46..000000000 Binary files a/test/mixeddt/matlab/gemm_md.pdf and /dev/null differ diff --git a/test/mixeddt/matlab/gen_prec_combos.m b/test/mixeddt/matlab/gen_prec_combos.m new file mode 100644 index 000000000..44b56f0c9 --- /dev/null +++ b/test/mixeddt/matlab/gen_prec_combos.m @@ -0,0 +1,101 @@ +function r_val = gen_prec_combos( mdcase ) + +dt_chars = [ 's' 'd' 'c' 'z' ]; +pr_chars = [ 's' 'd' ]; +dm_chars = [ 'r' 'c' ]; + +dmc = mdcase( 1 ); +dma = mdcase( 2 ); +dmb = mdcase( 3 ); + +if 0 +pr_combos( 1, : ) = 'ssss'; +pr_combos( 2, : ) = 'ssds'; +pr_combos( 3, : ) = 'sdss'; +pr_combos( 4, : ) = 'sdds'; +pr_combos( 5, : ) = 'dsss'; +pr_combos( 6, : ) = 'dsds'; +pr_combos( 7, : ) = 'ddss'; +pr_combos( 8, : ) = 'ddds'; +pr_combos( 9, : ) = 'dddd'; +pr_combos( 10, : ) = 'ddsd'; +pr_combos( 11, : ) = 'dsdd'; +pr_combos( 12, : ) = 'dssd'; +pr_combos( 13, : ) = 'sddd'; +pr_combos( 14, : ) = 'sdsd'; +pr_combos( 15, : ) = 'ssdd'; +pr_combos( 16, : ) = 'sssd'; +end + +pr_combos( 1, : ) = 'ssss'; +pr_combos( 2, : ) = 'ssds'; +pr_combos( 3, : ) = 'dddd'; +pr_combos( 4, : ) = 'ddsd'; + +pr_combos( 5, : ) = 'sdss'; +pr_combos( 6, : ) = 'sdds'; +pr_combos( 7, : ) = 'dsdd'; +pr_combos( 8, : ) = 'dssd'; + +pr_combos( 9, : ) = 'dsss'; +pr_combos( 10, : ) = 'dsds'; +pr_combos( 11, : ) = 'sddd'; +pr_combos( 12, : ) = 'sdsd'; + +pr_combos( 13, : ) = 'ddss'; +pr_combos( 14, : ) = 'ddds'; +pr_combos( 15, : ) = 'ssdd'; +pr_combos( 16, : ) = 'sssd'; + +for i = 1:16 + + pr_combo = pr_combos( i, : ); + + %str = sprintf( '%s', pr_combo ); disp(str); + + prc = pr_combo( 1 ); + pra = pr_combo( 2 ); + prb = pr_combo( 3 ); + pr = pr_combo( 4 ); + + dtc = prec_dom_to_dt( prc, dmc ); + dta = prec_dom_to_dt( pra, dma ); + dtb = prec_dom_to_dt( prb, dmb ); + + dt_combos( i, : ) = sprintf( '%c%c%c%c', dtc, dta, dtb, pr ); + +end + + +%if 0 +%i = 1; +%pr = 's'; +%for prc = pr_chars +% for pra = pr_chars +% for prb = pr_chars +% dtc = prec_dom_to_dt( prc, dmc ); +% dta = prec_dom_to_dt( pra, dma ); +% dtb = prec_dom_to_dt( prb, dmb ); +% dt_combos( i, : ) = sprintf( '%c%c%c%c', dtc, dta, dtb, pr ); +% i = i + 1; +% end +% end +%end +% +%pr = 'd'; +%for prc = flip( pr_chars ) +% for pra = flip( pr_chars ) +% for prb = flip( pr_chars ) +% dtc = prec_dom_to_dt( prc, dmc ); +% dta = prec_dom_to_dt( pra, dma ); +% dtb = prec_dom_to_dt( prb, dmb ); +% dt_combos( i, : ) = sprintf( '%c%c%c%c', dtc, dta, dtb, pr ); +% i = i + 1; +% end +% end +%end +%end + +r_val = dt_combos; + +end diff --git a/test/mixeddt/matlab/output/.gitkeep b/test/mixeddt/matlab/output/.gitkeep new file mode 100644 index 000000000..e69de29bb diff --git a/test/mixeddt/matlab/plot_dom_all.m b/test/mixeddt/matlab/plot_dom_all.m new file mode 100644 index 000000000..f2fe7df4e --- /dev/null +++ b/test/mixeddt/matlab/plot_dom_all.m @@ -0,0 +1,23 @@ +function r_val = plot_dom_all( is_mt ) + +cases( 1, : ) = [ 'rrr' ]; +cases( 2, : ) = [ 'rrc' ]; +cases( 3, : ) = [ 'rcr' ]; +cases( 4, : ) = [ 'rcc' ]; +cases( 5, : ) = [ 'crr' ]; +cases( 6, : ) = [ 'crc' ]; +cases( 7, : ) = [ 'ccr' ]; +cases( 8, : ) = [ 'ccc' ]; + +n_cases = size(cases,1); + +for i = 1:n_cases + + thecase = cases( i, : ); + + plot_dom_case( thecase, is_mt ); +end + +r_val = 0; + +end diff --git a/test/mixeddt/matlab/plot_dom_case.m b/test/mixeddt/matlab/plot_dom_case.m new file mode 100644 index 000000000..e0147d049 --- /dev/null +++ b/test/mixeddt/matlab/plot_dom_case.m @@ -0,0 +1,140 @@ +function r_val = plot_dom_case( mdcase, is_mt ) + +if is_mt == 1 + thr_str = 'mt'; +else + thr_str = 'st'; +end + +if 1 +dt_combos = gen_prec_combos( mdcase ); +else +dt_combos( 1, : ) = [ 'ssss' ]; +dt_combos( 2, : ) = [ 'sssd' ]; +dt_combos( 3, : ) = [ 'ssds' ]; +dt_combos( 4, : ) = [ 'sdss' ]; +dt_combos( 5, : ) = [ 'dsss' ]; +dt_combos( 6, : ) = [ 'ddds' ]; +dt_combos( 7, : ) = [ 'dddd' ]; +end + +n_combos = size(dt_combos,1); + +filetemp_blis = '../output_%s_%sgemm_asm_blis.m'; +filetemp_open = '../output_%s_%sgemm_openblas.m'; + +% Construct filenames for the "reference" (single real) data, then load +% the data files, and finally save the results to different variable names. +file_blis_sref = sprintf( filetemp_blis, thr_str, 'ssss' ); +file_open_sref = sprintf( filetemp_open, thr_str, 'ssss' ); +run( file_blis_sref ) +run( file_open_sref ) +data_gemm_asm_blis_sref( :, : ) = data_gemm_asm_blis( :, : ); +data_gemm_openblas_sref( :, : ) = data_gemm_openblas( :, : ); + +% Construct filenames for the "reference" (double real) data, then load +% the data files, and finally save the results to different variable names. +file_blis_dref = sprintf( filetemp_blis, thr_str, 'dddd' ); +file_open_dref = sprintf( filetemp_open, thr_str, 'dddd' ); +run( file_blis_dref ) +run( file_open_dref ) +data_gemm_asm_blis_dref( :, : ) = data_gemm_asm_blis( :, : ); +data_gemm_openblas_dref( :, : ) = data_gemm_openblas( :, : ); + +% Construct filenames for the "reference" (single complex) data, then load +% the data files, and finally save the results to different variable names. +file_blis_cref = sprintf( filetemp_blis, thr_str, 'cccs' ); +file_open_cref = sprintf( filetemp_open, thr_str, 'cccs' ); +run( file_blis_cref ) +run( file_open_cref ) +data_gemm_asm_blis_cref( :, : ) = data_gemm_asm_blis( :, : ); +data_gemm_openblas_cref( :, : ) = data_gemm_openblas( :, : ); + +% Construct filenames for the "reference" (double complex) data, then load +% the data files, and finally save the results to different variable names. +file_blis_zref = sprintf( filetemp_blis, thr_str, 'zzzd' ); +file_open_zref = sprintf( filetemp_open, thr_str, 'zzzd' ); +run( file_blis_zref ) +run( file_open_zref ) +data_gemm_asm_blis_zref( :, : ) = data_gemm_asm_blis( :, : ); +data_gemm_openblas_zref( :, : ) = data_gemm_openblas( :, : ); + +fig = figure; +orient( fig, 'portrait' ); +%set(gcf,'Position',[0 0 2000 900]); +set(gcf,'PaperUnits', 'inches'); +%set(gcf,'PaperSize', [16 12.4]); +%set(gcf,'PaperPosition', [0 0 16 12.4]); +set(gcf,'PaperSize', [14 11.0]); +set(gcf,'PaperPosition', [0 0 14 11.0]); +%set(gcf,'PaperPositionMode','auto'); +set(gcf,'PaperPositionMode','manual'); +set(gcf,'PaperOrientation','portrait'); + +for dti = 1:n_combos +%for dti = 1:1 + + % Grab the current datatype combination. + combo = dt_combos( dti, : ); + + str = sprintf( 'Plotting %d: %s', dti, combo ); disp(str); + + if combo(4) == 's' + data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_sref( :, : ); + data_gemm_openblas_ref( :, : ) = data_gemm_openblas_sref( :, : ); + refch = 's'; + else %if combo(4) == 'd' + data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_dref( :, : ); + data_gemm_openblas_ref( :, : ) = data_gemm_openblas_dref( :, : ); + refch = 'd'; + end + + if ( combo(1) == 'c' || combo(1) == 'z' ) && ... + ( combo(2) == 'c' || combo(2) == 'z' ) && ... + ( combo(3) == 'c' || combo(3) == 'z' ) + if combo(4) == 's' + data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_cref( :, : ); + data_gemm_openblas_ref( :, : ) = data_gemm_openblas_cref( :, : ); + refch = 'c'; + else %if combo(4) == 'd' + data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_zref( :, : ); + data_gemm_openblas_ref( :, : ) = data_gemm_openblas_zref( :, : ); + refch = 'z'; + end + end + + % Construct filenames for the data files from templates. + file_blis = sprintf( filetemp_blis, thr_str, combo ); + file_open = sprintf( filetemp_open, thr_str, combo ); + + % Load the data files. + %str = sprintf( ' Loading %s', file_blis ); disp(str); + run( file_blis ) + %str = sprintf( ' Loading %s', file_open ); disp(str); + run( file_open ) + + % Plot the result. + plot_gemm_perf( combo, ... + data_gemm_asm_blis, ... + data_gemm_asm_blis_ref, ... + data_gemm_openblas, ... + data_gemm_openblas_ref, ... + is_mt, refch, 4, 4, dti ); + +end + + +%if 0 +%set(gcf,'Position',[0 0 2000 900]); +%set(gcf,'PaperUnits', 'inches'); +%set(gcf,'PaperSize', [48 22]); +%set(gcf,'PaperPosition', [0 0 48 22]); +%%set(gcf,'PaperPositionMode','auto'); +%set(gcf,'PaperPositionMode','manual'); +%set(gcf,'PaperOrientation','landscape'); +%end + +outfile = sprintf( 'output/gemm_%s', mdcase ); + +print(gcf, outfile,'-bestfit','-dpdf'); +%print(gcf, 'gemm_md','-fillpage','-dpdf'); diff --git a/test/mixeddt/matlab/plot_all_md.m b/test/mixeddt/matlab/plot_dt_all.m similarity index 93% rename from test/mixeddt/matlab/plot_all_md.m rename to test/mixeddt/matlab/plot_dt_all.m index 9302bdb0a..865d578bb 100644 --- a/test/mixeddt/matlab/plot_all_md.m +++ b/test/mixeddt/matlab/plot_dt_all.m @@ -1,4 +1,4 @@ -function r_val = plot_all_md( is_mt ) +function r_val = plot_dt_all( is_mt ) if is_mt == 1 thr_str = 'mt'; @@ -73,8 +73,8 @@ set(gcf,'Position',[0 0 2000 900]); set(gcf,'PaperUnits', 'inches'); set(gcf,'PaperSize', [64 33]); set(gcf,'PaperPosition', [0 0 64 33]); -%set(gcf,'PaperPositionMode','auto'); -set(gcf,'PaperPositionMode','manual'); +%set(gcf,'PaperPositionMode','auto'); +set(gcf,'PaperPositionMode','manual'); set(gcf,'PaperOrientation','landscape'); for dti = 1:n_combos @@ -88,9 +88,11 @@ for dti = 1:n_combos if combo(4) == 's' data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_sref( :, : ); data_gemm_openblas_ref( :, : ) = data_gemm_openblas_sref( :, : ); - elseif combo(4) == 'd' + refch = 's'; + else %if combo(4) == 'd' data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_dref( :, : ); data_gemm_openblas_ref( :, : ) = data_gemm_openblas_dref( :, : ); + refch = 'd'; end if ( combo(1) == 'c' || combo(1) == 'z' ) && ... @@ -99,9 +101,11 @@ for dti = 1:n_combos if combo(4) == 's' data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_cref( :, : ); data_gemm_openblas_ref( :, : ) = data_gemm_openblas_cref( :, : ); - elseif combo(4) == 'd' + refch = 'c'; + else %if combo(4) == 'd' data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_zref( :, : ); data_gemm_openblas_ref( :, : ) = data_gemm_openblas_zref( :, : ); + refch = 'z'; end end @@ -121,7 +125,7 @@ for dti = 1:n_combos data_gemm_asm_blis_ref, ... data_gemm_openblas, ... data_gemm_openblas_ref, ... - is_mt, dti ); + is_mt, refch, 8, 16, dti ); end @@ -135,5 +139,5 @@ set(gcf,'PaperPosition', [0 0 48 22]); set(gcf,'PaperPositionMode','manual'); set(gcf,'PaperOrientation','landscape'); end -print(gcf, 'gemm_md','-bestfit','-dpdf'); +print(gcf, 'output/gemm_md','-bestfit','-dpdf'); %print(gcf, 'gemm_md','-fillpage','-dpdf'); diff --git a/test/mixeddt/matlab/plot_dt_select.m b/test/mixeddt/matlab/plot_dt_select.m new file mode 100644 index 000000000..38c8d3cd1 --- /dev/null +++ b/test/mixeddt/matlab/plot_dt_select.m @@ -0,0 +1,148 @@ +function r_val = plot_dt_select( dom, is_mt ) + +if is_mt == 1 + thr_str = 'mt'; +else + thr_str = 'st'; +end + +if dom == 'r' + + dt_combos( 1, : ) = [ 'dsss' ]; + dt_combos( 2, : ) = [ 'sddd' ]; + dt_combos( 3, : ) = [ 'sdds' ]; + dt_combos( 4, : ) = [ 'dssd' ]; + dt_combos( 5, : ) = [ 'ddds' ]; + dt_combos( 6, : ) = [ 'sssd' ]; + +else + + dt_combos( 1, : ) = [ 'csss' ]; + dt_combos( 2, : ) = [ 'zddd' ]; + dt_combos( 3, : ) = [ 'ccss' ]; + dt_combos( 4, : ) = [ 'zzdd' ]; + dt_combos( 5, : ) = [ 'cscs' ]; + dt_combos( 6, : ) = [ 'zdzd' ]; +end + +n_combos = size(dt_combos,1); + +filetemp_blis = '../output_%s_%sgemm_asm_blis.m'; +filetemp_open = '../output_%s_%sgemm_openblas.m'; + +% Construct filenames for the "reference" (single real) data, then load +% the data files, and finally save the results to different variable names. +file_blis_sref = sprintf( filetemp_blis, thr_str, 'ssss' ); +file_open_sref = sprintf( filetemp_open, thr_str, 'ssss' ); +run( file_blis_sref ) +run( file_open_sref ) +data_gemm_asm_blis_sref( :, : ) = data_gemm_asm_blis( :, : ); +data_gemm_openblas_sref( :, : ) = data_gemm_openblas( :, : ); + +% Construct filenames for the "reference" (double real) data, then load +% the data files, and finally save the results to different variable names. +file_blis_dref = sprintf( filetemp_blis, thr_str, 'dddd' ); +file_open_dref = sprintf( filetemp_open, thr_str, 'dddd' ); +run( file_blis_dref ) +run( file_open_dref ) +data_gemm_asm_blis_dref( :, : ) = data_gemm_asm_blis( :, : ); +data_gemm_openblas_dref( :, : ) = data_gemm_openblas( :, : ); + +% Construct filenames for the "reference" (single complex) data, then load +% the data files, and finally save the results to different variable names. +file_blis_cref = sprintf( filetemp_blis, thr_str, 'cccs' ); +file_open_cref = sprintf( filetemp_open, thr_str, 'cccs' ); +run( file_blis_cref ) +run( file_open_cref ) +data_gemm_asm_blis_cref( :, : ) = data_gemm_asm_blis( :, : ); +data_gemm_openblas_cref( :, : ) = data_gemm_openblas( :, : ); + +% Construct filenames for the "reference" (double complex) data, then load +% the data files, and finally save the results to different variable names. +file_blis_zref = sprintf( filetemp_blis, thr_str, 'zzzd' ); +file_open_zref = sprintf( filetemp_open, thr_str, 'zzzd' ); +run( file_blis_zref ) +run( file_open_zref ) +data_gemm_asm_blis_zref( :, : ) = data_gemm_asm_blis( :, : ); +data_gemm_openblas_zref( :, : ) = data_gemm_openblas( :, : ); + +%fig = figure; +fig = figure('Position', [100, 100, 1024, 1300]); +orient( fig, 'portrait' ); +%set(gcf,'Position',[0 0 2000 900]); +set(gcf,'PaperUnits', 'inches'); +%set(gcf,'PaperSize', [16 12.4]); +%set(gcf,'PaperPosition', [0 0 16 12.4]); +set(gcf,'PaperSize', [9 11.0]); +set(gcf,'PaperPosition', [0 0 9 11.0]); +%set(gcf,'PaperPositionMode','auto'); +set(gcf,'PaperPositionMode','manual'); +set(gcf,'PaperOrientation','portrait'); + +for dti = 1:n_combos +%for dti = 1:1 + + % Grab the current datatype combination. + combo = dt_combos( dti, : ); + + str = sprintf( 'Plotting %d: %s', dti, combo ); disp(str); + + if combo(4) == 's' + data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_sref( :, : ); + data_gemm_openblas_ref( :, : ) = data_gemm_openblas_sref( :, : ); + refch = 's'; + else %if combo(4) == 'd' + data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_dref( :, : ); + data_gemm_openblas_ref( :, : ) = data_gemm_openblas_dref( :, : ); + refch = 'd'; + end + + if ( combo(1) == 'c' || combo(1) == 'z' ) && ... + ( combo(2) == 'c' || combo(2) == 'z' ) && ... + ( combo(3) == 'c' || combo(3) == 'z' ) + if combo(4) == 's' + data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_cref( :, : ); + data_gemm_openblas_ref( :, : ) = data_gemm_openblas_cref( :, : ); + refch = 'c'; + else %if combo(4) == 'd' + data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_zref( :, : ); + data_gemm_openblas_ref( :, : ) = data_gemm_openblas_zref( :, : ); + refch = 'z'; + end + end + + % Construct filenames for the data files from templates. + file_blis = sprintf( filetemp_blis, thr_str, combo ); + file_open = sprintf( filetemp_open, thr_str, combo ); + + % Load the data files. + %str = sprintf( ' Loading %s', file_blis ); disp(str); + run( file_blis ) + %str = sprintf( ' Loading %s', file_open ); disp(str); + run( file_open ) + + % Plot the result. + plot_gemm_perf( combo, ... + data_gemm_asm_blis, ... + data_gemm_asm_blis_ref, ... + data_gemm_openblas, ... + data_gemm_openblas_ref, ... + is_mt, refch, 3, 2, dti ); + +end + + +%if 0 +%set(gcf,'Position',[0 0 2000 900]); +%set(gcf,'PaperUnits', 'inches'); +%set(gcf,'PaperSize', [48 22]); +%set(gcf,'PaperPosition', [0 0 48 22]); +%%set(gcf,'PaperPositionMode','auto'); +%set(gcf,'PaperPositionMode','manual'); +%set(gcf,'PaperOrientation','landscape'); +%end + +outfile = sprintf( 'output/gemm_select_%c', dom ); + +print(gcf, outfile,'-bestfit','-dpdf'); +%print(gcf, 'gemm_md','-fillpage','-dpdf'); diff --git a/test/mixeddt/matlab/plot_gemm_perf.m b/test/mixeddt/matlab/plot_gemm_perf.m index 7fc9b0752..f30cc7da5 100644 --- a/test/mixeddt/matlab/plot_gemm_perf.m +++ b/test/mixeddt/matlab/plot_gemm_perf.m @@ -4,10 +4,12 @@ function r_val = plot_gemm_perf( dt_str, ... data_open, ... data_open_ref, ... is_mt, ... + refch, ... + rows, cols, ... theid ) if 1 -ax1 = subplot( 8, 16, theid ); +ax1 = subplot( rows, cols, theid ); hold( ax1, 'on' ); end @@ -16,7 +18,7 @@ color_open_ref = 'k'; lines_open_ref = ':'; markr_open_ref = 'o'; color_mkl_ref = 'r'; lines_mkl_ref = ':'; markr_mkl_ref = '.'; color_blis = 'b'; lines_blis = '-'; markr_blis = ''; -color_open = 'k'; lines_open = '-'; markr_open = 'o'; +color_open = 'k'; lines_open = '-.'; markr_open = 'o'; color_mkl = 'r'; lines_mkl = '-'; markr_mkl = '.'; if dt_str(4) == 's' @@ -50,12 +52,24 @@ filename_png = sprintf( filename_png, dt_str ); %dt0_str = [ dt_str(4), dt_str(4), dt_str(4), dt_str(4) ]; dt0_str = dt_str(4); -blis_sref_legend = sprintf( 'BLIS [sc]gemm' ); -blis_dref_legend = sprintf( 'BLIS [dz]gemm' ); -blis_legend = sprintf( 'BLIS mixed' ); -open_sref_legend = sprintf( 'OBLA [sc]gemm' ); -open_dref_legend = sprintf( 'OBLA [dz]gemm' ); -open_legend = sprintf( 'OBLA mixed' ); +if refch == 's' +blis_ref_legend = sprintf( 'Ref (sgemm)' ); +%blis_ref_legend = sprintf( 'Reference' ); +elseif refch == 'd' +blis_ref_legend = sprintf( 'Ref (dgemm)' ); +%blis_ref_legend = sprintf( 'Reference' ); +elseif refch == 'c' +blis_ref_legend = sprintf( 'Ref (cgemm)' ); +elseif refch == 'z' +blis_ref_legend = sprintf( 'Ref (zgemm)' ); +end + +blis_sref_legend = sprintf( 'Ref [sc]gemm' ); +blis_dref_legend = sprintf( 'Ref [dz]gemm' ); +blis_legend = sprintf( 'Internal' ); +open_sref_legend = sprintf( 'Ad-hoc [sc]gemm' ); +open_dref_legend = sprintf( 'Ad-hoc [dz]gemm' ); +open_legend = sprintf( 'Ad-hoc' ); y_scale = 1.00; @@ -72,12 +86,12 @@ y_end = max_perf_core * y_scale; flopscol = 4; msize = 5; if 1 -fontsize = 12; +fontsize = 13; else fontsize = 16; end -linesize = 0.7; -legend_loc = 'SouthEast'; +linesize = 0.5; +legend_loc = 'southeast'; % -------------------------------------------------------------------- @@ -95,9 +109,9 @@ blis_ref = line( x_axis( :, 1 ), data_blis_ref( :, flopscol ) / nth, ... blis_md = line( x_axis( :, 1 ), data_blis( :, flopscol ) / nth, ... 'Color',color_blis, 'LineStyle',lines_blis, ... 'LineWidth',linesize ); -open_ref = line( x_axis( :, 1 ), data_open_ref( :, flopscol ) / nth, ... - 'Color',color_open_ref, 'LineStyle',lines_open_ref, ... - 'LineWidth',linesize ); +%open_ref = line( x_axis( :, 1 ), data_open_ref( :, flopscol ) / nth, ... +% 'Color',color_open_ref, 'LineStyle',lines_open_ref, ... +% 'LineWidth',linesize ); open_md = line( x_axis( :, 1 ), data_open( :, flopscol ) / nth, ... 'Color',color_open, 'LineStyle',lines_open, ... 'LineWidth',linesize ); @@ -108,42 +122,117 @@ open_md = line( x_axis( :, 1 ), data_open( :, flopscol ) / nth, ... xlim( ax1, [x_begin x_end] ); ylim( ax1, [y_begin y_end] ); -if theid == 1 -leg = legend( ... -[ ... - blis_ref ... - blis_md ... - open_ref ... - open_md ... -], ... -blis_sref_legend, ... -blis_legend, ... -open_sref_legend, ... -open_legend, ... -'Location', 'best' ); -%'Location', legend_loc ); -set( leg,'Box','off' ); -set( leg,'Color','none' ); -set( leg,'FontSize',fontsize-2 ); -set( leg,'Units','inches' ); -elseif theid == 9 -leg = legend( ... -[ ... - blis_ref ... - blis_md ... - open_ref ... - open_md ... -], ... -blis_dref_legend, ... -blis_legend, ... -open_dref_legend, ... -open_legend, ... -'Location', 'best' ); -%'Location', legend_loc ); -set( leg,'Box','off' ); -set( leg,'Color','none' ); -set( leg,'FontSize',fontsize-2 ); -set( leg,'Units','inches' ); +if rows == 8 && cols == 16 + + if theid == 1 + leg = legend( ... + [ ... + blis_ref ... + blis_md ... + open_md ... + ], ... + blis_sref_legend, ... + blis_legend, ... + open_legend, ... + 'Location', 'best' ); + %'Location', legend_loc ); + set( leg,'Box','off' ); + set( leg,'Color','none' ); + set( leg,'FontSize',fontsize-2 ); + set( leg,'Units','inches' ); + elseif theid == 9 + leg = legend( ... + [ ... + blis_ref ... + blis_md ... + open_md ... + ], ... + blis_dref_legend, ... + blis_legend, ... + open_legend, ... + 'Location', 'best' ); + %'Location', legend_loc ); + set( leg,'Box','off' ); + set( leg,'Color','none' ); + set( leg,'FontSize',fontsize-2 ); + set( leg,'Units','inches' ); + end + +elseif rows == 4 && cols == 4 + + if theid == 1 + leg = legend( ... + [ ... + blis_ref ... + blis_md ... + open_md ... + ], ... + blis_ref_legend, ... + blis_legend, ... + open_legend, ... + 'Location', legend_loc ); + %'Location', 'best' ); + set( leg,'Box','off' ); + set( leg,'Color','none' ); + set( leg,'FontSize',fontsize-2 ); + set( leg,'Units','inches' ); + set( leg,'Position',[1.03 3.46 0.7 0.3 ] ); + elseif theid == 3 + leg = legend( ... + [ ... + blis_ref ... + blis_md ... + open_md ... + ], ... + blis_ref_legend, ... + blis_legend, ... + open_legend, ... + 'Location', legend_loc ); + %'Location', 'best' ); + set( leg,'Box','off' ); + set( leg,'Color','none' ); + set( leg,'FontSize',fontsize-2 ); + set( leg,'Units','inches' ); + set( leg,'Position',[3.51 3.46 0.7 0.3 ] ); + end + +elseif rows == 3 && cols == 2 + + if theid == 1 + leg = legend( ... + [ ... + blis_ref ... + blis_md ... + open_md ... + ], ... + blis_ref_legend, ... + blis_legend, ... + open_legend, ... + 'Location', legend_loc ); + %'Location', 'best' ); + set( leg,'Box','off' ); + set( leg,'Color','none' ); + set( leg,'FontSize',fontsize-2 ); + set( leg,'Units','inches' ); + %set( leg,'Position',[1.03 3.46 0.7 0.3 ] ); + elseif theid == 2 + leg = legend( ... + [ ... + blis_ref ... + blis_md ... + open_md ... + ], ... + blis_ref_legend, ... + blis_legend, ... + open_legend, ... + 'Location', legend_loc ); + %'Location', 'best' ); + set( leg,'Box','off' ); + set( leg,'Color','none' ); + set( leg,'FontSize',fontsize-2 ); + set( leg,'Units','inches' ); + %set( leg,'Position',[3.51 3.46 0.7 0.3 ] ); + end end @@ -159,14 +248,14 @@ tpos = get( titl, 'Position' ); % default is to align across whole figure, not b tpos(1) = tpos(1) + 40; set( titl, 'Position', tpos ); % here we nudge it back to centered with box. -if theid > 112 +if theid > (rows-1)*cols xlab = xlabel( ax1,xaxisname ); %tpos = get( xlab, 'Position' ) %tpos(2) = tpos(2) + 10; %set( xlab, 'Position', tpos ); end -if mod(theid-1,16) == 0 +if mod(theid-1,cols) == 0 ylab = ylabel( ax1,yaxisname ); end diff --git a/test/mixeddt/matlab/prec_dom_to_dt.m b/test/mixeddt/matlab/prec_dom_to_dt.m new file mode 100644 index 000000000..86e61413b --- /dev/null +++ b/test/mixeddt/matlab/prec_dom_to_dt.m @@ -0,0 +1,17 @@ +function r_val = prec_dom_to_dt( pc, dc ) + +if dc == 'r' + if pc == 's' + r_val = 's'; + else + r_val = 'd'; + end +else + if pc == 's' + r_val = 'c'; + else + r_val = 'z'; + end +end + +end diff --git a/test/mixeddt/matlab/testrand.pdf b/test/mixeddt/matlab/testrand.pdf deleted file mode 100644 index b97c17528..000000000 Binary files a/test/mixeddt/matlab/testrand.pdf and /dev/null differ diff --git a/test/mixeddt/runme.sh b/test/mixeddt/runme.sh index 2e9967f2b..81283e5fb 100755 --- a/test/mixeddt/runme.sh +++ b/test/mixeddt/runme.sh @@ -67,15 +67,15 @@ fi # Complex domain implementations to test. if [ ${sys} = "blis" ]; then - test_impls="openblas asm_blis" + test_impls="ad-hoc intern" elif [ ${sys} = "stampede2" ]; then - test_impls="openblas asm_blis mkl" + test_impls="" elif [ ${sys} = "lonestar5" ]; then - test_impls="openblas mkl asm_blis" + test_impls="" fi # Datatypes to test. @@ -102,16 +102,13 @@ for dtc in ${dt_chars}; do done # Threadedness to test. -threads="mt" -#threads="st" - -test_impls="openblas" +#threads="mt" +threads="st" +# Overrides, in case something goes wrong for a subset of tests. +#test_impls="ad-hoc" #dt_combos="ssss sssd ssds sdss dsss ddds dddd" -#dt_combos="csss csds cdss cdds zsss zsds zdss zdds cssd csdd cdsd cddd zssd zsdd zdsd zddd" -#dt_combos="cssd csdd cdsd cddd zsss zsds zdss zdds" -#dt_combos="cdsd cddd zsss zsds zdss zdds" -#test_impls="asm_blis" + # Now perform complex test cases. for th in ${threads}; do @@ -134,11 +131,11 @@ for th in ${threads}; do # Unset GOMP_CPU_AFFINITY for OpenBLAS, as it causes the library # to execute sequentially. - if [ ${im} = "openblas" ]; then - unset GOMP_CPU_AFFINITY - else - export GOMP_CPU_AFFINITY="0 1 2 3" - fi + #if [ ${im} = "openblas" ]; then + # unset GOMP_CPU_AFFINITY + #else + # export GOMP_CPU_AFFINITY="0 1 2 3" + #fi else export BLIS_JC_NT=1 diff --git a/test/mixeddt/test_gemm.c b/test/mixeddt/test_gemm.c index 7be31960d..14d0cfed6 100644 --- a/test/mixeddt/test_gemm.c +++ b/test/mixeddt/test_gemm.c @@ -259,7 +259,8 @@ void blas_gemm_md( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c ) if ( bli_obj_dt( a ) == bli_obj_dt( b ) && bli_obj_dt( b ) == bli_obj_dt( c ) && - bli_obj_dt( c ) == ( num_t )comp_prec ) + //bli_obj_dt( c ) == ( num_t )comp_prec ) + bli_obj_prec( c ) == comp_prec ) { blas_gemm( transa, transb, bli_obj_dt( c ), alpha, a, b, beta, c ); return;