Merge branch 'master' into amd

This commit is contained in:
Field G. Van Zee
2018-11-14 12:36:35 -06:00
17 changed files with 1099 additions and 276 deletions

View File

@@ -64,7 +64,7 @@ void PASTEMAC(opname,EX_SUF) \
where mixed datatype support will be implemented (if at all). */ \
if ( bli_obj_dt( c ) == bli_obj_dt( a ) && \
bli_obj_dt( c ) == bli_obj_dt( b ) && \
bli_obj_dt( c ) == bli_obj_exec_dt( c ) && \
bli_obj_dt( c ) == bli_obj_comp_dt( c ) && \
bli_obj_is_complex( c ) ) \
{ \
/* Invoke the operation's "ind" function--its induced method front-end.

View File

@@ -201,9 +201,9 @@ STR_ST := -DTHR_STR=\"st\"
STR_MT := -DTHR_STR=\"mt\"
# Problem size specification
PDEF_ST := -DP_BEGIN=96 \
PDEF_ST := -DP_BEGIN=40 \
-DP_END=2000 \
-DP_INC=96
-DP_INC=40
PDEF_MT := -DP_BEGIN=192 \
-DP_END=3000 \
@@ -215,34 +215,57 @@ PDEF_MT := -DP_BEGIN=192 \
# --- Targets/rules ------------------------------------------------------------
#
all: all-st all-mt
blis: blis-st blis-mt
blis-nat: blis-nat-st blis-nat-mt
openblas: openblas-st openblas-mt
mkl: mkl-st mkl-mt
all: all-st all-mt
blis: blis-st blis-mt
openblas: openblas-st openblas-mt
mkl: mkl-st mkl-mt
all-st: blis-st openblas-st mkl-st
all-mt: blis-mt openblas-mt mkl-mt
all-st: blis-st openblas-st mkl-st
all-mt: blis-mt openblas-mt mkl-mt
blis-st: blis-gemm-st
blis-mt: blis-gemm-mt
blis-st: blis-nat-st
blis-mt: blis-nat-mt
openblas-st: openblas-gemm-st
openblas-mt: openblas-gemm-mt
blis-ind: blis-ind-st blis-ind-mt
blis-nat: blis-nat-st blis-nat-mt
mkl-st: mkl-gemm-st
mkl-mt: mkl-gemm-mt
blis-ind-st: \
test_cgemm_3mhw_blis_st.x \
test_zgemm_3mhw_blis_st.x \
test_cgemm_3m1_blis_st.x \
test_zgemm_3m1_blis_st.x \
test_cgemm_4mhw_blis_st.x \
test_zgemm_4mhw_blis_st.x \
test_cgemm_4m1b_blis_st.x \
test_zgemm_4m1b_blis_st.x \
test_cgemm_4m1a_blis_st.x \
test_zgemm_4m1a_blis_st.x \
test_cgemm_1m_blis_st.x \
test_zgemm_1m_blis_st.x
blis-gemm-st: blis-gemm-nat-st \
blis-gemm-ind-st
blis-gemm-mt: blis-gemm-nat-mt \
blis-gemm-ind-mt
blis-ind-mt: \
test_cgemm_3mhw_blis_mt.x \
test_zgemm_3mhw_blis_mt.x \
test_cgemm_3m1_blis_mt.x \
test_zgemm_3m1_blis_mt.x \
test_cgemm_4mhw_blis_mt.x \
test_zgemm_4mhw_blis_mt.x \
test_cgemm_4m1b_blis_mt.x \
test_zgemm_4m1b_blis_mt.x \
test_cgemm_4m1a_blis_mt.x \
test_zgemm_4m1a_blis_mt.x \
test_cgemm_1m_blis_mt.x \
test_zgemm_1m_blis_mt.x
blis-nat-st: \
test_sgemm_asm_blis_st.x \
test_dgemm_asm_blis_st.x \
test_cgemm_asm_blis_st.x \
test_zgemm_asm_blis_st.x \
test_shemm_asm_blis_st.x \
test_dhemm_asm_blis_st.x \
test_chemm_asm_blis_st.x \
test_zhemm_asm_blis_st.x \
test_sherk_asm_blis_st.x \
test_dherk_asm_blis_st.x \
test_cherk_asm_blis_st.x \
@@ -261,6 +284,10 @@ blis-nat-mt: \
test_dgemm_asm_blis_mt.x \
test_cgemm_asm_blis_mt.x \
test_zgemm_asm_blis_mt.x \
test_shemm_asm_blis_mt.x \
test_dhemm_asm_blis_mt.x \
test_chemm_asm_blis_mt.x \
test_zhemm_asm_blis_mt.x \
test_sherk_asm_blis_mt.x \
test_dherk_asm_blis_mt.x \
test_cherk_asm_blis_mt.x \
@@ -274,69 +301,94 @@ blis-nat-mt: \
test_ctrsm_asm_blis_mt.x \
test_ztrsm_asm_blis_mt.x
blis-gemm-nat-st: \
test_sgemm_asm_blis_st.x \
test_dgemm_asm_blis_st.x \
test_cgemm_asm_blis_st.x \
test_zgemm_asm_blis_st.x
blis-gemm-nat-mt: \
test_sgemm_asm_blis_mt.x \
test_dgemm_asm_blis_mt.x \
test_cgemm_asm_blis_mt.x \
test_zgemm_asm_blis_mt.x
blis-gemm-ind-st: \
test_cgemm_3mhw_blis_st.x \
test_zgemm_3mhw_blis_st.x \
test_cgemm_3m1_blis_st.x \
test_zgemm_3m1_blis_st.x \
test_cgemm_4mhw_blis_st.x \
test_zgemm_4mhw_blis_st.x \
test_cgemm_4m1b_blis_st.x \
test_zgemm_4m1b_blis_st.x \
test_cgemm_4m1a_blis_st.x \
test_zgemm_4m1a_blis_st.x \
test_cgemm_1m_blis_st.x \
test_zgemm_1m_blis_st.x
blis-gemm-ind-mt: \
test_cgemm_3mhw_blis_mt.x \
test_zgemm_3mhw_blis_mt.x \
test_cgemm_3m1_blis_mt.x \
test_zgemm_3m1_blis_mt.x \
test_cgemm_4mhw_blis_mt.x \
test_zgemm_4mhw_blis_mt.x \
test_cgemm_4m1b_blis_mt.x \
test_zgemm_4m1b_blis_mt.x \
test_cgemm_4m1a_blis_mt.x \
test_zgemm_4m1a_blis_mt.x \
test_cgemm_1m_blis_mt.x \
test_zgemm_1m_blis_mt.x
openblas-gemm-st: \
openblas-st: \
test_sgemm_openblas_st.x \
test_dgemm_openblas_st.x \
test_cgemm_openblas_st.x \
test_zgemm_openblas_st.x
test_zgemm_openblas_st.x \
test_shemm_openblas_st.x \
test_dhemm_openblas_st.x \
test_chemm_openblas_st.x \
test_zhemm_openblas_st.x \
test_sherk_openblas_st.x \
test_dherk_openblas_st.x \
test_cherk_openblas_st.x \
test_zherk_openblas_st.x \
test_strmm_openblas_st.x \
test_dtrmm_openblas_st.x \
test_ctrmm_openblas_st.x \
test_ztrmm_openblas_st.x \
test_strsm_openblas_st.x \
test_dtrsm_openblas_st.x \
test_ctrsm_openblas_st.x \
test_ztrsm_openblas_st.x
openblas-gemm-mt: \
openblas-mt: \
test_sgemm_openblas_mt.x \
test_dgemm_openblas_mt.x \
test_cgemm_openblas_mt.x \
test_zgemm_openblas_mt.x
test_zgemm_openblas_mt.x \
test_shemm_openblas_mt.x \
test_dhemm_openblas_mt.x \
test_chemm_openblas_mt.x \
test_zhemm_openblas_mt.x \
test_sherk_openblas_mt.x \
test_dherk_openblas_mt.x \
test_cherk_openblas_mt.x \
test_zherk_openblas_mt.x \
test_strmm_openblas_mt.x \
test_dtrmm_openblas_mt.x \
test_ctrmm_openblas_mt.x \
test_ztrmm_openblas_mt.x \
test_strsm_openblas_mt.x \
test_dtrsm_openblas_mt.x \
test_ctrsm_openblas_mt.x \
test_ztrsm_openblas_mt.x
mkl-gemm-st: \
mkl-st: \
test_sgemm_mkl_st.x \
test_dgemm_mkl_st.x \
test_cgemm_mkl_st.x \
test_zgemm_mkl_st.x
test_zgemm_mkl_st.x \
test_shemm_mkl_st.x \
test_dhemm_mkl_st.x \
test_chemm_mkl_st.x \
test_zhemm_mkl_st.x \
test_sherk_mkl_st.x \
test_dherk_mkl_st.x \
test_cherk_mkl_st.x \
test_zherk_mkl_st.x \
test_strmm_mkl_st.x \
test_dtrmm_mkl_st.x \
test_ctrmm_mkl_st.x \
test_ztrmm_mkl_st.x \
test_strsm_mkl_st.x \
test_dtrsm_mkl_st.x \
test_ctrsm_mkl_st.x \
test_ztrsm_mkl_st.x
mkl-gemm-mt: \
mkl-mt: \
test_sgemm_mkl_mt.x \
test_dgemm_mkl_mt.x \
test_cgemm_mkl_mt.x \
test_zgemm_mkl_mt.x
test_zgemm_mkl_mt.x \
test_shemm_mkl_mt.x \
test_dhemm_mkl_mt.x \
test_chemm_mkl_mt.x \
test_zhemm_mkl_mt.x \
test_sherk_mkl_mt.x \
test_dherk_mkl_mt.x \
test_cherk_mkl_mt.x \
test_zherk_mkl_mt.x \
test_strmm_mkl_mt.x \
test_dtrmm_mkl_mt.x \
test_ctrmm_mkl_mt.x \
test_ztrmm_mkl_mt.x \
test_strsm_mkl_mt.x \
test_dtrsm_mkl_mt.x \
test_ctrsm_mkl_mt.x \
test_ztrsm_mkl_mt.x

331
test/3m4m/test_hemm.c Normal file
View File

@@ -0,0 +1,331 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas at Austin
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <unistd.h>
#include "blis.h"
//#define PRINT
int main( int argc, char** argv )
{
obj_t a, b, c;
obj_t c_save;
obj_t alpha, beta;
dim_t m, n;
dim_t p;
dim_t p_begin, p_end, p_inc;
int m_input, n_input;
ind_t ind;
num_t dt;
char dt_ch;
int r, n_repeats;
side_t side;
uplo_t uploa;
f77_char f77_side;
f77_char f77_uploa;
double dtime;
double dtime_save;
double gflops;
//bli_init();
//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
n_repeats = 3;
dt = DT;
ind = IND;
p_begin = P_BEGIN;
p_end = P_END;
p_inc = P_INC;
m_input = -1;
n_input = -1;
// Supress compiler warnings about unused variable 'ind'.
( void )ind;
#if 0
cntx_t* cntx;
ind_t ind_mod = ind;
// A hack to use 3m1 as 1mpb (with 1m as 1mbp).
if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M;
// Initialize a context for the current induced method and datatype.
cntx = bli_gks_query_ind_cntx( ind_mod, dt );
// Set k to the kc blocksize for the current datatype.
k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx );
#elif 1
//k_input = 256;
#endif
// Choose the char corresponding to the requested datatype.
if ( bli_is_float( dt ) ) dt_ch = 's';
else if ( bli_is_double( dt ) ) dt_ch = 'd';
else if ( bli_is_scomplex( dt ) ) dt_ch = 'c';
else dt_ch = 'z';
side = BLIS_LEFT;
uploa = BLIS_LOWER;
bli_param_map_blis_to_netlib_side( side, &f77_side );
bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa );
// Begin with initializing the last entry to zero so that
// matlab allocates space for the entire array once up-front.
for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ;
#ifdef BLIS
printf( "data_%s_%chemm_%s_blis", THR_STR, dt_ch, STR );
#else
printf( "data_%s_%chemm_%s", THR_STR, dt_ch, STR );
#endif
printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
( unsigned long )(p - p_begin + 1)/p_inc + 1,
( unsigned long )0,
( unsigned long )0, 0.0 );
for ( p = p_begin; p <= p_end; p += p_inc )
{
if ( m_input < 0 ) m = p / ( dim_t )abs(m_input);
else m = ( dim_t ) m_input;
if ( n_input < 0 ) n = p / ( dim_t )abs(n_input);
else n = ( dim_t ) n_input;
bli_obj_create( dt, 1, 1, 0, 0, &alpha );
bli_obj_create( dt, 1, 1, 0, 0, &beta );
if ( bli_is_left( side ) )
bli_obj_create( dt, m, m, 0, 0, &a );
else
bli_obj_create( dt, n, n, 0, 0, &a );
bli_obj_create( dt, m, n, 0, 0, &b );
bli_obj_create( dt, m, n, 0, 0, &c );
bli_obj_create( dt, m, n, 0, 0, &c_save );
bli_randm( &a );
bli_randm( &b );
bli_randm( &c );
bli_obj_set_struc( BLIS_HERMITIAN, &a );
bli_obj_set_uplo( uploa, &a );
// Make A densely Hermitian, and zero the unstored triangle to
// ensure the implementation reads only from the stored region.
bli_mkherm( &a );
bli_mktrim( &a );
bli_setsc( (2.0/1.0), 0.0, &alpha );
bli_setsc( (1.0/1.0), 0.0, &beta );
bli_copym( &c, &c_save );
#ifdef BLIS
bli_ind_disable_all_dt( dt );
bli_ind_enable_dt( ind, dt );
#endif
dtime_save = DBL_MAX;
for ( r = 0; r < n_repeats; ++r )
{
bli_copym( &c_save, &c );
dtime = bli_clock();
#ifdef PRINT
bli_printm( "a", &a, "%4.1f", "" );
bli_printm( "b", &b, "%4.1f", "" );
bli_printm( "c", &c, "%4.1f", "" );
#endif
#ifdef BLIS
bli_hemm( side,
&alpha,
&a,
&b,
&beta,
&c );
#else
if ( bli_is_float( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int nn = bli_obj_width( &c );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldb = bli_obj_col_stride( &b );
f77_int ldc = bli_obj_col_stride( &c );
float* alphap = bli_obj_buffer( &alpha );
float* ap = bli_obj_buffer( &a );
float* bp = bli_obj_buffer( &b );
float* betap = bli_obj_buffer( &beta );
float* cp = bli_obj_buffer( &c );
ssymm_( &f77_side,
&f77_uploa,
&mm,
&nn,
alphap,
ap, &lda,
bp, &ldb,
betap,
cp, &ldc );
}
else if ( bli_is_double( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int nn = bli_obj_width( &c );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldb = bli_obj_col_stride( &b );
f77_int ldc = bli_obj_col_stride( &c );
double* alphap = bli_obj_buffer( &alpha );
double* ap = bli_obj_buffer( &a );
double* bp = bli_obj_buffer( &b );
double* betap = bli_obj_buffer( &beta );
double* cp = bli_obj_buffer( &c );
dsymm_( &f77_side,
&f77_uploa,
&mm,
&nn,
alphap,
ap, &lda,
bp, &ldb,
betap,
cp, &ldc );
}
else if ( bli_is_scomplex( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int nn = bli_obj_width( &c );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldb = bli_obj_col_stride( &b );
f77_int ldc = bli_obj_col_stride( &c );
scomplex* alphap = bli_obj_buffer( &alpha );
scomplex* ap = bli_obj_buffer( &a );
scomplex* bp = bli_obj_buffer( &b );
scomplex* betap = bli_obj_buffer( &beta );
scomplex* cp = bli_obj_buffer( &c );
chemm_( &f77_side,
&f77_uploa,
&mm,
&nn,
alphap,
ap, &lda,
bp, &ldb,
betap,
cp, &ldc );
}
else if ( bli_is_dcomplex( dt ) )
{
f77_int mm = bli_obj_length( &c );
f77_int nn = bli_obj_width( &c );
f77_int lda = bli_obj_col_stride( &a );
f77_int ldb = bli_obj_col_stride( &b );
f77_int ldc = bli_obj_col_stride( &c );
dcomplex* alphap = bli_obj_buffer( &alpha );
dcomplex* ap = bli_obj_buffer( &a );
dcomplex* bp = bli_obj_buffer( &b );
dcomplex* betap = bli_obj_buffer( &beta );
dcomplex* cp = bli_obj_buffer( &c );
zhemm_( &f77_side,
&f77_uploa,
&mm,
&nn,
alphap,
ap, &lda,
bp, &ldb,
betap,
cp, &ldc );
}
#endif
#ifdef PRINT
bli_printm( "c after", &c, "%4.1f", "" );
exit(1);
#endif
dtime_save = bli_clock_min_diff( dtime_save, dtime );
}
if ( bli_is_left( side ) )
gflops = ( 2.0 * m * m * n ) / ( dtime_save * 1.0e9 );
else
gflops = ( 2.0 * m * n * n ) / ( dtime_save * 1.0e9 );
if ( bli_is_complex( dt ) ) gflops *= 4.0;
#ifdef BLIS
printf( "data_%s_%chemm_%s_blis", THR_STR, dt_ch, STR );
#else
printf( "data_%s_%chemm_%s", THR_STR, dt_ch, STR );
#endif
printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n",
( unsigned long )(p - p_begin + 1)/p_inc + 1,
( unsigned long )m,
( unsigned long )n, gflops );
bli_obj_free( &alpha );
bli_obj_free( &beta );
bli_obj_free( &a );
bli_obj_free( &b );
bli_obj_free( &c );
bli_obj_free( &c_save );
}
//bli_finalize();
return 0;
}

View File

@@ -242,7 +242,7 @@ int main( int argc, char** argv )
f77_int ldc = bli_obj_col_stride( &c );
float* alphap = bli_obj_buffer( &alpha );
scomplex* ap = bli_obj_buffer( &a );
scomplex* betap = bli_obj_buffer( &beta );
float* betap = bli_obj_buffer( &beta );
scomplex* cp = bli_obj_buffer( &c );
cherk_( &f77_uploc,
@@ -262,7 +262,7 @@ int main( int argc, char** argv )
f77_int ldc = bli_obj_col_stride( &c );
double* alphap = bli_obj_buffer( &alpha );
dcomplex* ap = bli_obj_buffer( &a );
dcomplex* betap = bli_obj_buffer( &beta );
double* betap = bli_obj_buffer( &beta );
dcomplex* cp = bli_obj_buffer( &c );
zherk_( &f77_uploc,

View File

@@ -100,38 +100,6 @@ endif
# BLAS library path(s). This is where the BLAS libraries reside.
HOME_LIB_PATH := $(HOME)/flame/lib
#MKL_LIB_PATH := /opt/apps/intel/13/composer_xe_2013.2.146/mkl/lib/intel64
#MKL_LIB_PATH := $(HOME)/intel/mkl/lib/intel64
MKL_LIB_PATH := ${MKLROOT}/lib/intel64
#ICC_LIB_PATH := /opt/apps/intel/13/composer_xe_2013.2.146/compiler/lib/intel64
# OpenBLAS
OPENBLAS_LIB := $(HOME_LIB_PATH)/libopenblas.a
OPENBLASP_LIB := $(HOME_LIB_PATH)/libopenblasp.a
# ATLAS
ATLAS_LIB := $(HOME_LIB_PATH)/libf77blas.a \
$(HOME_LIB_PATH)/libatlas.a
# MKL
MKL_LIB := -L$(MKL_LIB_PATH) \
-lmkl_intel_lp64 \
-lmkl_core \
-lmkl_sequential \
-lpthread -lm -ldl
#MKLP_LIB := -L$(MKL_LIB_PATH) \
# -lmkl_intel_thread \
# -lmkl_core \
# -lmkl_intel_ilp64 \
# -L$(ICC_LIB_PATH) \
# -liomp5
MKLP_LIB := -L$(MKL_LIB_PATH) \
-lmkl_intel_lp64 \
-lmkl_core \
-lmkl_gnu_thread \
-lpthread -lm -ldl -fopenmp
#-L$(ICC_LIB_PATH) \
#-lgomp
@@ -162,42 +130,18 @@ CFLAGS += -I$(TEST_SRC_PATH)
LIBBLIS_LINK := $(LIB_PATH)/$(LIBBLIS_L)
# Datatypes for A, B, and C.
#DTA_S := -DDTA=BLIS_FLOAT
#DTA_D := -DDTA=BLIS_DOUBLE
#DTA_C := -DDTA=BLIS_SCOMPLEX
#DTA_Z := -DDTA=BLIS_DCOMPLEX
#
#DTB_S := -DDTB=BLIS_FLOAT
#DTB_D := -DDTB=BLIS_DOUBLE
#DTB_C := -DDTB=BLIS_SCOMPLEX
#DTB_Z := -DDTB=BLIS_DCOMPLEX
#
#DTC_S := -DDTC=BLIS_FLOAT
#DTC_D := -DDTC=BLIS_DOUBLE
#DTC_C := -DDTC=BLIS_SCOMPLEX
#DTC_Z := -DDTC=BLIS_DCOMPLEX
#
#DTX_S := -DDTC=BLIS_FLOAT
#DTX_D := -DDTC=BLIS_DOUBLE
# Which library?
BLI_DEF := -DBLIS
BLA_DEF := -DBLAS
# Implementation string
STR_BLI := -DSTR=\"asm_blis\"
STR_OBL := -DSTR=\"openblas\"
STR_MKL := -DSTR=\"mkl\"
# Single or multithreaded string
STR_ST := -DTHR_STR=\"st\"
STR_MT := -DTHR_STR=\"mt\"
# Problem size specification
PDEF_ST := -DP_BEGIN=96 \
-DP_END=1200 \
-DP_INC=96
PDEF_ST := -DP_BEGIN=40 \
-DP_END=2000 \
-DP_INC=40
PDEF_MT := -DP_BEGIN=80 \
-DP_END=4000 \
@@ -232,9 +176,9 @@ get-cdef-x = $(strip $(subst s,-DDTX=BLIS_FLOAT, \
get-cdefs = $(call get-cdef-c,$(1)) $(call get-cdef-a,$(1)) $(call get-cdef-b,$(1)) $(call get-cdef-x,$(1))
# Define a function to return the appropriate -DSTR= and -D[BLIS|BLAS] flags.
get-idefs = $(strip $(subst asm_blis,-DSTR=\"$(1)\" -DBLIS, \
$(subst openblas,-DSTR=\"$(1)\" -DBLAS, \
$(subst mkl,-DSTR=\"$(1)\" -DBLAS,$(1)))))
get-idefs = $(strip $(subst intern,-DSTR=\"$(1)\" -DBLIS, \
$(subst ad-hoc,-DSTR=\"$(1)\" -DBLAS, \
$(subst mkl,-DSTR=\"$(1)\" -DBLAS,$(1)))))
# Enumerate all possible datatype combinations.
DT_CODES := $(foreach dt0,$(dts),$(foreach dt1,$(dts),$(foreach dt2,$(dts),$(foreach pr,$(prs),$(dt0)_$(dt1)_$(dt2)_$(pr)))))
@@ -243,21 +187,15 @@ DT_CODES := $(foreach dt0,$(dts),$(foreach dt1,$(dts),$(foreach dt2,$(dts),$(for
DT_COMBOS := $(foreach code,$(DT_CODES),$(call get-cstr,$(code)))
# Build a list of BLIS, OpenBLAS, and MKL executables.
BLIS_OBJS_ST := $(foreach combo,$(DT_COMBOS),test_$(combo)gemm_asm_blis_st.o)
BLIS_BINS_ST := $(patsubst %.o,%.x,$(BLIS_OBJS_ST))
OPENBLAS_OBJS_ST := $(foreach combo,$(DT_COMBOS),test_$(combo)gemm_openblas_st.o)
OPENBLAS_BINS_ST := $(patsubst %.o,%.x,$(OPENBLAS_OBJS_ST))
INTERN_OBJS_ST := $(foreach combo,$(DT_COMBOS),test_$(combo)gemm_intern_st.o)
INTERN_BINS_ST := $(patsubst %.o,%.x,$(INTERN_OBJS_ST))
AD_HOC_OBJS_ST := $(foreach combo,$(DT_COMBOS),test_$(combo)gemm_ad-hoc_st.o)
AD_HOC_BINS_ST := $(patsubst %.o,%.x,$(AD_HOC_OBJS_ST))
BLIS_OBJS_MT := $(foreach combo,$(DT_COMBOS),test_$(combo)gemm_asm_blis_mt.o)
BLIS_BINS_MT := $(patsubst %.o,%.x,$(BLIS_OBJS_MT))
OPENBLAS_OBJS_MT := $(foreach combo,$(DT_COMBOS),test_$(combo)gemm_openblas_mt.o)
OPENBLAS_BINS_MT := $(patsubst %.o,%.x,$(OPENBLAS_OBJS_MT))
#MKL_OBJS_ST := $(foreach combo,$(DT_COMBOS),test_$(combo)gemm_mkl_st.o)
#BLIS_OBJS_MT := $(foreach combo,$(DT_COMBOS),test_$(combo)gemm_asm_blis_mt.o)
#OPENBLAS_OBJS_MT := $(foreach combo,$(DT_COMBOS),test_$(combo)gemm_openblas_mt.o)
#MKL_OBJS_MT := $(foreach combo,$(DT_COMBOS),test_$(combo)gemm_mkl_mt.o)
INTERN_OBJS_MT := $(foreach combo,$(DT_COMBOS),test_$(combo)gemm_intern_mt.o)
INTERN_BINS_MT := $(patsubst %.o,%.x,$(INTERN_OBJS_MT))
AD_HOC_OBJS_MT := $(foreach combo,$(DT_COMBOS),test_$(combo)gemm_ad-hoc_mt.o)
AD_HOC_BINS_MT := $(patsubst %.o,%.x,$(AD_HOC_OBJS_MT))
@@ -265,15 +203,16 @@ OPENBLAS_BINS_MT := $(patsubst %.o,%.x,$(OPENBLAS_OBJS_MT))
# --- Targets/rules ------------------------------------------------------------
#
all: st
all: st
st: blis-st openblas-st
mt: blis-mt openblas-mt
st: intern-st ad-hoc-st
mt: intern-mt ad-hoc-mt
intern-st: $(INTERN_BINS_ST)
ad-hoc-st: $(AD_HOC_BINS_ST)
intern-mt: $(INTERN_BINS_MT)
ad-hoc-mt: $(AD_HOC_BINS_MT)
blis-st: $(BLIS_BINS_ST)
openblas-st: $(OPENBLAS_BINS_ST)
blis-mt: $(BLIS_BINS_MT)
openblas-mt: $(OPENBLAS_BINS_MT)
#blis: test_ssssgemm_asm_blis_st.x \
# test_sssdgemm_asm_blis_st.x \
# test_ssdsgemm_asm_blis_st.x \
@@ -281,13 +220,6 @@ openblas-mt: $(OPENBLAS_BINS_MT)
# test_dsssgemm_asm_blis_st.x \
# test_dddsgemm_asm_blis_st.x \
# test_ddddgemm_asm_blis_st.x
#openblas: test_ssssgemm_openblas_st.x \
# test_sssdgemm_openblas_st.x \
# test_ssdsgemm_openblas_st.x \
# test_sdssgemm_openblas_st.x \
# test_dsssgemm_openblas_st.x \
# test_dddsgemm_openblas_st.x \
# test_ddddgemm_openblas_st.x
# --Object file rules --
@@ -316,7 +248,7 @@ endef
# Define the implementations for which we will instantiate compilation rules.
IMPLS := asm_blis openblas
IMPLS := intern ad-hoc
# Instantiate the rule function make-st-rule() and make-mt-rule for each
# implementation in IMPLS and each of the datatype "codes" in DT_CODES.
@@ -334,56 +266,44 @@ $(foreach code,$(DT_CODES),$(eval $(call make-mt-rule,$(code),$(impl)))))
# compatibility layer. This prevents BLIS from inadvertently getting called
# for the BLAS routines we are trying to test with.
test_%_openblas_st.x: test_%_openblas_st.o $(LIBBLIS_LINK)
test_%_ad-hoc_st.x: test_%_ad-hoc_st.o $(LIBBLIS_LINK)
ifeq ($(ENABLE_VERBOSE),yes)
$(LINKER) $< $(OPENBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@
$(RM_F) $<
else
@@echo "Linking $@ to '$(notdir $(OPENBLAS_LIB)) $(LIBBLIS_LINK)'"
@$(LINKER) $< $(OPENBLAS_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@
@$(RM_F) $<
endif
test_%_openblas_mt.x: test_%_openblas_mt.o $(LIBBLIS_LINK)
ifeq ($(ENABLE_VERBOSE),yes)
$(LINKER) $< $(OPENBLASP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@
$(RM_F) $<
else
@@echo "Linking $@ to '$(notdir $(OPENBLAS_LIB)) $(LIBBLIS_LINK)'"
@$(LINKER) $< $(OPENBLASP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@
@$(RM_F) $<
endif
#test_%_mkl_st.x: test_%_mkl_st.o $(LIBBLIS_LINK)
#ifeq ($(ENABLE_VERBOSE),yes)
# $(LINKER) $< $(MKL_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@
# $(RM_F) $<
#else
# @@echo "Linking $@ to '$(notdir $(MKL_LIB)) $(LIBBLIS_LINK)'"
# @$(LINKER) $< $(MKL_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@
# @$(RM_F) $<
#endif
#test_%_mkl_mt.x: test_%_mkl_mt.o $(LIBBLIS_LINK)
# $(LINKER) $< $(MKLP_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@
test_%_blis_st.x: test_%_blis_st.o $(LIBBLIS_LINK)
ifeq ($(ENABLE_VERBOSE),yes)
$(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@
$(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@
$(RM_F) $<
else
@@echo "Linking $@ to '$(LIBBLIS_LINK)'"
@$(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@
@$(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@
@$(RM_F) $<
endif
test_%_blis_mt.x: test_%_blis_mt.o $(LIBBLIS_LINK)
test_%_ad-hoc_mt.x: test_%_ad-hoc_mt.o $(LIBBLIS_LINK)
ifeq ($(ENABLE_VERBOSE),yes)
$(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@
$(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@
$(RM_F) $<
else
@@echo "Linking $@ to '$(LIBBLIS_LINK)'"
@$(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@
@$(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@
@$(RM_F) $<
endif
test_%_intern_st.x: test_%_intern_st.o $(LIBBLIS_LINK)
ifeq ($(ENABLE_VERBOSE),yes)
$(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@
$(RM_F) $<
else
@@echo "Linking $@ to '$(LIBBLIS_LINK)'"
@$(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@
@$(RM_F) $<
endif
test_%_intern_mt.x: test_%_intern_mt.o $(LIBBLIS_LINK)
ifeq ($(ENABLE_VERBOSE),yes)
$(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@
$(RM_F) $<
else
@@echo "Linking $@ to '$(LIBBLIS_LINK)'"
@$(LINKER) $< $(LIBBLIS_LINK) $(LDFLAGS) -o $@
@$(RM_F) $<
endif

Binary file not shown.

View File

@@ -0,0 +1,101 @@
function r_val = gen_prec_combos( mdcase )
dt_chars = [ 's' 'd' 'c' 'z' ];
pr_chars = [ 's' 'd' ];
dm_chars = [ 'r' 'c' ];
dmc = mdcase( 1 );
dma = mdcase( 2 );
dmb = mdcase( 3 );
if 0
pr_combos( 1, : ) = 'ssss';
pr_combos( 2, : ) = 'ssds';
pr_combos( 3, : ) = 'sdss';
pr_combos( 4, : ) = 'sdds';
pr_combos( 5, : ) = 'dsss';
pr_combos( 6, : ) = 'dsds';
pr_combos( 7, : ) = 'ddss';
pr_combos( 8, : ) = 'ddds';
pr_combos( 9, : ) = 'dddd';
pr_combos( 10, : ) = 'ddsd';
pr_combos( 11, : ) = 'dsdd';
pr_combos( 12, : ) = 'dssd';
pr_combos( 13, : ) = 'sddd';
pr_combos( 14, : ) = 'sdsd';
pr_combos( 15, : ) = 'ssdd';
pr_combos( 16, : ) = 'sssd';
end
pr_combos( 1, : ) = 'ssss';
pr_combos( 2, : ) = 'ssds';
pr_combos( 3, : ) = 'dddd';
pr_combos( 4, : ) = 'ddsd';
pr_combos( 5, : ) = 'sdss';
pr_combos( 6, : ) = 'sdds';
pr_combos( 7, : ) = 'dsdd';
pr_combos( 8, : ) = 'dssd';
pr_combos( 9, : ) = 'dsss';
pr_combos( 10, : ) = 'dsds';
pr_combos( 11, : ) = 'sddd';
pr_combos( 12, : ) = 'sdsd';
pr_combos( 13, : ) = 'ddss';
pr_combos( 14, : ) = 'ddds';
pr_combos( 15, : ) = 'ssdd';
pr_combos( 16, : ) = 'sssd';
for i = 1:16
pr_combo = pr_combos( i, : );
%str = sprintf( '%s', pr_combo ); disp(str);
prc = pr_combo( 1 );
pra = pr_combo( 2 );
prb = pr_combo( 3 );
pr = pr_combo( 4 );
dtc = prec_dom_to_dt( prc, dmc );
dta = prec_dom_to_dt( pra, dma );
dtb = prec_dom_to_dt( prb, dmb );
dt_combos( i, : ) = sprintf( '%c%c%c%c', dtc, dta, dtb, pr );
end
%if 0
%i = 1;
%pr = 's';
%for prc = pr_chars
% for pra = pr_chars
% for prb = pr_chars
% dtc = prec_dom_to_dt( prc, dmc );
% dta = prec_dom_to_dt( pra, dma );
% dtb = prec_dom_to_dt( prb, dmb );
% dt_combos( i, : ) = sprintf( '%c%c%c%c', dtc, dta, dtb, pr );
% i = i + 1;
% end
% end
%end
%
%pr = 'd';
%for prc = flip( pr_chars )
% for pra = flip( pr_chars )
% for prb = flip( pr_chars )
% dtc = prec_dom_to_dt( prc, dmc );
% dta = prec_dom_to_dt( pra, dma );
% dtb = prec_dom_to_dt( prb, dmb );
% dt_combos( i, : ) = sprintf( '%c%c%c%c', dtc, dta, dtb, pr );
% i = i + 1;
% end
% end
%end
%end
r_val = dt_combos;
end

View File

View File

@@ -0,0 +1,23 @@
function r_val = plot_dom_all( is_mt )
cases( 1, : ) = [ 'rrr' ];
cases( 2, : ) = [ 'rrc' ];
cases( 3, : ) = [ 'rcr' ];
cases( 4, : ) = [ 'rcc' ];
cases( 5, : ) = [ 'crr' ];
cases( 6, : ) = [ 'crc' ];
cases( 7, : ) = [ 'ccr' ];
cases( 8, : ) = [ 'ccc' ];
n_cases = size(cases,1);
for i = 1:n_cases
thecase = cases( i, : );
plot_dom_case( thecase, is_mt );
end
r_val = 0;
end

View File

@@ -0,0 +1,140 @@
function r_val = plot_dom_case( mdcase, is_mt )
if is_mt == 1
thr_str = 'mt';
else
thr_str = 'st';
end
if 1
dt_combos = gen_prec_combos( mdcase );
else
dt_combos( 1, : ) = [ 'ssss' ];
dt_combos( 2, : ) = [ 'sssd' ];
dt_combos( 3, : ) = [ 'ssds' ];
dt_combos( 4, : ) = [ 'sdss' ];
dt_combos( 5, : ) = [ 'dsss' ];
dt_combos( 6, : ) = [ 'ddds' ];
dt_combos( 7, : ) = [ 'dddd' ];
end
n_combos = size(dt_combos,1);
filetemp_blis = '../output_%s_%sgemm_asm_blis.m';
filetemp_open = '../output_%s_%sgemm_openblas.m';
% Construct filenames for the "reference" (single real) data, then load
% the data files, and finally save the results to different variable names.
file_blis_sref = sprintf( filetemp_blis, thr_str, 'ssss' );
file_open_sref = sprintf( filetemp_open, thr_str, 'ssss' );
run( file_blis_sref )
run( file_open_sref )
data_gemm_asm_blis_sref( :, : ) = data_gemm_asm_blis( :, : );
data_gemm_openblas_sref( :, : ) = data_gemm_openblas( :, : );
% Construct filenames for the "reference" (double real) data, then load
% the data files, and finally save the results to different variable names.
file_blis_dref = sprintf( filetemp_blis, thr_str, 'dddd' );
file_open_dref = sprintf( filetemp_open, thr_str, 'dddd' );
run( file_blis_dref )
run( file_open_dref )
data_gemm_asm_blis_dref( :, : ) = data_gemm_asm_blis( :, : );
data_gemm_openblas_dref( :, : ) = data_gemm_openblas( :, : );
% Construct filenames for the "reference" (single complex) data, then load
% the data files, and finally save the results to different variable names.
file_blis_cref = sprintf( filetemp_blis, thr_str, 'cccs' );
file_open_cref = sprintf( filetemp_open, thr_str, 'cccs' );
run( file_blis_cref )
run( file_open_cref )
data_gemm_asm_blis_cref( :, : ) = data_gemm_asm_blis( :, : );
data_gemm_openblas_cref( :, : ) = data_gemm_openblas( :, : );
% Construct filenames for the "reference" (double complex) data, then load
% the data files, and finally save the results to different variable names.
file_blis_zref = sprintf( filetemp_blis, thr_str, 'zzzd' );
file_open_zref = sprintf( filetemp_open, thr_str, 'zzzd' );
run( file_blis_zref )
run( file_open_zref )
data_gemm_asm_blis_zref( :, : ) = data_gemm_asm_blis( :, : );
data_gemm_openblas_zref( :, : ) = data_gemm_openblas( :, : );
fig = figure;
orient( fig, 'portrait' );
%set(gcf,'Position',[0 0 2000 900]);
set(gcf,'PaperUnits', 'inches');
%set(gcf,'PaperSize', [16 12.4]);
%set(gcf,'PaperPosition', [0 0 16 12.4]);
set(gcf,'PaperSize', [14 11.0]);
set(gcf,'PaperPosition', [0 0 14 11.0]);
%set(gcf,'PaperPositionMode','auto');
set(gcf,'PaperPositionMode','manual');
set(gcf,'PaperOrientation','portrait');
for dti = 1:n_combos
%for dti = 1:1
% Grab the current datatype combination.
combo = dt_combos( dti, : );
str = sprintf( 'Plotting %d: %s', dti, combo ); disp(str);
if combo(4) == 's'
data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_sref( :, : );
data_gemm_openblas_ref( :, : ) = data_gemm_openblas_sref( :, : );
refch = 's';
else %if combo(4) == 'd'
data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_dref( :, : );
data_gemm_openblas_ref( :, : ) = data_gemm_openblas_dref( :, : );
refch = 'd';
end
if ( combo(1) == 'c' || combo(1) == 'z' ) && ...
( combo(2) == 'c' || combo(2) == 'z' ) && ...
( combo(3) == 'c' || combo(3) == 'z' )
if combo(4) == 's'
data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_cref( :, : );
data_gemm_openblas_ref( :, : ) = data_gemm_openblas_cref( :, : );
refch = 'c';
else %if combo(4) == 'd'
data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_zref( :, : );
data_gemm_openblas_ref( :, : ) = data_gemm_openblas_zref( :, : );
refch = 'z';
end
end
% Construct filenames for the data files from templates.
file_blis = sprintf( filetemp_blis, thr_str, combo );
file_open = sprintf( filetemp_open, thr_str, combo );
% Load the data files.
%str = sprintf( ' Loading %s', file_blis ); disp(str);
run( file_blis )
%str = sprintf( ' Loading %s', file_open ); disp(str);
run( file_open )
% Plot the result.
plot_gemm_perf( combo, ...
data_gemm_asm_blis, ...
data_gemm_asm_blis_ref, ...
data_gemm_openblas, ...
data_gemm_openblas_ref, ...
is_mt, refch, 4, 4, dti );
end
%if 0
%set(gcf,'Position',[0 0 2000 900]);
%set(gcf,'PaperUnits', 'inches');
%set(gcf,'PaperSize', [48 22]);
%set(gcf,'PaperPosition', [0 0 48 22]);
%%set(gcf,'PaperPositionMode','auto');
%set(gcf,'PaperPositionMode','manual');
%set(gcf,'PaperOrientation','landscape');
%end
outfile = sprintf( 'output/gemm_%s', mdcase );
print(gcf, outfile,'-bestfit','-dpdf');
%print(gcf, 'gemm_md','-fillpage','-dpdf');

View File

@@ -1,4 +1,4 @@
function r_val = plot_all_md( is_mt )
function r_val = plot_dt_all( is_mt )
if is_mt == 1
thr_str = 'mt';
@@ -73,8 +73,8 @@ set(gcf,'Position',[0 0 2000 900]);
set(gcf,'PaperUnits', 'inches');
set(gcf,'PaperSize', [64 33]);
set(gcf,'PaperPosition', [0 0 64 33]);
%set(gcf,'PaperPositionMode','auto');
set(gcf,'PaperPositionMode','manual');
%set(gcf,'PaperPositionMode','auto');
set(gcf,'PaperPositionMode','manual');
set(gcf,'PaperOrientation','landscape');
for dti = 1:n_combos
@@ -88,9 +88,11 @@ for dti = 1:n_combos
if combo(4) == 's'
data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_sref( :, : );
data_gemm_openblas_ref( :, : ) = data_gemm_openblas_sref( :, : );
elseif combo(4) == 'd'
refch = 's';
else %if combo(4) == 'd'
data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_dref( :, : );
data_gemm_openblas_ref( :, : ) = data_gemm_openblas_dref( :, : );
refch = 'd';
end
if ( combo(1) == 'c' || combo(1) == 'z' ) && ...
@@ -99,9 +101,11 @@ for dti = 1:n_combos
if combo(4) == 's'
data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_cref( :, : );
data_gemm_openblas_ref( :, : ) = data_gemm_openblas_cref( :, : );
elseif combo(4) == 'd'
refch = 'c';
else %if combo(4) == 'd'
data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_zref( :, : );
data_gemm_openblas_ref( :, : ) = data_gemm_openblas_zref( :, : );
refch = 'z';
end
end
@@ -121,7 +125,7 @@ for dti = 1:n_combos
data_gemm_asm_blis_ref, ...
data_gemm_openblas, ...
data_gemm_openblas_ref, ...
is_mt, dti );
is_mt, refch, 8, 16, dti );
end
@@ -135,5 +139,5 @@ set(gcf,'PaperPosition', [0 0 48 22]);
set(gcf,'PaperPositionMode','manual');
set(gcf,'PaperOrientation','landscape');
end
print(gcf, 'gemm_md','-bestfit','-dpdf');
print(gcf, 'output/gemm_md','-bestfit','-dpdf');
%print(gcf, 'gemm_md','-fillpage','-dpdf');

View File

@@ -0,0 +1,148 @@
function r_val = plot_dt_select( dom, is_mt )
if is_mt == 1
thr_str = 'mt';
else
thr_str = 'st';
end
if dom == 'r'
dt_combos( 1, : ) = [ 'dsss' ];
dt_combos( 2, : ) = [ 'sddd' ];
dt_combos( 3, : ) = [ 'sdds' ];
dt_combos( 4, : ) = [ 'dssd' ];
dt_combos( 5, : ) = [ 'ddds' ];
dt_combos( 6, : ) = [ 'sssd' ];
else
dt_combos( 1, : ) = [ 'csss' ];
dt_combos( 2, : ) = [ 'zddd' ];
dt_combos( 3, : ) = [ 'ccss' ];
dt_combos( 4, : ) = [ 'zzdd' ];
dt_combos( 5, : ) = [ 'cscs' ];
dt_combos( 6, : ) = [ 'zdzd' ];
end
n_combos = size(dt_combos,1);
filetemp_blis = '../output_%s_%sgemm_asm_blis.m';
filetemp_open = '../output_%s_%sgemm_openblas.m';
% Construct filenames for the "reference" (single real) data, then load
% the data files, and finally save the results to different variable names.
file_blis_sref = sprintf( filetemp_blis, thr_str, 'ssss' );
file_open_sref = sprintf( filetemp_open, thr_str, 'ssss' );
run( file_blis_sref )
run( file_open_sref )
data_gemm_asm_blis_sref( :, : ) = data_gemm_asm_blis( :, : );
data_gemm_openblas_sref( :, : ) = data_gemm_openblas( :, : );
% Construct filenames for the "reference" (double real) data, then load
% the data files, and finally save the results to different variable names.
file_blis_dref = sprintf( filetemp_blis, thr_str, 'dddd' );
file_open_dref = sprintf( filetemp_open, thr_str, 'dddd' );
run( file_blis_dref )
run( file_open_dref )
data_gemm_asm_blis_dref( :, : ) = data_gemm_asm_blis( :, : );
data_gemm_openblas_dref( :, : ) = data_gemm_openblas( :, : );
% Construct filenames for the "reference" (single complex) data, then load
% the data files, and finally save the results to different variable names.
file_blis_cref = sprintf( filetemp_blis, thr_str, 'cccs' );
file_open_cref = sprintf( filetemp_open, thr_str, 'cccs' );
run( file_blis_cref )
run( file_open_cref )
data_gemm_asm_blis_cref( :, : ) = data_gemm_asm_blis( :, : );
data_gemm_openblas_cref( :, : ) = data_gemm_openblas( :, : );
% Construct filenames for the "reference" (double complex) data, then load
% the data files, and finally save the results to different variable names.
file_blis_zref = sprintf( filetemp_blis, thr_str, 'zzzd' );
file_open_zref = sprintf( filetemp_open, thr_str, 'zzzd' );
run( file_blis_zref )
run( file_open_zref )
data_gemm_asm_blis_zref( :, : ) = data_gemm_asm_blis( :, : );
data_gemm_openblas_zref( :, : ) = data_gemm_openblas( :, : );
%fig = figure;
fig = figure('Position', [100, 100, 1024, 1300]);
orient( fig, 'portrait' );
%set(gcf,'Position',[0 0 2000 900]);
set(gcf,'PaperUnits', 'inches');
%set(gcf,'PaperSize', [16 12.4]);
%set(gcf,'PaperPosition', [0 0 16 12.4]);
set(gcf,'PaperSize', [9 11.0]);
set(gcf,'PaperPosition', [0 0 9 11.0]);
%set(gcf,'PaperPositionMode','auto');
set(gcf,'PaperPositionMode','manual');
set(gcf,'PaperOrientation','portrait');
for dti = 1:n_combos
%for dti = 1:1
% Grab the current datatype combination.
combo = dt_combos( dti, : );
str = sprintf( 'Plotting %d: %s', dti, combo ); disp(str);
if combo(4) == 's'
data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_sref( :, : );
data_gemm_openblas_ref( :, : ) = data_gemm_openblas_sref( :, : );
refch = 's';
else %if combo(4) == 'd'
data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_dref( :, : );
data_gemm_openblas_ref( :, : ) = data_gemm_openblas_dref( :, : );
refch = 'd';
end
if ( combo(1) == 'c' || combo(1) == 'z' ) && ...
( combo(2) == 'c' || combo(2) == 'z' ) && ...
( combo(3) == 'c' || combo(3) == 'z' )
if combo(4) == 's'
data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_cref( :, : );
data_gemm_openblas_ref( :, : ) = data_gemm_openblas_cref( :, : );
refch = 'c';
else %if combo(4) == 'd'
data_gemm_asm_blis_ref( :, : ) = data_gemm_asm_blis_zref( :, : );
data_gemm_openblas_ref( :, : ) = data_gemm_openblas_zref( :, : );
refch = 'z';
end
end
% Construct filenames for the data files from templates.
file_blis = sprintf( filetemp_blis, thr_str, combo );
file_open = sprintf( filetemp_open, thr_str, combo );
% Load the data files.
%str = sprintf( ' Loading %s', file_blis ); disp(str);
run( file_blis )
%str = sprintf( ' Loading %s', file_open ); disp(str);
run( file_open )
% Plot the result.
plot_gemm_perf( combo, ...
data_gemm_asm_blis, ...
data_gemm_asm_blis_ref, ...
data_gemm_openblas, ...
data_gemm_openblas_ref, ...
is_mt, refch, 3, 2, dti );
end
%if 0
%set(gcf,'Position',[0 0 2000 900]);
%set(gcf,'PaperUnits', 'inches');
%set(gcf,'PaperSize', [48 22]);
%set(gcf,'PaperPosition', [0 0 48 22]);
%%set(gcf,'PaperPositionMode','auto');
%set(gcf,'PaperPositionMode','manual');
%set(gcf,'PaperOrientation','landscape');
%end
outfile = sprintf( 'output/gemm_select_%c', dom );
print(gcf, outfile,'-bestfit','-dpdf');
%print(gcf, 'gemm_md','-fillpage','-dpdf');

View File

@@ -4,10 +4,12 @@ function r_val = plot_gemm_perf( dt_str, ...
data_open, ...
data_open_ref, ...
is_mt, ...
refch, ...
rows, cols, ...
theid )
if 1
ax1 = subplot( 8, 16, theid );
ax1 = subplot( rows, cols, theid );
hold( ax1, 'on' );
end
@@ -16,7 +18,7 @@ color_open_ref = 'k'; lines_open_ref = ':'; markr_open_ref = 'o';
color_mkl_ref = 'r'; lines_mkl_ref = ':'; markr_mkl_ref = '.';
color_blis = 'b'; lines_blis = '-'; markr_blis = '';
color_open = 'k'; lines_open = '-'; markr_open = 'o';
color_open = 'k'; lines_open = '-.'; markr_open = 'o';
color_mkl = 'r'; lines_mkl = '-'; markr_mkl = '.';
if dt_str(4) == 's'
@@ -50,12 +52,24 @@ filename_png = sprintf( filename_png, dt_str );
%dt0_str = [ dt_str(4), dt_str(4), dt_str(4), dt_str(4) ];
dt0_str = dt_str(4);
blis_sref_legend = sprintf( 'BLIS [sc]gemm' );
blis_dref_legend = sprintf( 'BLIS [dz]gemm' );
blis_legend = sprintf( 'BLIS mixed' );
open_sref_legend = sprintf( 'OBLA [sc]gemm' );
open_dref_legend = sprintf( 'OBLA [dz]gemm' );
open_legend = sprintf( 'OBLA mixed' );
if refch == 's'
blis_ref_legend = sprintf( 'Ref (sgemm)' );
%blis_ref_legend = sprintf( 'Reference' );
elseif refch == 'd'
blis_ref_legend = sprintf( 'Ref (dgemm)' );
%blis_ref_legend = sprintf( 'Reference' );
elseif refch == 'c'
blis_ref_legend = sprintf( 'Ref (cgemm)' );
elseif refch == 'z'
blis_ref_legend = sprintf( 'Ref (zgemm)' );
end
blis_sref_legend = sprintf( 'Ref [sc]gemm' );
blis_dref_legend = sprintf( 'Ref [dz]gemm' );
blis_legend = sprintf( 'Internal' );
open_sref_legend = sprintf( 'Ad-hoc [sc]gemm' );
open_dref_legend = sprintf( 'Ad-hoc [dz]gemm' );
open_legend = sprintf( 'Ad-hoc' );
y_scale = 1.00;
@@ -72,12 +86,12 @@ y_end = max_perf_core * y_scale;
flopscol = 4;
msize = 5;
if 1
fontsize = 12;
fontsize = 13;
else
fontsize = 16;
end
linesize = 0.7;
legend_loc = 'SouthEast';
linesize = 0.5;
legend_loc = 'southeast';
% --------------------------------------------------------------------
@@ -95,9 +109,9 @@ blis_ref = line( x_axis( :, 1 ), data_blis_ref( :, flopscol ) / nth, ...
blis_md = line( x_axis( :, 1 ), data_blis( :, flopscol ) / nth, ...
'Color',color_blis, 'LineStyle',lines_blis, ...
'LineWidth',linesize );
open_ref = line( x_axis( :, 1 ), data_open_ref( :, flopscol ) / nth, ...
'Color',color_open_ref, 'LineStyle',lines_open_ref, ...
'LineWidth',linesize );
%open_ref = line( x_axis( :, 1 ), data_open_ref( :, flopscol ) / nth, ...
% 'Color',color_open_ref, 'LineStyle',lines_open_ref, ...
% 'LineWidth',linesize );
open_md = line( x_axis( :, 1 ), data_open( :, flopscol ) / nth, ...
'Color',color_open, 'LineStyle',lines_open, ...
'LineWidth',linesize );
@@ -108,42 +122,117 @@ open_md = line( x_axis( :, 1 ), data_open( :, flopscol ) / nth, ...
xlim( ax1, [x_begin x_end] );
ylim( ax1, [y_begin y_end] );
if theid == 1
leg = legend( ...
[ ...
blis_ref ...
blis_md ...
open_ref ...
open_md ...
], ...
blis_sref_legend, ...
blis_legend, ...
open_sref_legend, ...
open_legend, ...
'Location', 'best' );
%'Location', legend_loc );
set( leg,'Box','off' );
set( leg,'Color','none' );
set( leg,'FontSize',fontsize-2 );
set( leg,'Units','inches' );
elseif theid == 9
leg = legend( ...
[ ...
blis_ref ...
blis_md ...
open_ref ...
open_md ...
], ...
blis_dref_legend, ...
blis_legend, ...
open_dref_legend, ...
open_legend, ...
'Location', 'best' );
%'Location', legend_loc );
set( leg,'Box','off' );
set( leg,'Color','none' );
set( leg,'FontSize',fontsize-2 );
set( leg,'Units','inches' );
if rows == 8 && cols == 16
if theid == 1
leg = legend( ...
[ ...
blis_ref ...
blis_md ...
open_md ...
], ...
blis_sref_legend, ...
blis_legend, ...
open_legend, ...
'Location', 'best' );
%'Location', legend_loc );
set( leg,'Box','off' );
set( leg,'Color','none' );
set( leg,'FontSize',fontsize-2 );
set( leg,'Units','inches' );
elseif theid == 9
leg = legend( ...
[ ...
blis_ref ...
blis_md ...
open_md ...
], ...
blis_dref_legend, ...
blis_legend, ...
open_legend, ...
'Location', 'best' );
%'Location', legend_loc );
set( leg,'Box','off' );
set( leg,'Color','none' );
set( leg,'FontSize',fontsize-2 );
set( leg,'Units','inches' );
end
elseif rows == 4 && cols == 4
if theid == 1
leg = legend( ...
[ ...
blis_ref ...
blis_md ...
open_md ...
], ...
blis_ref_legend, ...
blis_legend, ...
open_legend, ...
'Location', legend_loc );
%'Location', 'best' );
set( leg,'Box','off' );
set( leg,'Color','none' );
set( leg,'FontSize',fontsize-2 );
set( leg,'Units','inches' );
set( leg,'Position',[1.03 3.46 0.7 0.3 ] );
elseif theid == 3
leg = legend( ...
[ ...
blis_ref ...
blis_md ...
open_md ...
], ...
blis_ref_legend, ...
blis_legend, ...
open_legend, ...
'Location', legend_loc );
%'Location', 'best' );
set( leg,'Box','off' );
set( leg,'Color','none' );
set( leg,'FontSize',fontsize-2 );
set( leg,'Units','inches' );
set( leg,'Position',[3.51 3.46 0.7 0.3 ] );
end
elseif rows == 3 && cols == 2
if theid == 1
leg = legend( ...
[ ...
blis_ref ...
blis_md ...
open_md ...
], ...
blis_ref_legend, ...
blis_legend, ...
open_legend, ...
'Location', legend_loc );
%'Location', 'best' );
set( leg,'Box','off' );
set( leg,'Color','none' );
set( leg,'FontSize',fontsize-2 );
set( leg,'Units','inches' );
%set( leg,'Position',[1.03 3.46 0.7 0.3 ] );
elseif theid == 2
leg = legend( ...
[ ...
blis_ref ...
blis_md ...
open_md ...
], ...
blis_ref_legend, ...
blis_legend, ...
open_legend, ...
'Location', legend_loc );
%'Location', 'best' );
set( leg,'Box','off' );
set( leg,'Color','none' );
set( leg,'FontSize',fontsize-2 );
set( leg,'Units','inches' );
%set( leg,'Position',[3.51 3.46 0.7 0.3 ] );
end
end
@@ -159,14 +248,14 @@ tpos = get( titl, 'Position' ); % default is to align across whole figure, not b
tpos(1) = tpos(1) + 40;
set( titl, 'Position', tpos ); % here we nudge it back to centered with box.
if theid > 112
if theid > (rows-1)*cols
xlab = xlabel( ax1,xaxisname );
%tpos = get( xlab, 'Position' )
%tpos(2) = tpos(2) + 10;
%set( xlab, 'Position', tpos );
end
if mod(theid-1,16) == 0
if mod(theid-1,cols) == 0
ylab = ylabel( ax1,yaxisname );
end

View File

@@ -0,0 +1,17 @@
function r_val = prec_dom_to_dt( pc, dc )
if dc == 'r'
if pc == 's'
r_val = 's';
else
r_val = 'd';
end
else
if pc == 's'
r_val = 'c';
else
r_val = 'z';
end
end
end

Binary file not shown.

View File

@@ -67,15 +67,15 @@ fi
# Complex domain implementations to test.
if [ ${sys} = "blis" ]; then
test_impls="openblas asm_blis"
test_impls="ad-hoc intern"
elif [ ${sys} = "stampede2" ]; then
test_impls="openblas asm_blis mkl"
test_impls=""
elif [ ${sys} = "lonestar5" ]; then
test_impls="openblas mkl asm_blis"
test_impls=""
fi
# Datatypes to test.
@@ -102,16 +102,13 @@ for dtc in ${dt_chars}; do
done
# Threadedness to test.
threads="mt"
#threads="st"
test_impls="openblas"
#threads="mt"
threads="st"
# Overrides, in case something goes wrong for a subset of tests.
#test_impls="ad-hoc"
#dt_combos="ssss sssd ssds sdss dsss ddds dddd"
#dt_combos="csss csds cdss cdds zsss zsds zdss zdds cssd csdd cdsd cddd zssd zsdd zdsd zddd"
#dt_combos="cssd csdd cdsd cddd zsss zsds zdss zdds"
#dt_combos="cdsd cddd zsss zsds zdss zdds"
#test_impls="asm_blis"
# Now perform complex test cases.
for th in ${threads}; do
@@ -134,11 +131,11 @@ for th in ${threads}; do
# Unset GOMP_CPU_AFFINITY for OpenBLAS, as it causes the library
# to execute sequentially.
if [ ${im} = "openblas" ]; then
unset GOMP_CPU_AFFINITY
else
export GOMP_CPU_AFFINITY="0 1 2 3"
fi
#if [ ${im} = "openblas" ]; then
# unset GOMP_CPU_AFFINITY
#else
# export GOMP_CPU_AFFINITY="0 1 2 3"
#fi
else
export BLIS_JC_NT=1

View File

@@ -259,7 +259,8 @@ void blas_gemm_md( obj_t* alpha, obj_t* a, obj_t* b, obj_t* beta, obj_t* c )
if ( bli_obj_dt( a ) == bli_obj_dt( b ) &&
bli_obj_dt( b ) == bli_obj_dt( c ) &&
bli_obj_dt( c ) == ( num_t )comp_prec )
//bli_obj_dt( c ) == ( num_t )comp_prec )
bli_obj_prec( c ) == comp_prec )
{
blas_gemm( transa, transb, bli_obj_dt( c ), alpha, a, b, beta, c );
return;