diff --git a/test/sup/Makefile b/test/sup/Makefile index d2b3c7170..7934d7052 100644 --- a/test/sup/Makefile +++ b/test/sup/Makefile @@ -103,6 +103,9 @@ OPENBLASP_LIB := $(HOME_LIB_PATH)/libopenblasp.a # BLASFEO BLASFEO_LIB := $(HOME_LIB_PATH)/libblasfeo.a +# libxsmm +LIBXSMM_LIB := $(HOME_LIB_PATH)/libxsmm.a + # ATLAS ATLAS_LIB := $(HOME_LIB_PATH)/libf77blas.a \ $(HOME_LIB_PATH)/libatlas.a @@ -306,14 +309,18 @@ get-imp-defs = $(strip $(subst blissup,-DSTR=\"$(1)\" -DBLIS -DSUP, \ $(subst eigen,-DSTR=\"$(1)\" -DEIGEN, \ $(subst openblas,-DSTR=\"$(1)\" -DCBLAS, \ $(subst blasfeo,-DSTR=\"$(1)\" -DCBLAS, \ - $(subst vendor,-DSTR=\"$(1)\" -DCBLAS,$(1)))))))) + $(subst libxsmm,-DSTR=\"$(1)\" -DBLAS, \ + $(subst vendor,-DSTR=\"$(1)\" -DCBLAS,$(1))))))))) TRANS0 = $(call stripu,$(TRANS)) STORS0 = $(call stripu,$(STORS)) # Limit BLAS and Eigen to only using all row-stored, or all column-stored matrices. +# Also, limit libxsmm to using all column-stored matrices since it does not offer +# CBLAS interfaces. BSTORS0 = rrr ccc ESTORS0 = rrr ccc +XSTORS0 = ccc # @@ -339,6 +346,9 @@ OPENBLAS_ST_BINS := $(patsubst %.o,%.x,$(OPENBLAS_ST_OBJS)) BLASFEO_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(BSTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),blasfeo) BLASFEO_ST_BINS := $(patsubst %.o,%.x,$(BLASFEO_ST_OBJS)) +LIBXSMM_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(XSTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),libxsmm) +LIBXSMM_ST_BINS := $(patsubst %.o,%.x,$(LIBXSMM_ST_OBJS)) + VENDOR_ST_OBJS := $(call get-st-objs,$(DTS),$(TRANS0),$(BSTORS0),$(SHAPES),$(SMS),$(SNS),$(SKS),vendor) VENDOR_ST_BINS := $(patsubst %.o,%.x,$(VENDOR_ST_OBJS)) @@ -351,6 +361,7 @@ VENDOR_ST_BINS := $(patsubst %.o,%.x,$(VENDOR_ST_OBJS)) $(EIGEN_ST_OBJS) \ $(OPENBLAS_ST_OBJS) \ $(BLASFEO_ST_OBJS) \ + $(LIBXSMM_ST_OBJS) \ $(VENDOR_ST_OBJS) @@ -365,9 +376,11 @@ blislpab: blislpab-st eigen: eigen-st openblas: openblas-st blasfeo: blasfeo-st +libxsmm: libxsmm-st vendor: vendor-st -st: blissup-st blislpab-st eigen-st openblas-st blasfeo-st vendor-st +st: blissup-st blislpab-st \ + eigen-st openblas-st blasfeo-st libxsmm-st vendor-st blis: blissup-st blislpab-st blissup-st: $(BLISSUP_ST_BINS) @@ -375,13 +388,14 @@ blislpab-st: $(BLISLPAB_ST_BINS) eigen-st: $(EIGEN_ST_BINS) openblas-st: $(OPENBLAS_ST_BINS) blasfeo-st: $(BLASFEO_ST_BINS) +libxsmm-st: $(LIBXSMM_ST_BINS) vendor-st: $(VENDOR_ST_BINS) # --Object file rules -- # Define the implementations for which we will instantiate compilation rules. -BIMPLS := blissup blislpab openblas blasfeo vendor +BIMPLS := blissup blislpab openblas blasfeo libxsmm vendor EIMPLS := eigen # 1 2 3 4 567 8 @@ -447,6 +461,9 @@ test_%_openblas_st.x: test_%_openblas_st.o $(LIBBLIS_LINK) test_%_blasfeo_st.x: test_%_blasfeo_st.o $(LIBBLIS_LINK) $(CC) $(strip $< $(BLASFEO_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) +test_%_libxsmm_st.x: test_%_libxsmm_st.o $(LIBBLIS_LINK) + $(CC) $(strip $< $(LIBXSMM_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) + test_%_vendor_st.x: test_%_vendor_st.o $(LIBBLIS_LINK) $(CC) $(strip $< $(VENDOR_LIB) $(LIBBLIS_LINK) $(LDFLAGS) -o $@) diff --git a/test/sup/runme.sh b/test/sup/runme.sh index 9646e3ccc..912c50dc0 100755 --- a/test/sup/runme.sh +++ b/test/sup/runme.sh @@ -37,13 +37,14 @@ sns="8" sks="4" # Implementations to test. -impls="vendor blissup blislpab openblas eigen" +impls="vendor blissup blislpab openblas eigen libxsmm blasfeo" #impls="vendor openblas eigen" #impls="blislpab blissup" -#mpls="openblas eigen vendor" -#mpls="eigen" +#impls="openblas eigen vendor" +#impls="eigen" #impls="blissup" #impls="blasfeo" +#impls="libxsmm" # Example: test_dgemm_nn_rrc_m6npkp_blissup_st.x @@ -75,6 +76,13 @@ for th in ${threads}; do continue; fi + # Further limit execution of libxsmm to + # ccc storage cases. + if [ "${im:0:7}" = "libxsmm" ] && \ + [ "${st}" != "ccc" ]; then + continue; + fi + # Extract the shape chars for m, n, k. chm=${sh:0:1} chn=${sh:1:1} diff --git a/test/sup/test_gemm.c b/test/sup/test_gemm.c index 311e8552a..8c9f24c8a 100644 --- a/test/sup/test_gemm.c +++ b/test/sup/test_gemm.c @@ -158,7 +158,8 @@ int main( int argc, char** argv ) ( unsigned long )0, 0.0 ); - for ( p = p_begin; p <= p_max; p += p_inc ) + //for ( p = p_begin; p <= p_max; p += p_inc ) + for ( p = p_max; p_begin <= p; p -= p_inc ) { obj_t a, b, c; obj_t c_save;