Added test drivers for level 3 BLAS that run tests in parallel using MPI

This commit is contained in:
Tyler Michael Smith
2014-03-26 17:19:46 +00:00
parent 73b3db5948
commit a6fd483454
7 changed files with 1744 additions and 0 deletions

323
mpi_test/Makefile Normal file
View File

@@ -0,0 +1,323 @@
#!/bin/bash
#
# BLIS
# An object-based framework for developing high-performance BLAS-like
# libraries.
#
# Copyright (C) 2014, The University of Texas
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# - Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# - Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# - Neither the name of The University of Texas nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
#
#
# Makefile
#
# Field G. Van Zee
#
# Makefile for standalone BLIS test drivers.
#
#
# --- Makefile PHONY target definitions ----------------------------------------
#
.PHONY: all \
blis essl \
clean cleanx
#
# --- Makefile initialization --------------------------------------------------
#
# Define the name of the configuration file.
CONFIG_MK_FILE := config.mk
# Define the name of the file containing build and architecture-specific
# makefile definitions.
MAKE_DEFS_FILE := make_defs.mk
# Locations of important files.
ROOT_PATH := ..
CONFIG_DIR := config
#
# --- Include makefile configuration file --------------------------------------
#
# Construct the path to the makefile configuration file that was generated by
# the configure script.
CONFIG_MK_PATH := $(ROOT_PATH)/$(CONFIG_MK_FILE)
# Include the configuration file.
-include $(CONFIG_MK_PATH)
# Detect whether we actually got the configuration file. If we didn't, then
# it is likely that the user has not yet generated it (via configure).
ifeq ($(strip $(CONFIG_MK_INCLUDED)),yes)
CONFIG_MK_PRESENT := yes
else
CONFIG_MK_PRESENT := no
endif
# Now we have access to CONFIG_NAME, which tells us which sub-directory of the
# config directory to use as our configuration.
CONFIG_PATH := $(ROOT_PATH)/$(CONFIG_DIR)/$(CONFIG_NAME)
#
# --- Include makefile definitions file ----------------------------------------
#
# Construct the path to the makefile definitions file residing inside of
# the configuration sub-directory.
MAKE_DEFS_MK_PATH := $(CONFIG_PATH)/$(MAKE_DEFS_FILE)
# Include the makefile definitions file.
-include $(MAKE_DEFS_MK_PATH)
# Detect whether we actually got the make definitios file. If we didn't, then
# it is likely that the configuration is invalid (or incomplete).
ifeq ($(strip $(MAKE_DEFS_MK_INCLUDED)),yes)
MAKE_DEFS_MK_PRESENT := yes
else
MAKE_DEFS_MK_PRESENT := no
endif
#
# --- BLAS and LAPACK implementations ------------------------------------------
#
# BLIS library and header path. This is simply wherever it was installed.
BLIS_LIB_PATH := $(INSTALL_PREFIX)/lib
BLIS_INC_PATH := $(INSTALL_PREFIX)/include/blis
# BLIS library.
BLIS_LIB := $(BLIS_LIB_PATH)/libblis.a
# BLAS library path(s). This is where the BLAS libraries reside.
BLAS_LIB_PATH := $(HOME)/flame/lib
MKL_LIB_PATH := $(HOME)/intel/mkl/lib/intel64/
ESSL_LIB_PATH := /soft/libraries/essl/current/lib64
# OpenBLAS
OPENBLAS_LIB := $(BLAS_LIB_PATH)/libopenblas.a
# ATLAS
ATLAS_LIB := $(BLAS_LIB_PATH)/libf77blas.a \
$(BLAS_LIB_PATH)/libatlas.a
# MKL
MKL_LIB := -L$(MKL_LIB_PATH) \
-lmkl_sequential \
-lmkl_core \
-lmkl_intel_lp64
# ESSL
# Note: ESSL is named differently for SMP and/or BG
ESSL_LIB := $(ESSL_LIB_PATH)/libesslsmpbg.a \
-L$(IBM_MAIN_DIR)/xlsmp/bg/3.1/bglib64/ \
-L$(IBM_MAIN_DIR)/xlf/bg/14.1/bglib64/ \
-lxlsmp -lxlf90_r -lxlfmath -lxl
# Accelerate
MAC_LIB := -framework Accelerate
#
# --- General build definitions ------------------------------------------------
#
TEST_SRC_PATH := .
TEST_OBJ_PATH := .
# Gather all local object files.
TEST_OBJS := $(patsubst $(TEST_SRC_PATH)/%.c, \
$(TEST_OBJ_PATH)/%.o, \
$(wildcard $(TEST_SRC_PATH)/*.c))
# Override CFLAGS from make_defs.mk here, if desired.
#CFLAGS := -g -O2 -march=native
# Add installed and local header paths to CFLAGS
CFLAGS += -I$(BLIS_INC_PATH) -I$(TEST_SRC_PATH)
LINKER := $(CC)
#LDFLAGS := -L/home/00146/field/gnu/gcc-4.8.2/lib64
#LDFLAGS += -lgfortran -lm -lpthread
#
# --- Targets/rules ------------------------------------------------------------
#
# Complete list of possible targets when defining 'all':
#
# blis openblas atlas mkl mac essl
#
all: blis essl
blis: test_gemm_blis.x \
test_hemm_blis.x \
test_herk_blis.x \
test_her2k_blis.x \
test_trmm_blis.x \
test_trsm_blis.x
essl: test_gemm_essl.x \
test_hemm_essl.x \
test_herk_essl.x \
test_her2k_essl.x \
test_trmm_essl.x \
test_trsm_essl.x
openblas: test_gemv_openblas.x \
test_ger_openblas.x \
test_hemv_openblas.x \
test_her_openblas.x \
test_her2_openblas.x \
test_trmv_openblas.x \
test_trsv_openblas.x \
\
test_gemm_openblas.x \
test_hemm_openblas.x \
test_herk_openblas.x \
test_her2k_openblas.x \
test_trmm_openblas.x \
test_trsm_openblas.x
atlas: test_gemv_atlas.x \
test_ger_atlas.x \
test_hemv_atlas.x \
test_her_atlas.x \
test_her2_atlas.x \
test_trmv_atlas.x \
test_trsv_atlas.x \
\
test_gemm_atlas.x \
test_hemm_atlas.x \
test_herk_atlas.x \
test_her2k_atlas.x \
test_trmm_atlas.x \
test_trsm_atlas.x
mkl: test_gemv_mkl.x \
test_ger_mkl.x \
test_hemv_mkl.x \
test_her_mkl.x \
test_her2_mkl.x \
test_trmv_mkl.x \
test_trsv_mkl.x \
\
test_gemm_mkl.x \
test_hemm_mkl.x \
test_herk_mkl.x \
test_her2k_mkl.x \
test_trmm_mkl.x \
test_trsm_mkl.x
mac: test_gemv_mac.x \
test_ger_mac.x \
test_hemv_mac.x \
test_her_mac.x \
test_her2_mac.x \
test_trmv_mac.x \
test_trsv_mac.x \
\
test_gemm_mac.x \
test_hemm_mac.x \
test_herk_mac.x \
test_her2k_mac.x \
test_trmm_mac.x \
test_trsm_mac.x
# --Object file rules --
$(TEST_OBJ_PATH)/%.o: $(TEST_SRC_PATH)/%.c
$(CC) $(CFLAGS) -c $< -o $@
test_%_openblas.o: test_%.c
$(CC) $(CFLAGS) -DBLAS=\"openblas\" -c $< -o $@
test_%_atlas.o: test_%.c
$(CC) $(CFLAGS) -DBLAS=\"atlas\" -c $< -o $@
test_%_mkl.o: test_%.c
$(CC) $(CFLAGS) -DBLAS=\"mkl\" -c $< -o $@
test_%_essl.o: test_%.c
$(CC) $(CFLAGS) -DBLAS=\"essl\" -c $< -o $@
test_%_mac.o: test_%.c
$(CC) $(CFLAGS) -DBLAS=\"mac\" -c $< -o $@
test_%_blis.o: test_%.c
$(CC) $(CFLAGS) -DBLIS -c $< -o $@
# -- Executable file rules --
# NOTE: For the BLAS test drivers, we place the BLAS libraries before BLIS
# on the link command line in case BLIS was configured with the BLAS
# compatibility layer. This prevents BLIS from inadvertently getting called
# for the BLAS routines we are trying to test with.
test_%_openblas.x: test_%_openblas.o $(BLIS_LIB)
$(LINKER) $< $(OPENBLAS_LIB) $(BLIS_LIB) $(LDFLAGS) -o $@
test_%_atlas.x: test_%_atlas.o $(BLIS_LIB)
$(LINKER) $< $(ATLAS_LIB) $(BLIS_LIB) $(LDFLAGS) -o $@
test_%_mkl.x: test_%_mkl.o $(BLIS_LIB)
$(LINKER) $< $(MKL_LIB) $(BLIS_LIB) $(LDFLAGS) -o $@
test_%_essl.x: test_%_essl.o $(BLIS_LIB)
$(LINKER) $< $(ESSL_LIB) $(BLIS_LIB) $(LDFLAGS) -o $@
test_%_mac.x: test_%_mac.o $(BLIS_LIB)
$(LINKER) $< $(MAC_LIB) $(BLIS_LIB) $(LDFLAGS) -o $@
test_%_blis.x: test_%_blis.o $(BLIS_LIB)
$(LINKER) $< $(BLIS_LIB) $(LDFLAGS) -o $@
# -- Clean rules --
clean: cleanx
cleanx:
- $(RM_F) *.o *.x

232
mpi_test/test_gemm.c Normal file
View File

@@ -0,0 +1,232 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <unistd.h>
#include "blis.h"
#include <mpi.h>
// transa transb m n k alpha a lda b ldb beta c ldc
//void dgemm_( char*, char*, int*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int* );
//#define PRINT
int main( int argc, char** argv )
{
obj_t a, b, c;
obj_t c_save;
obj_t alpha, beta;
dim_t m, n, k;
dim_t p;
dim_t p_begin, p_end, p_inc;
int m_input, n_input, k_input;
num_t dt_a, dt_b, dt_c;
num_t dt_alpha, dt_beta;
int r, n_repeats;
double dtime;
double dtime_save;
double gflops;
bli_init();
n_repeats = 3;
if( argc < 7 )
{
printf("Usage:\n");
printf("test_foo.x m n k p_begin p_inc p_end:\n");
exit;
}
int world_size, world_rank, provided;
MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided );
MPI_Comm_size( MPI_COMM_WORLD, &world_size );
MPI_Comm_rank( MPI_COMM_WORLD, &world_rank );
m_input = strtol( argv[1], NULL, 10 );
n_input = strtol( argv[2], NULL, 10 );
k_input = strtol( argv[3], NULL, 10 );
p_begin = strtol( argv[4], NULL, 10 );
p_inc = strtol( argv[5], NULL, 10 );
p_end = strtol( argv[6], NULL, 10 );
#if 1
dt_a = BLIS_DOUBLE;
dt_b = BLIS_DOUBLE;
dt_c = BLIS_DOUBLE;
dt_alpha = BLIS_DOUBLE;
dt_beta = BLIS_DOUBLE;
#else
dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_DCOMPLEX;
#endif
for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size )
{
if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
else m = ( dim_t ) m_input;
if ( n_input < 0 ) n = p * ( dim_t )abs(n_input);
else n = ( dim_t ) n_input;
if ( k_input < 0 ) k = p * ( dim_t )abs(k_input);
else k = ( dim_t ) k_input;
bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
bli_obj_create( dt_beta, 1, 1, 0, 0, &beta );
bli_obj_create( dt_a, m, k, 0, 0, &a );
bli_obj_create( dt_b, k, n, 0, 0, &b );
bli_obj_create( dt_c, m, n, 0, 0, &c );
bli_obj_create( dt_c, m, n, 0, 0, &c_save );
bli_randm( &a );
bli_randm( &b );
bli_randm( &c );
bli_setsc( (0.9/1.0), 0.2, &alpha );
bli_setsc( (1.0/1.0), 0.0, &beta );
bli_copym( &c, &c_save );
dtime_save = 1.0e9;
for ( r = 0; r < n_repeats; ++r )
{
bli_copym( &c_save, &c );
dtime = bli_clock();
#ifdef BLIS
//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
bli_gemm( &alpha,
//bli_gemm4m( &alpha,
&a,
&b,
&beta,
&c );
#else
if ( bli_is_real( dt_a ) )
{
f77_char transa = 'N';
f77_char transb = 'N';
f77_int mm = bli_obj_length( c );
f77_int kk = bli_obj_width_after_trans( a );
f77_int nn = bli_obj_width( c );
f77_int lda = bli_obj_col_stride( a );
f77_int ldb = bli_obj_col_stride( b );
f77_int ldc = bli_obj_col_stride( c );
double* alphap = bli_obj_buffer( alpha );
double* ap = bli_obj_buffer( a );
double* bp = bli_obj_buffer( b );
double* betap = bli_obj_buffer( beta );
double* cp = bli_obj_buffer( c );
dgemm_( &transa,
&transb,
&mm,
&nn,
&kk,
alphap,
ap, &lda,
bp, &ldb,
betap,
cp, &ldc );
}
else
{
f77_char transa = 'N';
f77_char transb = 'N';
f77_int mm = bli_obj_length( c );
f77_int kk = bli_obj_width_after_trans( a );
f77_int nn = bli_obj_width( c );
f77_int lda = bli_obj_col_stride( a );
f77_int ldb = bli_obj_col_stride( b );
f77_int ldc = bli_obj_col_stride( c );
dcomplex* alphap = bli_obj_buffer( alpha );
dcomplex* ap = bli_obj_buffer( a );
dcomplex* bp = bli_obj_buffer( b );
dcomplex* betap = bli_obj_buffer( beta );
dcomplex* cp = bli_obj_buffer( c );
zgemm_( &transa,
//zgemm3m_( &transa,
&transb,
&mm,
&nn,
&kk,
alphap,
ap, &lda,
bp, &ldb,
betap,
cp, &ldc );
}
#endif
dtime_save = bli_clock_min_diff( dtime_save, dtime );
}
gflops = ( 2.0 * m * k * n ) / ( dtime_save * 1.0e9 );
if ( bli_is_complex( dt_a ) ) gflops *= 4.0;
#ifdef BLIS
printf( "data_gemm_blis" );
#else
printf( "data_gemm_%s", BLAS );
#endif
printf( "( %2lu, 1:5 ) = [ %4lu %4lu %4lu %10.3e %6.3f ];\n",
( unsigned long )(p - p_begin + 1)/p_inc + 1,
( unsigned long )m,
( unsigned long )k,
( unsigned long )n, dtime_save, gflops );
bli_obj_free( &alpha );
bli_obj_free( &beta );
bli_obj_free( &a );
bli_obj_free( &b );
bli_obj_free( &c );
bli_obj_free( &c_save );
}
bli_finalize();
return 0;
}

252
mpi_test/test_hemm.c Normal file
View File

@@ -0,0 +1,252 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <unistd.h>
#include "blis.h"
#include <mpi.h>
// side uploa m n alpha a lda b ldb beta c ldc
//void dsymm_( char*, char*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int* );
//#define PRINT
int main( int argc, char** argv )
{
obj_t a, b, c;
obj_t c_save;
obj_t alpha, beta;
dim_t m, n;
dim_t p;
dim_t p_begin, p_end, p_inc;
int m_input, n_input;
num_t dt_a, dt_b, dt_c;
num_t dt_alpha, dt_beta;
int r, n_repeats;
side_t side;
uplo_t uplo;
double dtime;
double dtime_save;
double gflops;
bli_init();
n_repeats = 3;
if( argc < 7 )
{
printf("Usage:\n");
printf("test_foo.x m n k p_begin p_inc p_end:\n");
exit;
}
int world_size, world_rank, provided;
MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided );
MPI_Comm_size( MPI_COMM_WORLD, &world_size );
MPI_Comm_rank( MPI_COMM_WORLD, &world_rank );
m_input = strtol( argv[1], NULL, 10 );
n_input = strtol( argv[2], NULL, 10 );
p_begin = strtol( argv[4], NULL, 10 );
p_inc = strtol( argv[5], NULL, 10 );
p_end = strtol( argv[6], NULL, 10 );
#if 1
dt_a = BLIS_DOUBLE;
dt_b = BLIS_DOUBLE;
dt_c = BLIS_DOUBLE;
dt_alpha = BLIS_DOUBLE;
dt_beta = BLIS_DOUBLE;
#else
dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_DCOMPLEX;
#endif
side = BLIS_LEFT;
//side = BLIS_RIGHT;
uplo = BLIS_LOWER;
//uplo = BLIS_UPPER;
for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size )
{
if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
else m = ( dim_t ) m_input;
if ( n_input < 0 ) n = p * ( dim_t )abs(n_input);
else n = ( dim_t ) n_input;
bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
bli_obj_create( dt_beta, 1, 1, 0, 0, &beta );
if ( bli_is_left( side ) )
bli_obj_create( dt_a, m, m, 0, 0, &a );
else
bli_obj_create( dt_a, n, n, 0, 0, &a );
bli_obj_create( dt_b, m, n, 0, 0, &b );
bli_obj_create( dt_c, m, n, 0, 0, &c );
bli_obj_create( dt_c, m, n, 0, 0, &c_save );
bli_randm( &a );
bli_randm( &b );
bli_randm( &c );
bli_obj_set_struc( BLIS_HERMITIAN, a );
bli_obj_set_uplo( uplo, a );
// Randomize A, make it densely Hermitian, and zero the unstored
// triangle to ensure the implementation reads only from the stored
// region.
bli_randm( &a );
bli_mkherm( &a );
bli_mktrim( &a );
/*
bli_obj_toggle_uplo( a );
bli_obj_inc_diag_off( 1, a );
bli_setm( &BLIS_ZERO, &a );
bli_obj_inc_diag_off( -1, a );
bli_obj_toggle_uplo( a );
bli_obj_set_diag( BLIS_NONUNIT_DIAG, a );
bli_scalm( &BLIS_TWO, &a );
bli_scalm( &BLIS_TWO, &a );
*/
bli_setsc( (2.0/1.0), 1.0, &alpha );
bli_setsc( (1.0/1.0), 0.0, &beta );
bli_copym( &c, &c_save );
dtime_save = 1.0e9;
for ( r = 0; r < n_repeats; ++r )
{
bli_copym( &c_save, &c );
dtime = bli_clock();
#ifdef PRINT
/*
obj_t ar, ai;
bli_obj_alias_to( a, ar );
bli_obj_alias_to( a, ai );
bli_obj_set_datatype( BLIS_DOUBLE, ar ); ar.rs *= 2; ar.cs *= 2;
bli_obj_set_datatype( BLIS_DOUBLE, ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1;
bli_printm( "ar", &ar, "%4.1f", "" );
bli_printm( "ai", &ai, "%4.1f", "" );
*/
bli_printm( "a", &a, "%4.1f", "" );
bli_printm( "b", &b, "%4.1f", "" );
bli_printm( "c", &c, "%4.1f", "" );
#endif
#ifdef BLIS
//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
bli_hemm( side,
//bli_hemm4m( side,
&alpha,
&a,
&b,
&beta,
&c );
#else
f77_char side = 'L';
f77_char uplo = 'L';
f77_int mm = bli_obj_length( c );
f77_int nn = bli_obj_width( c );
f77_int lda = bli_obj_col_stride( a );
f77_int ldb = bli_obj_col_stride( b );
f77_int ldc = bli_obj_col_stride( c );
double* alphap = bli_obj_buffer( alpha );
double* ap = bli_obj_buffer( a );
double* bp = bli_obj_buffer( b );
double* betap = bli_obj_buffer( beta );
double* cp = bli_obj_buffer( c );
dsymm_( &side,
&uplo,
&mm,
&nn,
alphap,
ap, &lda,
bp, &ldb,
betap,
cp, &ldc );
#endif
#ifdef PRINT
bli_printm( "c after", &c, "%9.5f", "" );
exit(1);
#endif
dtime_save = bli_clock_min_diff( dtime_save, dtime );
}
if ( bli_is_left( side ) )
gflops = ( 2.0 * m * m * n ) / ( dtime_save * 1.0e9 );
else
gflops = ( 2.0 * m * n * n ) / ( dtime_save * 1.0e9 );
if ( bli_is_complex( dt_a ) ) gflops *= 4.0;
#ifdef BLIS
printf( "data_hemm_blis" );
#else
printf( "data_hemm_%s", BLAS );
#endif
printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n",
( unsigned long )(p - p_begin + 1)/p_inc + 1,
( unsigned long )m,
( unsigned long )n, dtime_save, gflops );
bli_obj_free( &alpha );
bli_obj_free( &beta );
bli_obj_free( &a );
bli_obj_free( &b );
bli_obj_free( &c );
bli_obj_free( &c_save );
}
bli_finalize();
return 0;
}

209
mpi_test/test_her2k.c Normal file
View File

@@ -0,0 +1,209 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <unistd.h>
#include "blis.h"
#include <mpi.h>
// uploa transa m k alpha a lda b ldb beta c ldc
//void dsyr2k_( char*, char*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int* );
//#define PRINT
int main( int argc, char** argv )
{
obj_t a, b, c;
obj_t c_save;
obj_t alpha, beta;
dim_t m, k;
dim_t p;
dim_t p_begin, p_end, p_inc;
int m_input, k_input;
num_t dt_a, dt_b, dt_c;
num_t dt_alpha, dt_beta;
int r, n_repeats;
uplo_t uplo;
double dtime;
double dtime_save;
double gflops;
bli_init();
n_repeats = 3;
if( argc < 7 )
{
printf("Usage:\n");
printf("test_foo.x m n k p_begin p_inc p_end:\n");
exit;
}
int world_size, world_rank, provided;
MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided );
MPI_Comm_size( MPI_COMM_WORLD, &world_size );
MPI_Comm_rank( MPI_COMM_WORLD, &world_rank );
m_input = strtol( argv[1], NULL, 10 );
k_input = strtol( argv[3], NULL, 10 );
p_begin = strtol( argv[4], NULL, 10 );
p_inc = strtol( argv[5], NULL, 10 );
p_end = strtol( argv[6], NULL, 10 );
dt_a = BLIS_DOUBLE;
dt_b = BLIS_DOUBLE;
dt_c = BLIS_DOUBLE;
dt_alpha = BLIS_DOUBLE;
dt_beta = BLIS_DOUBLE;
uplo = BLIS_LOWER;
for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size )
{
if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
else m = ( dim_t ) m_input;
if ( k_input < 0 ) k = p * ( dim_t )abs(k_input);
else k = ( dim_t ) k_input;
bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
bli_obj_create( dt_beta, 1, 1, 0, 0, &beta );
bli_obj_create( dt_a, m, k, 0, 0, &a );
bli_obj_create( dt_b, m, k, 0, 0, &b );
bli_obj_create( dt_c, m, m, 0, 0, &c );
bli_obj_create( dt_c, m, m, 0, 0, &c_save );
bli_randm( &a );
bli_randm( &b );
bli_randm( &c );
bli_obj_set_struc( BLIS_HERMITIAN, c );
bli_obj_set_uplo( uplo, c );
bli_setsc( (2.0/1.0), 0.0, &alpha );
bli_setsc( (1.0/1.0), 0.0, &beta );
bli_copym( &c, &c_save );
dtime_save = 1.0e9;
for ( r = 0; r < n_repeats; ++r )
{
bli_copym( &c_save, &c );
dtime = bli_clock();
#ifdef PRINT
bli_printm( "a", &a, "%4.1f", "" );
bli_printm( "b", &b, "%4.1f", "" );
bli_printm( "c", &c, "%4.1f", "" );
#endif
#ifdef BLIS
//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
bli_her2k( &alpha,
&a,
&b,
&beta,
&c );
#else
f77_char uploa = 'L';
f77_char transa = 'N';
f77_int mm = bli_obj_length( c );
f77_int kk = bli_obj_width_after_trans( a );
f77_int lda = bli_obj_col_stride( a );
f77_int ldb = bli_obj_col_stride( b );
f77_int ldc = bli_obj_col_stride( c );
double* alphap = bli_obj_buffer( alpha );
double* ap = bli_obj_buffer( a );
double* bp = bli_obj_buffer( b );
double* betap = bli_obj_buffer( beta );
double* cp = bli_obj_buffer( c );
dsyr2k_( &uploa,
&transa,
&mm,
&kk,
alphap,
ap, &lda,
bp, &ldb,
betap,
cp, &ldc );
#endif
#ifdef PRINT
bli_printm( "c after", &c, "%4.1f", "" );
exit(1);
#endif
dtime_save = bli_clock_min_diff( dtime_save, dtime );
}
gflops = ( 2.0 * m * k * m ) / ( dtime_save * 1.0e9 );
#ifdef BLIS
printf( "data_her2k_blis" );
#else
printf( "data_her2k_%s", BLAS );
#endif
printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n",
( unsigned long )(p - p_begin + 1)/p_inc + 1,
( unsigned long )m,
( unsigned long )k, dtime_save, gflops );
bli_obj_free( &alpha );
bli_obj_free( &beta );
bli_obj_free( &a );
bli_obj_free( &b );
bli_obj_free( &c );
bli_obj_free( &c_save );
}
bli_finalize();
return 0;
}

200
mpi_test/test_herk.c Normal file
View File

@@ -0,0 +1,200 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <unistd.h>
#include "blis.h"
#include <mpi.h>
// uploa transa m k alpha a lda beta c ldc
//void dsyrk_( char*, char*, int*, int*, double*, double*, int*, double*, double*, int* );
//#define PRINT
int main( int argc, char** argv )
{
obj_t a, c;
obj_t c_save;
obj_t alpha, beta;
dim_t m, k;
dim_t p;
dim_t p_begin, p_end, p_inc;
int m_input, k_input;
num_t dt_a, dt_c;
num_t dt_alpha, dt_beta;
int r, n_repeats;
uplo_t uplo;
double dtime;
double dtime_save;
double gflops;
bli_init();
n_repeats = 3;
if( argc < 7 )
{
printf("Usage:\n");
printf("test_foo.x m n k p_begin p_inc p_end:\n");
exit;
}
int world_size, world_rank, provided;
MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided );
MPI_Comm_size( MPI_COMM_WORLD, &world_size );
MPI_Comm_rank( MPI_COMM_WORLD, &world_rank );
m_input = strtol( argv[1], NULL, 10 );
k_input = strtol( argv[3], NULL, 10 );
p_begin = strtol( argv[4], NULL, 10 );
p_inc = strtol( argv[5], NULL, 10 );
p_end = strtol( argv[6], NULL, 10 );
dt_a = BLIS_DOUBLE;
dt_c = BLIS_DOUBLE;
dt_alpha = BLIS_DOUBLE;
dt_beta = BLIS_DOUBLE;
uplo = BLIS_LOWER;
for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size )
{
if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
else m = ( dim_t ) m_input;
if ( k_input < 0 ) k = p * ( dim_t )abs(k_input);
else k = ( dim_t ) k_input;
bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
bli_obj_create( dt_beta, 1, 1, 0, 0, &beta );
bli_obj_create( dt_a, m, k, 0, 0, &a );
bli_obj_create( dt_c, m, m, 0, 0, &c );
bli_obj_create( dt_c, m, m, 0, 0, &c_save );
bli_randm( &a );
bli_randm( &c );
bli_obj_set_struc( BLIS_HERMITIAN, c );
bli_obj_set_uplo( uplo, c );
bli_setsc( (2.0/1.0), 0.0, &alpha );
bli_setsc( (1.0/1.0), 0.0, &beta );
bli_copym( &c, &c_save );
dtime_save = 1.0e9;
for ( r = 0; r < n_repeats; ++r )
{
bli_copym( &c_save, &c );
dtime = bli_clock();
#ifdef PRINT
bli_printm( "a", &a, "%4.1f", "" );
bli_printm( "c", &c, "%4.1f", "" );
#endif
#ifdef BLIS
//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
bli_herk( &alpha,
&a,
&beta,
&c );
#else
f77_char uploa = 'L';
f77_char transa = 'N';
f77_int mm = bli_obj_length( c );
f77_int kk = bli_obj_width_after_trans( a );
f77_int lda = bli_obj_col_stride( a );
f77_int ldc = bli_obj_col_stride( c );
double* alphap = bli_obj_buffer( alpha );
double* ap = bli_obj_buffer( a );
double* betap = bli_obj_buffer( beta );
double* cp = bli_obj_buffer( c );
dsyrk_( &uploa,
&transa,
&mm,
&kk,
alphap,
ap, &lda,
betap,
cp, &ldc );
#endif
#ifdef PRINT
bli_printm( "c after", &c, "%4.1f", "" );
exit(1);
#endif
dtime_save = bli_clock_min_diff( dtime_save, dtime );
}
gflops = ( 1.0 * m * k * m ) / ( dtime_save * 1.0e9 );
#ifdef BLIS
printf( "data_herk_blis" );
#else
printf( "data_herk_%s", BLAS );
#endif
printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n",
( unsigned long )(p - p_begin + 1)/p_inc + 1,
( unsigned long )m,
( unsigned long )k, dtime_save, gflops );
bli_obj_free( &alpha );
bli_obj_free( &beta );
bli_obj_free( &a );
bli_obj_free( &c );
bli_obj_free( &c_save );
}
bli_finalize();
return 0;
}

246
mpi_test/test_trmm.c Normal file
View File

@@ -0,0 +1,246 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <unistd.h>
#include "blis.h"
#include <mpi.h>
// side uplo trans diag m n alpha a lda b ldb
//void dtrmm_( char*, char*, char*, char*, int*, int*, double*, double*, int*, double*, int* );
//#define PRINT
int main( int argc, char** argv )
{
obj_t a, b, c;
obj_t c_save;
obj_t alpha, beta;
dim_t m, n;
dim_t p;
dim_t p_begin, p_end, p_inc;
int m_input, n_input;
num_t dt_a, dt_b, dt_c;
num_t dt_alpha, dt_beta;
int r, n_repeats;
side_t side;
uplo_t uplo;
double dtime;
double dtime_save;
double gflops;
bli_init();
n_repeats = 3;
if( argc < 7 )
{
printf("Usage:\n");
printf("test_foo.x m n p_begin p_inc p_end:\n");
exit;
}
int world_size, world_rank, provided;
MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided );
MPI_Comm_size( MPI_COMM_WORLD, &world_size );
MPI_Comm_rank( MPI_COMM_WORLD, &world_rank );
m_input = strtol( argv[1], NULL, 10 );
n_input = strtol( argv[2], NULL, 10 );
p_begin = strtol( argv[4], NULL, 10 );
p_inc = strtol( argv[5], NULL, 10 );
p_end = strtol( argv[6], NULL, 10 );
#if 1
dt_a = BLIS_DOUBLE;
dt_b = BLIS_DOUBLE;
dt_c = BLIS_DOUBLE;
dt_alpha = BLIS_DOUBLE;
dt_beta = BLIS_DOUBLE;
#else
dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_DCOMPLEX;
#endif
side = BLIS_LEFT;
//side = BLIS_RIGHT;
uplo = BLIS_LOWER;
//uplo = BLIS_UPPER;
for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size )
{
if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
else m = ( dim_t ) m_input;
if ( n_input < 0 ) n = p * ( dim_t )abs(n_input);
else n = ( dim_t ) n_input;
bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
bli_obj_create( dt_beta, 1, 1, 0, 0, &beta );
if ( bli_is_left( side ) )
bli_obj_create( dt_a, m, m, 0, 0, &a );
else
bli_obj_create( dt_a, n, n, 0, 0, &a );
bli_obj_create( dt_b, m, n, 0, 0, &b );
bli_obj_create( dt_c, m, n, 0, 0, &c );
bli_obj_create( dt_c, m, n, 0, 0, &c_save );
bli_obj_set_struc( BLIS_TRIANGULAR, a );
bli_obj_set_uplo( uplo, a );
bli_randm( &a );
bli_randm( &c );
bli_randm( &b );
/*
bli_obj_toggle_uplo( a );
bli_obj_inc_diag_off( -1, a );
bli_setm( &BLIS_ZERO, &a );
bli_obj_inc_diag_off( 1, a );
bli_obj_toggle_uplo( a );
bli_obj_set_diag( BLIS_NONUNIT_DIAG, a );
bli_scalm( &BLIS_TWO, &a );
//bli_scalm( &BLIS_TWO, &a );
*/
bli_setsc( (2.0/1.0), 0.0, &alpha );
bli_setsc( (1.0/1.0), 0.0, &beta );
bli_copym( &c, &c_save );
dtime_save = 1.0e9;
for ( r = 0; r < n_repeats; ++r )
{
bli_copym( &c_save, &c );
dtime = bli_clock();
#ifdef PRINT
/*
obj_t ar, ai;
bli_obj_alias_to( a, ar );
bli_obj_alias_to( a, ai );
bli_obj_set_datatype( BLIS_DOUBLE, ar ); ar.rs *= 2; ar.cs *= 2;
bli_obj_set_datatype( BLIS_DOUBLE, ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1;
bli_printm( "ar", &ar, "%4.1f", "" );
bli_printm( "ai", &ai, "%4.1f", "" );
*/
bli_printm( "a", &a, "%4.1f", "" );
bli_printm( "c", &c, "%4.1f", "" );
#endif
#ifdef BLIS
bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
bli_trmm( side,
//bli_trmm4m( side,
&alpha,
&a,
&c );
#else
f77_char side = 'L';
f77_char uplo = 'L';
f77_char transa = 'N';
f77_char diag = 'N';
f77_int mm = bli_obj_length( c );
f77_int nn = bli_obj_width( c );
f77_int lda = bli_obj_col_stride( a );
f77_int ldc = bli_obj_col_stride( c );
double* alphap = bli_obj_buffer( alpha );
double* ap = bli_obj_buffer( a );
double* cp = bli_obj_buffer( c );
dtrmm_( &side,
&uplo,
&transa,
&diag,
&mm,
&nn,
alphap,
ap, &lda,
cp, &ldc );
#endif
#ifdef PRINT
bli_printm( "c after", &c, "%4.1f", "" );
exit(1);
#endif
dtime_save = bli_clock_min_diff( dtime_save, dtime );
}
if ( bli_is_left( side ) )
gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 );
else
gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 );
if ( bli_is_complex( dt_a ) ) gflops *= 4.0;
#ifdef BLIS
printf( "data_trmm_blis" );
#else
printf( "data_trmm_%s", BLAS );
#endif
printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n",
( unsigned long )(p - p_begin + 1)/p_inc + 1,
( unsigned long )m,
( unsigned long )n, dtime_save, gflops );
bli_obj_free( &alpha );
bli_obj_free( &beta );
bli_obj_free( &a );
bli_obj_free( &b );
bli_obj_free( &c );
bli_obj_free( &c_save );
}
bli_finalize();
return 0;
}

282
mpi_test/test_trsm.c Normal file
View File

@@ -0,0 +1,282 @@
/*
BLIS
An object-based framework for developing high-performance BLAS-like
libraries.
Copyright (C) 2014, The University of Texas
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
- Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
- Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
- Neither the name of The University of Texas nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <unistd.h>
#include "blis.h"
#include <mpi.h>
// side uplo trans diag m n alpha a lda b ldb
//void dtrsm_( char*, char*, char*, char*, int*, int*, double*, double*, int*, double*, int* );
//#define PRINT
int main( int argc, char** argv )
{
obj_t a, b, c;
obj_t c_save;
obj_t alpha, beta;
dim_t m, n;
dim_t p;
dim_t p_begin, p_end, p_inc;
int m_input, n_input;
num_t dt_a, dt_b, dt_c;
num_t dt_alpha, dt_beta;
int r, n_repeats;
side_t side;
uplo_t uplo;
double dtime;
double dtime_save;
double gflops;
bli_init();
n_repeats = 3;
if( argc < 7 )
{
printf("Usage:\n");
printf("test_foo.x m n k p_begin p_inc p_end:\n");
exit;
}
int world_size, world_rank, provided;
MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided );
MPI_Comm_size( MPI_COMM_WORLD, &world_size );
MPI_Comm_rank( MPI_COMM_WORLD, &world_rank );
m_input = strtol( argv[1], NULL, 10 );
n_input = strtol( argv[2], NULL, 10 );
p_begin = strtol( argv[4], NULL, 10 );
p_inc = strtol( argv[5], NULL, 10 );
p_end = strtol( argv[6], NULL, 10 );
#if 1
dt_a = BLIS_DOUBLE;
dt_b = BLIS_DOUBLE;
dt_c = BLIS_DOUBLE;
dt_alpha = BLIS_DOUBLE;
dt_beta = BLIS_DOUBLE;
#else
dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_FLOAT;
//dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_SCOMPLEX;
#endif
side = BLIS_LEFT;
//side = BLIS_RIGHT;
uplo = BLIS_LOWER;
//uplo = BLIS_UPPER;
for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size )
{
if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
else m = ( dim_t ) m_input;
if ( n_input < 0 ) n = p * ( dim_t )abs(n_input);
else n = ( dim_t ) n_input;
bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
bli_obj_create( dt_beta, 1, 1, 0, 0, &beta );
if ( bli_is_left( side ) )
bli_obj_create( dt_a, m, m, 0, 0, &a );
else
bli_obj_create( dt_a, n, n, 0, 0, &a );
bli_obj_create( dt_b, m, n, 0, 0, &b );
bli_obj_create( dt_c, m, n, 0, 0, &c );
bli_obj_create( dt_c, m, n, 0, 0, &c_save );
bli_obj_set_struc( BLIS_TRIANGULAR, a );
bli_obj_set_uplo( uplo, a );
//bli_obj_set_diag( BLIS_UNIT_DIAG, a );
bli_randm( &a );
bli_randm( &c );
bli_randm( &b );
/*
{
obj_t a2;
bli_obj_alias_to( a, a2 );
bli_obj_toggle_uplo( a2 );
bli_obj_inc_diag_off( 1, a2 );
bli_setm( &BLIS_ZERO, &a2 );
bli_obj_inc_diag_off( -2, a2 );
bli_obj_toggle_uplo( a2 );
bli_obj_set_diag( BLIS_NONUNIT_DIAG, a2 );
bli_scalm( &BLIS_TWO, &a2 );
//bli_scalm( &BLIS_TWO, &a );
}
*/
bli_setsc( (2.0/1.0), 0.0, &alpha );
bli_setsc( (1.0/1.0), 0.0, &beta );
bli_copym( &c, &c_save );
dtime_save = 1.0e9;
for ( r = 0; r < n_repeats; ++r )
{
bli_copym( &c_save, &c );
dtime = bli_clock();
#ifdef PRINT
/*
obj_t ar, ai;
bli_obj_alias_to( a, ar );
bli_obj_alias_to( a, ai );
bli_obj_set_datatype( BLIS_DOUBLE, ar ); ar.rs *= 2; ar.cs *= 2;
bli_obj_set_datatype( BLIS_DOUBLE, ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1;
bli_printm( "ar", &ar, "%4.1f", "" );
bli_printm( "ai", &ai, "%4.1f", "" );
*/
bli_invertd( &a );
bli_printm( "a", &a, "%4.1f", "" );
bli_invertd( &a );
bli_printm( "c", &c, "%4.1f", "" );
#endif
#ifdef BLIS
//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
bli_trsm( side,
//bli_trsm4m( side,
//bli_trsm3m( side,
&alpha,
&a,
&c );
#else
if ( bli_is_real( dt_a ) )
{
f77_char side = 'L';
f77_char uplo = 'L';
f77_char transa = 'N';
f77_char diag = 'N';
f77_int mm = bli_obj_length( c );
f77_int nn = bli_obj_width( c );
f77_int lda = bli_obj_col_stride( a );
f77_int ldc = bli_obj_col_stride( c );
float * alphap = bli_obj_buffer( alpha );
float * ap = bli_obj_buffer( a );
float * cp = bli_obj_buffer( c );
strsm_( &side,
&uplo,
&transa,
&diag,
&mm,
&nn,
alphap,
ap, &lda,
cp, &ldc );
}
else // if ( bli_is_complex( dt_a ) )
{
f77_char side = 'L';
f77_char uplo = 'L';
f77_char transa = 'N';
f77_char diag = 'N';
f77_int mm = bli_obj_length( c );
f77_int nn = bli_obj_width( c );
f77_int lda = bli_obj_col_stride( a );
f77_int ldc = bli_obj_col_stride( c );
scomplex* alphap = bli_obj_buffer( alpha );
scomplex* ap = bli_obj_buffer( a );
scomplex* cp = bli_obj_buffer( c );
ctrsm_( &side,
//ztrsm_( &side,
&uplo,
&transa,
&diag,
&mm,
&nn,
alphap,
ap, &lda,
cp, &ldc );
}
#endif
#ifdef PRINT
bli_printm( "c after", &c, "%4.1f", "" );
exit(1);
#endif
dtime_save = bli_clock_min_diff( dtime_save, dtime );
}
if ( bli_is_left( side ) )
gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 );
else
gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 );
if ( bli_is_complex( dt_a ) ) gflops *= 4.0;
#ifdef BLIS
printf( "data_trsm_blis" );
#else
printf( "data_trsm_%s", BLAS );
#endif
printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n",
( unsigned long )(p - p_begin + 1)/p_inc + 1,
( unsigned long )m,
( unsigned long )n, dtime_save, gflops );
bli_obj_free( &alpha );
bli_obj_free( &beta );
bli_obj_free( &a );
bli_obj_free( &b );
bli_obj_free( &c );
bli_obj_free( &c_save );
}
bli_finalize();
return 0;
}