mirror of
https://github.com/amd/blis.git
synced 2026-04-23 17:18:51 +00:00
Added test drivers for level 3 BLAS that run tests in parallel using MPI
This commit is contained in:
323
mpi_test/Makefile
Normal file
323
mpi_test/Makefile
Normal file
@@ -0,0 +1,323 @@
|
||||
#!/bin/bash
|
||||
#
|
||||
# BLIS
|
||||
# An object-based framework for developing high-performance BLAS-like
|
||||
# libraries.
|
||||
#
|
||||
# Copyright (C) 2014, The University of Texas
|
||||
#
|
||||
# Redistribution and use in source and binary forms, with or without
|
||||
# modification, are permitted provided that the following conditions are
|
||||
# met:
|
||||
# - Redistributions of source code must retain the above copyright
|
||||
# notice, this list of conditions and the following disclaimer.
|
||||
# - Redistributions in binary form must reproduce the above copyright
|
||||
# notice, this list of conditions and the following disclaimer in the
|
||||
# documentation and/or other materials provided with the distribution.
|
||||
# - Neither the name of The University of Texas nor the names of its
|
||||
# contributors may be used to endorse or promote products derived
|
||||
# from this software without specific prior written permission.
|
||||
#
|
||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
#
|
||||
#
|
||||
|
||||
#
|
||||
# Makefile
|
||||
#
|
||||
# Field G. Van Zee
|
||||
#
|
||||
# Makefile for standalone BLIS test drivers.
|
||||
#
|
||||
|
||||
#
|
||||
# --- Makefile PHONY target definitions ----------------------------------------
|
||||
#
|
||||
|
||||
.PHONY: all \
|
||||
blis essl \
|
||||
clean cleanx
|
||||
|
||||
|
||||
|
||||
#
|
||||
# --- Makefile initialization --------------------------------------------------
|
||||
#
|
||||
|
||||
# Define the name of the configuration file.
|
||||
CONFIG_MK_FILE := config.mk
|
||||
|
||||
# Define the name of the file containing build and architecture-specific
|
||||
# makefile definitions.
|
||||
MAKE_DEFS_FILE := make_defs.mk
|
||||
|
||||
# Locations of important files.
|
||||
ROOT_PATH := ..
|
||||
CONFIG_DIR := config
|
||||
|
||||
|
||||
|
||||
#
|
||||
# --- Include makefile configuration file --------------------------------------
|
||||
#
|
||||
|
||||
# Construct the path to the makefile configuration file that was generated by
|
||||
# the configure script.
|
||||
CONFIG_MK_PATH := $(ROOT_PATH)/$(CONFIG_MK_FILE)
|
||||
|
||||
# Include the configuration file.
|
||||
-include $(CONFIG_MK_PATH)
|
||||
|
||||
# Detect whether we actually got the configuration file. If we didn't, then
|
||||
# it is likely that the user has not yet generated it (via configure).
|
||||
ifeq ($(strip $(CONFIG_MK_INCLUDED)),yes)
|
||||
CONFIG_MK_PRESENT := yes
|
||||
else
|
||||
CONFIG_MK_PRESENT := no
|
||||
endif
|
||||
|
||||
# Now we have access to CONFIG_NAME, which tells us which sub-directory of the
|
||||
# config directory to use as our configuration.
|
||||
CONFIG_PATH := $(ROOT_PATH)/$(CONFIG_DIR)/$(CONFIG_NAME)
|
||||
|
||||
|
||||
|
||||
#
|
||||
# --- Include makefile definitions file ----------------------------------------
|
||||
#
|
||||
|
||||
# Construct the path to the makefile definitions file residing inside of
|
||||
# the configuration sub-directory.
|
||||
MAKE_DEFS_MK_PATH := $(CONFIG_PATH)/$(MAKE_DEFS_FILE)
|
||||
|
||||
# Include the makefile definitions file.
|
||||
-include $(MAKE_DEFS_MK_PATH)
|
||||
|
||||
# Detect whether we actually got the make definitios file. If we didn't, then
|
||||
# it is likely that the configuration is invalid (or incomplete).
|
||||
ifeq ($(strip $(MAKE_DEFS_MK_INCLUDED)),yes)
|
||||
MAKE_DEFS_MK_PRESENT := yes
|
||||
else
|
||||
MAKE_DEFS_MK_PRESENT := no
|
||||
endif
|
||||
|
||||
|
||||
|
||||
#
|
||||
# --- BLAS and LAPACK implementations ------------------------------------------
|
||||
#
|
||||
|
||||
# BLIS library and header path. This is simply wherever it was installed.
|
||||
BLIS_LIB_PATH := $(INSTALL_PREFIX)/lib
|
||||
BLIS_INC_PATH := $(INSTALL_PREFIX)/include/blis
|
||||
|
||||
# BLIS library.
|
||||
BLIS_LIB := $(BLIS_LIB_PATH)/libblis.a
|
||||
|
||||
# BLAS library path(s). This is where the BLAS libraries reside.
|
||||
BLAS_LIB_PATH := $(HOME)/flame/lib
|
||||
MKL_LIB_PATH := $(HOME)/intel/mkl/lib/intel64/
|
||||
ESSL_LIB_PATH := /soft/libraries/essl/current/lib64
|
||||
|
||||
# OpenBLAS
|
||||
OPENBLAS_LIB := $(BLAS_LIB_PATH)/libopenblas.a
|
||||
|
||||
# ATLAS
|
||||
ATLAS_LIB := $(BLAS_LIB_PATH)/libf77blas.a \
|
||||
$(BLAS_LIB_PATH)/libatlas.a
|
||||
|
||||
# MKL
|
||||
MKL_LIB := -L$(MKL_LIB_PATH) \
|
||||
-lmkl_sequential \
|
||||
-lmkl_core \
|
||||
-lmkl_intel_lp64
|
||||
|
||||
# ESSL
|
||||
# Note: ESSL is named differently for SMP and/or BG
|
||||
ESSL_LIB := $(ESSL_LIB_PATH)/libesslsmpbg.a \
|
||||
-L$(IBM_MAIN_DIR)/xlsmp/bg/3.1/bglib64/ \
|
||||
-L$(IBM_MAIN_DIR)/xlf/bg/14.1/bglib64/ \
|
||||
-lxlsmp -lxlf90_r -lxlfmath -lxl
|
||||
|
||||
# Accelerate
|
||||
MAC_LIB := -framework Accelerate
|
||||
|
||||
|
||||
|
||||
#
|
||||
# --- General build definitions ------------------------------------------------
|
||||
#
|
||||
|
||||
TEST_SRC_PATH := .
|
||||
TEST_OBJ_PATH := .
|
||||
|
||||
# Gather all local object files.
|
||||
TEST_OBJS := $(patsubst $(TEST_SRC_PATH)/%.c, \
|
||||
$(TEST_OBJ_PATH)/%.o, \
|
||||
$(wildcard $(TEST_SRC_PATH)/*.c))
|
||||
|
||||
# Override CFLAGS from make_defs.mk here, if desired.
|
||||
#CFLAGS := -g -O2 -march=native
|
||||
|
||||
# Add installed and local header paths to CFLAGS
|
||||
CFLAGS += -I$(BLIS_INC_PATH) -I$(TEST_SRC_PATH)
|
||||
|
||||
LINKER := $(CC)
|
||||
#LDFLAGS := -L/home/00146/field/gnu/gcc-4.8.2/lib64
|
||||
#LDFLAGS += -lgfortran -lm -lpthread
|
||||
|
||||
|
||||
|
||||
#
|
||||
# --- Targets/rules ------------------------------------------------------------
|
||||
#
|
||||
|
||||
# Complete list of possible targets when defining 'all':
|
||||
#
|
||||
# blis openblas atlas mkl mac essl
|
||||
#
|
||||
all: blis essl
|
||||
|
||||
blis: test_gemm_blis.x \
|
||||
test_hemm_blis.x \
|
||||
test_herk_blis.x \
|
||||
test_her2k_blis.x \
|
||||
test_trmm_blis.x \
|
||||
test_trsm_blis.x
|
||||
|
||||
essl: test_gemm_essl.x \
|
||||
test_hemm_essl.x \
|
||||
test_herk_essl.x \
|
||||
test_her2k_essl.x \
|
||||
test_trmm_essl.x \
|
||||
test_trsm_essl.x
|
||||
|
||||
openblas: test_gemv_openblas.x \
|
||||
test_ger_openblas.x \
|
||||
test_hemv_openblas.x \
|
||||
test_her_openblas.x \
|
||||
test_her2_openblas.x \
|
||||
test_trmv_openblas.x \
|
||||
test_trsv_openblas.x \
|
||||
\
|
||||
test_gemm_openblas.x \
|
||||
test_hemm_openblas.x \
|
||||
test_herk_openblas.x \
|
||||
test_her2k_openblas.x \
|
||||
test_trmm_openblas.x \
|
||||
test_trsm_openblas.x
|
||||
|
||||
atlas: test_gemv_atlas.x \
|
||||
test_ger_atlas.x \
|
||||
test_hemv_atlas.x \
|
||||
test_her_atlas.x \
|
||||
test_her2_atlas.x \
|
||||
test_trmv_atlas.x \
|
||||
test_trsv_atlas.x \
|
||||
\
|
||||
test_gemm_atlas.x \
|
||||
test_hemm_atlas.x \
|
||||
test_herk_atlas.x \
|
||||
test_her2k_atlas.x \
|
||||
test_trmm_atlas.x \
|
||||
test_trsm_atlas.x
|
||||
|
||||
mkl: test_gemv_mkl.x \
|
||||
test_ger_mkl.x \
|
||||
test_hemv_mkl.x \
|
||||
test_her_mkl.x \
|
||||
test_her2_mkl.x \
|
||||
test_trmv_mkl.x \
|
||||
test_trsv_mkl.x \
|
||||
\
|
||||
test_gemm_mkl.x \
|
||||
test_hemm_mkl.x \
|
||||
test_herk_mkl.x \
|
||||
test_her2k_mkl.x \
|
||||
test_trmm_mkl.x \
|
||||
test_trsm_mkl.x
|
||||
|
||||
mac: test_gemv_mac.x \
|
||||
test_ger_mac.x \
|
||||
test_hemv_mac.x \
|
||||
test_her_mac.x \
|
||||
test_her2_mac.x \
|
||||
test_trmv_mac.x \
|
||||
test_trsv_mac.x \
|
||||
\
|
||||
test_gemm_mac.x \
|
||||
test_hemm_mac.x \
|
||||
test_herk_mac.x \
|
||||
test_her2k_mac.x \
|
||||
test_trmm_mac.x \
|
||||
test_trsm_mac.x
|
||||
|
||||
|
||||
|
||||
# --Object file rules --
|
||||
|
||||
$(TEST_OBJ_PATH)/%.o: $(TEST_SRC_PATH)/%.c
|
||||
$(CC) $(CFLAGS) -c $< -o $@
|
||||
|
||||
test_%_openblas.o: test_%.c
|
||||
$(CC) $(CFLAGS) -DBLAS=\"openblas\" -c $< -o $@
|
||||
|
||||
test_%_atlas.o: test_%.c
|
||||
$(CC) $(CFLAGS) -DBLAS=\"atlas\" -c $< -o $@
|
||||
|
||||
test_%_mkl.o: test_%.c
|
||||
$(CC) $(CFLAGS) -DBLAS=\"mkl\" -c $< -o $@
|
||||
|
||||
test_%_essl.o: test_%.c
|
||||
$(CC) $(CFLAGS) -DBLAS=\"essl\" -c $< -o $@
|
||||
|
||||
test_%_mac.o: test_%.c
|
||||
$(CC) $(CFLAGS) -DBLAS=\"mac\" -c $< -o $@
|
||||
|
||||
test_%_blis.o: test_%.c
|
||||
$(CC) $(CFLAGS) -DBLIS -c $< -o $@
|
||||
|
||||
|
||||
# -- Executable file rules --
|
||||
|
||||
# NOTE: For the BLAS test drivers, we place the BLAS libraries before BLIS
|
||||
# on the link command line in case BLIS was configured with the BLAS
|
||||
# compatibility layer. This prevents BLIS from inadvertently getting called
|
||||
# for the BLAS routines we are trying to test with.
|
||||
|
||||
test_%_openblas.x: test_%_openblas.o $(BLIS_LIB)
|
||||
$(LINKER) $< $(OPENBLAS_LIB) $(BLIS_LIB) $(LDFLAGS) -o $@
|
||||
|
||||
test_%_atlas.x: test_%_atlas.o $(BLIS_LIB)
|
||||
$(LINKER) $< $(ATLAS_LIB) $(BLIS_LIB) $(LDFLAGS) -o $@
|
||||
|
||||
test_%_mkl.x: test_%_mkl.o $(BLIS_LIB)
|
||||
$(LINKER) $< $(MKL_LIB) $(BLIS_LIB) $(LDFLAGS) -o $@
|
||||
|
||||
test_%_essl.x: test_%_essl.o $(BLIS_LIB)
|
||||
$(LINKER) $< $(ESSL_LIB) $(BLIS_LIB) $(LDFLAGS) -o $@
|
||||
|
||||
test_%_mac.x: test_%_mac.o $(BLIS_LIB)
|
||||
$(LINKER) $< $(MAC_LIB) $(BLIS_LIB) $(LDFLAGS) -o $@
|
||||
|
||||
test_%_blis.x: test_%_blis.o $(BLIS_LIB)
|
||||
$(LINKER) $< $(BLIS_LIB) $(LDFLAGS) -o $@
|
||||
|
||||
|
||||
# -- Clean rules --
|
||||
|
||||
clean: cleanx
|
||||
|
||||
cleanx:
|
||||
- $(RM_F) *.o *.x
|
||||
|
||||
232
mpi_test/test_gemm.c
Normal file
232
mpi_test/test_gemm.c
Normal file
@@ -0,0 +1,232 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include <unistd.h>
|
||||
#include "blis.h"
|
||||
#include <mpi.h>
|
||||
|
||||
// transa transb m n k alpha a lda b ldb beta c ldc
|
||||
//void dgemm_( char*, char*, int*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int* );
|
||||
|
||||
//#define PRINT
|
||||
|
||||
int main( int argc, char** argv )
|
||||
{
|
||||
obj_t a, b, c;
|
||||
obj_t c_save;
|
||||
obj_t alpha, beta;
|
||||
dim_t m, n, k;
|
||||
dim_t p;
|
||||
dim_t p_begin, p_end, p_inc;
|
||||
int m_input, n_input, k_input;
|
||||
num_t dt_a, dt_b, dt_c;
|
||||
num_t dt_alpha, dt_beta;
|
||||
int r, n_repeats;
|
||||
|
||||
double dtime;
|
||||
double dtime_save;
|
||||
double gflops;
|
||||
|
||||
bli_init();
|
||||
|
||||
n_repeats = 3;
|
||||
|
||||
if( argc < 7 )
|
||||
{
|
||||
printf("Usage:\n");
|
||||
printf("test_foo.x m n k p_begin p_inc p_end:\n");
|
||||
exit;
|
||||
}
|
||||
|
||||
int world_size, world_rank, provided;
|
||||
MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided );
|
||||
MPI_Comm_size( MPI_COMM_WORLD, &world_size );
|
||||
MPI_Comm_rank( MPI_COMM_WORLD, &world_rank );
|
||||
|
||||
m_input = strtol( argv[1], NULL, 10 );
|
||||
n_input = strtol( argv[2], NULL, 10 );
|
||||
k_input = strtol( argv[3], NULL, 10 );
|
||||
p_begin = strtol( argv[4], NULL, 10 );
|
||||
p_inc = strtol( argv[5], NULL, 10 );
|
||||
p_end = strtol( argv[6], NULL, 10 );
|
||||
|
||||
#if 1
|
||||
dt_a = BLIS_DOUBLE;
|
||||
dt_b = BLIS_DOUBLE;
|
||||
dt_c = BLIS_DOUBLE;
|
||||
dt_alpha = BLIS_DOUBLE;
|
||||
dt_beta = BLIS_DOUBLE;
|
||||
#else
|
||||
dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_DCOMPLEX;
|
||||
#endif
|
||||
|
||||
for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size )
|
||||
{
|
||||
|
||||
if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
|
||||
else m = ( dim_t ) m_input;
|
||||
if ( n_input < 0 ) n = p * ( dim_t )abs(n_input);
|
||||
else n = ( dim_t ) n_input;
|
||||
if ( k_input < 0 ) k = p * ( dim_t )abs(k_input);
|
||||
else k = ( dim_t ) k_input;
|
||||
|
||||
|
||||
bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
|
||||
bli_obj_create( dt_beta, 1, 1, 0, 0, &beta );
|
||||
|
||||
bli_obj_create( dt_a, m, k, 0, 0, &a );
|
||||
bli_obj_create( dt_b, k, n, 0, 0, &b );
|
||||
bli_obj_create( dt_c, m, n, 0, 0, &c );
|
||||
bli_obj_create( dt_c, m, n, 0, 0, &c_save );
|
||||
|
||||
bli_randm( &a );
|
||||
bli_randm( &b );
|
||||
bli_randm( &c );
|
||||
|
||||
|
||||
bli_setsc( (0.9/1.0), 0.2, &alpha );
|
||||
bli_setsc( (1.0/1.0), 0.0, &beta );
|
||||
|
||||
|
||||
bli_copym( &c, &c_save );
|
||||
|
||||
dtime_save = 1.0e9;
|
||||
|
||||
for ( r = 0; r < n_repeats; ++r )
|
||||
{
|
||||
bli_copym( &c_save, &c );
|
||||
|
||||
|
||||
dtime = bli_clock();
|
||||
|
||||
#ifdef BLIS
|
||||
//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
|
||||
|
||||
bli_gemm( &alpha,
|
||||
//bli_gemm4m( &alpha,
|
||||
&a,
|
||||
&b,
|
||||
&beta,
|
||||
&c );
|
||||
|
||||
#else
|
||||
if ( bli_is_real( dt_a ) )
|
||||
{
|
||||
f77_char transa = 'N';
|
||||
f77_char transb = 'N';
|
||||
f77_int mm = bli_obj_length( c );
|
||||
f77_int kk = bli_obj_width_after_trans( a );
|
||||
f77_int nn = bli_obj_width( c );
|
||||
f77_int lda = bli_obj_col_stride( a );
|
||||
f77_int ldb = bli_obj_col_stride( b );
|
||||
f77_int ldc = bli_obj_col_stride( c );
|
||||
double* alphap = bli_obj_buffer( alpha );
|
||||
double* ap = bli_obj_buffer( a );
|
||||
double* bp = bli_obj_buffer( b );
|
||||
double* betap = bli_obj_buffer( beta );
|
||||
double* cp = bli_obj_buffer( c );
|
||||
|
||||
dgemm_( &transa,
|
||||
&transb,
|
||||
&mm,
|
||||
&nn,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
bp, &ldb,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
else
|
||||
{
|
||||
f77_char transa = 'N';
|
||||
f77_char transb = 'N';
|
||||
f77_int mm = bli_obj_length( c );
|
||||
f77_int kk = bli_obj_width_after_trans( a );
|
||||
f77_int nn = bli_obj_width( c );
|
||||
f77_int lda = bli_obj_col_stride( a );
|
||||
f77_int ldb = bli_obj_col_stride( b );
|
||||
f77_int ldc = bli_obj_col_stride( c );
|
||||
dcomplex* alphap = bli_obj_buffer( alpha );
|
||||
dcomplex* ap = bli_obj_buffer( a );
|
||||
dcomplex* bp = bli_obj_buffer( b );
|
||||
dcomplex* betap = bli_obj_buffer( beta );
|
||||
dcomplex* cp = bli_obj_buffer( c );
|
||||
|
||||
zgemm_( &transa,
|
||||
//zgemm3m_( &transa,
|
||||
&transb,
|
||||
&mm,
|
||||
&nn,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
bp, &ldb,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
}
|
||||
#endif
|
||||
|
||||
dtime_save = bli_clock_min_diff( dtime_save, dtime );
|
||||
}
|
||||
|
||||
gflops = ( 2.0 * m * k * n ) / ( dtime_save * 1.0e9 );
|
||||
|
||||
if ( bli_is_complex( dt_a ) ) gflops *= 4.0;
|
||||
|
||||
#ifdef BLIS
|
||||
printf( "data_gemm_blis" );
|
||||
#else
|
||||
printf( "data_gemm_%s", BLAS );
|
||||
#endif
|
||||
printf( "( %2lu, 1:5 ) = [ %4lu %4lu %4lu %10.3e %6.3f ];\n",
|
||||
( unsigned long )(p - p_begin + 1)/p_inc + 1,
|
||||
( unsigned long )m,
|
||||
( unsigned long )k,
|
||||
( unsigned long )n, dtime_save, gflops );
|
||||
|
||||
bli_obj_free( &alpha );
|
||||
bli_obj_free( &beta );
|
||||
|
||||
bli_obj_free( &a );
|
||||
bli_obj_free( &b );
|
||||
bli_obj_free( &c );
|
||||
bli_obj_free( &c_save );
|
||||
}
|
||||
|
||||
bli_finalize();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
252
mpi_test/test_hemm.c
Normal file
252
mpi_test/test_hemm.c
Normal file
@@ -0,0 +1,252 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include <unistd.h>
|
||||
#include "blis.h"
|
||||
#include <mpi.h>
|
||||
|
||||
// side uploa m n alpha a lda b ldb beta c ldc
|
||||
//void dsymm_( char*, char*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int* );
|
||||
|
||||
//#define PRINT
|
||||
|
||||
int main( int argc, char** argv )
|
||||
{
|
||||
obj_t a, b, c;
|
||||
obj_t c_save;
|
||||
obj_t alpha, beta;
|
||||
dim_t m, n;
|
||||
dim_t p;
|
||||
dim_t p_begin, p_end, p_inc;
|
||||
int m_input, n_input;
|
||||
num_t dt_a, dt_b, dt_c;
|
||||
num_t dt_alpha, dt_beta;
|
||||
int r, n_repeats;
|
||||
side_t side;
|
||||
uplo_t uplo;
|
||||
|
||||
double dtime;
|
||||
double dtime_save;
|
||||
double gflops;
|
||||
|
||||
bli_init();
|
||||
|
||||
n_repeats = 3;
|
||||
|
||||
if( argc < 7 )
|
||||
{
|
||||
printf("Usage:\n");
|
||||
printf("test_foo.x m n k p_begin p_inc p_end:\n");
|
||||
exit;
|
||||
}
|
||||
|
||||
int world_size, world_rank, provided;
|
||||
MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided );
|
||||
MPI_Comm_size( MPI_COMM_WORLD, &world_size );
|
||||
MPI_Comm_rank( MPI_COMM_WORLD, &world_rank );
|
||||
|
||||
m_input = strtol( argv[1], NULL, 10 );
|
||||
n_input = strtol( argv[2], NULL, 10 );
|
||||
p_begin = strtol( argv[4], NULL, 10 );
|
||||
p_inc = strtol( argv[5], NULL, 10 );
|
||||
p_end = strtol( argv[6], NULL, 10 );
|
||||
|
||||
#if 1
|
||||
dt_a = BLIS_DOUBLE;
|
||||
dt_b = BLIS_DOUBLE;
|
||||
dt_c = BLIS_DOUBLE;
|
||||
dt_alpha = BLIS_DOUBLE;
|
||||
dt_beta = BLIS_DOUBLE;
|
||||
#else
|
||||
dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_DCOMPLEX;
|
||||
#endif
|
||||
|
||||
side = BLIS_LEFT;
|
||||
//side = BLIS_RIGHT;
|
||||
|
||||
uplo = BLIS_LOWER;
|
||||
//uplo = BLIS_UPPER;
|
||||
|
||||
for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size )
|
||||
{
|
||||
|
||||
if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
|
||||
else m = ( dim_t ) m_input;
|
||||
if ( n_input < 0 ) n = p * ( dim_t )abs(n_input);
|
||||
else n = ( dim_t ) n_input;
|
||||
|
||||
|
||||
bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
|
||||
bli_obj_create( dt_beta, 1, 1, 0, 0, &beta );
|
||||
|
||||
if ( bli_is_left( side ) )
|
||||
bli_obj_create( dt_a, m, m, 0, 0, &a );
|
||||
else
|
||||
bli_obj_create( dt_a, n, n, 0, 0, &a );
|
||||
bli_obj_create( dt_b, m, n, 0, 0, &b );
|
||||
bli_obj_create( dt_c, m, n, 0, 0, &c );
|
||||
bli_obj_create( dt_c, m, n, 0, 0, &c_save );
|
||||
|
||||
bli_randm( &a );
|
||||
bli_randm( &b );
|
||||
bli_randm( &c );
|
||||
|
||||
bli_obj_set_struc( BLIS_HERMITIAN, a );
|
||||
bli_obj_set_uplo( uplo, a );
|
||||
|
||||
// Randomize A, make it densely Hermitian, and zero the unstored
|
||||
// triangle to ensure the implementation reads only from the stored
|
||||
// region.
|
||||
bli_randm( &a );
|
||||
bli_mkherm( &a );
|
||||
bli_mktrim( &a );
|
||||
/*
|
||||
bli_obj_toggle_uplo( a );
|
||||
bli_obj_inc_diag_off( 1, a );
|
||||
bli_setm( &BLIS_ZERO, &a );
|
||||
bli_obj_inc_diag_off( -1, a );
|
||||
bli_obj_toggle_uplo( a );
|
||||
bli_obj_set_diag( BLIS_NONUNIT_DIAG, a );
|
||||
bli_scalm( &BLIS_TWO, &a );
|
||||
bli_scalm( &BLIS_TWO, &a );
|
||||
*/
|
||||
|
||||
|
||||
bli_setsc( (2.0/1.0), 1.0, &alpha );
|
||||
bli_setsc( (1.0/1.0), 0.0, &beta );
|
||||
|
||||
|
||||
bli_copym( &c, &c_save );
|
||||
|
||||
dtime_save = 1.0e9;
|
||||
|
||||
for ( r = 0; r < n_repeats; ++r )
|
||||
{
|
||||
bli_copym( &c_save, &c );
|
||||
|
||||
|
||||
dtime = bli_clock();
|
||||
|
||||
#ifdef PRINT
|
||||
/*
|
||||
obj_t ar, ai;
|
||||
bli_obj_alias_to( a, ar );
|
||||
bli_obj_alias_to( a, ai );
|
||||
bli_obj_set_datatype( BLIS_DOUBLE, ar ); ar.rs *= 2; ar.cs *= 2;
|
||||
bli_obj_set_datatype( BLIS_DOUBLE, ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1;
|
||||
bli_printm( "ar", &ar, "%4.1f", "" );
|
||||
bli_printm( "ai", &ai, "%4.1f", "" );
|
||||
*/
|
||||
|
||||
bli_printm( "a", &a, "%4.1f", "" );
|
||||
bli_printm( "b", &b, "%4.1f", "" );
|
||||
bli_printm( "c", &c, "%4.1f", "" );
|
||||
#endif
|
||||
|
||||
#ifdef BLIS
|
||||
|
||||
//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
|
||||
|
||||
bli_hemm( side,
|
||||
//bli_hemm4m( side,
|
||||
&alpha,
|
||||
&a,
|
||||
&b,
|
||||
&beta,
|
||||
&c );
|
||||
#else
|
||||
|
||||
f77_char side = 'L';
|
||||
f77_char uplo = 'L';
|
||||
f77_int mm = bli_obj_length( c );
|
||||
f77_int nn = bli_obj_width( c );
|
||||
f77_int lda = bli_obj_col_stride( a );
|
||||
f77_int ldb = bli_obj_col_stride( b );
|
||||
f77_int ldc = bli_obj_col_stride( c );
|
||||
double* alphap = bli_obj_buffer( alpha );
|
||||
double* ap = bli_obj_buffer( a );
|
||||
double* bp = bli_obj_buffer( b );
|
||||
double* betap = bli_obj_buffer( beta );
|
||||
double* cp = bli_obj_buffer( c );
|
||||
|
||||
dsymm_( &side,
|
||||
&uplo,
|
||||
&mm,
|
||||
&nn,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
bp, &ldb,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
#endif
|
||||
|
||||
#ifdef PRINT
|
||||
bli_printm( "c after", &c, "%9.5f", "" );
|
||||
exit(1);
|
||||
#endif
|
||||
|
||||
dtime_save = bli_clock_min_diff( dtime_save, dtime );
|
||||
}
|
||||
|
||||
if ( bli_is_left( side ) )
|
||||
gflops = ( 2.0 * m * m * n ) / ( dtime_save * 1.0e9 );
|
||||
else
|
||||
gflops = ( 2.0 * m * n * n ) / ( dtime_save * 1.0e9 );
|
||||
|
||||
if ( bli_is_complex( dt_a ) ) gflops *= 4.0;
|
||||
|
||||
#ifdef BLIS
|
||||
printf( "data_hemm_blis" );
|
||||
#else
|
||||
printf( "data_hemm_%s", BLAS );
|
||||
#endif
|
||||
printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n",
|
||||
( unsigned long )(p - p_begin + 1)/p_inc + 1,
|
||||
( unsigned long )m,
|
||||
( unsigned long )n, dtime_save, gflops );
|
||||
|
||||
bli_obj_free( &alpha );
|
||||
bli_obj_free( &beta );
|
||||
|
||||
bli_obj_free( &a );
|
||||
bli_obj_free( &b );
|
||||
bli_obj_free( &c );
|
||||
bli_obj_free( &c_save );
|
||||
}
|
||||
|
||||
bli_finalize();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
209
mpi_test/test_her2k.c
Normal file
209
mpi_test/test_her2k.c
Normal file
@@ -0,0 +1,209 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include <unistd.h>
|
||||
#include "blis.h"
|
||||
#include <mpi.h>
|
||||
|
||||
// uploa transa m k alpha a lda b ldb beta c ldc
|
||||
//void dsyr2k_( char*, char*, int*, int*, double*, double*, int*, double*, int*, double*, double*, int* );
|
||||
|
||||
//#define PRINT
|
||||
|
||||
int main( int argc, char** argv )
|
||||
{
|
||||
obj_t a, b, c;
|
||||
obj_t c_save;
|
||||
obj_t alpha, beta;
|
||||
dim_t m, k;
|
||||
dim_t p;
|
||||
dim_t p_begin, p_end, p_inc;
|
||||
int m_input, k_input;
|
||||
num_t dt_a, dt_b, dt_c;
|
||||
num_t dt_alpha, dt_beta;
|
||||
int r, n_repeats;
|
||||
uplo_t uplo;
|
||||
|
||||
double dtime;
|
||||
double dtime_save;
|
||||
double gflops;
|
||||
|
||||
bli_init();
|
||||
|
||||
n_repeats = 3;
|
||||
|
||||
if( argc < 7 )
|
||||
{
|
||||
printf("Usage:\n");
|
||||
printf("test_foo.x m n k p_begin p_inc p_end:\n");
|
||||
exit;
|
||||
}
|
||||
|
||||
int world_size, world_rank, provided;
|
||||
MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided );
|
||||
MPI_Comm_size( MPI_COMM_WORLD, &world_size );
|
||||
MPI_Comm_rank( MPI_COMM_WORLD, &world_rank );
|
||||
|
||||
m_input = strtol( argv[1], NULL, 10 );
|
||||
k_input = strtol( argv[3], NULL, 10 );
|
||||
p_begin = strtol( argv[4], NULL, 10 );
|
||||
p_inc = strtol( argv[5], NULL, 10 );
|
||||
p_end = strtol( argv[6], NULL, 10 );
|
||||
|
||||
dt_a = BLIS_DOUBLE;
|
||||
dt_b = BLIS_DOUBLE;
|
||||
dt_c = BLIS_DOUBLE;
|
||||
dt_alpha = BLIS_DOUBLE;
|
||||
dt_beta = BLIS_DOUBLE;
|
||||
|
||||
uplo = BLIS_LOWER;
|
||||
|
||||
for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size )
|
||||
{
|
||||
|
||||
if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
|
||||
else m = ( dim_t ) m_input;
|
||||
if ( k_input < 0 ) k = p * ( dim_t )abs(k_input);
|
||||
else k = ( dim_t ) k_input;
|
||||
|
||||
|
||||
bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
|
||||
bli_obj_create( dt_beta, 1, 1, 0, 0, &beta );
|
||||
|
||||
bli_obj_create( dt_a, m, k, 0, 0, &a );
|
||||
bli_obj_create( dt_b, m, k, 0, 0, &b );
|
||||
bli_obj_create( dt_c, m, m, 0, 0, &c );
|
||||
bli_obj_create( dt_c, m, m, 0, 0, &c_save );
|
||||
|
||||
bli_randm( &a );
|
||||
bli_randm( &b );
|
||||
bli_randm( &c );
|
||||
|
||||
bli_obj_set_struc( BLIS_HERMITIAN, c );
|
||||
bli_obj_set_uplo( uplo, c );
|
||||
|
||||
|
||||
bli_setsc( (2.0/1.0), 0.0, &alpha );
|
||||
bli_setsc( (1.0/1.0), 0.0, &beta );
|
||||
|
||||
|
||||
bli_copym( &c, &c_save );
|
||||
|
||||
dtime_save = 1.0e9;
|
||||
|
||||
for ( r = 0; r < n_repeats; ++r )
|
||||
{
|
||||
bli_copym( &c_save, &c );
|
||||
|
||||
|
||||
dtime = bli_clock();
|
||||
|
||||
#ifdef PRINT
|
||||
bli_printm( "a", &a, "%4.1f", "" );
|
||||
bli_printm( "b", &b, "%4.1f", "" );
|
||||
bli_printm( "c", &c, "%4.1f", "" );
|
||||
#endif
|
||||
|
||||
#ifdef BLIS
|
||||
|
||||
//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
|
||||
|
||||
bli_her2k( &alpha,
|
||||
&a,
|
||||
&b,
|
||||
&beta,
|
||||
&c );
|
||||
|
||||
#else
|
||||
|
||||
f77_char uploa = 'L';
|
||||
f77_char transa = 'N';
|
||||
f77_int mm = bli_obj_length( c );
|
||||
f77_int kk = bli_obj_width_after_trans( a );
|
||||
f77_int lda = bli_obj_col_stride( a );
|
||||
f77_int ldb = bli_obj_col_stride( b );
|
||||
f77_int ldc = bli_obj_col_stride( c );
|
||||
double* alphap = bli_obj_buffer( alpha );
|
||||
double* ap = bli_obj_buffer( a );
|
||||
double* bp = bli_obj_buffer( b );
|
||||
double* betap = bli_obj_buffer( beta );
|
||||
double* cp = bli_obj_buffer( c );
|
||||
|
||||
dsyr2k_( &uploa,
|
||||
&transa,
|
||||
&mm,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
bp, &ldb,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
#endif
|
||||
|
||||
#ifdef PRINT
|
||||
bli_printm( "c after", &c, "%4.1f", "" );
|
||||
exit(1);
|
||||
#endif
|
||||
|
||||
|
||||
dtime_save = bli_clock_min_diff( dtime_save, dtime );
|
||||
}
|
||||
|
||||
gflops = ( 2.0 * m * k * m ) / ( dtime_save * 1.0e9 );
|
||||
|
||||
#ifdef BLIS
|
||||
printf( "data_her2k_blis" );
|
||||
#else
|
||||
printf( "data_her2k_%s", BLAS );
|
||||
#endif
|
||||
printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n",
|
||||
( unsigned long )(p - p_begin + 1)/p_inc + 1,
|
||||
( unsigned long )m,
|
||||
( unsigned long )k, dtime_save, gflops );
|
||||
|
||||
|
||||
bli_obj_free( &alpha );
|
||||
bli_obj_free( &beta );
|
||||
|
||||
bli_obj_free( &a );
|
||||
bli_obj_free( &b );
|
||||
bli_obj_free( &c );
|
||||
bli_obj_free( &c_save );
|
||||
}
|
||||
|
||||
bli_finalize();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
200
mpi_test/test_herk.c
Normal file
200
mpi_test/test_herk.c
Normal file
@@ -0,0 +1,200 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include <unistd.h>
|
||||
#include "blis.h"
|
||||
#include <mpi.h>
|
||||
|
||||
// uploa transa m k alpha a lda beta c ldc
|
||||
//void dsyrk_( char*, char*, int*, int*, double*, double*, int*, double*, double*, int* );
|
||||
|
||||
//#define PRINT
|
||||
|
||||
int main( int argc, char** argv )
|
||||
{
|
||||
obj_t a, c;
|
||||
obj_t c_save;
|
||||
obj_t alpha, beta;
|
||||
dim_t m, k;
|
||||
dim_t p;
|
||||
dim_t p_begin, p_end, p_inc;
|
||||
int m_input, k_input;
|
||||
num_t dt_a, dt_c;
|
||||
num_t dt_alpha, dt_beta;
|
||||
int r, n_repeats;
|
||||
uplo_t uplo;
|
||||
|
||||
double dtime;
|
||||
double dtime_save;
|
||||
double gflops;
|
||||
|
||||
bli_init();
|
||||
|
||||
n_repeats = 3;
|
||||
|
||||
if( argc < 7 )
|
||||
{
|
||||
printf("Usage:\n");
|
||||
printf("test_foo.x m n k p_begin p_inc p_end:\n");
|
||||
exit;
|
||||
}
|
||||
|
||||
int world_size, world_rank, provided;
|
||||
MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided );
|
||||
MPI_Comm_size( MPI_COMM_WORLD, &world_size );
|
||||
MPI_Comm_rank( MPI_COMM_WORLD, &world_rank );
|
||||
|
||||
m_input = strtol( argv[1], NULL, 10 );
|
||||
k_input = strtol( argv[3], NULL, 10 );
|
||||
p_begin = strtol( argv[4], NULL, 10 );
|
||||
p_inc = strtol( argv[5], NULL, 10 );
|
||||
p_end = strtol( argv[6], NULL, 10 );
|
||||
|
||||
dt_a = BLIS_DOUBLE;
|
||||
dt_c = BLIS_DOUBLE;
|
||||
dt_alpha = BLIS_DOUBLE;
|
||||
dt_beta = BLIS_DOUBLE;
|
||||
|
||||
uplo = BLIS_LOWER;
|
||||
|
||||
for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size )
|
||||
{
|
||||
|
||||
if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
|
||||
else m = ( dim_t ) m_input;
|
||||
if ( k_input < 0 ) k = p * ( dim_t )abs(k_input);
|
||||
else k = ( dim_t ) k_input;
|
||||
|
||||
|
||||
bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
|
||||
bli_obj_create( dt_beta, 1, 1, 0, 0, &beta );
|
||||
|
||||
bli_obj_create( dt_a, m, k, 0, 0, &a );
|
||||
bli_obj_create( dt_c, m, m, 0, 0, &c );
|
||||
bli_obj_create( dt_c, m, m, 0, 0, &c_save );
|
||||
|
||||
bli_randm( &a );
|
||||
bli_randm( &c );
|
||||
|
||||
bli_obj_set_struc( BLIS_HERMITIAN, c );
|
||||
bli_obj_set_uplo( uplo, c );
|
||||
|
||||
|
||||
bli_setsc( (2.0/1.0), 0.0, &alpha );
|
||||
bli_setsc( (1.0/1.0), 0.0, &beta );
|
||||
|
||||
|
||||
bli_copym( &c, &c_save );
|
||||
|
||||
dtime_save = 1.0e9;
|
||||
|
||||
for ( r = 0; r < n_repeats; ++r )
|
||||
{
|
||||
bli_copym( &c_save, &c );
|
||||
|
||||
|
||||
dtime = bli_clock();
|
||||
|
||||
#ifdef PRINT
|
||||
bli_printm( "a", &a, "%4.1f", "" );
|
||||
bli_printm( "c", &c, "%4.1f", "" );
|
||||
#endif
|
||||
|
||||
#ifdef BLIS
|
||||
|
||||
//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
|
||||
|
||||
bli_herk( &alpha,
|
||||
&a,
|
||||
&beta,
|
||||
&c );
|
||||
|
||||
#else
|
||||
|
||||
f77_char uploa = 'L';
|
||||
f77_char transa = 'N';
|
||||
f77_int mm = bli_obj_length( c );
|
||||
f77_int kk = bli_obj_width_after_trans( a );
|
||||
f77_int lda = bli_obj_col_stride( a );
|
||||
f77_int ldc = bli_obj_col_stride( c );
|
||||
double* alphap = bli_obj_buffer( alpha );
|
||||
double* ap = bli_obj_buffer( a );
|
||||
double* betap = bli_obj_buffer( beta );
|
||||
double* cp = bli_obj_buffer( c );
|
||||
|
||||
dsyrk_( &uploa,
|
||||
&transa,
|
||||
&mm,
|
||||
&kk,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
betap,
|
||||
cp, &ldc );
|
||||
#endif
|
||||
|
||||
#ifdef PRINT
|
||||
bli_printm( "c after", &c, "%4.1f", "" );
|
||||
exit(1);
|
||||
#endif
|
||||
|
||||
|
||||
dtime_save = bli_clock_min_diff( dtime_save, dtime );
|
||||
}
|
||||
|
||||
gflops = ( 1.0 * m * k * m ) / ( dtime_save * 1.0e9 );
|
||||
|
||||
#ifdef BLIS
|
||||
printf( "data_herk_blis" );
|
||||
#else
|
||||
printf( "data_herk_%s", BLAS );
|
||||
#endif
|
||||
printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n",
|
||||
( unsigned long )(p - p_begin + 1)/p_inc + 1,
|
||||
( unsigned long )m,
|
||||
( unsigned long )k, dtime_save, gflops );
|
||||
|
||||
|
||||
bli_obj_free( &alpha );
|
||||
bli_obj_free( &beta );
|
||||
|
||||
bli_obj_free( &a );
|
||||
bli_obj_free( &c );
|
||||
bli_obj_free( &c_save );
|
||||
}
|
||||
|
||||
bli_finalize();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
246
mpi_test/test_trmm.c
Normal file
246
mpi_test/test_trmm.c
Normal file
@@ -0,0 +1,246 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include <unistd.h>
|
||||
#include "blis.h"
|
||||
#include <mpi.h>
|
||||
|
||||
// side uplo trans diag m n alpha a lda b ldb
|
||||
//void dtrmm_( char*, char*, char*, char*, int*, int*, double*, double*, int*, double*, int* );
|
||||
|
||||
//#define PRINT
|
||||
|
||||
int main( int argc, char** argv )
|
||||
{
|
||||
obj_t a, b, c;
|
||||
obj_t c_save;
|
||||
obj_t alpha, beta;
|
||||
dim_t m, n;
|
||||
dim_t p;
|
||||
dim_t p_begin, p_end, p_inc;
|
||||
int m_input, n_input;
|
||||
num_t dt_a, dt_b, dt_c;
|
||||
num_t dt_alpha, dt_beta;
|
||||
int r, n_repeats;
|
||||
side_t side;
|
||||
uplo_t uplo;
|
||||
|
||||
double dtime;
|
||||
double dtime_save;
|
||||
double gflops;
|
||||
|
||||
bli_init();
|
||||
|
||||
n_repeats = 3;
|
||||
|
||||
if( argc < 7 )
|
||||
{
|
||||
printf("Usage:\n");
|
||||
printf("test_foo.x m n p_begin p_inc p_end:\n");
|
||||
exit;
|
||||
}
|
||||
|
||||
int world_size, world_rank, provided;
|
||||
MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided );
|
||||
MPI_Comm_size( MPI_COMM_WORLD, &world_size );
|
||||
MPI_Comm_rank( MPI_COMM_WORLD, &world_rank );
|
||||
|
||||
m_input = strtol( argv[1], NULL, 10 );
|
||||
n_input = strtol( argv[2], NULL, 10 );
|
||||
p_begin = strtol( argv[4], NULL, 10 );
|
||||
p_inc = strtol( argv[5], NULL, 10 );
|
||||
p_end = strtol( argv[6], NULL, 10 );
|
||||
|
||||
#if 1
|
||||
dt_a = BLIS_DOUBLE;
|
||||
dt_b = BLIS_DOUBLE;
|
||||
dt_c = BLIS_DOUBLE;
|
||||
dt_alpha = BLIS_DOUBLE;
|
||||
dt_beta = BLIS_DOUBLE;
|
||||
#else
|
||||
dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_DCOMPLEX;
|
||||
#endif
|
||||
|
||||
side = BLIS_LEFT;
|
||||
//side = BLIS_RIGHT;
|
||||
|
||||
uplo = BLIS_LOWER;
|
||||
//uplo = BLIS_UPPER;
|
||||
|
||||
for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size )
|
||||
{
|
||||
|
||||
if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
|
||||
else m = ( dim_t ) m_input;
|
||||
if ( n_input < 0 ) n = p * ( dim_t )abs(n_input);
|
||||
else n = ( dim_t ) n_input;
|
||||
|
||||
|
||||
bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
|
||||
bli_obj_create( dt_beta, 1, 1, 0, 0, &beta );
|
||||
|
||||
if ( bli_is_left( side ) )
|
||||
bli_obj_create( dt_a, m, m, 0, 0, &a );
|
||||
else
|
||||
bli_obj_create( dt_a, n, n, 0, 0, &a );
|
||||
bli_obj_create( dt_b, m, n, 0, 0, &b );
|
||||
bli_obj_create( dt_c, m, n, 0, 0, &c );
|
||||
bli_obj_create( dt_c, m, n, 0, 0, &c_save );
|
||||
|
||||
bli_obj_set_struc( BLIS_TRIANGULAR, a );
|
||||
bli_obj_set_uplo( uplo, a );
|
||||
|
||||
bli_randm( &a );
|
||||
bli_randm( &c );
|
||||
bli_randm( &b );
|
||||
|
||||
/*
|
||||
bli_obj_toggle_uplo( a );
|
||||
bli_obj_inc_diag_off( -1, a );
|
||||
bli_setm( &BLIS_ZERO, &a );
|
||||
bli_obj_inc_diag_off( 1, a );
|
||||
bli_obj_toggle_uplo( a );
|
||||
bli_obj_set_diag( BLIS_NONUNIT_DIAG, a );
|
||||
bli_scalm( &BLIS_TWO, &a );
|
||||
//bli_scalm( &BLIS_TWO, &a );
|
||||
*/
|
||||
|
||||
|
||||
|
||||
bli_setsc( (2.0/1.0), 0.0, &alpha );
|
||||
bli_setsc( (1.0/1.0), 0.0, &beta );
|
||||
|
||||
|
||||
bli_copym( &c, &c_save );
|
||||
|
||||
dtime_save = 1.0e9;
|
||||
|
||||
for ( r = 0; r < n_repeats; ++r )
|
||||
{
|
||||
bli_copym( &c_save, &c );
|
||||
|
||||
dtime = bli_clock();
|
||||
|
||||
|
||||
#ifdef PRINT
|
||||
|
||||
/*
|
||||
obj_t ar, ai;
|
||||
bli_obj_alias_to( a, ar );
|
||||
bli_obj_alias_to( a, ai );
|
||||
bli_obj_set_datatype( BLIS_DOUBLE, ar ); ar.rs *= 2; ar.cs *= 2;
|
||||
bli_obj_set_datatype( BLIS_DOUBLE, ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1;
|
||||
bli_printm( "ar", &ar, "%4.1f", "" );
|
||||
bli_printm( "ai", &ai, "%4.1f", "" );
|
||||
*/
|
||||
bli_printm( "a", &a, "%4.1f", "" );
|
||||
bli_printm( "c", &c, "%4.1f", "" );
|
||||
#endif
|
||||
|
||||
#ifdef BLIS
|
||||
bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
|
||||
|
||||
bli_trmm( side,
|
||||
//bli_trmm4m( side,
|
||||
&alpha,
|
||||
&a,
|
||||
&c );
|
||||
|
||||
#else
|
||||
|
||||
f77_char side = 'L';
|
||||
f77_char uplo = 'L';
|
||||
f77_char transa = 'N';
|
||||
f77_char diag = 'N';
|
||||
f77_int mm = bli_obj_length( c );
|
||||
f77_int nn = bli_obj_width( c );
|
||||
f77_int lda = bli_obj_col_stride( a );
|
||||
f77_int ldc = bli_obj_col_stride( c );
|
||||
double* alphap = bli_obj_buffer( alpha );
|
||||
double* ap = bli_obj_buffer( a );
|
||||
double* cp = bli_obj_buffer( c );
|
||||
|
||||
dtrmm_( &side,
|
||||
&uplo,
|
||||
&transa,
|
||||
&diag,
|
||||
&mm,
|
||||
&nn,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
cp, &ldc );
|
||||
#endif
|
||||
|
||||
#ifdef PRINT
|
||||
bli_printm( "c after", &c, "%4.1f", "" );
|
||||
exit(1);
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
dtime_save = bli_clock_min_diff( dtime_save, dtime );
|
||||
}
|
||||
|
||||
if ( bli_is_left( side ) )
|
||||
gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 );
|
||||
else
|
||||
gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 );
|
||||
|
||||
if ( bli_is_complex( dt_a ) ) gflops *= 4.0;
|
||||
|
||||
#ifdef BLIS
|
||||
printf( "data_trmm_blis" );
|
||||
#else
|
||||
printf( "data_trmm_%s", BLAS );
|
||||
#endif
|
||||
printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n",
|
||||
( unsigned long )(p - p_begin + 1)/p_inc + 1,
|
||||
( unsigned long )m,
|
||||
( unsigned long )n, dtime_save, gflops );
|
||||
|
||||
|
||||
bli_obj_free( &alpha );
|
||||
bli_obj_free( &beta );
|
||||
|
||||
bli_obj_free( &a );
|
||||
bli_obj_free( &b );
|
||||
bli_obj_free( &c );
|
||||
bli_obj_free( &c_save );
|
||||
}
|
||||
|
||||
bli_finalize();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
282
mpi_test/test_trsm.c
Normal file
282
mpi_test/test_trsm.c
Normal file
@@ -0,0 +1,282 @@
|
||||
/*
|
||||
|
||||
BLIS
|
||||
An object-based framework for developing high-performance BLAS-like
|
||||
libraries.
|
||||
|
||||
Copyright (C) 2014, The University of Texas
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name of The University of Texas nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include <unistd.h>
|
||||
#include "blis.h"
|
||||
#include <mpi.h>
|
||||
|
||||
// side uplo trans diag m n alpha a lda b ldb
|
||||
//void dtrsm_( char*, char*, char*, char*, int*, int*, double*, double*, int*, double*, int* );
|
||||
|
||||
//#define PRINT
|
||||
|
||||
int main( int argc, char** argv )
|
||||
{
|
||||
obj_t a, b, c;
|
||||
obj_t c_save;
|
||||
obj_t alpha, beta;
|
||||
dim_t m, n;
|
||||
dim_t p;
|
||||
dim_t p_begin, p_end, p_inc;
|
||||
int m_input, n_input;
|
||||
num_t dt_a, dt_b, dt_c;
|
||||
num_t dt_alpha, dt_beta;
|
||||
int r, n_repeats;
|
||||
side_t side;
|
||||
uplo_t uplo;
|
||||
|
||||
double dtime;
|
||||
double dtime_save;
|
||||
double gflops;
|
||||
|
||||
bli_init();
|
||||
|
||||
n_repeats = 3;
|
||||
|
||||
if( argc < 7 )
|
||||
{
|
||||
printf("Usage:\n");
|
||||
printf("test_foo.x m n k p_begin p_inc p_end:\n");
|
||||
exit;
|
||||
}
|
||||
|
||||
int world_size, world_rank, provided;
|
||||
MPI_Init_thread( NULL, NULL, MPI_THREAD_FUNNELED, &provided );
|
||||
MPI_Comm_size( MPI_COMM_WORLD, &world_size );
|
||||
MPI_Comm_rank( MPI_COMM_WORLD, &world_rank );
|
||||
|
||||
m_input = strtol( argv[1], NULL, 10 );
|
||||
n_input = strtol( argv[2], NULL, 10 );
|
||||
p_begin = strtol( argv[4], NULL, 10 );
|
||||
p_inc = strtol( argv[5], NULL, 10 );
|
||||
p_end = strtol( argv[6], NULL, 10 );
|
||||
|
||||
#if 1
|
||||
dt_a = BLIS_DOUBLE;
|
||||
dt_b = BLIS_DOUBLE;
|
||||
dt_c = BLIS_DOUBLE;
|
||||
dt_alpha = BLIS_DOUBLE;
|
||||
dt_beta = BLIS_DOUBLE;
|
||||
#else
|
||||
dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_FLOAT;
|
||||
//dt_a = dt_b = dt_c = dt_alpha = dt_beta = BLIS_SCOMPLEX;
|
||||
#endif
|
||||
|
||||
side = BLIS_LEFT;
|
||||
//side = BLIS_RIGHT;
|
||||
|
||||
uplo = BLIS_LOWER;
|
||||
//uplo = BLIS_UPPER;
|
||||
|
||||
for ( p = p_begin + world_rank * p_inc; p <= p_end; p += p_inc * world_size )
|
||||
{
|
||||
|
||||
if ( m_input < 0 ) m = p * ( dim_t )abs(m_input);
|
||||
else m = ( dim_t ) m_input;
|
||||
if ( n_input < 0 ) n = p * ( dim_t )abs(n_input);
|
||||
else n = ( dim_t ) n_input;
|
||||
|
||||
|
||||
bli_obj_create( dt_alpha, 1, 1, 0, 0, &alpha );
|
||||
bli_obj_create( dt_beta, 1, 1, 0, 0, &beta );
|
||||
|
||||
if ( bli_is_left( side ) )
|
||||
bli_obj_create( dt_a, m, m, 0, 0, &a );
|
||||
else
|
||||
bli_obj_create( dt_a, n, n, 0, 0, &a );
|
||||
bli_obj_create( dt_b, m, n, 0, 0, &b );
|
||||
bli_obj_create( dt_c, m, n, 0, 0, &c );
|
||||
bli_obj_create( dt_c, m, n, 0, 0, &c_save );
|
||||
|
||||
bli_obj_set_struc( BLIS_TRIANGULAR, a );
|
||||
bli_obj_set_uplo( uplo, a );
|
||||
//bli_obj_set_diag( BLIS_UNIT_DIAG, a );
|
||||
|
||||
bli_randm( &a );
|
||||
bli_randm( &c );
|
||||
bli_randm( &b );
|
||||
|
||||
/*
|
||||
{
|
||||
obj_t a2;
|
||||
|
||||
bli_obj_alias_to( a, a2 );
|
||||
bli_obj_toggle_uplo( a2 );
|
||||
bli_obj_inc_diag_off( 1, a2 );
|
||||
bli_setm( &BLIS_ZERO, &a2 );
|
||||
bli_obj_inc_diag_off( -2, a2 );
|
||||
bli_obj_toggle_uplo( a2 );
|
||||
bli_obj_set_diag( BLIS_NONUNIT_DIAG, a2 );
|
||||
bli_scalm( &BLIS_TWO, &a2 );
|
||||
//bli_scalm( &BLIS_TWO, &a );
|
||||
}
|
||||
*/
|
||||
|
||||
bli_setsc( (2.0/1.0), 0.0, &alpha );
|
||||
bli_setsc( (1.0/1.0), 0.0, &beta );
|
||||
|
||||
|
||||
bli_copym( &c, &c_save );
|
||||
|
||||
dtime_save = 1.0e9;
|
||||
|
||||
for ( r = 0; r < n_repeats; ++r )
|
||||
{
|
||||
bli_copym( &c_save, &c );
|
||||
|
||||
dtime = bli_clock();
|
||||
|
||||
|
||||
#ifdef PRINT
|
||||
/*
|
||||
obj_t ar, ai;
|
||||
bli_obj_alias_to( a, ar );
|
||||
bli_obj_alias_to( a, ai );
|
||||
bli_obj_set_datatype( BLIS_DOUBLE, ar ); ar.rs *= 2; ar.cs *= 2;
|
||||
bli_obj_set_datatype( BLIS_DOUBLE, ai ); ai.rs *= 2; ai.cs *= 2; ai.buffer = ( double* )ai.buffer + 1;
|
||||
|
||||
bli_printm( "ar", &ar, "%4.1f", "" );
|
||||
bli_printm( "ai", &ai, "%4.1f", "" );
|
||||
*/
|
||||
|
||||
bli_invertd( &a );
|
||||
bli_printm( "a", &a, "%4.1f", "" );
|
||||
bli_invertd( &a );
|
||||
bli_printm( "c", &c, "%4.1f", "" );
|
||||
#endif
|
||||
|
||||
#ifdef BLIS
|
||||
//bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING );
|
||||
|
||||
bli_trsm( side,
|
||||
//bli_trsm4m( side,
|
||||
//bli_trsm3m( side,
|
||||
&alpha,
|
||||
&a,
|
||||
&c );
|
||||
#else
|
||||
|
||||
if ( bli_is_real( dt_a ) )
|
||||
{
|
||||
f77_char side = 'L';
|
||||
f77_char uplo = 'L';
|
||||
f77_char transa = 'N';
|
||||
f77_char diag = 'N';
|
||||
f77_int mm = bli_obj_length( c );
|
||||
f77_int nn = bli_obj_width( c );
|
||||
f77_int lda = bli_obj_col_stride( a );
|
||||
f77_int ldc = bli_obj_col_stride( c );
|
||||
float * alphap = bli_obj_buffer( alpha );
|
||||
float * ap = bli_obj_buffer( a );
|
||||
float * cp = bli_obj_buffer( c );
|
||||
|
||||
strsm_( &side,
|
||||
&uplo,
|
||||
&transa,
|
||||
&diag,
|
||||
&mm,
|
||||
&nn,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
cp, &ldc );
|
||||
}
|
||||
else // if ( bli_is_complex( dt_a ) )
|
||||
{
|
||||
f77_char side = 'L';
|
||||
f77_char uplo = 'L';
|
||||
f77_char transa = 'N';
|
||||
f77_char diag = 'N';
|
||||
f77_int mm = bli_obj_length( c );
|
||||
f77_int nn = bli_obj_width( c );
|
||||
f77_int lda = bli_obj_col_stride( a );
|
||||
f77_int ldc = bli_obj_col_stride( c );
|
||||
scomplex* alphap = bli_obj_buffer( alpha );
|
||||
scomplex* ap = bli_obj_buffer( a );
|
||||
scomplex* cp = bli_obj_buffer( c );
|
||||
|
||||
ctrsm_( &side,
|
||||
//ztrsm_( &side,
|
||||
&uplo,
|
||||
&transa,
|
||||
&diag,
|
||||
&mm,
|
||||
&nn,
|
||||
alphap,
|
||||
ap, &lda,
|
||||
cp, &ldc );
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef PRINT
|
||||
bli_printm( "c after", &c, "%4.1f", "" );
|
||||
exit(1);
|
||||
#endif
|
||||
|
||||
|
||||
dtime_save = bli_clock_min_diff( dtime_save, dtime );
|
||||
}
|
||||
|
||||
if ( bli_is_left( side ) )
|
||||
gflops = ( 1.0 * m * m * n ) / ( dtime_save * 1.0e9 );
|
||||
else
|
||||
gflops = ( 1.0 * m * n * n ) / ( dtime_save * 1.0e9 );
|
||||
|
||||
if ( bli_is_complex( dt_a ) ) gflops *= 4.0;
|
||||
|
||||
#ifdef BLIS
|
||||
printf( "data_trsm_blis" );
|
||||
#else
|
||||
printf( "data_trsm_%s", BLAS );
|
||||
#endif
|
||||
printf( "( %2lu, 1:4 ) = [ %4lu %4lu %10.3e %6.3f ];\n",
|
||||
( unsigned long )(p - p_begin + 1)/p_inc + 1,
|
||||
( unsigned long )m,
|
||||
( unsigned long )n, dtime_save, gflops );
|
||||
|
||||
|
||||
bli_obj_free( &alpha );
|
||||
bli_obj_free( &beta );
|
||||
|
||||
bli_obj_free( &a );
|
||||
bli_obj_free( &b );
|
||||
bli_obj_free( &c );
|
||||
bli_obj_free( &c_save );
|
||||
}
|
||||
|
||||
bli_finalize();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user