mirror of
https://github.com/amd/blis.git
synced 2026-06-06 12:43:58 +00:00
Level 3 BLAS CPP routines(Except trmm) + Doxygen Commenting + Test application for dgemm CPP API
Change-Id: I97d1203ce466f3adb991341c17db9070c1eaf104
This commit is contained in:
@@ -2,31 +2,29 @@ CWD=$(pwd)
|
||||
echo $CWD
|
||||
make clean
|
||||
make blis CFLAGS+="-DFLOAT"
|
||||
numactl -C 1 ./test_gemm1_blis.x
|
||||
make clean
|
||||
make blis CFLAGS+="-DDOUBLE"
|
||||
numactl -C 1 ./test_gemm1_blis.x
|
||||
make clean
|
||||
make blis CFLAGS+="-DSCOMPLEX"
|
||||
numactl -C 1 ./test_gemm1_blis.x
|
||||
make clean
|
||||
make blis CFLAGS+="-DDCOMPLEX"
|
||||
numactl -C 1 ./test_gemm1_blis.x
|
||||
|
||||
|
||||
cd ../test/
|
||||
CWD=$(pwd)
|
||||
echo $CWD
|
||||
make clean
|
||||
make blis CFLAGS+="-DFLOAT"
|
||||
numactl -C 1 ./test_gemm_blis.x
|
||||
numactl -C 1 ./test_trsm_blis.x
|
||||
numactl -C 1 ./test_hemm_blis.x
|
||||
numactl -C 1 ./test_symm_blis.x
|
||||
|
||||
make clean
|
||||
make blis CFLAGS+="-DDOUBLE"
|
||||
numactl -C 1 ./test_gemm_blis.x
|
||||
numactl -C 1 ./test_trsm_blis.x
|
||||
numactl -C 1 ./test_hemm_blis.x
|
||||
numactl -C 1 ./test_symm_blis.x
|
||||
|
||||
make clean
|
||||
make blis CFLAGS+="-DSCOMPLEX"
|
||||
numactl -C 1 ./test_gemm_blis.x
|
||||
numactl -C 1 ./test_trsm_blis.x
|
||||
numactl -C 1 ./test_hemm_blis.x
|
||||
numactl -C 1 ./test_symm_blis.x
|
||||
|
||||
make clean
|
||||
make blis CFLAGS+="-DDCOMPLEX"
|
||||
numactl -C 1 ./test_gemm_blis.x
|
||||
numactl -C 1 ./test_trsm_blis.x
|
||||
numactl -C 1 ./test_hemm_blis.x
|
||||
numactl -C 1 ./test_symm_blis.x
|
||||
|
||||
|
||||
@@ -1,3 +1,36 @@
|
||||
/*
|
||||
|
||||
BLISPP
|
||||
C++ test driver for BLIS CPP gemm routine and reference cblas gemm routine.
|
||||
|
||||
Copyright (C) 2019, Advanced Micro Devices, Inc.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
- Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
- Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
- Neither the name(s) of the copyright holder(s) nor the names of its
|
||||
contributors may be used to endorse or promote products derived
|
||||
from this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
*/
|
||||
|
||||
#include <complex>
|
||||
|
||||
#include <stdio.h>
|
||||
@@ -5,98 +38,174 @@
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include "blis.hh"
|
||||
#include "test_gemm.hh"
|
||||
|
||||
using namespace std;
|
||||
|
||||
#define DIM 2
|
||||
template <typename T>
|
||||
void print_matrix(T * matrix , int m , int n)
|
||||
#define PRINT
|
||||
int computeError(
|
||||
int ldc,
|
||||
int ldc_ref,
|
||||
int m,
|
||||
int n,
|
||||
double *C,
|
||||
double *C_ref
|
||||
)
|
||||
{
|
||||
for ( int L=0; L < m; L ++ ) {
|
||||
for ( int J = 0; J < n; J ++ ) {
|
||||
cout<< matrix[L * n + J]<<" ";
|
||||
}
|
||||
cout<<"\n";
|
||||
}
|
||||
int i, j;
|
||||
int ret = 0;
|
||||
for ( i = 0; i < m; i ++ ) {
|
||||
for ( j = 0; j < n; j ++ ) {
|
||||
if ( C( i, j ) != C_ref( i, j ) ) {
|
||||
printf( "C[ %d ][ %d ] != C_ref, %E, %E\n", i, j, C( i, j ), C_ref( i, j ) );
|
||||
ret = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
return ret;
|
||||
|
||||
}
|
||||
void test_dgemm( )
|
||||
{
|
||||
int i, j, p, nx;
|
||||
double *A, *B, *C, *C_ref;
|
||||
double alpha, beta;
|
||||
double tmp, error, flops;
|
||||
double ref_beg, ref_time, bl_dgemm_beg, bl_dgemm_time;
|
||||
int nrepeats;
|
||||
int m,n,k;
|
||||
int lda, ldb, ldc, ldc_ref;
|
||||
double ref_rectime, bl_dgemm_rectime;
|
||||
|
||||
alpha = 1.0;
|
||||
beta = 0.0;
|
||||
m = 5;
|
||||
k = 6;
|
||||
n = 4;
|
||||
|
||||
A = (double*)malloc( sizeof(double) * m * k );
|
||||
B = (double*)malloc( sizeof(double) * k * n );
|
||||
|
||||
lda = m;
|
||||
ldb = k;
|
||||
ldc = m;
|
||||
ldc_ref = m;
|
||||
C = bl_malloc_aligned( ldc, n + 4, sizeof(double) );
|
||||
C_ref = (double*)malloc( sizeof(double) * m * n );
|
||||
|
||||
nrepeats = 3;
|
||||
|
||||
srand48 (time(NULL));
|
||||
|
||||
// Randonly generate points in [ 0, 1 ].
|
||||
for ( p = 0; p < k; p ++ ) {
|
||||
for ( i = 0; i < m; i ++ ) {
|
||||
A( i, p ) = (double)( drand48() );
|
||||
}
|
||||
}
|
||||
for ( j = 0; j < n; j ++ ) {
|
||||
for ( p = 0; p < k; p ++ ) {
|
||||
B( p, j ) = (double)( drand48() );
|
||||
}
|
||||
}
|
||||
|
||||
for ( j = 0; j < n; j ++ ) {
|
||||
for ( i = 0; i < m; i ++ ) {
|
||||
C_ref( i, j ) = (double)( 0.0 );
|
||||
C( i, j ) = (double)( 0.0 );
|
||||
}
|
||||
}
|
||||
#ifdef PRINT
|
||||
bl_dgemm_printmatrix(A, lda ,m,k);
|
||||
bl_dgemm_printmatrix(B, ldb ,k,n);
|
||||
bl_dgemm_printmatrix(C, ldc ,m,n);
|
||||
#endif
|
||||
for ( i = 0; i < nrepeats; i ++ ) {
|
||||
bl_dgemm_beg = bl_clock();
|
||||
{
|
||||
blis::gemm(
|
||||
CblasColMajor,
|
||||
CblasNoTrans,
|
||||
CblasNoTrans,
|
||||
m,
|
||||
n,
|
||||
k,
|
||||
alpha,
|
||||
A,
|
||||
lda,
|
||||
B,
|
||||
ldb,
|
||||
beta,
|
||||
C,
|
||||
ldc
|
||||
);
|
||||
}
|
||||
bl_dgemm_time = bl_clock() - bl_dgemm_beg;
|
||||
|
||||
if ( i == 0 ) {
|
||||
bl_dgemm_rectime = bl_dgemm_time;
|
||||
} else {
|
||||
bl_dgemm_rectime = bl_dgemm_time < bl_dgemm_rectime ? bl_dgemm_time : bl_dgemm_rectime;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef PRINT
|
||||
bl_dgemm_printmatrix(C, ldc ,m,n);
|
||||
#endif
|
||||
for ( i = 0; i < nrepeats; i ++ ) {
|
||||
ref_beg = bl_clock();
|
||||
{
|
||||
cblas_dgemm(
|
||||
CblasColMajor,
|
||||
CblasNoTrans,
|
||||
CblasNoTrans,
|
||||
m,
|
||||
n,
|
||||
k,
|
||||
alpha,
|
||||
A,
|
||||
lda,
|
||||
B,
|
||||
ldb,
|
||||
beta,
|
||||
C_ref,
|
||||
ldc_ref
|
||||
);
|
||||
}
|
||||
ref_time = bl_clock() - ref_beg;
|
||||
|
||||
if ( i == 0 ) {
|
||||
ref_rectime = ref_time;
|
||||
} else {
|
||||
ref_rectime = ref_time < ref_rectime ? ref_time : ref_rectime;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef PRINT
|
||||
bl_dgemm_printmatrix(C_ref, ldc_ref ,m,n);
|
||||
#endif
|
||||
if(computeError(ldc, ldc_ref, m, n, C, C_ref )==1)
|
||||
printf("%s TEST FAIL\n" ,__func__);
|
||||
else
|
||||
printf("%s TEST PASS\n" , __func__);
|
||||
|
||||
|
||||
// Compute overall floating point operations.
|
||||
flops = ( m * n / ( 1000.0 * 1000.0 * 1000.0 ) ) * ( 2 * k );
|
||||
|
||||
printf( "%5d\t %5d\t %5d\t %5.2lf\t %5.2lf\n",
|
||||
m, n, k, flops / bl_dgemm_rectime, flops / ref_rectime );
|
||||
|
||||
free( A );
|
||||
free( B );
|
||||
free( C );
|
||||
free( C_ref );
|
||||
}
|
||||
|
||||
// -----------------------------------------------------------------------------
|
||||
int main( int argc, char** argv )
|
||||
{
|
||||
int M, N, K, lda, ldb, ldc;
|
||||
double a_d[DIM * DIM] = { 1.111, 2.222, 3.333, 4.444 };
|
||||
double b_d[DIM * DIM] = { 5.555, 6.666, 7.777, 8.888 };
|
||||
double c_d[DIM * DIM];
|
||||
double alpha_d, beta_d;
|
||||
float a_f[DIM * DIM] = { 1.1, 2.2, 3.3, 4.4 };
|
||||
float b_f[DIM * DIM] = { 5.5, 6.6, 7.7, 8.8 };
|
||||
float c_f[DIM * DIM];
|
||||
float alpha_f, beta_f;
|
||||
std::complex<float> a_c[DIM * DIM]={{1, 2},{3, 4},{5,6},{7,8}};
|
||||
std::complex<float> b_c[DIM * DIM]={{1, 2},{3, 4},{5,6},{7,8}};
|
||||
std::complex<float> c_c[DIM * DIM];
|
||||
std::complex<float> alpha_c, beta_c;
|
||||
std::complex<double> a_z[DIM * DIM]={{1.1, 2.2},{3.3, 4.4},{5.5,6.6},{7.7,8.8}};
|
||||
std::complex<double> b_z[DIM * DIM]={{1.1, 2.2},{3.3, 4.4},{5.5,6.6},{7.7,8.8}};
|
||||
std::complex<double> c_z[DIM * DIM];
|
||||
std::complex<double> alpha_z, beta_z;
|
||||
M = DIM;
|
||||
N = M;
|
||||
K = M;
|
||||
lda = M;
|
||||
ldb = K;
|
||||
ldc = M;
|
||||
alpha_d = 1.0;
|
||||
beta_d = 0.0;
|
||||
alpha_f = 1.0;
|
||||
beta_f = 0.0;
|
||||
alpha_c = {1.0,1.0};
|
||||
beta_c = {0.0,0.0};
|
||||
alpha_z = {1.0,1.0};
|
||||
beta_z = {0.0,0.0};
|
||||
|
||||
/*cblis_sgemm*/
|
||||
cout<<"a_f= \n";
|
||||
print_matrix<float>(a_f , M , K);
|
||||
cout<<"b_f= \n";
|
||||
print_matrix<float>(b_f , K , N);
|
||||
blis::gemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha_f, a_f,
|
||||
lda, b_f, ldb, beta_f, c_f, ldc);
|
||||
cout<<"c_f= \n";
|
||||
print_matrix<float>(c_f , M , N);
|
||||
|
||||
|
||||
/*cblis_dgemm*/
|
||||
printf("a_d = \n");
|
||||
print_matrix<double>(a_d , M , K);
|
||||
printf("b_d = \n");
|
||||
print_matrix<double>(b_d , K , N);
|
||||
blis::gemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha_d, a_d,
|
||||
lda, b_d, ldb, beta_d, c_d, ldc);
|
||||
printf("c_d = \n");
|
||||
print_matrix<double>(c_d , M , N);
|
||||
|
||||
|
||||
/*cblis_cgemm*/
|
||||
printf("a_c = \n");
|
||||
print_matrix<std::complex<float>>(a_c , M , K);
|
||||
printf("b_c = \n");
|
||||
print_matrix<std::complex<float>>(b_c , K , N);
|
||||
blis::gemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha_c, a_c,
|
||||
lda, b_c, ldb, beta_c, c_c, ldc);
|
||||
printf("c_c = \n");
|
||||
print_matrix<std::complex<float>>(c_c , M , N);
|
||||
|
||||
|
||||
/*cblis_zgemm*/
|
||||
printf("a_z = \n");
|
||||
print_matrix<std::complex<double>>(a_z , M , K);
|
||||
printf("b_z = \n");
|
||||
print_matrix<std::complex<double>>(b_z , K , N);
|
||||
blis::gemm(CblasRowMajor, CblasNoTrans, CblasNoTrans, M, N, K, alpha_z, a_z,
|
||||
lda, b_z, ldb, beta_z, c_z, ldc);
|
||||
printf("c_z = \n");
|
||||
print_matrix<std::complex<double>>(c_z , M , N);
|
||||
return 0;
|
||||
test_dgemm( );
|
||||
return 0;
|
||||
|
||||
}
|
||||
|
||||
191
testcpp/test_gemm.hh
Normal file
191
testcpp/test_gemm.hh
Normal file
@@ -0,0 +1,191 @@
|
||||
/*
|
||||
* --------------------------------------------------------------------------
|
||||
* BLISLAB
|
||||
* --------------------------------------------------------------------------
|
||||
* Copyright (C) 2016, The University of Texas at Austin
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are
|
||||
* met:
|
||||
* - Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* - Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
* - Neither the name of The University of Texas nor the names of its
|
||||
* contributors may be used to endorse or promote products derived
|
||||
* from this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*
|
||||
*
|
||||
* test_gemm.hh
|
||||
*
|
||||
*
|
||||
* Purpose:
|
||||
* this header file contains all function prototypes.
|
||||
*
|
||||
* Todo:
|
||||
*
|
||||
*
|
||||
* Modification:
|
||||
*
|
||||
*
|
||||
* */
|
||||
|
||||
|
||||
#ifndef TEST_GEMM_HH
|
||||
#define TEST_GEMM_HH
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
// Determine the target operating system
|
||||
#if defined(_WIN32) || defined(__CYGWIN__)
|
||||
#define BL_OS_WINDOWS 1
|
||||
#elif defined(__APPLE__) || defined(__MACH__)
|
||||
#define BL_OS_OSX 1
|
||||
#elif defined(__ANDROID__)
|
||||
#define BL_OS_ANDROID 1
|
||||
#elif defined(__linux__)
|
||||
#define BL_OS_LINUX 1
|
||||
#elif defined(__bgq__)
|
||||
#define BL_OS_BGQ 1
|
||||
#elif defined(__bg__)
|
||||
#define BL_OS_BGP 1
|
||||
#elif defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \
|
||||
defined(__bsdi__) || defined(__DragonFly__)
|
||||
#define BL_OS_BSD 1
|
||||
#else
|
||||
#error "Cannot determine operating system"
|
||||
#endif
|
||||
|
||||
// gettimeofday() needs this.
|
||||
#if BL_OS_WINDOWS
|
||||
#include <time.h>
|
||||
#elif BL_OS_OSX
|
||||
#include <mach/mach_time.h>
|
||||
#else
|
||||
#include <sys/time.h>
|
||||
#include <time.h>
|
||||
#endif
|
||||
|
||||
//#include "bl_config.h"
|
||||
|
||||
#define min( i, j ) ( (i)<(j) ? (i): (j) )
|
||||
|
||||
#define A( i, j ) A[ (j)*lda + (i) ]
|
||||
#define B( i, j ) B[ (j)*ldb + (i) ]
|
||||
#define C( i, j ) C[ (j)*ldc + (i) ]
|
||||
#define C_ref( i, j ) C_ref[ (j)*ldc_ref + (i) ]
|
||||
#define GEMM_SIMD_ALIGN_SIZE 32
|
||||
struct aux_s {
|
||||
double *b_next;
|
||||
float *b_next_s;
|
||||
int ldr;
|
||||
char *flag;
|
||||
int pc;
|
||||
int m;
|
||||
int n;
|
||||
};
|
||||
typedef struct aux_s aux_t;
|
||||
|
||||
void bl_dgemm(
|
||||
int m,
|
||||
int n,
|
||||
int k,
|
||||
double *A,
|
||||
int lda,
|
||||
double *B,
|
||||
int ldb,
|
||||
double *C,
|
||||
int ldc
|
||||
);
|
||||
|
||||
/*
|
||||
*
|
||||
*
|
||||
*/
|
||||
double *bl_malloc_aligned(
|
||||
int m,
|
||||
int n,
|
||||
int size
|
||||
)
|
||||
{
|
||||
double *ptr;
|
||||
int err;
|
||||
|
||||
err = posix_memalign( (void**)&ptr, (size_t)GEMM_SIMD_ALIGN_SIZE, size * m * n );
|
||||
|
||||
if ( err ) {
|
||||
printf( "bl_malloc_aligned(): posix_memalign() failures" );
|
||||
exit( 1 );
|
||||
}
|
||||
|
||||
return ptr;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/*
|
||||
*
|
||||
*
|
||||
*/
|
||||
void bl_dgemm_printmatrix(
|
||||
double *A,
|
||||
int lda,
|
||||
int m,
|
||||
int n
|
||||
)
|
||||
{
|
||||
int i, j;
|
||||
for ( i = 0; i < m; i ++ ) {
|
||||
for ( j = 0; j < n; j ++ ) {
|
||||
printf("%lf\t", A[j * lda + i]);
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
printf("\n");
|
||||
}
|
||||
|
||||
/*
|
||||
* The timer functions are copied directly from BLIS 0.2.0
|
||||
*
|
||||
*/
|
||||
static double gtod_ref_time_sec = 0.0;
|
||||
double bl_clock_helper()
|
||||
{
|
||||
double the_time, norm_sec;
|
||||
struct timespec ts;
|
||||
|
||||
clock_gettime( CLOCK_MONOTONIC, &ts );
|
||||
|
||||
if ( gtod_ref_time_sec == 0.0 )
|
||||
gtod_ref_time_sec = ( double ) ts.tv_sec;
|
||||
|
||||
norm_sec = ( double ) ts.tv_sec - gtod_ref_time_sec;
|
||||
|
||||
the_time = norm_sec + ts.tv_nsec * 1.0e-9;
|
||||
|
||||
return the_time;
|
||||
}
|
||||
|
||||
|
||||
double bl_clock( void )
|
||||
{
|
||||
return bl_clock_helper();
|
||||
}
|
||||
|
||||
#endif
|
||||
Reference in New Issue
Block a user