From a2e0cdd7debf8109198536d55af05d5631072fb2 Mon Sep 17 00:00:00 2001 From: "Field G. Van Zee" Date: Tue, 13 Nov 2018 14:15:11 -0600 Subject: [PATCH] Added hemm test driver to test/3m4m. Details: - Added a new test_hemm.c test driver to test/3m4m, which was modeled after the driver by the similar name in test. Also updated Makefile so that blis-nat-[sm]t would trigger builds for the new driver. --- test/3m4m/Makefile | 8 + test/3m4m/test_hemm.c | 332 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 340 insertions(+) create mode 100644 test/3m4m/test_hemm.c diff --git a/test/3m4m/Makefile b/test/3m4m/Makefile index e91b100b2..7582c70ec 100644 --- a/test/3m4m/Makefile +++ b/test/3m4m/Makefile @@ -243,6 +243,10 @@ blis-nat-st: \ test_dgemm_asm_blis_st.x \ test_cgemm_asm_blis_st.x \ test_zgemm_asm_blis_st.x \ + test_shemm_asm_blis_st.x \ + test_dhemm_asm_blis_st.x \ + test_chemm_asm_blis_st.x \ + test_zhemm_asm_blis_st.x \ test_sherk_asm_blis_st.x \ test_dherk_asm_blis_st.x \ test_cherk_asm_blis_st.x \ @@ -261,6 +265,10 @@ blis-nat-mt: \ test_dgemm_asm_blis_mt.x \ test_cgemm_asm_blis_mt.x \ test_zgemm_asm_blis_mt.x \ + test_shemm_asm_blis_mt.x \ + test_dhemm_asm_blis_mt.x \ + test_chemm_asm_blis_mt.x \ + test_zhemm_asm_blis_mt.x \ test_sherk_asm_blis_mt.x \ test_dherk_asm_blis_mt.x \ test_cherk_asm_blis_mt.x \ diff --git a/test/3m4m/test_hemm.c b/test/3m4m/test_hemm.c new file mode 100644 index 000000000..7563e3cba --- /dev/null +++ b/test/3m4m/test_hemm.c @@ -0,0 +1,332 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include "blis.h" + +//#define PRINT + +int main( int argc, char** argv ) +{ + obj_t a, b, c; + obj_t c_save; + obj_t alpha, beta; + dim_t m, n; + dim_t p; + dim_t p_begin, p_end, p_inc; + int m_input, n_input; + ind_t ind; + num_t dt; + char dt_ch; + int r, n_repeats; + side_t side; + uplo_t uploa; + f77_char f77_side; + f77_char f77_uploa; + + double dtime; + double dtime_save; + double gflops; + + //bli_init(); + + //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); + + n_repeats = 3; + + dt = DT; + + ind = IND; + + p_begin = P_BEGIN; + p_end = P_END; + p_inc = P_INC; + + m_input = -1; + n_input = -1; + + + // Supress compiler warnings about unused variable 'ind'. + ( void )ind; + +#if 0 + + cntx_t* cntx; + + ind_t ind_mod = ind; + + // A hack to use 3m1 as 1mpb (with 1m as 1mbp). + if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M; + + // Initialize a context for the current induced method and datatype. + cntx = bli_gks_query_ind_cntx( ind_mod, dt ); + + // Set k to the kc blocksize for the current datatype. + k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx ); + +#elif 1 + + //k_input = 256; + +#endif + + // Choose the char corresponding to the requested datatype. + if ( bli_is_float( dt ) ) dt_ch = 's'; + else if ( bli_is_double( dt ) ) dt_ch = 'd'; + else if ( bli_is_scomplex( dt ) ) dt_ch = 'c'; + else dt_ch = 'z'; + + side = BLIS_LEFT; + uploa = BLIS_LOWER; + + bli_param_map_blis_to_netlib_side( side, &f77_side ); + bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa ); + + // Begin with initializing the last entry to zero so that + // matlab allocates space for the entire array once up-front. + for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; +#ifdef BLIS + printf( "data_%s_%chemm_%s_blis", THR_STR, dt_ch, STR ); +#else + printf( "data_%s_%chemm_%s", THR_STR, dt_ch, STR ); +#endif + printf( "( %2lu, 1:4 ) = [ %4lu %4lu %4lu %7.2f ];\n", + ( unsigned long )(p - p_begin + 1)/p_inc + 1, + ( unsigned long )0, + ( unsigned long )0, + ( unsigned long )0, 0.0 ); + + + for ( p = p_begin; p <= p_end; p += p_inc ) + { + + if ( m_input < 0 ) m = p / ( dim_t )abs(m_input); + else m = ( dim_t ) m_input; + if ( n_input < 0 ) n = p / ( dim_t )abs(n_input); + else n = ( dim_t ) n_input; + + bli_obj_create( dt, 1, 1, 0, 0, &alpha ); + bli_obj_create( dt, 1, 1, 0, 0, &beta ); + + if ( bli_is_left( side ) ) + bli_obj_create( dt, m, m, 0, 0, &a ); + else + bli_obj_create( dt, n, n, 0, 0, &a ); + bli_obj_create( dt, m, n, 0, 0, &b ); + bli_obj_create( dt, m, n, 0, 0, &c ); + bli_obj_create( dt, m, n, 0, 0, &c_save ); + + bli_randm( &a ); + bli_randm( &b ); + bli_randm( &c ); + + bli_obj_set_struc( BLIS_HERMITIAN, &a ); + bli_obj_set_uplo( uploa, &a ); + + // Make A densely Hermitian, and zero the unstored triangle to + // ensure the implementation reads only from the stored region. + bli_mkherm( &a ); + bli_mktrim( &a ); + + bli_setsc( (2.0/1.0), 0.0, &alpha ); + bli_setsc( (1.0/1.0), 0.0, &beta ); + + + bli_copym( &c, &c_save ); + +#ifdef BLIS + bli_ind_disable_all_dt( dt ); + bli_ind_enable_dt( ind, dt ); +#endif + + dtime_save = DBL_MAX; + + for ( r = 0; r < n_repeats; ++r ) + { + bli_copym( &c_save, &c ); + + dtime = bli_clock(); + + +#ifdef PRINT + bli_printm( "a", &a, "%4.1f", "" ); + bli_printm( "b", &b, "%4.1f", "" ); + bli_printm( "c", &c, "%4.1f", "" ); +#endif + +#ifdef BLIS + + bli_hemm( side, + &alpha, + &a, + &b, + &beta, + &c ); + +#else + + if ( bli_is_float( dt ) ) + { + f77_int mm = bli_obj_length( &c ); + f77_int nn = bli_obj_width( &c ); + f77_int lda = bli_obj_col_stride( &a ); + f77_int ldb = bli_obj_col_stride( &b ); + f77_int ldc = bli_obj_col_stride( &c ); + float* alphap = bli_obj_buffer( &alpha ); + float* ap = bli_obj_buffer( &a ); + float* bp = bli_obj_buffer( &b ); + float* betap = bli_obj_buffer( &beta ); + float* cp = bli_obj_buffer( &c ); + + shemm_( &f77_side, + &f77_uploa, + &mm, + &nn, + alphap, + ap, &lda, + bp, &ldb, + betap, + cp, &ldc ); + } + else if ( bli_is_double( dt ) ) + { + f77_int mm = bli_obj_length( &c ); + f77_int nn = bli_obj_width( &c ); + f77_int lda = bli_obj_col_stride( &a ); + f77_int ldb = bli_obj_col_stride( &b ); + f77_int ldc = bli_obj_col_stride( &c ); + double* alphap = bli_obj_buffer( &alpha ); + double* ap = bli_obj_buffer( &a ); + double* bp = bli_obj_buffer( &b ); + double* betap = bli_obj_buffer( &beta ); + double* cp = bli_obj_buffer( &c ); + + dhemm_( &f77_side, + &f77_uploa, + &mm, + &nn, + alphap, + ap, &lda, + bp, &ldb, + betap, + cp, &ldc ); + } + else if ( bli_is_scomplex( dt ) ) + { + f77_int mm = bli_obj_length( &c ); + f77_int nn = bli_obj_width( &c ); + f77_int lda = bli_obj_col_stride( &a ); + f77_int ldb = bli_obj_col_stride( &b ); + f77_int ldc = bli_obj_col_stride( &c ); + scomplex* alphap = bli_obj_buffer( &alpha ); + scomplex* ap = bli_obj_buffer( &a ); + scomplex* bp = bli_obj_buffer( &b ); + scomplex* betap = bli_obj_buffer( &beta ); + scomplex* cp = bli_obj_buffer( &c ); + + chemm_( &f77_side, + &f77_uploa, + &mm, + &nn, + alphap, + ap, &lda, + bp, &ldb, + betap, + cp, &ldc ); + } + else if ( bli_is_dcomplex( dt ) ) + { + f77_int mm = bli_obj_length( &c ); + f77_int nn = bli_obj_width( &c ); + f77_int lda = bli_obj_col_stride( &a ); + f77_int ldb = bli_obj_col_stride( &b ); + f77_int ldc = bli_obj_col_stride( &c ); + dcomplex* alphap = bli_obj_buffer( &alpha ); + dcomplex* ap = bli_obj_buffer( &a ); + dcomplex* bp = bli_obj_buffer( &b ); + dcomplex* betap = bli_obj_buffer( &beta ); + dcomplex* cp = bli_obj_buffer( &c ); + + zhemm_( &f77_side, + &f77_uploa, + &mm, + &nn, + alphap, + ap, &lda, + bp, &ldb, + betap, + cp, &ldc ); + } +#endif + +#ifdef PRINT + bli_printm( "c after", &c, "%4.1f", "" ); + exit(1); +#endif + + + dtime_save = bli_clock_min_diff( dtime_save, dtime ); + } + + if ( bli_is_left( side ) ) + gflops = ( 2.0 * m * m * n ) / ( dtime_save * 1.0e9 ); + else + gflops = ( 2.0 * m * n * n ) / ( dtime_save * 1.0e9 ); + + if ( bli_is_complex( dt ) ) gflops *= 4.0; + +#ifdef BLIS + printf( "data_%s_%chemm_%s_blis", THR_STR, dt_ch, STR ); +#else + printf( "data_%s_%chemm_%s", THR_STR, dt_ch, STR ); +#endif + printf( "( %2lu, 1:4 ) = [ %4lu %4lu %7.2f ];\n", + ( unsigned long )(p - p_begin + 1)/p_inc + 1, + ( unsigned long )m, + ( unsigned long )n, gflops ); + + bli_obj_free( &alpha ); + bli_obj_free( &beta ); + + bli_obj_free( &a ); + bli_obj_free( &b ); + bli_obj_free( &c ); + bli_obj_free( &c_save ); + } + + //bli_finalize(); + + return 0; +} +