diff --git a/frame/3/bli_l3_oapi.c b/frame/3/bli_l3_oapi.c index 701428a59..0340cf566 100644 --- a/frame/3/bli_l3_oapi.c +++ b/frame/3/bli_l3_oapi.c @@ -64,7 +64,7 @@ void PASTEMAC(opname,EX_SUF) \ where mixed datatype support will be implemented (if at all). */ \ if ( bli_obj_dt( c ) == bli_obj_dt( a ) && \ bli_obj_dt( c ) == bli_obj_dt( b ) && \ - bli_obj_dt( c ) == bli_obj_exec_dt( c ) && \ + bli_obj_dt( c ) == bli_obj_comp_dt( c ) && \ bli_obj_is_complex( c ) ) \ { \ /* Invoke the operation's "ind" function--its induced method front-end. diff --git a/test/3m4m/Makefile b/test/3m4m/Makefile index e91b100b2..b129ab020 100644 --- a/test/3m4m/Makefile +++ b/test/3m4m/Makefile @@ -201,9 +201,9 @@ STR_ST := -DTHR_STR=\"st\" STR_MT := -DTHR_STR=\"mt\" # Problem size specification -PDEF_ST := -DP_BEGIN=96 \ +PDEF_ST := -DP_BEGIN=40 \ -DP_END=2000 \ - -DP_INC=96 + -DP_INC=40 PDEF_MT := -DP_BEGIN=192 \ -DP_END=3000 \ @@ -215,34 +215,57 @@ PDEF_MT := -DP_BEGIN=192 \ # --- Targets/rules ------------------------------------------------------------ # -all: all-st all-mt -blis: blis-st blis-mt -blis-nat: blis-nat-st blis-nat-mt -openblas: openblas-st openblas-mt -mkl: mkl-st mkl-mt +all: all-st all-mt +blis: blis-st blis-mt +openblas: openblas-st openblas-mt +mkl: mkl-st mkl-mt -all-st: blis-st openblas-st mkl-st -all-mt: blis-mt openblas-mt mkl-mt +all-st: blis-st openblas-st mkl-st +all-mt: blis-mt openblas-mt mkl-mt -blis-st: blis-gemm-st -blis-mt: blis-gemm-mt +blis-st: blis-nat-st +blis-mt: blis-nat-mt -openblas-st: openblas-gemm-st -openblas-mt: openblas-gemm-mt +blis-ind: blis-ind-st blis-ind-mt +blis-nat: blis-nat-st blis-nat-mt -mkl-st: mkl-gemm-st -mkl-mt: mkl-gemm-mt +blis-ind-st: \ + test_cgemm_3mhw_blis_st.x \ + test_zgemm_3mhw_blis_st.x \ + test_cgemm_3m1_blis_st.x \ + test_zgemm_3m1_blis_st.x \ + test_cgemm_4mhw_blis_st.x \ + test_zgemm_4mhw_blis_st.x \ + test_cgemm_4m1b_blis_st.x \ + test_zgemm_4m1b_blis_st.x \ + test_cgemm_4m1a_blis_st.x \ + test_zgemm_4m1a_blis_st.x \ + test_cgemm_1m_blis_st.x \ + test_zgemm_1m_blis_st.x -blis-gemm-st: blis-gemm-nat-st \ - blis-gemm-ind-st -blis-gemm-mt: blis-gemm-nat-mt \ - blis-gemm-ind-mt +blis-ind-mt: \ + test_cgemm_3mhw_blis_mt.x \ + test_zgemm_3mhw_blis_mt.x \ + test_cgemm_3m1_blis_mt.x \ + test_zgemm_3m1_blis_mt.x \ + test_cgemm_4mhw_blis_mt.x \ + test_zgemm_4mhw_blis_mt.x \ + test_cgemm_4m1b_blis_mt.x \ + test_zgemm_4m1b_blis_mt.x \ + test_cgemm_4m1a_blis_mt.x \ + test_zgemm_4m1a_blis_mt.x \ + test_cgemm_1m_blis_mt.x \ + test_zgemm_1m_blis_mt.x blis-nat-st: \ test_sgemm_asm_blis_st.x \ test_dgemm_asm_blis_st.x \ test_cgemm_asm_blis_st.x \ test_zgemm_asm_blis_st.x \ + test_shemm_asm_blis_st.x \ + test_dhemm_asm_blis_st.x \ + test_chemm_asm_blis_st.x \ + test_zhemm_asm_blis_st.x \ test_sherk_asm_blis_st.x \ test_dherk_asm_blis_st.x \ test_cherk_asm_blis_st.x \ @@ -261,6 +284,10 @@ blis-nat-mt: \ test_dgemm_asm_blis_mt.x \ test_cgemm_asm_blis_mt.x \ test_zgemm_asm_blis_mt.x \ + test_shemm_asm_blis_mt.x \ + test_dhemm_asm_blis_mt.x \ + test_chemm_asm_blis_mt.x \ + test_zhemm_asm_blis_mt.x \ test_sherk_asm_blis_mt.x \ test_dherk_asm_blis_mt.x \ test_cherk_asm_blis_mt.x \ @@ -274,69 +301,94 @@ blis-nat-mt: \ test_ctrsm_asm_blis_mt.x \ test_ztrsm_asm_blis_mt.x -blis-gemm-nat-st: \ - test_sgemm_asm_blis_st.x \ - test_dgemm_asm_blis_st.x \ - test_cgemm_asm_blis_st.x \ - test_zgemm_asm_blis_st.x - -blis-gemm-nat-mt: \ - test_sgemm_asm_blis_mt.x \ - test_dgemm_asm_blis_mt.x \ - test_cgemm_asm_blis_mt.x \ - test_zgemm_asm_blis_mt.x - -blis-gemm-ind-st: \ - test_cgemm_3mhw_blis_st.x \ - test_zgemm_3mhw_blis_st.x \ - test_cgemm_3m1_blis_st.x \ - test_zgemm_3m1_blis_st.x \ - test_cgemm_4mhw_blis_st.x \ - test_zgemm_4mhw_blis_st.x \ - test_cgemm_4m1b_blis_st.x \ - test_zgemm_4m1b_blis_st.x \ - test_cgemm_4m1a_blis_st.x \ - test_zgemm_4m1a_blis_st.x \ - test_cgemm_1m_blis_st.x \ - test_zgemm_1m_blis_st.x - -blis-gemm-ind-mt: \ - test_cgemm_3mhw_blis_mt.x \ - test_zgemm_3mhw_blis_mt.x \ - test_cgemm_3m1_blis_mt.x \ - test_zgemm_3m1_blis_mt.x \ - test_cgemm_4mhw_blis_mt.x \ - test_zgemm_4mhw_blis_mt.x \ - test_cgemm_4m1b_blis_mt.x \ - test_zgemm_4m1b_blis_mt.x \ - test_cgemm_4m1a_blis_mt.x \ - test_zgemm_4m1a_blis_mt.x \ - test_cgemm_1m_blis_mt.x \ - test_zgemm_1m_blis_mt.x - -openblas-gemm-st: \ +openblas-st: \ test_sgemm_openblas_st.x \ test_dgemm_openblas_st.x \ test_cgemm_openblas_st.x \ - test_zgemm_openblas_st.x + test_zgemm_openblas_st.x \ + test_shemm_openblas_st.x \ + test_dhemm_openblas_st.x \ + test_chemm_openblas_st.x \ + test_zhemm_openblas_st.x \ + test_sherk_openblas_st.x \ + test_dherk_openblas_st.x \ + test_cherk_openblas_st.x \ + test_zherk_openblas_st.x \ + test_strmm_openblas_st.x \ + test_dtrmm_openblas_st.x \ + test_ctrmm_openblas_st.x \ + test_ztrmm_openblas_st.x \ + test_strsm_openblas_st.x \ + test_dtrsm_openblas_st.x \ + test_ctrsm_openblas_st.x \ + test_ztrsm_openblas_st.x -openblas-gemm-mt: \ +openblas-mt: \ test_sgemm_openblas_mt.x \ test_dgemm_openblas_mt.x \ test_cgemm_openblas_mt.x \ - test_zgemm_openblas_mt.x + test_zgemm_openblas_mt.x \ + test_shemm_openblas_mt.x \ + test_dhemm_openblas_mt.x \ + test_chemm_openblas_mt.x \ + test_zhemm_openblas_mt.x \ + test_sherk_openblas_mt.x \ + test_dherk_openblas_mt.x \ + test_cherk_openblas_mt.x \ + test_zherk_openblas_mt.x \ + test_strmm_openblas_mt.x \ + test_dtrmm_openblas_mt.x \ + test_ctrmm_openblas_mt.x \ + test_ztrmm_openblas_mt.x \ + test_strsm_openblas_mt.x \ + test_dtrsm_openblas_mt.x \ + test_ctrsm_openblas_mt.x \ + test_ztrsm_openblas_mt.x -mkl-gemm-st: \ +mkl-st: \ test_sgemm_mkl_st.x \ test_dgemm_mkl_st.x \ test_cgemm_mkl_st.x \ - test_zgemm_mkl_st.x + test_zgemm_mkl_st.x \ + test_shemm_mkl_st.x \ + test_dhemm_mkl_st.x \ + test_chemm_mkl_st.x \ + test_zhemm_mkl_st.x \ + test_sherk_mkl_st.x \ + test_dherk_mkl_st.x \ + test_cherk_mkl_st.x \ + test_zherk_mkl_st.x \ + test_strmm_mkl_st.x \ + test_dtrmm_mkl_st.x \ + test_ctrmm_mkl_st.x \ + test_ztrmm_mkl_st.x \ + test_strsm_mkl_st.x \ + test_dtrsm_mkl_st.x \ + test_ctrsm_mkl_st.x \ + test_ztrsm_mkl_st.x -mkl-gemm-mt: \ +mkl-mt: \ test_sgemm_mkl_mt.x \ test_dgemm_mkl_mt.x \ test_cgemm_mkl_mt.x \ - test_zgemm_mkl_mt.x + test_zgemm_mkl_mt.x \ + test_shemm_mkl_mt.x \ + test_dhemm_mkl_mt.x \ + test_chemm_mkl_mt.x \ + test_zhemm_mkl_mt.x \ + test_sherk_mkl_mt.x \ + test_dherk_mkl_mt.x \ + test_cherk_mkl_mt.x \ + test_zherk_mkl_mt.x \ + test_strmm_mkl_mt.x \ + test_dtrmm_mkl_mt.x \ + test_ctrmm_mkl_mt.x \ + test_ztrmm_mkl_mt.x \ + test_strsm_mkl_mt.x \ + test_dtrsm_mkl_mt.x \ + test_ctrsm_mkl_mt.x \ + test_ztrsm_mkl_mt.x + diff --git a/test/3m4m/test_hemm.c b/test/3m4m/test_hemm.c new file mode 100644 index 000000000..cf640b4f6 --- /dev/null +++ b/test/3m4m/test_hemm.c @@ -0,0 +1,331 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include "blis.h" + +//#define PRINT + +int main( int argc, char** argv ) +{ + obj_t a, b, c; + obj_t c_save; + obj_t alpha, beta; + dim_t m, n; + dim_t p; + dim_t p_begin, p_end, p_inc; + int m_input, n_input; + ind_t ind; + num_t dt; + char dt_ch; + int r, n_repeats; + side_t side; + uplo_t uploa; + f77_char f77_side; + f77_char f77_uploa; + + double dtime; + double dtime_save; + double gflops; + + //bli_init(); + + //bli_error_checking_level_set( BLIS_NO_ERROR_CHECKING ); + + n_repeats = 3; + + dt = DT; + + ind = IND; + + p_begin = P_BEGIN; + p_end = P_END; + p_inc = P_INC; + + m_input = -1; + n_input = -1; + + + // Supress compiler warnings about unused variable 'ind'. + ( void )ind; + +#if 0 + + cntx_t* cntx; + + ind_t ind_mod = ind; + + // A hack to use 3m1 as 1mpb (with 1m as 1mbp). + if ( ind == BLIS_3M1 ) ind_mod = BLIS_1M; + + // Initialize a context for the current induced method and datatype. + cntx = bli_gks_query_ind_cntx( ind_mod, dt ); + + // Set k to the kc blocksize for the current datatype. + k_input = bli_cntx_get_blksz_def_dt( dt, BLIS_KC, cntx ); + +#elif 1 + + //k_input = 256; + +#endif + + // Choose the char corresponding to the requested datatype. + if ( bli_is_float( dt ) ) dt_ch = 's'; + else if ( bli_is_double( dt ) ) dt_ch = 'd'; + else if ( bli_is_scomplex( dt ) ) dt_ch = 'c'; + else dt_ch = 'z'; + + side = BLIS_LEFT; + uploa = BLIS_LOWER; + + bli_param_map_blis_to_netlib_side( side, &f77_side ); + bli_param_map_blis_to_netlib_uplo( uploa, &f77_uploa ); + + // Begin with initializing the last entry to zero so that + // matlab allocates space for the entire array once up-front. + for ( p = p_begin; p + p_inc <= p_end; p += p_inc ) ; +#ifdef BLIS + printf( "data_%s_%chemm_%s_blis", THR_STR, dt_ch, STR ); +#else + printf( "data_%s_%chemm_%s", THR_STR, dt_ch, STR ); +#endif + printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", + ( unsigned long )(p - p_begin + 1)/p_inc + 1, + ( unsigned long )0, + ( unsigned long )0, 0.0 ); + + + for ( p = p_begin; p <= p_end; p += p_inc ) + { + + if ( m_input < 0 ) m = p / ( dim_t )abs(m_input); + else m = ( dim_t ) m_input; + if ( n_input < 0 ) n = p / ( dim_t )abs(n_input); + else n = ( dim_t ) n_input; + + bli_obj_create( dt, 1, 1, 0, 0, &alpha ); + bli_obj_create( dt, 1, 1, 0, 0, &beta ); + + if ( bli_is_left( side ) ) + bli_obj_create( dt, m, m, 0, 0, &a ); + else + bli_obj_create( dt, n, n, 0, 0, &a ); + bli_obj_create( dt, m, n, 0, 0, &b ); + bli_obj_create( dt, m, n, 0, 0, &c ); + bli_obj_create( dt, m, n, 0, 0, &c_save ); + + bli_randm( &a ); + bli_randm( &b ); + bli_randm( &c ); + + bli_obj_set_struc( BLIS_HERMITIAN, &a ); + bli_obj_set_uplo( uploa, &a ); + + // Make A densely Hermitian, and zero the unstored triangle to + // ensure the implementation reads only from the stored region. + bli_mkherm( &a ); + bli_mktrim( &a ); + + bli_setsc( (2.0/1.0), 0.0, &alpha ); + bli_setsc( (1.0/1.0), 0.0, &beta ); + + + bli_copym( &c, &c_save ); + +#ifdef BLIS + bli_ind_disable_all_dt( dt ); + bli_ind_enable_dt( ind, dt ); +#endif + + dtime_save = DBL_MAX; + + for ( r = 0; r < n_repeats; ++r ) + { + bli_copym( &c_save, &c ); + + dtime = bli_clock(); + + +#ifdef PRINT + bli_printm( "a", &a, "%4.1f", "" ); + bli_printm( "b", &b, "%4.1f", "" ); + bli_printm( "c", &c, "%4.1f", "" ); +#endif + +#ifdef BLIS + + bli_hemm( side, + &alpha, + &a, + &b, + &beta, + &c ); + +#else + + if ( bli_is_float( dt ) ) + { + f77_int mm = bli_obj_length( &c ); + f77_int nn = bli_obj_width( &c ); + f77_int lda = bli_obj_col_stride( &a ); + f77_int ldb = bli_obj_col_stride( &b ); + f77_int ldc = bli_obj_col_stride( &c ); + float* alphap = bli_obj_buffer( &alpha ); + float* ap = bli_obj_buffer( &a ); + float* bp = bli_obj_buffer( &b ); + float* betap = bli_obj_buffer( &beta ); + float* cp = bli_obj_buffer( &c ); + + ssymm_( &f77_side, + &f77_uploa, + &mm, + &nn, + alphap, + ap, &lda, + bp, &ldb, + betap, + cp, &ldc ); + } + else if ( bli_is_double( dt ) ) + { + f77_int mm = bli_obj_length( &c ); + f77_int nn = bli_obj_width( &c ); + f77_int lda = bli_obj_col_stride( &a ); + f77_int ldb = bli_obj_col_stride( &b ); + f77_int ldc = bli_obj_col_stride( &c ); + double* alphap = bli_obj_buffer( &alpha ); + double* ap = bli_obj_buffer( &a ); + double* bp = bli_obj_buffer( &b ); + double* betap = bli_obj_buffer( &beta ); + double* cp = bli_obj_buffer( &c ); + + dsymm_( &f77_side, + &f77_uploa, + &mm, + &nn, + alphap, + ap, &lda, + bp, &ldb, + betap, + cp, &ldc ); + } + else if ( bli_is_scomplex( dt ) ) + { + f77_int mm = bli_obj_length( &c ); + f77_int nn = bli_obj_width( &c ); + f77_int lda = bli_obj_col_stride( &a ); + f77_int ldb = bli_obj_col_stride( &b ); + f77_int ldc = bli_obj_col_stride( &c ); + scomplex* alphap = bli_obj_buffer( &alpha ); + scomplex* ap = bli_obj_buffer( &a ); + scomplex* bp = bli_obj_buffer( &b ); + scomplex* betap = bli_obj_buffer( &beta ); + scomplex* cp = bli_obj_buffer( &c ); + + chemm_( &f77_side, + &f77_uploa, + &mm, + &nn, + alphap, + ap, &lda, + bp, &ldb, + betap, + cp, &ldc ); + } + else if ( bli_is_dcomplex( dt ) ) + { + f77_int mm = bli_obj_length( &c ); + f77_int nn = bli_obj_width( &c ); + f77_int lda = bli_obj_col_stride( &a ); + f77_int ldb = bli_obj_col_stride( &b ); + f77_int ldc = bli_obj_col_stride( &c ); + dcomplex* alphap = bli_obj_buffer( &alpha ); + dcomplex* ap = bli_obj_buffer( &a ); + dcomplex* bp = bli_obj_buffer( &b ); + dcomplex* betap = bli_obj_buffer( &beta ); + dcomplex* cp = bli_obj_buffer( &c ); + + zhemm_( &f77_side, + &f77_uploa, + &mm, + &nn, + alphap, + ap, &lda, + bp, &ldb, + betap, + cp, &ldc ); + } +#endif + +#ifdef PRINT + bli_printm( "c after", &c, "%4.1f", "" ); + exit(1); +#endif + + + dtime_save = bli_clock_min_diff( dtime_save, dtime ); + } + + if ( bli_is_left( side ) ) + gflops = ( 2.0 * m * m * n ) / ( dtime_save * 1.0e9 ); + else + gflops = ( 2.0 * m * n * n ) / ( dtime_save * 1.0e9 ); + + if ( bli_is_complex( dt ) ) gflops *= 4.0; + +#ifdef BLIS + printf( "data_%s_%chemm_%s_blis", THR_STR, dt_ch, STR ); +#else + printf( "data_%s_%chemm_%s", THR_STR, dt_ch, STR ); +#endif + printf( "( %2lu, 1:3 ) = [ %4lu %4lu %7.2f ];\n", + ( unsigned long )(p - p_begin + 1)/p_inc + 1, + ( unsigned long )m, + ( unsigned long )n, gflops ); + + bli_obj_free( &alpha ); + bli_obj_free( &beta ); + + bli_obj_free( &a ); + bli_obj_free( &b ); + bli_obj_free( &c ); + bli_obj_free( &c_save ); + } + + //bli_finalize(); + + return 0; +} + diff --git a/test/3m4m/test_herk.c b/test/3m4m/test_herk.c index 66a057a59..6616ffe9f 100644 --- a/test/3m4m/test_herk.c +++ b/test/3m4m/test_herk.c @@ -242,7 +242,7 @@ int main( int argc, char** argv ) f77_int ldc = bli_obj_col_stride( &c ); float* alphap = bli_obj_buffer( &alpha ); scomplex* ap = bli_obj_buffer( &a ); - scomplex* betap = bli_obj_buffer( &beta ); + float* betap = bli_obj_buffer( &beta ); scomplex* cp = bli_obj_buffer( &c ); cherk_( &f77_uploc, @@ -262,7 +262,7 @@ int main( int argc, char** argv ) f77_int ldc = bli_obj_col_stride( &c ); double* alphap = bli_obj_buffer( &alpha ); dcomplex* ap = bli_obj_buffer( &a ); - dcomplex* betap = bli_obj_buffer( &beta ); + double* betap = bli_obj_buffer( &beta ); dcomplex* cp = bli_obj_buffer( &c ); zherk_( &f77_uploc,