diff --git a/.travis.yml b/.travis.yml index d1dcce71d..adcd16abf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,89 +1,62 @@ language: c -sudo: false - -os: - - linux - - osx - -compiler: - - gcc - - clang - +sudo: required +dist: trusty env: - - OOT=1 TEST=0 THR="none" CONF="auto" - - OOT=0 TEST=1 THR="none" CONF="auto" - - OOT=0 TEST=1 THR="none" CONF="penryn" - - OOT=0 TEST=0 THR="none" CONF="sandybridge" - - OOT=0 TEST=0 THR="none" CONF="haswell" - - OOT=0 TEST=0 THR="none" CONF="knl" - - OOT=0 TEST=0 THR="none" CONF="bulldozer" - - OOT=0 TEST=0 THR="none" CONF="piledriver" - - OOT=0 TEST=0 THR="none" CONF="steamroller" - - OOT=0 TEST=0 THR="none" CONF="excavator" - - OOT=0 TEST=0 THR="none" CONF="zen" - - OOT=0 TEST=0 THR="openmp" CONF="auto" - - OOT=0 TEST=0 THR="pthreads" CONF="auto" - + global: + secure: "Ty3PM1xGhXwxfJG6YyY9bUZyXzw98ekHxQEqU9VnrMXTZb28IxfocPCXHjL34r9HTGosO5Pmierhal1Cs3ZKE5ZAJqJhCfck+kwlH21Uay5CNYglDtSmy2qxtbbDG4AxpEZ1UKlIZr1pNh/x+pRemSmnMEnQp/E7QJqdkhm4+aMX2bWKyLPtrdL+B9QXLVT2nT6/Fw3i05aBhpcFJpSPfvYX2KoCZYdJOSKcKci4T8nAfP/c0olkz+jAkBZxZFgO9Ptrt/lvHtVPrkh5o29GvHg2i/4vucbsMltoxlV31/2eYpdr17Ngtt41MMVn2fHV4lVhLmENc04nlm084fBtg73T6b8hNy5JlcA44xI/UrPJsQAJ+0A0ds9BbBQKPxOmaF/O8WGXhwiwdKT6DGS9lj05f3S+yZfeNE3pQhLEcvwXLO5SW3VvKXMj0t/lZyG+XCkvFjD7KEPQV4g+BZc2zzD9TwDx3ydn8Uzd6zZlq1erQUzCnODP24wuwfrNP8nqxFYG0VtI8oZW62IC9U2hcnAF5QNXXW3yDYD65k3BHbigfI28gu9iO9G8RxOglR27J7Whdqkqw3AMRaqyHt2tdbz7tM2dLZ0EatT5m8esjC+LP4EshW9C59jP2U9vJ/94YEgOfwiqk8+e6fL/7dJvOumbwu1RclRI9DS88PPYb3Q=" matrix: - allow_failures: - - env: OOT=0 TEST=0 THR="none" CONF="knl" - - os: osx - env: OOT=0 TEST=1 THR="none" CONF="auto" - exclude: - - os: linux - compiler: clang - - os: osx - compiler: gcc - - os: osx - env: OOT=1 TEST=0 THR="none" CONF="auto" - - os: osx - env: OOT=0 TEST=1 THR="none" CONF="penryn" - - os: osx - env: OOT=0 TEST=0 THR="none" CONF="sandybridge" - - os: osx - env: OOT=0 TEST=0 THR="none" CONF="haswell" - - os: osx - env: OOT=0 TEST=0 THR="none" CONF="knl" - - os: osx - env: OOT=0 TEST=0 THR="none" CONF="bulldozer" - - os: osx - env: OOT=0 TEST=0 THR="none" CONF="piledriver" - - os: osx - env: OOT=0 TEST=0 THR="none" CONF="steamroller" - - os: osx - env: OOT=0 TEST=0 THR="none" CONF="excavator" - - os: osx - env: OOT=0 TEST=0 THR="none" CONF="zen" - - os: osx - env: OOT=0 TEST=0 THR="openmp" CONF="auto" - + include: + # full testsuite + - os: linux + compiler: gcc + env: OOT=0 TEST=1 SDE=0 THR="none" CONF="auto" + # test x86_64 ukrs with SDE + - os: linux + compiler: gcc + env: OOT=0 TEST=0 SDE=1 THR="none" CONF="x86_64" + # openmp build + - os: linux + compiler: gcc + env: OOT=0 TEST=0 SDE=0 THR="openmp" CONF="auto" + # pthreads build + - os: linux + compiler: gcc + env: OOT=0 TEST=0 SDE=0 THR="pthreads" CONF="auto" + # out-of-tree build + - os: linux + compiler: gcc + env: OOT=1 TEST=0 SDE=0 THR="none" CONF="auto" + # clang build + - os: linux + compiler: clang + env: OOT=0 TEST=0 SDE=0 THR="none" CONF="auto" + # macOS with system compiler (clang) + - os: osx + compiler: clang + env: OOT=0 TEST=0 SDE=0 THR="none" CONF="auto" install: - - if [ "$CC" = "gcc" ] && [ "$TRAVIS_OS_NAME" = "linux" ]; then export CC="gcc-5"; fi - +- if [ "$TRAVIS_OS_NAME" = "linux" ]; then sudo rm -f /usr/bin/as; fi +- if [ "$TRAVIS_OS_NAME" = "linux" ]; then sudo ln -s /usr/lib/binutils-2.26/bin/as /usr/bin/as; fi +- if [ "$TRAVIS_OS_NAME" = "linux" ]; then sudo rm -f /usr/bin/ld; fi +- if [ "$TRAVIS_OS_NAME" = "linux" ]; then sudo ln -s /usr/lib/binutils-2.26/bin/ld /usr/bin/ld; fi +- if [ "$CC" = "gcc" ] && [ "$TRAVIS_OS_NAME" = "linux" ]; then export CC="gcc-6"; fi addons: apt: sources: - ubuntu-toolchain-r-test packages: - - gcc-5 + - gcc-6 + - binutils-2.26 - clang - script: - - export DIST_PATH=. - - pwd - - if [ $OOT -eq 1 ]; then mkdir oot; cd oot; export DIST_PATH=..; fi - - pwd - - $DIST_PATH/configure -t $THR CC=$CC $CONF - - pwd - - ls -l - - $CC --version - - make -j 2 - - export BLIS_IC_NT=2 - - export BLIS_JC_NT=1 - - export BLIS_IR_NT=1 - - export BLIS_JR_NT=1 - - if [ $TEST -eq 1 ]; then travis_wait 30 make BLIS_ENABLE_TEST_OUTPUT=yes testblis; fi - - if [ $TEST -eq 1 ]; then $DIST_PATH/build/check-blistest.sh ./output.testsuite; fi - - if [ $TEST -eq 1 ]; then make BLIS_ENABLE_TEST_OUTPUT=yes testblas; fi - - if [ $TEST -eq 1 ]; then $DIST_PATH/build/check-blastest.sh; fi - +- export DIST_PATH=. +- pwd +- if [ $OOT -eq 1 ]; then mkdir oot; cd oot; export DIST_PATH=..; fi +- pwd +- $DIST_PATH/configure -t $THR CC=$CC $CONF +- pwd +- ls -l +- $CC --version +- make -j 2 +- if [ $TEST -eq 1 ]; then travis_wait 30 $DIST_PATH/travis/do_testsuite.sh; fi +- if [ $SDE -eq 1 ]; then travis_wait 30 $DIST_PATH/travis/do_sde.sh; fi diff --git a/CREDITS b/CREDITS index 9c10a0e3e..b1a54ac43 100644 --- a/CREDITS +++ b/CREDITS @@ -10,9 +10,9 @@ The BLIS framework was primarily authored by but many others have contributed code and feedback, including Murtaza Ali (Texas Instruments) + Erling Andersen Alex Arslan Vernon Austel (IBM, T.J. Watson Research Center) - Erling Andersen Jed Brown (Argonne National Laboratory) Johannes Dieterich Krzysztof Drewniak @@ -39,15 +39,16 @@ but many others have contributed code and feedback, including Devangi Parikh (The University of Texas at Austin) Elmar Peise (RWTH-Aachen) Clément Pernet - Jack Poulson (Stanford) Ilya Polkovnichenko + Jack Poulson (Stanford) Michael Rader Pradeep Rao (AMD) Aleksei Rechinskii Karl Rupp - Rene Sitt Martin Schatz (The University of Texas at Austin) Nico Schlömer + Rene Sitt + Tony Skjellum (The University of Tennessee at Chattanooga) Mikhail Smelyanskiy (Intel, Parallel Computing Lab) Shaden Smith Tyler Smith (The University of Texas at Austin) diff --git a/build/config.mk.in b/build/config.mk.in index ed0485e16..8c2dced21 100644 --- a/build/config.mk.in +++ b/build/config.mk.in @@ -118,8 +118,9 @@ MK_ENABLE_CBLAS := @enable_cblas@ # Whether libblis will depend on libmemkind for certain memory allocations. MK_ENABLE_MEMKIND := @enable_memkind@ -# Whether an alternative gemm implementation will be compiled and included -# in BLIS. +# The name of a sandbox defining an alternative gemm implementation. If empty, +# no sandbox will be used and the conventional gemm implementation will remain +# enabled. SANDBOX := @sandbox@ # end of ifndef CONFIG_MK_INCLUDED conditional block diff --git a/common.mk b/common.mk index 955adbd30..3104b8ba8 100644 --- a/common.mk +++ b/common.mk @@ -648,7 +648,10 @@ PARENT_PATH := $(DIST_PATH) # -- sandbox -- # Construct paths to each sandbox. (At present, there can be only one.) -SANDBOX_PATHS := := $(addprefix $(SANDBOX_PATH)/, $(SANDBOX)) +# NOTE: If $(SANDBOX) is empty (because no sandbox was enabled at configure- +# time) then $(SANDBOX_PATHS) will also be empty, which will cause no +# fragments to be included. +SANDBOX_PATHS := $(addprefix $(SANDBOX_PATH)/, $(SANDBOX)) # This variable is used by the include statements as they recursively include # one another. For the 'sandbox' directory, we initialize it to that directory @@ -658,7 +661,6 @@ PARENT_PATH := $(DIST_PATH)/$(SANDBOX_DIR) # Recursively include the makefile fragments in the sandbox sub-directory. -include $(addsuffix /$(FRAGMENT_MK), $(SANDBOX_PATHS)) - # Create a list of the makefile fragments using the variable into which each # of the above include statements accumulated their directory paths. MAKEFILE_FRAGMENTS := $(addsuffix /$(FRAGMENT_MK), $(FRAGMENT_DIR_PATHS)) diff --git a/config/excavator/bli_cntx_init_excavator.c b/config/excavator/bli_cntx_init_excavator.c index 065154d27..56d04ef4e 100644 --- a/config/excavator/bli_cntx_init_excavator.c +++ b/config/excavator/bli_cntx_init_excavator.c @@ -39,7 +39,7 @@ void bli_cntx_init_excavator( cntx_t* cntx ) blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. - bli_cntx_init_piledriver_ref( cntx ); + bli_cntx_init_excavator_ref( cntx ); // ------------------------------------------------------------------------- diff --git a/config/excavator/make_defs.mk b/config/excavator/make_defs.mk index cebaa30df..12d5add0c 100644 --- a/config/excavator/make_defs.mk +++ b/config/excavator/make_defs.mk @@ -63,10 +63,10 @@ endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) -CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver4 +CKVECFLAGS := -mfpmath=sse -mavx -mfma -mno-fma4 -march=bdver4 else ifeq ($(CC_VENDOR),clang) -CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver4 +CKVECFLAGS := -mfpmath=sse -mavx -mfma -mno-fma4 -march=bdver4 else $(error gcc or clang are required for this configuration.) endif diff --git a/config/haswell/bli_cntx_init_haswell.c b/config/haswell/bli_cntx_init_haswell.c index 2823277a9..6d794430c 100644 --- a/config/haswell/bli_cntx_init_haswell.c +++ b/config/haswell/bli_cntx_init_haswell.c @@ -49,10 +49,17 @@ void bli_cntx_init_haswell( cntx_t* cntx ) ( 8, // gemm +#if 1 BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_zen_asm_6x16, TRUE, BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_zen_asm_6x8, TRUE, BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_zen_asm_3x8, TRUE, BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_zen_asm_3x4, TRUE, +#else + BLIS_GEMM_UKR, BLIS_FLOAT, bli_sgemm_zen_asm_16x6, FALSE, + BLIS_GEMM_UKR, BLIS_DOUBLE, bli_dgemm_zen_asm_8x6, FALSE, + BLIS_GEMM_UKR, BLIS_SCOMPLEX, bli_cgemm_zen_asm_8x3, FALSE, + BLIS_GEMM_UKR, BLIS_DCOMPLEX, bli_zgemm_zen_asm_4x3, FALSE, +#endif // gemmtrsm_l BLIS_GEMMTRSM_L_UKR, BLIS_FLOAT, bli_sgemmtrsm_l_zen_asm_6x16, TRUE, BLIS_GEMMTRSM_L_UKR, BLIS_DOUBLE, bli_dgemmtrsm_l_zen_asm_6x8, TRUE, @@ -108,8 +115,13 @@ void bli_cntx_init_haswell( cntx_t* cntx ) // Initialize level-3 blocksize objects with architecture-specific values. // s d c z +#if 1 bli_blksz_init_easy( &blkszs[ BLIS_MR ], 6, 6, 3, 3 ); bli_blksz_init_easy( &blkszs[ BLIS_NR ], 16, 8, 8, 4 ); +#else + bli_blksz_init_easy( &blkszs[ BLIS_MR ], 16, 8, 8, 4 ); + bli_blksz_init_easy( &blkszs[ BLIS_NR ], 6, 6, 3, 3 ); +#endif bli_blksz_init_easy( &blkszs[ BLIS_MC ], 144, 72, 144, 72 ); bli_blksz_init_easy( &blkszs[ BLIS_KC ], 256, 256, 256, 256 ); bli_blksz_init_easy( &blkszs[ BLIS_NC ], 4080, 4080, 4080, 4080 ); diff --git a/config/piledriver/make_defs.mk b/config/piledriver/make_defs.mk index 2e58143ec..395b8b9b5 100644 --- a/config/piledriver/make_defs.mk +++ b/config/piledriver/make_defs.mk @@ -63,10 +63,10 @@ endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) -CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver2 +CKVECFLAGS := -mfpmath=sse -mavx -mfma -mno-fma4 -march=bdver2 else ifeq ($(CC_VENDOR),clang) -CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver2 +CKVECFLAGS := -mfpmath=sse -mavx -mfma -mno-fma4 -march=bdver2 else $(error gcc or clang are required for this configuration.) endif diff --git a/config/steamroller/bli_cntx_init_steamroller.c b/config/steamroller/bli_cntx_init_steamroller.c index b1409e4fc..1b6566c5c 100644 --- a/config/steamroller/bli_cntx_init_steamroller.c +++ b/config/steamroller/bli_cntx_init_steamroller.c @@ -39,7 +39,7 @@ void bli_cntx_init_steamroller( cntx_t* cntx ) blksz_t blkszs[ BLIS_NUM_BLKSZS ]; // Set default kernel blocksizes and functions. - bli_cntx_init_piledriver_ref( cntx ); + bli_cntx_init_steamroller_ref( cntx ); // ------------------------------------------------------------------------- diff --git a/config/steamroller/make_defs.mk b/config/steamroller/make_defs.mk index cbd9064cc..adb6ebe2e 100644 --- a/config/steamroller/make_defs.mk +++ b/config/steamroller/make_defs.mk @@ -63,10 +63,10 @@ endif # Flags specific to optimized kernels. CKOPTFLAGS := $(COPTFLAGS) ifeq ($(CC_VENDOR),gcc) -CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver2 +CKVECFLAGS := -mfpmath=sse -mavx -mfma -mno-fma4 -march=bdver3 else ifeq ($(CC_VENDOR),clang) -CKVECFLAGS := -mfpmath=sse -mavx -mfma -march=bdver2 +CKVECFLAGS := -mfpmath=sse -mavx -mfma -mno-fma4 -march=bdver3 else $(error gcc or clang are required for this configuration.) endif diff --git a/configure b/configure index 525fc6627..69365cad0 100755 --- a/configure +++ b/configure @@ -301,16 +301,15 @@ pass_config_kernel_registries() passnum="$2" # Initialize a list of indirect blacklisted configurations for the - # current iteration. These are configurations that are invalidated - # by the removal of blacklisted configurations. For example, if - # haswell is registered as needing the 'haswell' and 'zen' kernel - # sets: + # current iteration. These are configurations that are invalidated by + # the removal of blacklisted configurations. For example, if haswell + # is registered as needing the 'haswell' and 'zen' kernel sets: # # haswell: haswell/haswell/zen # - # and 'zen' was blacklisted because of the compiler version, then - # the 'haswell' configuration must be omitted from the registry, - # as it no longer has all of the kernel sets it was expecting. + # and 'zen' was blacklisted because of the compiler version, then the + # 'haswell' configuration must be omitted from the registry, as it no + # longer has all of the kernel sets it was expecting. if [ "${passnum}" == "0" ]; then indirect_blist="" fi @@ -320,6 +319,22 @@ pass_config_kernel_registries() # indirect_blist is still empty. all_blist="${config_blist} ${indirect_blist}" + # Disable support for indirect blacklisting by returning early during + # pass 0. See issue #214 for details [1]. Basically, I realized that + # indirect blacklisting is not needed in the use case that I envisioned + # in the real-life example above. If a subconfiguration such as haswell + # is defined to require the zen kernel set, it implies that the zen + # kernels can be compiled with haswell compiler flags. That is, just + # because the zen subconfig (and its compiler flags) is blacklisted + # does not mean that the haswell subconfig cannot compile the zen + # kernels with haswell-specific flags. + # + # [1] https://github.com/flame/blis/issues/214 + # + if [ "${passnum}" == "0" ]; then + return + fi + while read -r line do curline="${line}" @@ -1184,7 +1199,8 @@ check_compiler() # [1] While gcc 6.0 or newer is needed for zen support (-march=znver1), # we relax this compiler version constraint a bit by targeting bdver4 # and then disabling the instruction sets that were removed in the - # transition from bdver4 to znver1. + # transition from bdver4 to znver1. (See config/zen/make_defs.mk for + # the specific compiler flags used.) # [2] https://github.com/devinamatthews/tblis/ # @@ -1871,6 +1887,9 @@ main() echo "done." # Report if additional configurations needed to be blacklisted. + # NOTE: This branch should never execute so long as indirect blacklisting + # is disabled. See comment regarding issue #214 in the definition of + # pass_config_kernel_registries(). if [ -n "${indirect_blist}" ]; then echo "${script_name}: needed to indirectly blacklist additional configurations:" echo "${script_name}: ${indirect_blist}" @@ -2282,9 +2301,10 @@ main() fi if [ "x${has_memkind}" = "xyes" ]; then # If no explicit option was given for libmemkind one way or the other, - # default to the value previously returned by has_libmemkind(). + # we use the value returned previously by has_libmemkind() to determine + # the default. if [ "x${enable_memkind}" = "x" ]; then - enable_memkind=${has_memkind} + enable_memkind="yes" fi echo "${script_name}: libmemkind found; default is to enable use." if [ "x${enable_memkind}" = "xyes" ]; then @@ -2299,6 +2319,7 @@ main() if [ "x${enable_memkind}" = "xyes" ]; then echo "${script_name}: cannot honor explicit request to enable libmemkind." fi + enable_memkind="no" enable_memkind_01=0 fi if [ "x${enable_blas}" = "xyes" ]; then diff --git a/examples/oapi/0obj_basic.c b/examples/oapi/00obj_basic.c similarity index 96% rename from examples/oapi/0obj_basic.c rename to examples/oapi/00obj_basic.c index 38c2e7047..4cf1565b6 100644 --- a/examples/oapi/0obj_basic.c +++ b/examples/oapi/00obj_basic.c @@ -122,7 +122,7 @@ int main( int argc, char** argv ) // Let's inspect the amount of padding inserted for alignment. Note // the difference between the m dimension and the column stride. printf( "datatype %s\n", bli_dt_string( bli_obj_dt( &a8 ) ) ); - printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( &a8 ) ) ); + printf( "datatype size %d bytes\n", ( int )bli_dt_size( bli_obj_dt( &a8 ) ) ); printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( &a8 ) ); printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( &a8 ) ); printf( "row stride: %d\n", ( int )bli_obj_row_stride( &a8 ) ); @@ -140,7 +140,7 @@ int main( int argc, char** argv ) bli_obj_create( BLIS_DCOMPLEX, 3, 5, 0, 0, &a11 ); printf( "datatype %s\n", bli_dt_string( bli_obj_dt( &a9 ) ) ); - printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( &a9 ) ) ); + printf( "datatype size %d bytes\n", ( int )bli_dt_size( bli_obj_dt( &a9 ) ) ); printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( &a9 ) ); printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( &a9 ) ); printf( "row stride: %d\n", ( int )bli_obj_row_stride( &a9 ) ); @@ -148,7 +148,7 @@ int main( int argc, char** argv ) printf( "\n" ); printf( "datatype %s\n", bli_dt_string( bli_obj_dt( &a10 ) ) ); - printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( &a10 ) ) ); + printf( "datatype size %d bytes\n", ( int )bli_dt_size( bli_obj_dt( &a10 ) ) ); printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( &a10 ) ); printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( &a10 ) ); printf( "row stride: %d\n", ( int )bli_obj_row_stride( &a10 ) ); @@ -156,7 +156,7 @@ int main( int argc, char** argv ) printf( "\n" ); printf( "datatype %s\n", bli_dt_string( bli_obj_dt( &a11 ) ) ); - printf( "datatype size %d bytes\n", bli_dt_size( bli_obj_dt( &a11 ) ) ); + printf( "datatype size %d bytes\n", ( int )bli_dt_size( bli_obj_dt( &a11 ) ) ); printf( "m dim (# of rows): %d\n", ( int )bli_obj_length( &a11 ) ); printf( "n dim (# of cols): %d\n", ( int )bli_obj_width( &a11 ) ); printf( "row stride: %d\n", ( int )bli_obj_row_stride( &a11 ) ); diff --git a/examples/oapi/1obj_attach.c b/examples/oapi/01obj_attach.c similarity index 100% rename from examples/oapi/1obj_attach.c rename to examples/oapi/01obj_attach.c diff --git a/examples/oapi/2obj_ij.c b/examples/oapi/02obj_ij.c similarity index 96% rename from examples/oapi/2obj_ij.c rename to examples/oapi/02obj_ij.c index 0a15ac8a4..322b7eff5 100644 --- a/examples/oapi/2obj_ij.c +++ b/examples/oapi/02obj_ij.c @@ -83,18 +83,18 @@ int main( int argc, char** argv ) bli_getijm( i, j, &a1, &alpha_r, &alpha_i ); // Here, we print out the element "returned" by bli_getijm(). - printf( "element (%2d,%2d) of matrix 'a1' (real + imag): %5.1f + %5.1f\n", i, j, alpha_r, alpha_i ); + printf( "element (%2d,%2d) of matrix 'a1' (real + imag): %5.1f + %5.1f\n", ( int )i, ( int )j, alpha_r, alpha_i ); // Let's query a few more elements. i = 0; j = 2; bli_getijm( i, j, &a1, &alpha_r, &alpha_i ); - printf( "element (%2d,%2d) of matrix 'a1' (real + imag): %5.1f + %5.1f\n", i, j, alpha_r, alpha_i ); + printf( "element (%2d,%2d) of matrix 'a1' (real + imag): %5.1f + %5.1f\n", ( int )i, ( int )j, alpha_r, alpha_i ); i = 3; j = 4; bli_getijm( i, j, &a1, &alpha_r, &alpha_i ); - printf( "element (%2d,%2d) of matrix 'a1' (real + imag): %5.1f + %5.1f\n", i, j, alpha_r, alpha_i ); + printf( "element (%2d,%2d) of matrix 'a1' (real + imag): %5.1f + %5.1f\n", ( int )i, ( int )j, alpha_r, alpha_i ); printf( "\n" ); @@ -224,8 +224,8 @@ void init_dmatrix_by_cols( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ) void init_dobj_by_cols( obj_t* a ) { - dim_t m = bli_obj_length( *a ); - dim_t n = bli_obj_width( *a ); + dim_t m = bli_obj_length( a ); + dim_t n = bli_obj_width( a ); dim_t i, j; double alpha = 0.0; @@ -245,8 +245,8 @@ void init_dobj_by_cols( obj_t* a ) void init_zobj_by_cols( obj_t* a ) { - dim_t m = bli_obj_length( *a ); - dim_t n = bli_obj_width( *a ); + dim_t m = bli_obj_length( a ); + dim_t n = bli_obj_width( a ); dim_t i, j; double alpha = 0.0; diff --git a/examples/oapi/03obj_view.c b/examples/oapi/03obj_view.c new file mode 100644 index 000000000..68c9c922f --- /dev/null +++ b/examples/oapi/03obj_view.c @@ -0,0 +1,272 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include +#include +#include "blis.h" + +void init_dmatrix_by_rows( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ); +void init_dmatrix_by_cols( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ); +void init_dobj_by_cols( obj_t* a ); +void init_zobj_by_cols( obj_t* a ); + +int main( int argc, char** argv ) +{ + obj_t a1, a2; + obj_t v1, v2, v3, v4, v5; + num_t dt; + dim_t m, n; + inc_t rs, cs; + dim_t i, j; + dim_t mv, nv; + + // + // This file demonstrates creating and submatrix views into existing matrices. + // + + // + // Example 1: Create an object and then create a submatrix view. + // + + printf( "\n#\n# -- Example 1 --\n#\n\n" ); + + // We'll use these parameters for the following examples. + dt = BLIS_DOUBLE; + m = 6; n = 7; rs = 1; cs = m; + + // Create an object a1 using bli_obj_create(). + bli_obj_create( dt, m, n, rs, cs, &a1 ); + + // Initialize a1 to contain known values. + init_dobj_by_cols( &a1 ); + + bli_printm( "matrix 'a1' (initial state)", &a1, "%5.1f", "" ); + + // Acquire a 4x3 submatrix view into a1 at (i,j) offsets (1,2). + i = 1; j = 2; mv = 4; nv = 3; + bli_acquire_mpart( i, j, mv, nv, &a1, &v1 ); + + bli_printm( "4x3 submatrix 'v1' at offsets (1,2)", &v1, "%5.1f", "" ); + + // NOTE: Submatrix views should never be passed to bli_obj_free(). It + // will not cause an immediate error, but it is bad practice. Instead, + // you should only release the objects that were created directy via + // bli_obj_create(). In the above example, that means only object a1 + // would be passed to bli_obj_free(). + + // + // Example 2: Modify the contents of a submatrix view. + // + + printf( "\n#\n# -- Example 2 --\n#\n\n" ); + + // Modify the first three elements of the first column. + bli_setijm( -3.0, 0.0, 0, 0, &v1 ); + bli_setijm( -4.0, 0.0, 1, 0, &v1 ); + bli_setijm( -5.0, 0.0, 2, 0, &v1 ); + + // Modify the first three elements of the second column. + bli_setijm( -6.0, 0.0, 0, 1, &v1 ); + bli_setijm( -7.0, 0.0, 1, 1, &v1 ); + bli_setijm( -8.0, 0.0, 2, 1, &v1 ); + + // Print the matrix again so we can see the update elements. + bli_printm( "submatrix view 'v1' (modified state)", &v1, "%5.1f", "" ); + bli_printm( "matrix 'a1' (indirectly modified due to changes to 'v1')", &a1, "%5.1f", "" ); + + // + // Example 3: Create a submatrix view that is "too big". + // + + printf( "\n#\n# -- Example 3 --\n#\n\n" ); + + // bli_acquire_mpart() will safely truncate your requested submatrix + // view dimensions (or even the offsets) if they extend beyond the + // bounds of the parent object. + + bli_printm( "matrix 'a1' (current state)", &a1, "%5.1f", "" ); + + // Acquire a 4x3 submatrix view into a1 at offsets (4,2). Notice how + // the requested view contains four rows, but the view is created with + // only two rows because the starting m offset of 4 leaves only two rows + // left in the parent matrix. + bli_acquire_mpart( 4, 2, 4, 3, &a1, &v2 ); + + bli_printm( "4x3 submatrix 'v2' at offsets (4,2) -- two rows truncated for safety", &v2, "%5.1f", "" ); + + // + // Example 4: Create a bufferless object, attach an external buffer, and + // then create a submatrix view. + // + + printf( "\n#\n# -- Example 4 --\n#\n\n" ); + + // Create a object with known elements using the same approach as the + // previous example file. + double* p1 = malloc( m * n * sizeof( double ) ); + init_dmatrix_by_cols( m, n, p1, rs, cs ); + bli_obj_create_with_attached_buffer( dt, m, n, p1, rs, cs, &a2 ); + + bli_printm( "matrix 'a2' (initial state)", &a2, "%5.1f", "" ); + + // Acquire a 3x4 submatrix view at offset (2,3). + bli_acquire_mpart( 2, 3, 3, 4, &a2, &v3 ); + + bli_printm( "3x4 submatrix view 'v3' at offsets (2,3)", &v3, "%5.1f", "" ); + + // + // Example 5: Use a submatrix view to set a region of a larger matrix to + // zero. + // + + printf( "\n#\n# -- Example 5 --\n#\n\n" ); + + bli_printm( "3x4 submatrix view 'v3' at offsets (2,3)", &v3, "%5.1f", "" ); + + bli_setm( &BLIS_ZERO, &v3 ); + + bli_printm( "3x4 submatrix view 'v3' (zeroed out)", &v3, "%5.1f", "" ); + + bli_printm( "matrix 'a2' (modified state)", &a2, "%5.1f", "" ); + + // + // Example 6: Obtain a submatrix view into a submatrix view. + // + + printf( "\n#\n# -- Example 6 --\n#\n\n" ); + + bli_acquire_mpart( 1, 1, 5, 6, &a2, &v4 ); + + bli_printm( "5x6 submatrix view 'v4' at offsets (1,1) of 'a2'", &v4, "%5.1f", "" ); + + bli_acquire_mpart( 1, 0, 4, 5, &v4, &v5 ); + + bli_printm( "4x5 submatrix view 'v5' at offsets (1,0) of 'v4'", &v5, "%5.1f", "" ); + + + // Free the memory arrays we allocated. + free( p1 ); + + // Free the objects we created. + bli_obj_free( &a1 ); + + return 0; +} + +// ----------------------------------------------------------------------------- + +void init_dmatrix_by_rows( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ) +{ + dim_t i, j; + + double alpha = 0.0; + + // Step through a matrix by rows, assigning each element a unique + // value, starting at 0. + for ( i = 0; i < m; ++i ) + { + for ( j = 0; j < n; ++j ) + { + double* a_ij = a + i*rs + j*cs; + + *a_ij = alpha; + + alpha += 1.0; + } + } +} + +void init_dmatrix_by_cols( dim_t m, dim_t n, double* a, inc_t rs, inc_t cs ) +{ + dim_t i, j; + + double alpha = 0.0; + + // Step through a matrix by columns, assigning each element a unique + // value, starting at 0. + for ( j = 0; j < n; ++j ) + { + for ( i = 0; i < m; ++i ) + { + double* a_ij = a + i*rs + j*cs; + + *a_ij = alpha; + + alpha += 1.0; + } + } +} + +void init_dobj_by_cols( obj_t* a ) +{ + dim_t m = bli_obj_length( a ); + dim_t n = bli_obj_width( a ); + dim_t i, j; + + double alpha = 0.0; + + // Step through a matrix by columns, assigning each element a unique + // value, starting at 0. + for ( j = 0; j < n; ++j ) + { + for ( i = 0; i < m; ++i ) + { + bli_setijm( alpha, 0.0, i, j, a ); + + alpha += 1.0; + } + } +} + +void init_zobj_by_cols( obj_t* a ) +{ + dim_t m = bli_obj_length( a ); + dim_t n = bli_obj_width( a ); + dim_t i, j; + + double alpha = 0.0; + + // Step through a matrix by columns, assigning each real and imaginary + // element a unique value, starting at 0. + for ( j = 0; j < n; ++j ) + { + for ( i = 0; i < m; ++i ) + { + bli_setijm( alpha, alpha + 1.0, i, j, a ); + + alpha += 2.0; + } + } +} + diff --git a/examples/oapi/3level0.c b/examples/oapi/04level0.c similarity index 100% rename from examples/oapi/3level0.c rename to examples/oapi/04level0.c diff --git a/examples/oapi/4level1v.c b/examples/oapi/05level1v.c similarity index 100% rename from examples/oapi/4level1v.c rename to examples/oapi/05level1v.c diff --git a/examples/oapi/5level1m.c b/examples/oapi/06level1m.c similarity index 100% rename from examples/oapi/5level1m.c rename to examples/oapi/06level1m.c diff --git a/examples/oapi/6level1m_diag.c b/examples/oapi/07level1m_diag.c similarity index 98% rename from examples/oapi/6level1m_diag.c rename to examples/oapi/07level1m_diag.c index 130311a6c..14ee8d902 100644 --- a/examples/oapi/6level1m_diag.c +++ b/examples/oapi/07level1m_diag.c @@ -59,7 +59,7 @@ int main( int argc, char** argv ) bli_obj_create( dt, m, n, rs, cs, &a ); // First, we mark the matrix structure as triangular. - bli_obj_set_struc( BLIS_TRIANGULAR, &a ) + bli_obj_set_struc( BLIS_TRIANGULAR, &a ); // Next, we specify whether the lower part or the upper part is to be // recognized as the "stored" region (which we call the uplo field). The @@ -89,7 +89,7 @@ int main( int argc, char** argv ) bli_obj_create( dt, m, n, rs, cs, &b ); // Set structure and uplo. - bli_obj_set_struc( BLIS_TRIANGULAR, &b ) + bli_obj_set_struc( BLIS_TRIANGULAR, &b ); bli_obj_set_uplo( BLIS_UPPER, &b ); // Create an alias, 'bl', of the original object 'b'. Both objects will @@ -245,7 +245,7 @@ int main( int argc, char** argv ) bli_obj_alias_to( &e, &el ); // Set structure and uplo of 'el'. - bli_obj_set_struc( BLIS_TRIANGULAR, &el ) + bli_obj_set_struc( BLIS_TRIANGULAR, &el ); bli_obj_set_uplo( BLIS_LOWER, &el ); // Digression: Notice that "triangular" structure does not require that @@ -290,7 +290,7 @@ int main( int argc, char** argv ) bli_obj_set_diag_offset( -1, &h ); // Set the structure and uplo of 'h'. - bli_obj_set_struc( BLIS_TRIANGULAR, &h ) + bli_obj_set_struc( BLIS_TRIANGULAR, &h ); bli_obj_set_uplo( BLIS_UPPER, &h ); // Randomize the elements on and above the first subdiagonal. diff --git a/examples/oapi/7level2.c b/examples/oapi/08level2.c similarity index 98% rename from examples/oapi/7level2.c rename to examples/oapi/08level2.c index 6008a3a9b..1954f42e0 100644 --- a/examples/oapi/7level2.c +++ b/examples/oapi/08level2.c @@ -157,7 +157,7 @@ int main( int argc, char** argv ) // Mark matrix 'a' as symmetric and stored in the lower triangle, and // then randomize that lower triangle. - bli_obj_set_struc( BLIS_SYMMETRIC, &a ) + bli_obj_set_struc( BLIS_SYMMETRIC, &a ); bli_obj_set_uplo( BLIS_LOWER, &a ); bli_randm( &a ); @@ -200,7 +200,7 @@ int main( int argc, char** argv ) // Mark matrix 'a' as symmetric and stored in the upper triangle, and // then randomize that upper triangle. - bli_obj_set_struc( BLIS_SYMMETRIC, &a ) + bli_obj_set_struc( BLIS_SYMMETRIC, &a ); bli_obj_set_uplo( BLIS_UPPER, &a ); bli_randm( &a ); @@ -242,7 +242,7 @@ int main( int argc, char** argv ) // Mark matrix 'a' as triangular and stored in the lower triangle, and // then randomize that lower triangle. - bli_obj_set_struc( BLIS_TRIANGULAR, &a ) + bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( BLIS_LOWER, &a ); bli_randm( &a ); @@ -283,7 +283,7 @@ int main( int argc, char** argv ) // Mark matrix 'a' as triangular and stored in the lower triangle, and // then randomize that lower triangle. - bli_obj_set_struc( BLIS_TRIANGULAR, &a ) + bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( BLIS_LOWER, &a ); bli_randm( &a ); diff --git a/examples/oapi/8level3.c b/examples/oapi/09level3.c similarity index 98% rename from examples/oapi/8level3.c rename to examples/oapi/09level3.c index a1fd55bce..ff850e2a4 100644 --- a/examples/oapi/8level3.c +++ b/examples/oapi/09level3.c @@ -148,7 +148,7 @@ int main( int argc, char** argv ) // Mark matrix 'c' as symmetric and stored in the lower triangle, and // then randomize that lower triangle. - bli_obj_set_struc( BLIS_SYMMETRIC, &c ) + bli_obj_set_struc( BLIS_SYMMETRIC, &c ); bli_obj_set_uplo( BLIS_LOWER, &c ); bli_randm( &c ); @@ -194,7 +194,7 @@ int main( int argc, char** argv ) // Mark matrix 'a' as symmetric and stored in the upper triangle, and // then randomize that upper triangle. - bli_obj_set_struc( BLIS_SYMMETRIC, &a ) + bli_obj_set_struc( BLIS_SYMMETRIC, &a ); bli_obj_set_uplo( BLIS_UPPER, &a ); bli_randm( &a ); @@ -241,7 +241,7 @@ int main( int argc, char** argv ) // Mark matrix 'a' as triangular and stored in the lower triangle, and // then randomize that lower triangle. - bli_obj_set_struc( BLIS_TRIANGULAR, &a ) + bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( BLIS_LOWER, &a ); bli_randm( &a ); @@ -286,7 +286,7 @@ int main( int argc, char** argv ) // Mark matrix 'a' as triangular and stored in the lower triangle, and // then randomize that lower triangle. - bli_obj_set_struc( BLIS_TRIANGULAR, &a ) + bli_obj_set_struc( BLIS_TRIANGULAR, &a ); bli_obj_set_uplo( BLIS_LOWER, &a ); bli_randm( &a ); diff --git a/examples/oapi/9util.c b/examples/oapi/10util.c similarity index 97% rename from examples/oapi/9util.c rename to examples/oapi/10util.c index 61042c39f..55366b0f6 100644 --- a/examples/oapi/9util.c +++ b/examples/oapi/10util.c @@ -147,7 +147,7 @@ int main( int argc, char** argv ) bli_setm( &BLIS_MINUS_ONE, &c ); // Set the structure and uplo of 'c'. - bli_obj_set_struc( BLIS_SYMMETRIC, &c ) + bli_obj_set_struc( BLIS_SYMMETRIC, &c ); bli_obj_set_uplo( BLIS_LOWER, &c ); // Randomize the lower triangle of 'c'. @@ -170,7 +170,7 @@ int main( int argc, char** argv ) // Initialize all of 'd' to -1.0 to simulate junk values. bli_setm( &BLIS_MINUS_ONE, &d ); - bli_obj_set_struc( BLIS_HERMITIAN, &d ) + bli_obj_set_struc( BLIS_HERMITIAN, &d ); bli_obj_set_uplo( BLIS_LOWER, &d ); // Randomize the lower triangle of 'd'. @@ -185,7 +185,7 @@ int main( int argc, char** argv ) bli_printm( "d (after mkherm):", &d, "%4.1f", "" ); // Set the structure and uplo of 'd'. - bli_obj_set_struc( BLIS_HERMITIAN, &d ) + bli_obj_set_struc( BLIS_HERMITIAN, &d ); bli_obj_set_uplo( BLIS_LOWER, &d ); // @@ -203,7 +203,7 @@ int main( int argc, char** argv ) bli_setm( &BLIS_MINUS_ONE, &e ); // Set the structure and uplo of 'e'. - bli_obj_set_struc( BLIS_SYMMETRIC, &e ) + bli_obj_set_struc( BLIS_SYMMETRIC, &e ); bli_obj_set_uplo( BLIS_UPPER, &e ); // Randomize the upper triangle of 'e'. @@ -221,7 +221,7 @@ int main( int argc, char** argv ) bli_setm( &BLIS_MINUS_ONE, &f ); // Set the structure and uplo of 'f'. - bli_obj_set_struc( BLIS_HERMITIAN, &f ) + bli_obj_set_struc( BLIS_HERMITIAN, &f ); bli_obj_set_uplo( BLIS_UPPER, &f ); // Randomize the upper triangle of 'f'. @@ -249,7 +249,7 @@ int main( int argc, char** argv ) bli_setm( &BLIS_MINUS_ONE, &g ); // Set the structure and uplo of 'g'. - bli_obj_set_struc( BLIS_TRIANGULAR, &g ) + bli_obj_set_struc( BLIS_TRIANGULAR, &g ); bli_obj_set_uplo( BLIS_LOWER, &g ); // Randomize the lower triangle of 'g'. diff --git a/examples/oapi/Makefile b/examples/oapi/Makefile index 905ef6727..08964e479 100644 --- a/examples/oapi/Makefile +++ b/examples/oapi/Makefile @@ -105,16 +105,17 @@ CFLAGS += -I$(TEST_SRC_PATH) LIBBLIS_LINK := $(BUILD_PATH)/$(LIBBLIS_LINK) # Binary executable name. -TEST_BINS := 0obj_basic.x \ - 1obj_attach.x \ - 2obj_ij.x \ - 3level0.x \ - 4level1v.x \ - 5level1m.x \ - 6level1m_diag.x \ - 7level2.x \ - 8level3.x \ - 9util.x +TEST_BINS := 00obj_basic.x \ + 01obj_attach.x \ + 02obj_ij.x \ + 03obj_view.x \ + 04level0.x \ + 05level1v.x \ + 06level1m.x \ + 07level1m_diag.x \ + 08level2.x \ + 09level3.x \ + 10util.x diff --git a/examples/oapi/README b/examples/oapi/README index 28cc6d84e..adf7ded9d 100644 --- a/examples/oapi/README +++ b/examples/oapi/README @@ -6,7 +6,7 @@ This directory contains several files, each containing various pieces of example code that demonstrate core functionality of the object API in BLIS. These example files should be thought of collectively like a tutorial, and therefore it is recommended to start from the beginning (the file that -starts in '0'). +starts in '00'). You can build all of the examples by simply running 'make' from this directory. (You can also run 'make clean'.) The makefile assumes that diff --git a/frame/0/copysc/bli_copysc.h b/frame/0/copysc/bli_copysc.h index dbddb5079..8022f4889 100644 --- a/frame/0/copysc/bli_copysc.h +++ b/frame/0/copysc/bli_copysc.h @@ -49,7 +49,7 @@ GENFRONT( copysc ) // -// Define BLAS-like interfaces with heterogeneous-typed operands. +// Prototype BLAS-like interfaces with heterogeneous-typed operands. // #undef GENTPROT2 diff --git a/frame/1/bli_l1v_check.c b/frame/1/bli_l1v_check.c index 54c856b45..7ca5e1291 100644 --- a/frame/1/bli_l1v_check.c +++ b/frame/1/bli_l1v_check.c @@ -203,6 +203,11 @@ void bli_l1v_xy_check e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); + // Check for consistent datatypes. + + e_val = bli_check_consistent_object_datatypes( x, y ); + bli_check_error_code( e_val ); + // Check object dimensions. e_val = bli_check_vector_object( x ); @@ -243,6 +248,11 @@ void bli_l1v_axy_check e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); + // Check for consistent datatypes. + + e_val = bli_check_consistent_object_datatypes( x, y ); + bli_check_error_code( e_val ); + // Check object dimensions. e_val = bli_check_scalar_object( alpha ); @@ -289,6 +299,11 @@ void bli_l1v_xby_check e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); + // Check for consistent datatypes. + + e_val = bli_check_consistent_object_datatypes( x, y ); + bli_check_error_code( e_val ); + // Check object dimensions. e_val = bli_check_scalar_object( beta ); @@ -339,6 +354,11 @@ void bli_l1v_axby_check e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); + // Check for consistent datatypes. + + e_val = bli_check_consistent_object_datatypes( x, y ); + bli_check_error_code( e_val ); + // Check object dimensions. e_val = bli_check_scalar_object( alpha ); @@ -402,6 +422,11 @@ void bli_l1v_dot_check e_val = bli_check_nonconstant_object( rho ); bli_check_error_code( e_val ); + // Check for consistent datatypes. + + e_val = bli_check_consistent_object_datatypes( x, y ); + bli_check_error_code( e_val ); + // Check object dimensions. e_val = bli_check_scalar_object( alpha ); diff --git a/frame/1d/bli_l1d_check.c b/frame/1d/bli_l1d_check.c index 3846d99ef..118908657 100644 --- a/frame/1d/bli_l1d_check.c +++ b/frame/1d/bli_l1d_check.c @@ -121,6 +121,11 @@ void bli_l1d_xy_check e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); + // Check for consistent datatypes. + + e_val = bli_check_consistent_object_datatypes( x, y ); + bli_check_error_code( e_val ); + // Check object dimensions. e_val = bli_check_matrix_object( x ); @@ -161,6 +166,11 @@ void bli_l1d_axy_check e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); + // Check for consistent datatypes. + + e_val = bli_check_consistent_object_datatypes( x, y ); + bli_check_error_code( e_val ); + // Check object dimensions. e_val = bli_check_scalar_object( alpha ); diff --git a/frame/1f/bli_l1f_check.c b/frame/1f/bli_l1f_check.c index a64b9c7db..4ba9dc034 100644 --- a/frame/1f/bli_l1f_check.c +++ b/frame/1f/bli_l1f_check.c @@ -66,6 +66,14 @@ void bli_axpy2v_check e_val = bli_check_floating_object( z ); bli_check_error_code( e_val ); + // Check for consistent datatypes. + + e_val = bli_check_consistent_object_datatypes( x, y ); + bli_check_error_code( e_val ); + + e_val = bli_check_consistent_object_datatypes( x, z ); + bli_check_error_code( e_val ); + // Check object dimensions. e_val = bli_check_scalar_object( alphax ); @@ -132,6 +140,14 @@ void bli_axpyf_check e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); + // Check for consistent datatypes. + + e_val = bli_check_consistent_object_datatypes( a, x ); + bli_check_error_code( e_val ); + + e_val = bli_check_consistent_object_datatypes( a, y ); + bli_check_error_code( e_val ); + // Check object dimensions. e_val = bli_check_scalar_object( alpha ); @@ -203,6 +219,17 @@ void bli_dotaxpyv_check e_val = bli_check_floating_object( z ); bli_check_error_code( e_val ); + // Check for consistent datatypes. + + e_val = bli_check_consistent_object_datatypes( x, xt ); + bli_check_error_code( e_val ); + + e_val = bli_check_consistent_object_datatypes( x, y ); + bli_check_error_code( e_val ); + + e_val = bli_check_consistent_object_datatypes( x, z ); + bli_check_error_code( e_val ); + // Check object dimensions. e_val = bli_check_scalar_object( alpha ); @@ -299,6 +326,23 @@ void bli_dotxaxpyf_check e_val = bli_check_floating_object( z ); bli_check_error_code( e_val ); + // Check for consistent datatypes. + + e_val = bli_check_consistent_object_datatypes( a, at ); + bli_check_error_code( e_val ); + + e_val = bli_check_consistent_object_datatypes( a, w ); + bli_check_error_code( e_val ); + + e_val = bli_check_consistent_object_datatypes( a, x ); + bli_check_error_code( e_val ); + + e_val = bli_check_consistent_object_datatypes( a, y ); + bli_check_error_code( e_val ); + + e_val = bli_check_consistent_object_datatypes( a, z ); + bli_check_error_code( e_val ); + // Check object dimensions. e_val = bli_check_scalar_object( alpha ); @@ -407,6 +451,14 @@ void bli_dotxf_check e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); + // Check for consistent datatypes. + + e_val = bli_check_consistent_object_datatypes( a, x ); + bli_check_error_code( e_val ); + + e_val = bli_check_consistent_object_datatypes( a, y ); + bli_check_error_code( e_val ); + // Check object dimensions. e_val = bli_check_scalar_object( alpha ); diff --git a/frame/1m/bli_l1m_check.c b/frame/1m/bli_l1m_check.c index d2ae6c5c4..ad8c409a7 100644 --- a/frame/1m/bli_l1m_check.c +++ b/frame/1m/bli_l1m_check.c @@ -106,6 +106,11 @@ void bli_l1m_xy_check e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); + // Check for consistent datatypes. + + e_val = bli_check_consistent_object_datatypes( x, y ); + bli_check_error_code( e_val ); + // Check object dimensions. e_val = bli_check_matrix_object( x ); @@ -146,6 +151,11 @@ void bli_l1m_axy_check e_val = bli_check_floating_object( y ); bli_check_error_code( e_val ); + // Check for consistent datatypes. + + e_val = bli_check_consistent_object_datatypes( x, y ); + bli_check_error_code( e_val ); + // Check object dimensions. e_val = bli_check_scalar_object( alpha ); diff --git a/frame/1m/packm/bli_packm_cntl.h b/frame/1m/packm/bli_packm_cntl.h index ab22e8621..c982aa0b6 100644 --- a/frame/1m/packm/bli_packm_cntl.h +++ b/frame/1m/packm/bli_packm_cntl.h @@ -46,37 +46,45 @@ struct packm_params_s }; typedef struct packm_params_s packm_params_t; -#define bli_cntl_packm_params_var_func( cntl ) \ -\ - ( ( (packm_params_t*)(cntl)->params )->var_func ) +static packm_voft bli_cntl_packm_params_var_func( cntl_t* cntl ) +{ + packm_params_t* ppp = cntl->params; return ppp->var_func; +} -#define bli_cntl_packm_params_bmid_m( cntl ) \ -\ - ( ( (packm_params_t*)(cntl)->params )->bmid_m ) +static bszid_t bli_cntl_packm_params_bmid_m( cntl_t* cntl ) +{ + packm_params_t* ppp = cntl->params; return ppp->bmid_m; +} -#define bli_cntl_packm_params_bmid_n( cntl ) \ -\ - ( ( (packm_params_t*)(cntl)->params )->bmid_n ) +static bszid_t bli_cntl_packm_params_bmid_n( cntl_t* cntl ) +{ + packm_params_t* ppp = cntl->params; return ppp->bmid_n; +} -#define bli_cntl_packm_params_does_invert_diag( cntl ) \ -\ - ( ( (packm_params_t*)(cntl)->params )->does_invert_diag ) +static bool_t bli_cntl_packm_params_does_invert_diag( cntl_t* cntl ) +{ + packm_params_t* ppp = cntl->params; return ppp->does_invert_diag; +} -#define bli_cntl_packm_params_rev_iter_if_upper( cntl ) \ -\ - ( ( (packm_params_t*)(cntl)->params )->rev_iter_if_upper ) +static bool_t bli_cntl_packm_params_rev_iter_if_upper( cntl_t* cntl ) +{ + packm_params_t* ppp = cntl->params; return ppp->rev_iter_if_upper; +} -#define bli_cntl_packm_params_rev_iter_if_lower( cntl ) \ -\ - ( ( (packm_params_t*)(cntl)->params )->rev_iter_if_lower ) +static bool_t bli_cntl_packm_params_rev_iter_if_lower( cntl_t* cntl ) +{ + packm_params_t* ppp = cntl->params; return ppp->rev_iter_if_lower; +} -#define bli_cntl_packm_params_pack_schema( cntl ) \ -\ - ( ( (packm_params_t*)(cntl)->params )->pack_schema ) +static pack_t bli_cntl_packm_params_pack_schema( cntl_t* cntl ) +{ + packm_params_t* ppp = cntl->params; return ppp->pack_schema; +} -#define bli_cntl_packm_params_pack_buf_type( cntl ) \ -\ - ( ( (packm_params_t*)(cntl)->params )->pack_buf_type ) +static packbuf_t bli_cntl_packm_params_pack_buf_type( cntl_t* cntl ) +{ + packm_params_t* ppp = cntl->params; return ppp->pack_buf_type; +} // ----------------------------------------------------------------------------- diff --git a/frame/1m/packm/bli_packm_init.c b/frame/1m/packm/bli_packm_init.c index 404498d60..0437b722a 100644 --- a/frame/1m/packm/bli_packm_init.c +++ b/frame/1m/packm/bli_packm_init.c @@ -56,8 +56,8 @@ siz_t bli_packm_init bool_t does_invert_diag; bool_t rev_iter_if_upper; bool_t rev_iter_if_lower; - //pack_t pack_schema; - packbuf_t pack_buf_type; + pack_t schema; + //packbuf_t pack_buf_type; siz_t size_needed; // Check parameters. @@ -70,8 +70,8 @@ siz_t bli_packm_init does_invert_diag = bli_cntl_packm_params_does_invert_diag( cntl ); rev_iter_if_upper = bli_cntl_packm_params_rev_iter_if_upper( cntl ); rev_iter_if_lower = bli_cntl_packm_params_rev_iter_if_lower( cntl ); - //pack_schema = bli_cntl_packm_params_pack_schema( cntl ); - pack_buf_type = bli_cntl_packm_params_pack_buf_type( cntl ); + schema = bli_cntl_packm_params_pack_schema( cntl ); + //pack_buf_type = bli_cntl_packm_params_pack_buf_type( cntl ); #if 0 // Let us now check to see if the object has already been packed. First @@ -112,30 +112,51 @@ siz_t bli_packm_init return 0; } - // We now ignore the pack_schema field in the control tree and - // extract the schema from the context, depending on whether we are - // preparing to pack a block of A or panel of B. For A and B, we must - // obtain the schema from the context since the induced methods reuse - // the same control trees used by native execution, and those induced - // methods specify the schema used by the current execution phase - // within the context (whereas the control tree does not change). +#if 0 pack_t schema; - if ( pack_buf_type == BLIS_BUFFER_FOR_A_BLOCK ) + if ( bli_cntx_method( cntx ) != BLIS_NAT ) { - schema = bli_cntx_schema_a_block( cntx ); + // We now ignore the pack_schema field in the control tree and + // extract the schema from the context, depending on whether we are + // preparing to pack a block of A or panel of B. For A and B, we must + // obtain the schema from the context since the induced methods reuse + // the same control trees used by native execution, and those induced + // methods specify the schema used by the current execution phase + // within the context (whereas the control tree does not change). + + if ( pack_buf_type == BLIS_BUFFER_FOR_A_BLOCK ) + { + schema = bli_cntx_schema_a_block( cntx ); + } + else if ( pack_buf_type == BLIS_BUFFER_FOR_B_PANEL ) + { + schema = bli_cntx_schema_b_panel( cntx ); + } + else // if ( pack_buf_type == BLIS_BUFFER_FOR_C_PANEL ) + { + schema = bli_cntl_packm_params_pack_schema( cntl ); + } } - else if ( pack_buf_type == BLIS_BUFFER_FOR_B_PANEL ) + else // ( bli_cntx_method( cntx ) == BLIS_NAT ) { - schema = bli_cntx_schema_b_panel( cntx ); + // For native execution, we obtain the schema from the control tree + // node. (Notice that it doesn't matter if the pack_buf_type is for + // A or B.) + schema = bli_cntl_packm_params_pack_schema( cntl ); } - else // if ( pack_buf_type == BLIS_BUFFER_FOR_C_PANEL ) + // This is no longer needed now that we branch between native and + // non-native cases above. +#if 0 + if ( pack_buf_type == BLIS_BUFFER_FOR_C_PANEL ) { // If we get a request to pack C for some reason, it is likely // not part of an induced method, and so it would be safe (and // necessary) to read the pack schema from the control tree. schema = bli_cntl_packm_params_pack_schema( cntl ); } +#endif +#endif // Prepare a few other variables based on properties of the control // tree. diff --git a/frame/2/bli_l2_check.c b/frame/2/bli_l2_check.c index 84dda521f..d8f66ff87 100644 --- a/frame/2/bli_l2_check.c +++ b/frame/2/bli_l2_check.c @@ -53,6 +53,14 @@ void bli_gemv_check e_val = bli_check_general_object( a ); bli_check_error_code( e_val ); + + // Check for consistent datatypes. + + e_val = bli_check_consistent_object_datatypes( a, x ); + bli_check_error_code( e_val ); + + e_val = bli_check_consistent_object_datatypes( a, y ); + bli_check_error_code( e_val ); } @@ -80,6 +88,14 @@ void bli_hemv_check e_val = bli_check_hermitian_object( a ); bli_check_error_code( e_val ); + + // Check for consistent datatypes. + + e_val = bli_check_consistent_object_datatypes( a, x ); + bli_check_error_code( e_val ); + + e_val = bli_check_consistent_object_datatypes( a, y ); + bli_check_error_code( e_val ); } @@ -107,6 +123,14 @@ void bli_symv_check e_val = bli_check_symmetric_object( a ); bli_check_error_code( e_val ); + + // Check for consistent datatypes. + + e_val = bli_check_consistent_object_datatypes( a, x ); + bli_check_error_code( e_val ); + + e_val = bli_check_consistent_object_datatypes( a, y ); + bli_check_error_code( e_val ); } @@ -132,6 +156,11 @@ void bli_trmv_check e_val = bli_check_triangular_object( a ); bli_check_error_code( e_val ); + + // Check for consistent datatypes. + + e_val = bli_check_consistent_object_datatypes( a, x ); + bli_check_error_code( e_val ); } @@ -157,6 +186,11 @@ void bli_trsv_check e_val = bli_check_triangular_object( a ); bli_check_error_code( e_val ); + + // Check for consistent datatypes. + + e_val = bli_check_consistent_object_datatypes( a, x ); + bli_check_error_code( e_val ); } @@ -178,6 +212,14 @@ void bli_ger_check e_val = bli_check_general_object( a ); bli_check_error_code( e_val ); + + // Check for consistent datatypes. + + e_val = bli_check_consistent_object_datatypes( a, x ); + bli_check_error_code( e_val ); + + e_val = bli_check_consistent_object_datatypes( a, y ); + bli_check_error_code( e_val ); } @@ -203,6 +245,11 @@ void bli_her_check e_val = bli_check_hermitian_object( a ); bli_check_error_code( e_val ); + + // Check for consistent datatypes. + + e_val = bli_check_consistent_object_datatypes( a, x ); + bli_check_error_code( e_val ); } @@ -229,6 +276,14 @@ void bli_her2_check e_val = bli_check_hermitian_object( a ); bli_check_error_code( e_val ); + + // Check for consistent datatypes. + + e_val = bli_check_consistent_object_datatypes( a, x ); + bli_check_error_code( e_val ); + + e_val = bli_check_consistent_object_datatypes( a, y ); + bli_check_error_code( e_val ); } @@ -254,6 +309,11 @@ void bli_syr_check e_val = bli_check_symmetric_object( a ); bli_check_error_code( e_val ); + + // Check for consistent datatypes. + + e_val = bli_check_consistent_object_datatypes( a, x ); + bli_check_error_code( e_val ); } @@ -280,6 +340,14 @@ void bli_syr2_check e_val = bli_check_symmetric_object( a ); bli_check_error_code( e_val ); + + // Check for consistent datatypes. + + e_val = bli_check_consistent_object_datatypes( a, x ); + bli_check_error_code( e_val ); + + e_val = bli_check_consistent_object_datatypes( a, y ); + bli_check_error_code( e_val ); } diff --git a/frame/3/bli_l3_check.c b/frame/3/bli_l3_check.c index 3dfd9bbf4..1a14ba93f 100644 --- a/frame/3/bli_l3_check.c +++ b/frame/3/bli_l3_check.c @@ -294,6 +294,14 @@ void bli_gemm_basic_check e_val = bli_check_level3_dims( a, b, c ); bli_check_error_code( e_val ); + + // Check for consistent datatypes. + + e_val = bli_check_consistent_object_datatypes( c, a ); + bli_check_error_code( e_val ); + + e_val = bli_check_consistent_object_datatypes( c, b ); + bli_check_error_code( e_val ); } void bli_hemm_basic_check @@ -330,6 +338,14 @@ void bli_hemm_basic_check e_val = bli_check_square_object( a ); bli_check_error_code( e_val ); + + // Check for consistent datatypes. + + e_val = bli_check_consistent_object_datatypes( c, a ); + bli_check_error_code( e_val ); + + e_val = bli_check_consistent_object_datatypes( c, b ); + bli_check_error_code( e_val ); } void bli_herk_basic_check @@ -365,6 +381,14 @@ void bli_herk_basic_check e_val = bli_check_general_object( ah ); bli_check_error_code( e_val ); + + // Check for consistent datatypes. + + e_val = bli_check_consistent_object_datatypes( c, a ); + bli_check_error_code( e_val ); + + e_val = bli_check_consistent_object_datatypes( c, ah ); + bli_check_error_code( e_val ); } void bli_her2k_basic_check @@ -412,6 +436,20 @@ void bli_her2k_basic_check e_val = bli_check_general_object( ah ); bli_check_error_code( e_val ); + + // Check for consistent datatypes. + + e_val = bli_check_consistent_object_datatypes( c, a ); + bli_check_error_code( e_val ); + + e_val = bli_check_consistent_object_datatypes( c, ah ); + bli_check_error_code( e_val ); + + e_val = bli_check_consistent_object_datatypes( c, b ); + bli_check_error_code( e_val ); + + e_val = bli_check_consistent_object_datatypes( c, bh ); + bli_check_error_code( e_val ); } void bli_l3_basic_check diff --git a/frame/3/bli_l3_cntl.c b/frame/3/bli_l3_cntl.c index 0ea06715a..33c64edcb 100644 --- a/frame/3/bli_l3_cntl.c +++ b/frame/3/bli_l3_cntl.c @@ -45,6 +45,21 @@ void bli_l3_cntl_create_if cntl_t** cntl_use ) { + // This is part of a hack to support mixed domain in bli_gemm_front(). + // Sometimes we need to specify a non-standard schema for A and B, and + // we decided to transmit them via the schema field in the obj_t's + // rather than pass them in as function parameters. Once the values + // have been read, we immediately reset them back to their expected + // values for unpacked objects. Notice that we do this even if the + // caller passed in a custom control tree; that's because we still need + // to reset the pack schema of a and b, which were modified by the + // operation's _front() function. + pack_t schema_a = bli_obj_pack_schema( a ); + pack_t schema_b = bli_obj_pack_schema( b ); + + bli_obj_set_pack_schema( BLIS_NOT_PACKED, a ); + bli_obj_set_pack_schema( BLIS_NOT_PACKED, b ); + // If the control tree pointer is NULL, we construct a default // tree as a function of the operation family. if ( cntl_orig == NULL ) @@ -53,7 +68,7 @@ void bli_l3_cntl_create_if family == BLIS_HERK || family == BLIS_TRMM ) { - *cntl_use = bli_gemm_cntl_create( family ); + *cntl_use = bli_gemm_cntl_create( family, schema_a, schema_b ); } else // if ( family == BLIS_TRSM ) { @@ -62,7 +77,7 @@ void bli_l3_cntl_create_if if ( bli_obj_is_triangular( a ) ) side = BLIS_LEFT; else side = BLIS_RIGHT; - *cntl_use = bli_trsm_cntl_create( side ); + *cntl_use = bli_trsm_cntl_create( side, schema_a, schema_b ); } } else diff --git a/frame/3/bli_l3_oapi.c b/frame/3/bli_l3_oapi.c index 5f4bc9932..94e563c24 100644 --- a/frame/3/bli_l3_oapi.c +++ b/frame/3/bli_l3_oapi.c @@ -57,20 +57,25 @@ void PASTEMAC(opname,EX_SUF) \ \ BLIS_OAPI_CNTX_DECL \ \ - /* Invoke the operation's "ind" function--its induced method front-end. - This function will call native execution for real domain problems. - For complex problems, it calls the highest priority induced method - that is available (ie: implemented and enabled), and if none are - enabled, it calls native execution. */ \ - PASTEMAC(opname,ind) \ - ( \ - alpha, \ - a, \ - b, \ - beta, \ - c, \ - cntx \ - ); \ + /* Only proceed with an induced method if all operands have the same + (complex) datatype. If any datatypes differ, skip the induced method + chooser function and proceed directly with native execution, which is + where mixed datatype support will be implemented (if at all). */ \ + if ( bli_obj_dt( a ) == bli_obj_dt( c ) && \ + bli_obj_dt( b ) == bli_obj_dt( c ) && \ + bli_obj_is_complex( c ) ) \ + { \ + /* Invoke the operation's "ind" function--its induced method front-end. + For complex problems, it calls the highest priority induced method + that is available (ie: implemented and enabled), and if none are + enabled, it calls native execution. (For real problems, it calls + the operation's native execution interface.) */ \ + PASTEMAC(opname,ind)( alpha, a, b, beta, c, cntx ); \ + } \ + else \ + { \ + PASTEMAC(opname,nat)( alpha, a, b, beta, c, cntx ); \ + } \ } GENFRONT( gemm ) @@ -96,16 +101,25 @@ void PASTEMAC(opname,EX_SUF) \ \ BLIS_OAPI_CNTX_DECL \ \ - PASTEMAC(opname,ind) \ - ( \ - side, \ - alpha, \ - a, \ - b, \ - beta, \ - c, \ - cntx \ - ); \ + /* Only proceed with an induced method if all operands have the same + (complex) datatype. If any datatypes differ, skip the induced method + chooser function and proceed directly with native execution, which is + where mixed datatype support will be implemented (if at all). */ \ + if ( bli_obj_dt( a ) == bli_obj_dt( c ) && \ + bli_obj_dt( b ) == bli_obj_dt( c ) && \ + bli_obj_is_complex( c ) ) \ + { \ + /* Invoke the operation's "ind" function--its induced method front-end. + For complex problems, it calls the highest priority induced method + that is available (ie: implemented and enabled), and if none are + enabled, it calls native execution. (For real problems, it calls + the operation's native execution interface.) */ \ + PASTEMAC(opname,ind)( side, alpha, a, b, beta, c, cntx ); \ + } \ + else \ + { \ + PASTEMAC(opname,nat)( side, alpha, a, b, beta, c, cntx ); \ + } \ } GENFRONT( hemm ) @@ -129,14 +143,24 @@ void PASTEMAC(opname,EX_SUF) \ \ BLIS_OAPI_CNTX_DECL \ \ - PASTEMAC(opname,ind) \ - ( \ - alpha, \ - a, \ - beta, \ - c, \ - cntx \ - ); \ + /* Only proceed with an induced method if all operands have the same + (complex) datatype. If any datatypes differ, skip the induced method + chooser function and proceed directly with native execution, which is + where mixed datatype support will be implemented (if at all). */ \ + if ( bli_obj_dt( a ) == bli_obj_dt( c ) && \ + bli_obj_is_complex( c ) ) \ + { \ + /* Invoke the operation's "ind" function--its induced method front-end. + For complex problems, it calls the highest priority induced method + that is available (ie: implemented and enabled), and if none are + enabled, it calls native execution. (For real problems, it calls + the operation's native execution interface.) */ \ + PASTEMAC(opname,ind)( alpha, a, beta, c, cntx ); \ + } \ + else \ + { \ + PASTEMAC(opname,nat)( alpha, a, beta, c, cntx ); \ + } \ } GENFRONT( herk ) @@ -159,14 +183,24 @@ void PASTEMAC(opname,EX_SUF) \ \ BLIS_OAPI_CNTX_DECL \ \ - PASTEMAC(opname,ind) \ - ( \ - side, \ - alpha, \ - a, \ - b, \ - cntx \ - ); \ + /* Only proceed with an induced method if all operands have the same + (complex) datatype. If any datatypes differ, skip the induced method + chooser function and proceed directly with native execution, which is + where mixed datatype support will be implemented (if at all). */ \ + if ( bli_obj_dt( a ) == bli_obj_dt( b ) && \ + bli_obj_is_complex( b ) ) \ + { \ + /* Invoke the operation's "ind" function--its induced method front-end. + For complex problems, it calls the highest priority induced method + that is available (ie: implemented and enabled), and if none are + enabled, it calls native execution. (For real problems, it calls + the operation's native execution interface.) */ \ + PASTEMAC(opname,ind)( side, alpha, a, b, cntx ); \ + } \ + else \ + { \ + PASTEMAC(opname,nat)( side, alpha, a, b, cntx ); \ + } \ } GENFRONT( trmm ) diff --git a/frame/3/bli_l3_thrinfo.h b/frame/3/bli_l3_thrinfo.h index fcf1f507d..58733bcf5 100644 --- a/frame/3/bli_l3_thrinfo.h +++ b/frame/3/bli_l3_thrinfo.h @@ -38,24 +38,24 @@ // gemm -#define gemm_get_next_a_micropanel( thread, a1, step ) ( a1 + step * thread->n_way ) -#define gemm_get_next_b_micropanel( thread, b1, step ) ( b1 + step * thread->n_way ) +#define bli_gemm_get_next_a_upanel( thread, a1, step ) ( a1 + step * thread->n_way ) +#define bli_gemm_get_next_b_upanel( thread, b1, step ) ( b1 + step * thread->n_way ) // herk -#define herk_get_next_a_micropanel( thread, a1, step ) ( a1 + step * thread->n_way ) -#define herk_get_next_b_micropanel( thread, b1, step ) ( b1 + step * thread->n_way ) +#define bli_herk_get_next_a_upanel( thread, a1, step ) ( a1 + step * thread->n_way ) +#define bli_herk_get_next_b_upanel( thread, b1, step ) ( b1 + step * thread->n_way ) // trmm -#define trmm_r_ir_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way ) -#define trmm_r_jr_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way ) -#define trmm_l_ir_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way ) -#define trmm_l_jr_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way ) +#define bli_trmm_r_ir_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way ) +#define bli_trmm_r_jr_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way ) +#define bli_trmm_l_ir_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way ) +#define bli_trmm_l_jr_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way ) // trsm -#define trsm_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way ) +#define bli_trsm_my_iter( index, thread ) ( index % thread->n_way == thread->work_id % thread->n_way ) // // thrinfo_t APIs specific to level-3 operations. diff --git a/frame/3/bli_l3_ukr_tapi.c b/frame/3/bli_l3_ukr_tapi.c index 1836e2f6a..44f557029 100644 --- a/frame/3/bli_l3_ukr_tapi.c +++ b/frame/3/bli_l3_ukr_tapi.c @@ -55,7 +55,7 @@ void PASTEMAC(ch,opname) \ \ /* Query the context for the function address of the current datatype's micro-kernel. */ \ - PASTECH2(ch,tname,_ft) f = bli_cntx_get_l3_ukr_dt( dt, kerid, cntx ); \ + PASTECH2(ch,tname,_ft) f = bli_cntx_get_l3_vir_ukr_dt( dt, kerid, cntx ); \ \ /* Invoke the typed function for the given datatype. */ \ f( \ @@ -91,7 +91,7 @@ void PASTEMAC(ch,opname) \ \ /* Query the context for the function address of the current datatype's micro-kernel. */ \ - PASTECH2(ch,tname,_ft) f = bli_cntx_get_l3_ukr_dt( dt, kerid, cntx ); \ + PASTECH2(ch,tname,_ft) f = bli_cntx_get_l3_vir_ukr_dt( dt, kerid, cntx ); \ \ /* Invoke the typed function for the given datatype. */ \ f( \ @@ -129,7 +129,7 @@ void PASTEMAC(ch,opname) \ \ /* Query the context for the function address of the current datatype's micro-kernel. */ \ - PASTECH2(ch,tname,_ft) f = bli_cntx_get_l3_ukr_dt( dt, kerid, cntx ); \ + PASTECH2(ch,tname,_ft) f = bli_cntx_get_l3_vir_ukr_dt( dt, kerid, cntx ); \ \ /* Invoke the typed function for the given datatype. */ \ f( \ diff --git a/frame/3/gemm/bli_gemm_cntl.c b/frame/3/gemm/bli_gemm_cntl.c index b17ce10ac..3e13f23fa 100644 --- a/frame/3/gemm/bli_gemm_cntl.c +++ b/frame/3/gemm/bli_gemm_cntl.c @@ -36,17 +36,21 @@ cntl_t* bli_gemm_cntl_create ( - opid_t family + opid_t family, + pack_t schema_a, + pack_t schema_b ) { - return bli_gemmbp_cntl_create( family ); + return bli_gemmbp_cntl_create( family, schema_a, schema_b ); } // ----------------------------------------------------------------------------- cntl_t* bli_gemmbp_cntl_create ( - opid_t family + opid_t family, + pack_t schema_a, + pack_t schema_b ) { void* macro_kernel_p = bli_gemm_ker_var2; @@ -82,7 +86,7 @@ cntl_t* bli_gemmbp_cntl_create FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? - BLIS_PACKED_ROW_PANELS, + schema_a, // normally BLIS_PACKED_ROW_PANELS BLIS_BUFFER_FOR_A_BLOCK, gemm_cntl_bp_bu ); @@ -106,7 +110,7 @@ cntl_t* bli_gemmbp_cntl_create FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? - BLIS_PACKED_COL_PANELS, + schema_b, // normally BLIS_PACKED_COL_PANELS BLIS_BUFFER_FOR_B_PANEL, gemm_cntl_op_bp ); @@ -134,6 +138,10 @@ cntl_t* bli_gemmbp_cntl_create // ----------------------------------------------------------------------------- +// This control tree creation function is disabled because it is no longer used. +// (It was originally created in the run up to publishing the 1m journal article, +// but was disabled to reduce complexity.) +#if 0 cntl_t* bli_gemmpb_cntl_create ( opid_t family @@ -223,6 +231,7 @@ cntl_t* bli_gemmpb_cntl_create return gemm_cntl_vl_mm; } +#endif // ----------------------------------------------------------------------------- diff --git a/frame/3/gemm/bli_gemm_cntl.h b/frame/3/gemm/bli_gemm_cntl.h index 3b643e1fc..3b3cb1cf2 100644 --- a/frame/3/gemm/bli_gemm_cntl.h +++ b/frame/3/gemm/bli_gemm_cntl.h @@ -34,20 +34,26 @@ cntl_t* bli_gemm_cntl_create ( - opid_t family + opid_t family, + pack_t schema_a, + pack_t schema_b ); // ----------------------------------------------------------------------------- cntl_t* bli_gemmbp_cntl_create ( - opid_t family + opid_t family, + pack_t schema_a, + pack_t schema_b ); +#if 0 cntl_t* bli_gemmpb_cntl_create ( - opid_t family + opid_t family, ); +#endif // ----------------------------------------------------------------------------- diff --git a/frame/3/gemm/bli_gemm_front.c b/frame/3/gemm/bli_gemm_front.c index f2600d791..841c4a1c2 100644 --- a/frame/3/gemm/bli_gemm_front.c +++ b/frame/3/gemm/bli_gemm_front.c @@ -77,7 +77,7 @@ void bli_gemm_front // contiguous columns, or if C is stored by columns and the micro-kernel // prefers contiguous rows, transpose the entire operation to allow the // micro-kernel to access elements of C in its preferred manner. - if ( bli_cntx_l3_ukr_eff_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) + if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) { bli_obj_swap( &a_local, &b_local ); @@ -87,10 +87,34 @@ void bli_gemm_front } // Record the threading for each level within the context. - bli_cntx_set_thrloop_from_env( BLIS_GEMM, BLIS_LEFT, cntx, - bli_obj_length( &c_local ), - bli_obj_width( &c_local ), - bli_obj_width( &a_local ) ); + bli_cntx_set_thrloop_from_env + ( + BLIS_GEMM, + BLIS_LEFT, // ignored for gemm/hemm/symm + bli_obj_length( &c_local ), + bli_obj_width( &c_local ), + bli_obj_width( &a_local ), + cntx + ); + + // A sort of hack for communicating the desired pach schemas for A and B + // to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and + // bli_l3_cntl_create_if()). This allows us to access the schemas from + // the control tree, which hopefully reduces some confusion, particularly + // in bli_packm_init(). + if ( bli_cntx_method( cntx ) == BLIS_NAT ) + { + bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local ); + bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local ); + } + else // if ( bli_cntx_method( cntx ) != BLIS_NAT ) + { + pack_t schema_a = bli_cntx_schema_a_block( cntx ); + pack_t schema_b = bli_cntx_schema_b_panel( cntx ); + + bli_obj_set_pack_schema( schema_a, &a_local ); + bli_obj_set_pack_schema( schema_b, &b_local ); + } // Invoke the internal back-end via the thread handler. bli_l3_thread_decorator diff --git a/frame/3/gemm/bli_gemm_ker_var2.c b/frame/3/gemm/bli_gemm_ker_var2.c index 3f54ef031..db74118bc 100644 --- a/frame/3/gemm/bli_gemm_ker_var2.c +++ b/frame/3/gemm/bli_gemm_ker_var2.c @@ -183,7 +183,7 @@ void PASTEMAC(ch,varname) \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ - gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ + gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the @@ -192,7 +192,7 @@ void PASTEMAC(ch,varname) \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ - const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ + const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ @@ -295,11 +295,11 @@ void PASTEMAC(ch,varname) \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ - a2 = gemm_get_next_a_micropanel( caucus, a1, rstep_a ); \ + a2 = bli_gemm_get_next_a_upanel( caucus, a1, rstep_a ); \ if ( bli_is_last_iter( i, m_iter, ir_thread_id, ir_num_threads ) ) \ { \ a2 = a_cast; \ - b2 = gemm_get_next_b_micropanel( thread, b1, cstep_b ); \ + b2 = bli_gemm_get_next_b_upanel( thread, b1, cstep_b ); \ if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \ b2 = b_cast; \ } \ diff --git a/frame/3/gemm/ind/bli_gemm4mb_ker_var2.c b/frame/3/gemm/ind/bli_gemm4mb_ker_var2.c index c57144392..8d927e295 100644 --- a/frame/3/gemm/ind/bli_gemm4mb_ker_var2.c +++ b/frame/3/gemm/ind/bli_gemm4mb_ker_var2.c @@ -163,13 +163,13 @@ void PASTEMAC(ch,varname) \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ - gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ + gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ - const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ + const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ @@ -291,11 +291,11 @@ void PASTEMAC(ch,varname) \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ - a2 = gemm_get_next_a_micropanel( caucus, a1, rstep_a ); \ + a2 = bli_gemm_get_next_a_upanel( caucus, a1, rstep_a ); \ if ( bli_is_last_iter( i, m_iter, ir_thread_id, ir_num_threads ) ) \ { \ a2 = a_cast; \ - b2 = gemm_get_next_b_micropanel( thread, b1, cstep_b ); \ + b2 = bli_gemm_get_next_b_upanel( thread, b1, cstep_b ); \ if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \ b2 = b_cast; \ } \ diff --git a/frame/3/gemm/ind/old/bli_gemm3m2_ker_var2.c b/frame/3/gemm/ind/old/bli_gemm3m2_ker_var2.c index 9ecb4cf5a..4045fa74c 100644 --- a/frame/3/gemm/ind/old/bli_gemm3m2_ker_var2.c +++ b/frame/3/gemm/ind/old/bli_gemm3m2_ker_var2.c @@ -163,13 +163,13 @@ void PASTEMAC(ch,varname) \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ - gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ + gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. */ \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ - const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ + const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ @@ -300,11 +300,11 @@ void PASTEMAC(ch,varname) \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ - a2 = gemm_get_next_a_micropanel( caucus, a1, rstep_a ); \ + a2 = bli_gemm_get_next_a_upanel( caucus, a1, rstep_a ); \ if ( bli_is_last_iter( i, m_iter, ir_thread_id, ir_num_threads ) ) \ { \ a2 = a_cast; \ - b2 = gemm_get_next_b_micropanel( thread, b1, cstep_b ); \ + b2 = bli_gemm_get_next_b_upanel( thread, b1, cstep_b ); \ if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \ b2 = b_cast; \ } \ diff --git a/frame/3/hemm/bli_hemm_front.c b/frame/3/hemm/bli_hemm_front.c index 2406ee1d5..f53fb888c 100644 --- a/frame/3/hemm/bli_hemm_front.c +++ b/frame/3/hemm/bli_hemm_front.c @@ -72,7 +72,7 @@ void bli_hemm_front // contiguous columns, or if C is stored by columns and the micro-kernel // prefers contiguous rows, transpose the entire operation to allow the // micro-kernel to access elements of C in its preferred manner. - if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) + if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) { bli_toggle_side( &side ); bli_obj_toggle_conj( &a_local ); @@ -88,10 +88,34 @@ void bli_hemm_front } // Record the threading for each level within the context. - bli_cntx_set_thrloop_from_env( BLIS_HEMM, BLIS_LEFT, cntx, - bli_obj_length( &c_local ), - bli_obj_width( &c_local ), - bli_obj_width( &a_local ) ); + bli_cntx_set_thrloop_from_env + ( + BLIS_HEMM, + BLIS_LEFT, // ignored for gemm/hemm/symm + bli_obj_length( &c_local ), + bli_obj_width( &c_local ), + bli_obj_width( &a_local ), + cntx + ); + + // A sort of hack for communicating the desired pach schemas for A and B + // to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and + // bli_l3_cntl_create_if()). This allows us to access the schemas from + // the control tree, which hopefully reduces some confusion, particularly + // in bli_packm_init(). + if ( bli_cntx_method( cntx ) == BLIS_NAT ) + { + bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local ); + bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local ); + } + else // if ( bli_cntx_method( cntx ) != BLIS_NAT ) + { + pack_t schema_a = bli_cntx_schema_a_block( cntx ); + pack_t schema_b = bli_cntx_schema_b_panel( cntx ); + + bli_obj_set_pack_schema( schema_a, &a_local ); + bli_obj_set_pack_schema( schema_b, &b_local ); + } // Invoke the internal back-end. bli_l3_thread_decorator diff --git a/frame/3/her2k/bli_her2k_front.c b/frame/3/her2k/bli_her2k_front.c index 9448b881e..e8eadc8e2 100644 --- a/frame/3/her2k/bli_her2k_front.c +++ b/frame/3/her2k/bli_her2k_front.c @@ -92,7 +92,7 @@ void bli_her2k_front // contiguous columns, or if C is stored by columns and the micro-kernel // prefers contiguous rows, transpose the entire operation to allow the // micro-kernel to access elements of C in its preferred manner. - if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) + if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) { bli_obj_swap( &a_local, &bh_local ); bli_obj_swap( &b_local, &ah_local ); @@ -106,10 +106,38 @@ void bli_her2k_front } // Record the threading for each level within the context. - bli_cntx_set_thrloop_from_env( BLIS_HER2K, BLIS_LEFT, cntx, - bli_obj_length( &c_local ), - bli_obj_width( &c_local ), - bli_obj_width( &a_local ) ); + bli_cntx_set_thrloop_from_env + ( + BLIS_HER2K, + BLIS_LEFT, // ignored for her[2]k/syr[2]k + bli_obj_length( &c_local ), + bli_obj_width( &c_local ), + bli_obj_width( &a_local ), + cntx + ); + + // A sort of hack for communicating the desired pach schemas for A and B + // to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and + // bli_l3_cntl_create_if()). This allows us to access the schemas from + // the control tree, which hopefully reduces some confusion, particularly + // in bli_packm_init(). + if ( bli_cntx_method( cntx ) == BLIS_NAT ) + { + bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local ); + bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &bh_local ); + bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &b_local ); + bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &ah_local ); + } + else // if ( bli_cntx_method( cntx ) != BLIS_NAT ) + { + pack_t schema_a = bli_cntx_schema_a_block( cntx ); + pack_t schema_b = bli_cntx_schema_b_panel( cntx ); + + bli_obj_set_pack_schema( schema_a, &a_local ); + bli_obj_set_pack_schema( schema_b, &bh_local ); + bli_obj_set_pack_schema( schema_a, &b_local ); + bli_obj_set_pack_schema( schema_b, &ah_local ); + } // Invoke herk twice, using beta only the first time. diff --git a/frame/3/herk/bli_herk_front.c b/frame/3/herk/bli_herk_front.c index 8b2379a66..50ea17b8f 100644 --- a/frame/3/herk/bli_herk_front.c +++ b/frame/3/herk/bli_herk_front.c @@ -77,7 +77,7 @@ void bli_herk_front // contiguous columns, or if C is stored by columns and the micro-kernel // prefers contiguous rows, transpose the entire operation to allow the // micro-kernel to access elements of C in its preferred manner. - if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) + if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) { bli_obj_toggle_conj( &a_local ); bli_obj_toggle_conj( &ah_local ); @@ -86,10 +86,34 @@ void bli_herk_front } // Record the threading for each level within the context. - bli_cntx_set_thrloop_from_env( BLIS_HERK, BLIS_LEFT, cntx, - bli_obj_length( &c_local ), - bli_obj_width( &c_local ), - bli_obj_width( &a_local ) ); + bli_cntx_set_thrloop_from_env + ( + BLIS_HERK, + BLIS_LEFT, // ignored for her[2]k/syr[2]k + bli_obj_length( &c_local ), + bli_obj_width( &c_local ), + bli_obj_width( &a_local ), + cntx + ); + + // A sort of hack for communicating the desired pach schemas for A and B + // to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and + // bli_l3_cntl_create_if()). This allows us to access the schemas from + // the control tree, which hopefully reduces some confusion, particularly + // in bli_packm_init(). + if ( bli_cntx_method( cntx ) == BLIS_NAT ) + { + bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local ); + bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &ah_local ); + } + else // if ( bli_cntx_method( cntx ) != BLIS_NAT ) + { + pack_t schema_a = bli_cntx_schema_a_block( cntx ); + pack_t schema_b = bli_cntx_schema_b_panel( cntx ); + + bli_obj_set_pack_schema( schema_a, &a_local ); + bli_obj_set_pack_schema( schema_b, &ah_local ); + } // Invoke the internal back-end. bli_l3_thread_decorator diff --git a/frame/3/herk/bli_herk_l_ker_var2.c b/frame/3/herk/bli_herk_l_ker_var2.c index 05a0e300e..ebc3be486 100644 --- a/frame/3/herk/bli_herk_l_ker_var2.c +++ b/frame/3/herk/bli_herk_l_ker_var2.c @@ -168,7 +168,7 @@ void PASTEMAC(ch,varname) \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ - gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ + gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the @@ -177,7 +177,7 @@ void PASTEMAC(ch,varname) \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ - const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ + const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ @@ -312,11 +312,11 @@ void PASTEMAC(ch,varname) \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ - a2 = herk_get_next_a_micropanel( caucus, a1, rstep_a ); \ + a2 = bli_herk_get_next_a_upanel( caucus, a1, rstep_a ); \ if ( bli_is_last_iter( i, m_iter, ir_thread_id, ir_num_threads ) ) \ { \ a2 = a_cast; \ - b2 = herk_get_next_b_micropanel( thread, b1, cstep_b ); \ + b2 = bli_herk_get_next_b_upanel( thread, b1, cstep_b ); \ if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \ b2 = b_cast; \ } \ diff --git a/frame/3/herk/bli_herk_u_ker_var2.c b/frame/3/herk/bli_herk_u_ker_var2.c index 51600d839..3d74a0543 100644 --- a/frame/3/herk/bli_herk_u_ker_var2.c +++ b/frame/3/herk/bli_herk_u_ker_var2.c @@ -168,7 +168,7 @@ void PASTEMAC(ch,varname) \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ - gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ + gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the @@ -177,7 +177,7 @@ void PASTEMAC(ch,varname) \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ - const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ + const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ @@ -312,11 +312,11 @@ void PASTEMAC(ch,varname) \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ - a2 = herk_get_next_a_micropanel( caucus, a1, rstep_a ); \ + a2 = bli_herk_get_next_a_upanel( caucus, a1, rstep_a ); \ if ( bli_is_last_iter( i, m_iter, ir_thread_id, ir_num_threads ) ) \ { \ a2 = a_cast; \ - b2 = herk_get_next_b_micropanel( thread, b1, cstep_b ); \ + b2 = bli_herk_get_next_b_upanel( thread, b1, cstep_b ); \ if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \ b2 = b_cast; \ } \ diff --git a/frame/3/symm/bli_symm_front.c b/frame/3/symm/bli_symm_front.c index 0c229ef9b..ba646ce92 100644 --- a/frame/3/symm/bli_symm_front.c +++ b/frame/3/symm/bli_symm_front.c @@ -72,7 +72,7 @@ void bli_symm_front // contiguous columns, or if C is stored by columns and the micro-kernel // prefers contiguous rows, transpose the entire operation to allow the // micro-kernel to access elements of C in its preferred manner. - if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) + if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) { bli_toggle_side( &side ); bli_obj_induce_trans( &b_local ); @@ -87,10 +87,34 @@ void bli_symm_front } // Record the threading for each level within the context. - bli_cntx_set_thrloop_from_env( BLIS_SYMM, BLIS_LEFT, cntx, - bli_obj_length( &c_local ), - bli_obj_width( &c_local ), - bli_obj_width( &a_local ) ); + bli_cntx_set_thrloop_from_env + ( + BLIS_SYMM, + BLIS_LEFT, // ignored for gemm/hemm/symm + bli_obj_length( &c_local ), + bli_obj_width( &c_local ), + bli_obj_width( &a_local ), + cntx + ); + + // A sort of hack for communicating the desired pach schemas for A and B + // to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and + // bli_l3_cntl_create_if()). This allows us to access the schemas from + // the control tree, which hopefully reduces some confusion, particularly + // in bli_packm_init(). + if ( bli_cntx_method( cntx ) == BLIS_NAT ) + { + bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local ); + bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local ); + } + else // if ( bli_cntx_method( cntx ) != BLIS_NAT ) + { + pack_t schema_a = bli_cntx_schema_a_block( cntx ); + pack_t schema_b = bli_cntx_schema_b_panel( cntx ); + + bli_obj_set_pack_schema( schema_a, &a_local ); + bli_obj_set_pack_schema( schema_b, &b_local ); + } // Invoke the internal back-end. bli_l3_thread_decorator diff --git a/frame/3/syr2k/bli_syr2k_front.c b/frame/3/syr2k/bli_syr2k_front.c index 32981cb89..35231980d 100644 --- a/frame/3/syr2k/bli_syr2k_front.c +++ b/frame/3/syr2k/bli_syr2k_front.c @@ -81,16 +81,44 @@ void bli_syr2k_front // contiguous columns, or if C is stored by columns and the micro-kernel // prefers contiguous rows, transpose the entire operation to allow the // micro-kernel to access elements of C in its preferred manner. - if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) + if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) { bli_obj_induce_trans( &c_local ); } // Record the threading for each level within the context. - bli_cntx_set_thrloop_from_env( BLIS_SYR2K, BLIS_LEFT, cntx, - bli_obj_length( &c_local ), - bli_obj_width( &c_local ), - bli_obj_width( &a_local ) ); + bli_cntx_set_thrloop_from_env + ( + BLIS_SYR2K, + BLIS_LEFT, // ignored for her[2]k/syr[2]k + bli_obj_length( &c_local ), + bli_obj_width( &c_local ), + bli_obj_width( &a_local ), + cntx + ); + + // A sort of hack for communicating the desired pach schemas for A and B + // to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and + // bli_l3_cntl_create_if()). This allows us to access the schemas from + // the control tree, which hopefully reduces some confusion, particularly + // in bli_packm_init(). + if ( bli_cntx_method( cntx ) == BLIS_NAT ) + { + bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local ); + bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &bt_local ); + bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &b_local ); + bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &at_local ); + } + else // if ( bli_cntx_method( cntx ) != BLIS_NAT ) + { + pack_t schema_a = bli_cntx_schema_a_block( cntx ); + pack_t schema_b = bli_cntx_schema_b_panel( cntx ); + + bli_obj_set_pack_schema( schema_a, &a_local ); + bli_obj_set_pack_schema( schema_b, &bt_local ); + bli_obj_set_pack_schema( schema_a, &b_local ); + bli_obj_set_pack_schema( schema_b, &at_local ); + } // Invoke herk twice, using beta only the first time. diff --git a/frame/3/syrk/bli_syrk_front.c b/frame/3/syrk/bli_syrk_front.c index eed5f0ebc..819214dfe 100644 --- a/frame/3/syrk/bli_syrk_front.c +++ b/frame/3/syrk/bli_syrk_front.c @@ -74,16 +74,40 @@ void bli_syrk_front // contiguous columns, or if C is stored by columns and the micro-kernel // prefers contiguous rows, transpose the entire operation to allow the // micro-kernel to access elements of C in its preferred manner. - if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) + if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) { bli_obj_induce_trans( &c_local ); } // Record the threading for each level within the context. - bli_cntx_set_thrloop_from_env( BLIS_SYRK, BLIS_LEFT, cntx, - bli_obj_length( &c_local ), - bli_obj_width( &c_local ), - bli_obj_width( &a_local ) ); + bli_cntx_set_thrloop_from_env + ( + BLIS_SYRK, + BLIS_LEFT, // ignored for her[2]k/syr[2]k + bli_obj_length( &c_local ), + bli_obj_width( &c_local ), + bli_obj_width( &a_local ), + cntx + ); + + // A sort of hack for communicating the desired pach schemas for A and B + // to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and + // bli_l3_cntl_create_if()). This allows us to access the schemas from + // the control tree, which hopefully reduces some confusion, particularly + // in bli_packm_init(). + if ( bli_cntx_method( cntx ) == BLIS_NAT ) + { + bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local ); + bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &at_local ); + } + else // if ( bli_cntx_method( cntx ) != BLIS_NAT ) + { + pack_t schema_a = bli_cntx_schema_a_block( cntx ); + pack_t schema_b = bli_cntx_schema_b_panel( cntx ); + + bli_obj_set_pack_schema( schema_a, &a_local ); + bli_obj_set_pack_schema( schema_b, &at_local ); + } // Invoke the internal back-end. bli_l3_thread_decorator diff --git a/frame/3/trmm/bli_trmm_front.c b/frame/3/trmm/bli_trmm_front.c index afdabbbd2..d6c692126 100644 --- a/frame/3/trmm/bli_trmm_front.c +++ b/frame/3/trmm/bli_trmm_front.c @@ -105,7 +105,7 @@ void bli_trmm_front // NOTE: We disable the optimization for 1x1 matrices since the concept // of row- vs. column storage breaks down. if ( !bli_obj_is_1x1( &c_local ) ) - if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) + if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) { bli_toggle_side( &side ); bli_obj_induce_trans( &a_local ); @@ -130,10 +130,34 @@ void bli_trmm_front bli_obj_set_as_root( &c_local ); // Record the threading for each level within the context. - bli_cntx_set_thrloop_from_env( BLIS_TRMM, side, cntx, - bli_obj_length( &c_local ), - bli_obj_width( &c_local ), - bli_obj_width( &a_local ) ); + bli_cntx_set_thrloop_from_env + ( + BLIS_TRMM, + side, + bli_obj_length( &c_local ), + bli_obj_width( &c_local ), + bli_obj_width( &a_local ), + cntx + ); + + // A sort of hack for communicating the desired pach schemas for A and B + // to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and + // bli_l3_cntl_create_if()). This allows us to access the schemas from + // the control tree, which hopefully reduces some confusion, particularly + // in bli_packm_init(). + if ( bli_cntx_method( cntx ) == BLIS_NAT ) + { + bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local ); + bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local ); + } + else // if ( bli_cntx_method( cntx ) != BLIS_NAT ) + { + pack_t schema_a = bli_cntx_schema_a_block( cntx ); + pack_t schema_b = bli_cntx_schema_b_panel( cntx ); + + bli_obj_set_pack_schema( schema_a, &a_local ); + bli_obj_set_pack_schema( schema_b, &b_local ); + } // Invoke the internal back-end. bli_l3_thread_decorator diff --git a/frame/3/trmm/bli_trmm_ll_ker_var2.c b/frame/3/trmm/bli_trmm_ll_ker_var2.c index 94dd233b0..854b9ce5f 100644 --- a/frame/3/trmm/bli_trmm_ll_ker_var2.c +++ b/frame/3/trmm/bli_trmm_ll_ker_var2.c @@ -160,7 +160,7 @@ void PASTEMAC(ch,varname) \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ - gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ + gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the @@ -169,7 +169,7 @@ void PASTEMAC(ch,varname) \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ - const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ + const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ @@ -322,7 +322,7 @@ void PASTEMAC(ch,varname) \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = 0; j < n_iter; ++j ) \ { \ - if ( trmm_l_jr_my_iter( j, jr_thread ) ) { \ + if ( bli_trmm_l_jr_my_iter( j, jr_thread ) ) { \ \ ctype* restrict a1; \ ctype* restrict c11; \ @@ -364,7 +364,7 @@ void PASTEMAC(ch,varname) \ is_a_cur += ( bli_is_odd( is_a_cur ) ? 1 : 0 ); \ ps_a_cur = ( is_a_cur * ss_a_num ) / ss_a_den; \ \ - if ( trmm_l_ir_my_iter( i, ir_thread ) ) { \ + if ( bli_trmm_l_ir_my_iter( i, ir_thread ) ) { \ \ b1_i = b1 + ( off_a1011 * PACKNR ) / off_scl; \ \ @@ -434,7 +434,7 @@ void PASTEMAC(ch,varname) \ } \ else if ( bli_is_strictly_below_diag_n( diagoffa_i, MR, k ) ) \ { \ - if ( trmm_l_ir_my_iter( i, ir_thread ) ) { \ + if ( bli_trmm_l_ir_my_iter( i, ir_thread ) ) { \ \ ctype* restrict a2; \ \ diff --git a/frame/3/trmm/bli_trmm_lu_ker_var2.c b/frame/3/trmm/bli_trmm_lu_ker_var2.c index 924e75d4f..9f9036129 100644 --- a/frame/3/trmm/bli_trmm_lu_ker_var2.c +++ b/frame/3/trmm/bli_trmm_lu_ker_var2.c @@ -160,7 +160,7 @@ void PASTEMAC(ch,varname) \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ - gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ + gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the @@ -169,7 +169,7 @@ void PASTEMAC(ch,varname) \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ - const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ + const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ @@ -329,7 +329,7 @@ void PASTEMAC(ch,varname) \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = 0; j < n_iter; ++j ) \ { \ - if ( trmm_l_jr_my_iter( j, jr_thread ) ) { \ + if ( bli_trmm_l_jr_my_iter( j, jr_thread ) ) { \ \ ctype* restrict a1; \ ctype* restrict c11; \ @@ -371,7 +371,7 @@ void PASTEMAC(ch,varname) \ is_a_cur += ( bli_is_odd( is_a_cur ) ? 1 : 0 ); \ ps_a_cur = ( is_a_cur * ss_a_num ) / ss_a_den; \ \ - if ( trmm_l_ir_my_iter( i, ir_thread ) ) { \ + if ( bli_trmm_l_ir_my_iter( i, ir_thread ) ) { \ \ b1_i = b1 + ( off_a1112 * PACKNR ) / off_scl; \ \ @@ -441,7 +441,7 @@ void PASTEMAC(ch,varname) \ } \ else if ( bli_is_strictly_above_diag_n( diagoffa_i, MR, k ) ) \ { \ - if ( trmm_l_ir_my_iter( i, ir_thread ) ) { \ + if ( bli_trmm_l_ir_my_iter( i, ir_thread ) ) { \ \ ctype* restrict a2; \ \ diff --git a/frame/3/trmm/bli_trmm_rl_ker_var2.c b/frame/3/trmm/bli_trmm_rl_ker_var2.c index 26ef9a13e..75d2a346e 100644 --- a/frame/3/trmm/bli_trmm_rl_ker_var2.c +++ b/frame/3/trmm/bli_trmm_rl_ker_var2.c @@ -160,7 +160,7 @@ void PASTEMAC(ch,varname) \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ - gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ + gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the @@ -169,7 +169,7 @@ void PASTEMAC(ch,varname) \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ - const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ + const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ @@ -361,7 +361,7 @@ void PASTEMAC(ch,varname) \ is_b_cur += ( bli_is_odd( is_b_cur ) ? 1 : 0 ); \ ps_b_cur = ( is_b_cur * ss_b_num ) / ss_b_den; \ \ - if ( trmm_r_jr_my_iter( j, jr_thread ) ) { \ + if ( bli_trmm_r_jr_my_iter( j, jr_thread ) ) { \ \ /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. */ \ @@ -370,7 +370,7 @@ void PASTEMAC(ch,varname) \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ - if ( trmm_r_ir_my_iter( i, ir_thread ) ) { \ + if ( bli_trmm_r_ir_my_iter( i, ir_thread ) ) { \ \ ctype* restrict a1_i; \ ctype* restrict a2; \ @@ -446,7 +446,7 @@ void PASTEMAC(ch,varname) \ } \ else if ( bli_is_strictly_below_diag_n( diagoffb_j, k, NR ) ) \ { \ - if ( trmm_r_jr_my_iter( j, jr_thread ) ) { \ + if ( bli_trmm_r_jr_my_iter( j, jr_thread ) ) { \ \ /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. */ \ @@ -455,7 +455,7 @@ void PASTEMAC(ch,varname) \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ - if ( trmm_r_ir_my_iter( i, ir_thread ) ) { \ + if ( bli_trmm_r_ir_my_iter( i, ir_thread ) ) { \ \ ctype* restrict a2; \ \ diff --git a/frame/3/trmm/bli_trmm_ru_ker_var2.c b/frame/3/trmm/bli_trmm_ru_ker_var2.c index 4c9af0757..203432b13 100644 --- a/frame/3/trmm/bli_trmm_ru_ker_var2.c +++ b/frame/3/trmm/bli_trmm_ru_ker_var2.c @@ -160,7 +160,7 @@ void PASTEMAC(ch,varname) \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ - gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ + gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the @@ -169,7 +169,7 @@ void PASTEMAC(ch,varname) \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ - const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ + const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ @@ -361,7 +361,7 @@ void PASTEMAC(ch,varname) \ is_b_cur += ( bli_is_odd( is_b_cur ) ? 1 : 0 ); \ ps_b_cur = ( is_b_cur * ss_b_num ) / ss_b_den; \ \ - if ( trmm_r_jr_my_iter( j, jr_thread ) ) { \ + if ( bli_trmm_r_jr_my_iter( j, jr_thread ) ) { \ \ /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. */ \ @@ -370,7 +370,7 @@ void PASTEMAC(ch,varname) \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ - if ( trmm_r_ir_my_iter( i, ir_thread ) ) { \ + if ( bli_trmm_r_ir_my_iter( i, ir_thread ) ) { \ \ ctype* restrict a1_i; \ ctype* restrict a2; \ @@ -446,7 +446,7 @@ void PASTEMAC(ch,varname) \ } \ else if ( bli_is_strictly_above_diag_n( diagoffb_j, k, NR ) ) \ { \ - if ( trmm_r_jr_my_iter( j, jr_thread ) ) { \ + if ( bli_trmm_r_jr_my_iter( j, jr_thread ) ) { \ \ /* Save the 4m1/3m1 imaginary stride of B to the auxinfo_t object. */ \ @@ -455,7 +455,7 @@ void PASTEMAC(ch,varname) \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ - if ( trmm_r_ir_my_iter( i, ir_thread ) ) { \ + if ( bli_trmm_r_ir_my_iter( i, ir_thread ) ) { \ \ ctype* restrict a2; \ \ diff --git a/frame/3/trmm3/bli_trmm3_front.c b/frame/3/trmm3/bli_trmm3_front.c index 664a7fd51..c5e561a0d 100644 --- a/frame/3/trmm3/bli_trmm3_front.c +++ b/frame/3/trmm3/bli_trmm3_front.c @@ -104,7 +104,7 @@ void bli_trmm3_front // contiguous columns, or if C is stored by columns and the micro-kernel // prefers contiguous rows, transpose the entire operation to allow the // micro-kernel to access elements of C in its preferred manner. - if ( bli_cntx_l3_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) + if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) { bli_toggle_side( &side ); bli_obj_induce_trans( &a_local ); @@ -129,10 +129,34 @@ void bli_trmm3_front bli_obj_set_as_root( &c_local ); // Record the threading for each level within the context. - bli_cntx_set_thrloop_from_env( BLIS_TRMM3, side, cntx, - bli_obj_length( &c_local ), - bli_obj_width( &c_local ), - bli_obj_width( &a_local ) ); + bli_cntx_set_thrloop_from_env + ( + BLIS_TRMM3, + side, + bli_obj_length( &c_local ), + bli_obj_width( &c_local ), + bli_obj_width( &a_local ), + cntx + ); + + // A sort of hack for communicating the desired pach schemas for A and B + // to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and + // bli_l3_cntl_create_if()). This allows us to access the schemas from + // the control tree, which hopefully reduces some confusion, particularly + // in bli_packm_init(). + if ( bli_cntx_method( cntx ) == BLIS_NAT ) + { + bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local ); + bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local ); + } + else // if ( bli_cntx_method( cntx ) != BLIS_NAT ) + { + pack_t schema_a = bli_cntx_schema_a_block( cntx ); + pack_t schema_b = bli_cntx_schema_b_panel( cntx ); + + bli_obj_set_pack_schema( schema_a, &a_local ); + bli_obj_set_pack_schema( schema_b, &b_local ); + } // Invoke the internal back-end. bli_l3_thread_decorator diff --git a/frame/3/trsm/bli_trsm_cntl.c b/frame/3/trsm/bli_trsm_cntl.c index e05fc3d20..df9b831a3 100644 --- a/frame/3/trsm/bli_trsm_cntl.c +++ b/frame/3/trsm/bli_trsm_cntl.c @@ -36,16 +36,21 @@ cntl_t* bli_trsm_cntl_create ( - side_t side + side_t side, + pack_t schema_a, + pack_t schema_b ) { - if ( bli_is_left( side ) ) return bli_trsm_l_cntl_create(); - else return bli_trsm_r_cntl_create(); + if ( bli_is_left( side ) ) + return bli_trsm_l_cntl_create( schema_a, schema_b ); + else + return bli_trsm_r_cntl_create( schema_a, schema_b ); } cntl_t* bli_trsm_l_cntl_create ( - void + pack_t schema_a, + pack_t schema_b ) { void* macro_kernel_p = bli_trsm_xx_ker_var2; @@ -79,7 +84,7 @@ cntl_t* bli_trsm_l_cntl_create TRUE, // do NOT invert diagonal TRUE, // reverse iteration if upper? FALSE, // reverse iteration if lower? - BLIS_PACKED_ROW_PANELS, + schema_a, // normally BLIS_PACKED_ROW_PANELS BLIS_BUFFER_FOR_A_BLOCK, trsm_cntl_bp_bu ); @@ -103,7 +108,7 @@ cntl_t* bli_trsm_l_cntl_create FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? - BLIS_PACKED_COL_PANELS, + schema_b, // normally BLIS_PACKED_COL_PANELS BLIS_BUFFER_FOR_B_PANEL, trsm_cntl_op_bp ); @@ -131,7 +136,8 @@ cntl_t* bli_trsm_l_cntl_create cntl_t* bli_trsm_r_cntl_create ( - void + pack_t schema_a, + pack_t schema_b ) { void* macro_kernel_p = bli_trsm_xx_ker_var2; @@ -165,7 +171,7 @@ cntl_t* bli_trsm_r_cntl_create FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? - BLIS_PACKED_ROW_PANELS, + schema_a, // normally BLIS_PACKED_ROW_PANELS BLIS_BUFFER_FOR_A_BLOCK, trsm_cntl_bp_bu ); @@ -189,7 +195,7 @@ cntl_t* bli_trsm_r_cntl_create TRUE, // do NOT invert diagonal FALSE, // reverse iteration if upper? TRUE, // reverse iteration if lower? - BLIS_PACKED_COL_PANELS, + schema_b, // normally BLIS_PACKED_COL_PANELS BLIS_BUFFER_FOR_B_PANEL, trsm_cntl_op_bp ); diff --git a/frame/3/trsm/bli_trsm_cntl.h b/frame/3/trsm/bli_trsm_cntl.h index cfd20cad3..77c36aec2 100644 --- a/frame/3/trsm/bli_trsm_cntl.h +++ b/frame/3/trsm/bli_trsm_cntl.h @@ -34,17 +34,21 @@ cntl_t* bli_trsm_cntl_create ( - side_t side + side_t side, + pack_t schema_a, + pack_t schema_b ); cntl_t* bli_trsm_l_cntl_create ( - void + pack_t schema_a, + pack_t schema_b ); cntl_t* bli_trsm_r_cntl_create ( - void + pack_t schema_a, + pack_t schema_b ); void bli_trsm_cntl_free diff --git a/frame/3/trsm/bli_trsm_front.c b/frame/3/trsm/bli_trsm_front.c index 2bc6d0186..081a2c284 100644 --- a/frame/3/trsm/bli_trsm_front.c +++ b/frame/3/trsm/bli_trsm_front.c @@ -121,10 +121,34 @@ void bli_trsm_front bli_obj_set_as_root( &c_local ); // Record the threading for each level within the context. - bli_cntx_set_thrloop_from_env( BLIS_TRSM, side, cntx, - bli_obj_length( &c_local ), - bli_obj_width( &c_local ), - bli_obj_width( &a_local ) ); + bli_cntx_set_thrloop_from_env + ( + BLIS_TRSM, + side, + bli_obj_length( &c_local ), + bli_obj_width( &c_local ), + bli_obj_width( &a_local ), + cntx + ); + + // A sort of hack for communicating the desired pach schemas for A and B + // to bli_trsm_cntl_create() (via bli_l3_thread_decorator() and + // bli_l3_cntl_create_if()). This allows us to access the schemas from + // the control tree, which hopefully reduces some confusion, particularly + // in bli_packm_init(). + if ( bli_cntx_method( cntx ) == BLIS_NAT ) + { + bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local ); + bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local ); + } + else // if ( bli_cntx_method( cntx ) != BLIS_NAT ) + { + pack_t schema_a = bli_cntx_schema_a_block( cntx ); + pack_t schema_b = bli_cntx_schema_b_panel( cntx ); + + bli_obj_set_pack_schema( schema_a, &a_local ); + bli_obj_set_pack_schema( schema_b, &b_local ); + } // Invoke the internal back-end. bli_l3_thread_decorator diff --git a/frame/3/trsm/bli_trsm_ll_ker_var2.c b/frame/3/trsm/bli_trsm_ll_ker_var2.c index 17c4b6d7a..693a79006 100644 --- a/frame/3/trsm/bli_trsm_ll_ker_var2.c +++ b/frame/3/trsm/bli_trsm_ll_ker_var2.c @@ -162,9 +162,9 @@ void PASTEMAC(ch,varname) \ \ /* Cast the micro-kernel address to its function pointer type. */ \ PASTECH(ch,gemmtrsm_ukr_ft) \ - gemmtrsm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMMTRSM_L_UKR, cntx ); \ + gemmtrsm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMMTRSM_L_UKR, cntx ); \ PASTECH(ch,gemm_ukr_ft) \ - gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ + gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the @@ -173,7 +173,7 @@ void PASTEMAC(ch,varname) \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ - const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ + const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ @@ -340,7 +340,7 @@ void PASTEMAC(ch,varname) \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = 0; j < n_iter; ++j ) \ { \ - if( trsm_my_iter( j, thread ) ) { \ + if( bli_trsm_my_iter( j, thread ) ) { \ \ ctype* restrict a1; \ ctype* restrict c11; \ diff --git a/frame/3/trsm/bli_trsm_lu_ker_var2.c b/frame/3/trsm/bli_trsm_lu_ker_var2.c index f12bbb194..0daa91639 100644 --- a/frame/3/trsm/bli_trsm_lu_ker_var2.c +++ b/frame/3/trsm/bli_trsm_lu_ker_var2.c @@ -162,9 +162,9 @@ void PASTEMAC(ch,varname) \ \ /* Cast the micro-kernel address to its function pointer type. */ \ PASTECH(ch,gemmtrsm_ukr_ft) \ - gemmtrsm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMMTRSM_U_UKR, cntx ); \ + gemmtrsm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMMTRSM_U_UKR, cntx ); \ PASTECH(ch,gemm_ukr_ft) \ - gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ + gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the @@ -173,7 +173,7 @@ void PASTEMAC(ch,varname) \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ - const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ + const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ @@ -348,7 +348,7 @@ void PASTEMAC(ch,varname) \ /* Loop over the n dimension (NR columns at a time). */ \ for ( j = 0; j < n_iter; ++j ) \ { \ - if( trsm_my_iter( j, thread ) ) { \ + if( bli_trsm_my_iter( j, thread ) ) { \ \ ctype* restrict a1; \ ctype* restrict c11; \ diff --git a/frame/3/trsm/bli_trsm_rl_ker_var2.c b/frame/3/trsm/bli_trsm_rl_ker_var2.c index be7904936..820142f27 100644 --- a/frame/3/trsm/bli_trsm_rl_ker_var2.c +++ b/frame/3/trsm/bli_trsm_rl_ker_var2.c @@ -167,9 +167,9 @@ void PASTEMAC(ch,varname) \ is transposed so that all kernel instances are of the "left" variety (since those are the only trsm ukernels that exist). */ \ PASTECH(ch,gemmtrsm_ukr_ft) \ - gemmtrsm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMMTRSM_U_UKR, cntx ); \ + gemmtrsm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMMTRSM_U_UKR, cntx ); \ PASTECH(ch,gemm_ukr_ft) \ - gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ + gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the @@ -178,7 +178,7 @@ void PASTEMAC(ch,varname) \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ - const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ + const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ @@ -422,7 +422,7 @@ void PASTEMAC(ch,varname) \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ - if( trsm_my_iter( i, thread ) ){ \ + if( bli_trsm_my_iter( i, thread ) ){ \ \ ctype* restrict a11; \ ctype* restrict a12; \ @@ -508,7 +508,7 @@ void PASTEMAC(ch,varname) \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ - if( trsm_my_iter( i, thread ) ){ \ + if( bli_trsm_my_iter( i, thread ) ){ \ \ ctype* restrict a2; \ \ diff --git a/frame/3/trsm/bli_trsm_ru_ker_var2.c b/frame/3/trsm/bli_trsm_ru_ker_var2.c index e1f2694b0..9d9e3a040 100644 --- a/frame/3/trsm/bli_trsm_ru_ker_var2.c +++ b/frame/3/trsm/bli_trsm_ru_ker_var2.c @@ -167,9 +167,9 @@ void PASTEMAC(ch,varname) \ is transposed so that all kernel instances are of the "left" variety (since those are the only trsm ukernels that exist). */ \ PASTECH(ch,gemmtrsm_ukr_ft) \ - gemmtrsm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMMTRSM_L_UKR, cntx ); \ + gemmtrsm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMMTRSM_L_UKR, cntx ); \ PASTECH(ch,gemm_ukr_ft) \ - gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ + gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the @@ -178,7 +178,7 @@ void PASTEMAC(ch,varname) \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ - const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ + const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ @@ -415,7 +415,7 @@ void PASTEMAC(ch,varname) \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ - if( trsm_my_iter( i, thread ) ){ \ + if( bli_trsm_my_iter( i, thread ) ){ \ \ ctype* restrict a10; \ ctype* restrict a11; \ @@ -501,7 +501,7 @@ void PASTEMAC(ch,varname) \ /* Loop over the m dimension (MR rows at a time). */ \ for ( i = 0; i < m_iter; ++i ) \ { \ - if( trsm_my_iter( i, thread ) ){ \ + if( bli_trsm_my_iter( i, thread ) ){ \ \ ctype* restrict a2; \ \ diff --git a/frame/base/bli_auxinfo.h b/frame/base/bli_auxinfo.h index 29facd2ae..3fbc8dbdb 100644 --- a/frame/base/bli_auxinfo.h +++ b/frame/base/bli_auxinfo.h @@ -53,7 +53,7 @@ static void* bli_auxinfo_next_a( auxinfo_t* ai ) } static void* bli_auxinfo_next_b( auxinfo_t* ai ) { - return ai->a_next; + return ai->b_next; } static inc_t bli_auxinfo_is_a( auxinfo_t* ai ) diff --git a/frame/base/bli_blksz.h b/frame/base/bli_blksz.h index 5be816775..31c24d93b 100644 --- a/frame/base/bli_blksz.h +++ b/frame/base/bli_blksz.h @@ -172,6 +172,18 @@ static void bli_blksz_scale_max bli_blksz_set_max( ( val * num ) / den, dt, b ); } +static void bli_blksz_scale_def_max + ( + dim_t num, + dim_t den, + num_t dt, + blksz_t* b + ) +{ + bli_blksz_scale_def( num, den, dt, b ); + bli_blksz_scale_max( num, den, dt, b ); +} + // ----------------------------------------------------------------------------- blksz_t* bli_blksz_create_ed diff --git a/frame/base/bli_check.c b/frame/base/bli_check.c index 4c9993b94..3e42758ba 100644 --- a/frame/base/bli_check.c +++ b/frame/base/bli_check.c @@ -342,6 +342,40 @@ err_t bli_check_real_valued_object( obj_t* a ) return e_val; } +err_t bli_check_consistent_precisions( num_t dt_a, num_t dt_b ) +{ + err_t e_val = BLIS_SUCCESS; + + if ( dt_a == BLIS_FLOAT ) + { + if ( dt_b != BLIS_FLOAT && + dt_b != BLIS_SCOMPLEX ) + e_val = BLIS_INCONSISTENT_PRECISIONS; + } + else if ( dt_a == BLIS_DOUBLE ) + { + if ( dt_b != BLIS_DOUBLE && + dt_b != BLIS_DCOMPLEX ) + e_val = BLIS_INCONSISTENT_PRECISIONS; + } + + return e_val; +} + +err_t bli_check_consistent_object_precisions( obj_t* a, obj_t* b ) +{ + err_t e_val; + num_t dt_a; + num_t dt_b; + + dt_a = bli_obj_dt( a ); + dt_b = bli_obj_dt( b ); + + e_val = bli_check_consistent_precisions( dt_a, dt_b ); + + return e_val; +} + // -- Dimension-related checks ------------------------------------------------- err_t bli_check_conformal_dims( obj_t* a, obj_t* b ) diff --git a/frame/base/bli_check.h b/frame/base/bli_check.h index bd5cd064f..dd76054e7 100644 --- a/frame/base/bli_check.h +++ b/frame/base/bli_check.h @@ -62,6 +62,8 @@ err_t bli_check_consistent_object_datatypes( obj_t* a, obj_t* b ); err_t bli_check_datatype_real_proj_of( num_t dt_c, num_t dt_r ); err_t bli_check_object_real_proj_of( obj_t* c, obj_t* r ); err_t bli_check_real_valued_object( obj_t* a ); +err_t bli_check_consistent_precisions( num_t dt_a, num_t dt_b ); +err_t bli_check_consistent_object_precisions( obj_t* a, obj_t* b ); err_t bli_check_conformal_dims( obj_t* a, obj_t* b ); err_t bli_check_level3_dims( obj_t* a, obj_t* b, obj_t* c ); diff --git a/frame/base/bli_cntx.c b/frame/base/bli_cntx.c index db2d73953..662e4585b 100644 --- a/frame/base/bli_cntx.c +++ b/frame/base/bli_cntx.c @@ -544,8 +544,10 @@ void bli_cntx_set_l3_nat_ukrs( dim_t n_ukrs, ... ) // -- End variable argument section -- // Query the context for the addresses of: + // - the l3 virtual ukernel func_t array // - the l3 native ukernel func_t array // - the l3 native ukernel preferences array + func_t* cntx_l3_vir_ukrs = bli_cntx_l3_vir_ukrs_buf( cntx ); func_t* cntx_l3_nat_ukrs = bli_cntx_l3_nat_ukrs_buf( cntx ); mbool_t* cntx_l3_nat_ukrs_prefs = bli_cntx_l3_nat_ukrs_prefs_buf( cntx ); @@ -565,11 +567,18 @@ void bli_cntx_set_l3_nat_ukrs( dim_t n_ukrs, ... ) // Index into the func_t and mbool_t for the current kernel id // being processed. + func_t* vukrs = &cntx_l3_vir_ukrs[ ukr_id ]; func_t* ukrs = &cntx_l3_nat_ukrs[ ukr_id ]; mbool_t* prefs = &cntx_l3_nat_ukrs_prefs[ ukr_id ]; // Store the ukernel function pointer and preference values into - // the context. + // the context. Notice that we redundantly store the native + // ukernel address in both the native and virtual ukernel slots + // in the context. This is standard practice when creating a + // native context. (Induced method contexts will overwrite the + // virtual function pointer with the address of the appropriate + // virtual ukernel.) + bli_func_set_dt( ukr_fp, ukr_dt, vukrs ); bli_func_set_dt( ukr_fp, ukr_dt, ukrs ); bli_mbool_set_dt( ukr_pref, ukr_dt, prefs ); } @@ -869,10 +878,10 @@ void bli_cntx_set_thrloop_from_env ( opid_t l3_op, side_t side, - cntx_t* cntx, dim_t m, dim_t n, - dim_t k + dim_t k, + cntx_t* cntx ) { dim_t jc, pc, ic, jr, ir; @@ -934,8 +943,8 @@ void bli_cntx_set_thrloop_from_env if ( l3_op == BLIS_TRMM ) { - // We reconfigure the paralelism from trmm_r due to a dependency in - // the jc loop. (NOTE: This dependency does not exist for trmm3 ) + // We reconfigure the parallelism from trmm_r due to a dependency in + // the jc loop. (NOTE: This dependency does not exist for trmm3.) if ( bli_is_right( side ) ) { bli_cntx_set_thrloop @@ -988,7 +997,7 @@ void bli_cntx_set_thrloop_from_env ); } } - else // if ( l3_op == BLIS_TRSM ) + else // any other level-3 operation besides trmm/trsm { bli_cntx_set_thrloop ( diff --git a/frame/base/bli_cntx.h b/frame/base/bli_cntx.h index ac43312bc..14963ba67 100644 --- a/frame/base/bli_cntx.h +++ b/frame/base/bli_cntx.h @@ -60,8 +60,6 @@ typedef struct cntx_s pack_t schema_b; pack_t schema_c; - bool_t anti_pref; - dim_t* thrloop; membrk_t* membrk; @@ -126,10 +124,6 @@ static pack_t bli_cntx_schema_c_panel( cntx_t* cntx ) { return cntx->schema_c_panel; } -static bool_t bli_cntx_anti_pref( cntx_t* cntx ) -{ - return cntx->anti_pref; -} static dim_t* bli_cntx_thrloop( cntx_t* cntx ) { return cntx->thrloop; @@ -166,10 +160,6 @@ static void bli_cntx_set_schema_ab_blockpanel( pack_t sa, pack_t sb, cntx_t* cnt bli_cntx_set_schema_a_block( sa, cntx ); bli_cntx_set_schema_b_panel( sb, cntx ); } -static void bli_cntx_set_anti_pref( bool_t anti_pref, cntx_t* cntx ) -{ - cntx->anti_pref = anti_pref; -} static void bli_cntx_set_membrk( membrk_t* membrk, cntx_t* cntx ) { cntx->membrk = membrk; @@ -234,27 +224,6 @@ static dim_t bli_cntx_get_bmult_dt( num_t dt, bszid_t bs_id, cntx_t* cntx ) // ----------------------------------------------------------------------------- -static func_t* bli_cntx_get_l3_ukrs( l3ukr_t ukr_id, cntx_t* cntx ) -{ - func_t* funcs; - - if ( bli_cntx_method( (cntx) ) != BLIS_NAT ) - funcs = bli_cntx_l3_vir_ukrs_buf( cntx ); - else - funcs = bli_cntx_l3_nat_ukrs_buf( cntx ); - - func_t* func = &funcs[ ukr_id ]; - - return func; -} - -static void* bli_cntx_get_l3_ukr_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx ) -{ - func_t* func = bli_cntx_get_l3_ukrs( ukr_id, cntx ); - - return bli_func_get_dt( dt, func ); -} - static func_t* bli_cntx_get_l3_vir_ukrs( l3ukr_t ukr_id, cntx_t* cntx ) { func_t* funcs = bli_cntx_l3_vir_ukrs_buf( cntx ); @@ -487,55 +456,43 @@ static bool_t bli_cntx_l3_nat_ukr_dislikes_storage_of( obj_t* obj, l3ukr_t ukr_i return !bli_cntx_l3_nat_ukr_prefers_storage_of( obj, ukr_id, cntx ); } -static bool_t bli_cntx_l3_nat_ukr_eff_prefers_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx ) -{ - bool_t r_val = bli_cntx_l3_nat_ukr_prefers_storage_of( obj, ukr_id, cntx ); - - // If the anti-preference is set, negate the result. - if ( bli_cntx_anti_pref( cntx ) ) r_val = !r_val; - - return r_val; -} - -static bool_t bli_cntx_l3_nat_ukr_eff_dislikes_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx ) -{ - bool_t r_val = bli_cntx_l3_nat_ukr_dislikes_storage_of( obj, ukr_id, cntx ); - - // If the anti-preference is set, negate the result. - if ( bli_cntx_anti_pref( cntx ) ) r_val = !r_val; - - return r_val; -} - // ----------------------------------------------------------------------------- -static bool_t bli_cntx_l3_ukr_prefers_rows_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx ) +static bool_t bli_cntx_l3_vir_ukr_prefers_rows_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx ) { // For induced methods, return the ukernel storage preferences of the // corresponding real micro-kernel. + // NOTE: This projection to real domain becomes unnecessary if you + // set the exec_dt for 1m to the real projection of the storage + // datatype. if ( bli_cntx_method( cntx ) != BLIS_NAT ) dt = bli_dt_proj_to_real( dt ); return bli_cntx_l3_nat_ukr_prefers_rows_dt( dt, ukr_id, cntx ); } -static bool_t bli_cntx_l3_ukr_prefers_cols_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx ) +static bool_t bli_cntx_l3_vir_ukr_prefers_cols_dt( num_t dt, l3ukr_t ukr_id, cntx_t* cntx ) { // For induced methods, return the ukernel storage preferences of the // corresponding real micro-kernel. + // NOTE: This projection to real domain becomes unnecessary if you + // set the exec_dt for 1m to the real projection of the storage + // datatype. if ( bli_cntx_method( cntx ) != BLIS_NAT ) dt = bli_dt_proj_to_real( dt ); return bli_cntx_l3_nat_ukr_prefers_cols_dt( dt, ukr_id, cntx ); } -static bool_t bli_cntx_l3_ukr_prefers_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx ) +static bool_t bli_cntx_l3_vir_ukr_prefers_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx ) { - const num_t dt = bli_obj_dt( obj ); + // Note that we use the execution datatype, which may differ from the + // storage datatype of C (though this would happen in very few situations). + const num_t dt = bli_obj_exec_dt( obj ); const bool_t ukr_prefers_rows - = bli_cntx_l3_ukr_prefers_rows_dt( dt, ukr_id, cntx ); + = bli_cntx_l3_vir_ukr_prefers_rows_dt( dt, ukr_id, cntx ); const bool_t ukr_prefers_cols - = bli_cntx_l3_ukr_prefers_cols_dt( dt, ukr_id, cntx ); + = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, ukr_id, cntx ); bool_t r_val = FALSE; if ( bli_obj_is_row_stored( obj ) && ukr_prefers_rows ) r_val = TRUE; @@ -544,29 +501,9 @@ static bool_t bli_cntx_l3_ukr_prefers_storage_of( obj_t* obj, l3ukr_t ukr_id, cn return r_val; } -static bool_t bli_cntx_l3_ukr_dislikes_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx ) +static bool_t bli_cntx_l3_vir_ukr_dislikes_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx ) { - return !bli_cntx_l3_ukr_prefers_storage_of( obj, ukr_id, cntx ); -} - -static bool_t bli_cntx_l3_ukr_eff_prefers_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx ) -{ - bool_t r_val = bli_cntx_l3_ukr_prefers_storage_of( obj, ukr_id, cntx ); - - // If the anti-preference is set, negate the result. - if ( bli_cntx_anti_pref( cntx ) ) r_val = !r_val; - - return r_val; -} - -static bool_t bli_cntx_l3_ukr_eff_dislikes_storage_of( obj_t* obj, l3ukr_t ukr_id, cntx_t* cntx ) -{ - bool_t r_val = bli_cntx_l3_ukr_dislikes_storage_of( obj, ukr_id, cntx ); - - // If the anti-preference is set, negate the result. - if ( bli_cntx_anti_pref( cntx ) ) r_val = !r_val; - - return r_val; + return !bli_cntx_l3_vir_ukr_prefers_storage_of( obj, ukr_id, cntx ); } // ----------------------------------------------------------------------------- @@ -674,12 +611,15 @@ void bli_cntx_set_l1f_kers( dim_t n_kers, ... ); void bli_cntx_set_l1v_kers( dim_t n_kers, ... ); void bli_cntx_set_packm_kers( dim_t n_kers, ... ); -void bli_cntx_set_thrloop_from_env( opid_t l3_op, - side_t side, - cntx_t* cntx, - dim_t m, - dim_t n, - dim_t k ); +void bli_cntx_set_thrloop_from_env + ( + opid_t l3_op, + side_t side, + dim_t m, + dim_t n, + dim_t k, + cntx_t* cntx + ); void bli_cntx_print( cntx_t* cntx ); diff --git a/frame/base/bli_error.c b/frame/base/bli_error.c index d78c48387..710e34028 100644 --- a/frame/base/bli_error.c +++ b/frame/base/bli_error.c @@ -90,6 +90,8 @@ void bli_error_init_msgs( void ) "Expected second datatype to be real projection of first." ); sprintf( bli_error_string_for_code(BLIS_EXPECTED_REAL_VALUED_OBJECT), "Expected real-valued object (ie: if complex, imaginary component equals zero)." ); + sprintf( bli_error_string_for_code(BLIS_INCONSISTENT_PRECISIONS), + "Expected consistent precisions (both single or both double)." ); sprintf( bli_error_string_for_code(BLIS_NONCONFORMAL_DIMENSIONS), "Encountered non-conformal dimensions between objects." ); diff --git a/frame/base/bli_gks.c b/frame/base/bli_gks.c index 884d655b4..02b20fb32 100644 --- a/frame/base/bli_gks.c +++ b/frame/base/bli_gks.c @@ -584,7 +584,7 @@ char* bli_gks_l3_ukr_impl_string( l3ukr_t ukr, ind_t method, num_t dt ) // then query the ukernel function pointer for the given datatype from // that context. cntx_t* cntx = bli_gks_query_ind_cntx( method, dt ); - void* fp = bli_cntx_get_l3_ukr_dt( dt, ukr, cntx ); + void* fp = bli_cntx_get_l3_vir_ukr_dt( dt, ukr, cntx ); // Check whether the ukernel function pointer is NULL for the given // datatype. If it is NULL, return the string for not applicable. diff --git a/frame/base/bli_param_map.c b/frame/base/bli_param_map.c index b50f5010b..a2d90011e 100644 --- a/frame/base/bli_param_map.c +++ b/frame/base/bli_param_map.c @@ -210,6 +210,19 @@ void bli_param_map_char_to_blis_diag( char diag, diag_t* blis_diag ) } } +void bli_param_map_char_to_blis_dt( char dt, num_t* blis_dt ) +{ + if ( dt == 's' ) *blis_dt = BLIS_FLOAT; + else if ( dt == 'd' ) *blis_dt = BLIS_DOUBLE; + else if ( dt == 'c' ) *blis_dt = BLIS_SCOMPLEX; + else if ( dt == 'z' ) *blis_dt = BLIS_DCOMPLEX; + else if ( dt == 'i' ) *blis_dt = BLIS_INT; + else + { + bli_check_error_code( BLIS_INVALID_DATATYPE ); + } +} + // --- BLIS to BLIS char mappings ---------------------------------------------- @@ -265,3 +278,16 @@ void bli_param_map_blis_to_char_diag( diag_t blis_diag, char* diag ) } } +void bli_param_map_blis_to_char_dt( num_t blis_dt, char* dt ) +{ + if ( blis_dt == BLIS_FLOAT ) *dt = 's'; + else if ( blis_dt == BLIS_DOUBLE ) *dt = 'd'; + else if ( blis_dt == BLIS_SCOMPLEX ) *dt = 'c'; + else if ( blis_dt == BLIS_DCOMPLEX ) *dt = 'z'; + else if ( blis_dt == BLIS_INT ) *dt = 'i'; + else + { + bli_check_error_code( BLIS_INVALID_DATATYPE ); + } +} + diff --git a/frame/base/bli_param_map.h b/frame/base/bli_param_map.h index 75738cd62..6cae9ee7b 100644 --- a/frame/base/bli_param_map.h +++ b/frame/base/bli_param_map.h @@ -57,6 +57,7 @@ void bli_param_map_char_to_blis_uplo( char uplo, uplo_t* blis_uplo ); void bli_param_map_char_to_blis_trans( char trans, trans_t* blis_trans ); void bli_param_map_char_to_blis_conj( char conj, conj_t* blis_conj ); void bli_param_map_char_to_blis_diag( char diag, diag_t* blis_diag ); +void bli_param_map_char_to_blis_dt( char dt, num_t* blis_dt ); // --- BLIS to BLIS char mappings ---------------------------------------------- @@ -66,4 +67,5 @@ void bli_param_map_blis_to_char_uplo( uplo_t blis_uplo, char* uplo ); void bli_param_map_blis_to_char_trans( trans_t blis_trans, char* trans ); void bli_param_map_blis_to_char_conj( conj_t blis_conj, char* conj ); void bli_param_map_blis_to_char_diag( diag_t blis_diag, char* diag ); +void bli_param_map_blis_to_char_dt( num_t blis_dt, char* dt ); diff --git a/frame/base/bli_part.c b/frame/base/bli_part.c index 47fa4fdfd..d05eabb79 100644 --- a/frame/base/bli_part.c +++ b/frame/base/bli_part.c @@ -38,6 +38,49 @@ // -- Matrix partitioning ------------------------------------------------------ +void bli_acquire_mpart + ( + dim_t i, + dim_t j, + dim_t bm, + dim_t bn, + obj_t* parent, + obj_t* child + ) +{ + // Query the dimensions of the parent object. + const dim_t m_par = bli_obj_length( parent ); + const dim_t n_par = bli_obj_width( parent ); + + // If either i or j is already beyond what exists of the parent matrix, + // slide them back to the outer dimensions. (What will happen in this + // scenario is that bm and bn and/or will be reduced to zero so that the + // child matrix does not refer to anything beyond the bounds of the + // parent. (Note: This is a safety measure and generally should never + // be needed if the caller is passing in sane arguments.) + if ( i > m_par ) i = m_par; + if ( j > n_par ) j = n_par; + + // If either bm or bn spills out over the edge of the parent matrix, + // reduce them so that the child matrix fits within the bounds of the + // parent. (Note: This is a safety measure and generally should never + // be needed if the caller is passing in sane arguments, though this + // code is somewhat more likely to be needed than the code above.) + if ( bm > m_par - i ) bm = m_par - i; + if ( bn > n_par - j ) bn = n_par - j; + + // Alias the parent object's contents into the child object. + bli_obj_alias_to( parent, child ); + + // Set the offsets and dimensions of the child object. Note that we + // increment, rather than overwrite, the offsets of the child object + // in case the parent object already had non-zero offsets (usually + // because the parent was itself a child a larger grandparent object). + bli_obj_inc_offs( i, j, child ); + bli_obj_set_dims( bm, bn, child ); +} + + void bli_acquire_mpart_mdim ( dir_t direct, diff --git a/frame/base/bli_part.h b/frame/base/bli_part.h index fd24f1d82..284a87ffa 100644 --- a/frame/base/bli_part.h +++ b/frame/base/bli_part.h @@ -36,6 +36,16 @@ // -- Matrix partitioning ------------------------------------------------------ +void bli_acquire_mpart + ( + dim_t i, + dim_t j, + dim_t m, + dim_t n, + obj_t* obj, + obj_t* sub_obj + ); + #undef GENPROT #define GENPROT( opname ) \ \ diff --git a/frame/base/bli_setri.c b/frame/base/bli_setri.c new file mode 100644 index 000000000..054ea3d9b --- /dev/null +++ b/frame/base/bli_setri.c @@ -0,0 +1,162 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +// -- setr --------------------------------------------------------------------- + +void bli_setrm + ( + obj_t* alpha, + obj_t* b + ) +{ + obj_t alpha_real; + obj_t br; + + // Check parameters. + if ( bli_error_checking_is_enabled() ) + bli_setm_check( alpha, b ); + + // Initialize a local scalar, alpha_real, using the real projection + // of the datatype of b. + bli_obj_scalar_init_detached( bli_obj_dt_proj_to_real( b ), + &alpha_real ); + + // Copy/typecast alpha to alpha_real. This discards the imaginary + // part of alpha (if it is complex). + bli_copysc( alpha, &alpha_real ); + + // Acquire an alias to the real part of b. + bli_obj_real_part( b, &br ); + + // Use setm to set the real part of b to alpha_real. + bli_setm( &alpha_real, &br ); +} + +void bli_setrv + ( + obj_t* alpha, + obj_t* x + ) +{ + obj_t alpha_real; + obj_t xr; + + // Check parameters. + if ( bli_error_checking_is_enabled() ) + bli_setv_check( alpha, x ); + + // Initialize a local scalar, alpha_real, using the real projection + // of the datatype of x. + bli_obj_scalar_init_detached( bli_obj_dt_proj_to_real( x ), + &alpha_real ); + + // Copy/typecast alpha to alpha_real. This discards the imaginary + // part of alpha (if it is complex). + bli_copysc( alpha, &alpha_real ); + + // Acquire an alias to the real part of x. + bli_obj_real_part( x, &xr ); + + // Use setv to set the real part of x to alpha_real. + bli_setv( &alpha_real, &xr ); +} + +// -- seti --------------------------------------------------------------------- + +void bli_setim + ( + obj_t* alpha, + obj_t* b + ) +{ + obj_t alpha_real; + obj_t bi; + + // Check parameters. + if ( bli_error_checking_is_enabled() ) + bli_setm_check( alpha, b ); + + // If the object is real, return early. + if ( bli_obj_is_real( b ) ) return; + + // Initialize a local scalar, alpha_real, using the real projection + // of the datatype of b. + bli_obj_scalar_init_detached( bli_obj_dt_proj_to_real( b ), + &alpha_real ); + + // Copy/typecast alpha to alpha_real. This discards the imaginary + // part of alpha (if it is complex). + bli_copysc( alpha, &alpha_real ); + + // Acquire an alias to the imaginary part of b. + bli_obj_imag_part( b, &bi ); + + // Use setm to set the imaginary part of b to alpha_real. + bli_setm( &alpha_real, &bi ); +} + +void bli_setiv + ( + obj_t* alpha, + obj_t* x + ) +{ + obj_t alpha_real; + obj_t xi; + + // Check parameters. + if ( bli_error_checking_is_enabled() ) + bli_setv_check( alpha, x ); + + // If the object is real, return early. + if ( bli_obj_is_real( x ) ) return; + + // Initialize a local scalar, alpha_real, using the real projection + // of the datatype of x. + bli_obj_scalar_init_detached( bli_obj_dt_proj_to_real( x ), + &alpha_real ); + + // Copy/typecast alpha to alpha_real. This discards the imaginary + // part of alpha (if it is complex). + bli_copysc( alpha, &alpha_real ); + + // Acquire an alias to the imaginary part of x. + bli_obj_imag_part( x, &xi ); + + // Use setm to set the imaginary part of x to alpha_real. + bli_setm( &alpha_real, &xi ); +} + diff --git a/frame/base/bli_setri.h b/frame/base/bli_setri.h new file mode 100644 index 000000000..a08eeaad1 --- /dev/null +++ b/frame/base/bli_setri.h @@ -0,0 +1,62 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +// -- setr --------------------------------------------------------------------- + +void bli_setrm + ( + obj_t* alpha, + obj_t* b + ); + +void bli_setrv + ( + obj_t* alpha, + obj_t* x + ); + +// -- seti --------------------------------------------------------------------- + +void bli_setim + ( + obj_t* alpha, + obj_t* b + ); + +void bli_setiv + ( + obj_t* alpha, + obj_t* x + ); + diff --git a/frame/base/cast/bli_castm.c b/frame/base/cast/bli_castm.c new file mode 100644 index 000000000..84d4c8ca6 --- /dev/null +++ b/frame/base/cast/bli_castm.c @@ -0,0 +1,267 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +// NOTE: This is one of the few functions in BLIS that is defined +// with heterogeneous type support. This is done so that we have +// an operation that can be used to typecast (copy-cast) a matrix +// of one datatype to a scalar of another datatype. + +typedef void (*FUNCPTR_T) + ( + trans_t transa, + dim_t m, + dim_t n, + void* restrict a, inc_t rs_a, inc_t cs_a, + void* restrict b, inc_t rs_b, inc_t cs_b + ); + +static FUNCPTR_T GENARRAY2_ALL(ftypes,castm); + +// +// Define object-based interface. +// + +void bli_castm + ( + obj_t* a, + obj_t* b + ) +{ + num_t dt_a = bli_obj_dt( a ); + num_t dt_b = bli_obj_dt( b ); + + trans_t transa = bli_obj_conjtrans_status( a ); + + dim_t m = bli_obj_length( b ); + dim_t n = bli_obj_width( b ); + + void* buf_a = bli_obj_buffer_at_off( a ); + inc_t rs_a = bli_obj_row_stride( a ); + inc_t cs_a = bli_obj_col_stride( a ); + + void* buf_b = bli_obj_buffer_at_off( b ); + inc_t rs_b = bli_obj_row_stride( b ); + inc_t cs_b = bli_obj_col_stride( b ); + + FUNCPTR_T f; + + // Check parameters. + if ( bli_error_checking_is_enabled() ) + bli_castm_check( a, b ); + +#if 0 + if ( bli_obj_dt( a ) == bli_obj_dt( b ) ) + { + // If a and b share the same datatype, we can simply use copym. + bli_copym( a, b ); + return; + } +#endif + + // Index into the type combination array to extract the correct + // function pointer. + f = ftypes[dt_a][dt_b]; + + // Invoke the void pointer-based function. + f + ( + transa, + m, + n, + buf_a, rs_a, cs_a, + buf_b, rs_b, cs_b + ); +} + +// ----------------------------------------------------------------------------- + +// +// Define BLAS-like interfaces with typed operands. +// + +#undef GENTFUNC2 +#define GENTFUNC2( ctype_a, ctype_b, cha, chb, opname ) \ +\ +void PASTEMAC2(cha,chb,opname) \ + ( \ + trans_t transa, \ + dim_t m, \ + dim_t n, \ + void* restrict a, inc_t rs_a, inc_t cs_a, \ + void* restrict b, inc_t rs_b, inc_t cs_b \ + ) \ +{ \ + ctype_a* restrict a_cast = a; \ + ctype_b* restrict b_cast = b; \ + conj_t conja; \ + dim_t n_iter; \ + dim_t n_elem; \ + inc_t lda, inca; \ + inc_t ldb, incb; \ + dim_t j, i; \ +\ + /* Set various loop parameters. */ \ + bli_set_dims_incs_2m \ + ( \ + transa, \ + m, n, rs_a, cs_a, rs_b, cs_b, \ + &n_elem, &n_iter, &inca, &lda, &incb, &ldb \ + ); \ +\ + /* Extract the conjugation component from the transa parameter. */ \ + conja = bli_extract_conj( transa ); \ +\ + if ( bli_is_conj( conja ) ) \ + { \ + if ( inca == 1 && incb == 1 ) \ + { \ + for ( j = 0; j < n_iter; ++j ) \ + { \ + ctype_a* restrict a1 = a_cast + (j )*lda + (0 )*inca; \ + ctype_b* restrict b1 = b_cast + (j )*ldb + (0 )*incb; \ +\ + for ( i = 0; i < n_elem; ++i ) \ + { \ + PASTEMAC2(cha,chb,copyjs)( a1[i], b1[i] ); \ + } \ + } \ + } \ + else \ + { \ + for ( j = 0; j < n_iter; ++j ) \ + { \ + ctype_a* restrict a1 = a_cast + (j )*lda + (0 )*inca; \ + ctype_b* restrict b1 = b_cast + (j )*ldb + (0 )*incb; \ +\ + for ( i = 0; i < n_elem; ++i ) \ + { \ + PASTEMAC2(cha,chb,copyjs)( *a1, *b1 ); \ +\ + a1 += inca; \ + b1 += incb; \ + } \ + } \ + } \ + } \ + else \ + { \ + if ( inca == 1 && incb == 1 ) \ + { \ + for ( j = 0; j < n_iter; ++j ) \ + { \ + ctype_a* restrict a1 = a_cast + (j )*lda + (0 )*inca; \ + ctype_b* restrict b1 = b_cast + (j )*ldb + (0 )*incb; \ +\ + for ( i = 0; i < n_elem; ++i ) \ + { \ + PASTEMAC2(cha,chb,copys)( a1[i], b1[i] ); \ + } \ + } \ + } \ + else \ + { \ + for ( j = 0; j < n_iter; ++j ) \ + { \ + ctype_a* restrict a1 = a_cast + (j )*lda + (0 )*inca; \ + ctype_b* restrict b1 = b_cast + (j )*ldb + (0 )*incb; \ +\ + for ( i = 0; i < n_elem; ++i ) \ + { \ + PASTEMAC2(cha,chb,copys)( *a1, *b1 ); \ +\ + a1 += inca; \ + b1 += incb; \ + } \ + } \ + } \ + } \ +} + +INSERT_GENTFUNC2_BASIC0( castm ) +INSERT_GENTFUNC2_MIXDP0( castm ) + +// ----------------------------------------------------------------------------- + +// +// Define object-based _check() function. +// + +void bli_castm_check + ( + obj_t* a, + obj_t* b + ) +{ + err_t e_val; + + // Check object datatypes. + + e_val = bli_check_floating_object( a ); + bli_check_error_code( e_val ); + + e_val = bli_check_floating_object( b ); + bli_check_error_code( e_val ); + + // Check structure. + // NOTE: We enforce general structure for now in order to simplify the + // implementation. + + bli_check_general_object( a ); + bli_check_error_code( e_val ); + + bli_check_general_object( b ); + bli_check_error_code( e_val ); + + // Check object dimensions. + + e_val = bli_check_matrix_object( a ); + bli_check_error_code( e_val ); + + e_val = bli_check_matrix_object( b ); + bli_check_error_code( e_val ); + + e_val = bli_check_conformal_dims( a, b ); + bli_check_error_code( e_val ); + + // Check object buffers (for non-NULLness). + + e_val = bli_check_object_buffer( a ); + bli_check_error_code( e_val ); + + e_val = bli_check_object_buffer( b ); + bli_check_error_code( e_val ); +} + diff --git a/frame/base/cast/bli_castm.h b/frame/base/cast/bli_castm.h new file mode 100644 index 000000000..353f25f33 --- /dev/null +++ b/frame/base/cast/bli_castm.h @@ -0,0 +1,73 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +// +// Prototype object-based interface. +// + +void bli_castm + ( + obj_t* a, + obj_t* b + ); + +// +// Prototype BLAS-like interfaces with heterogeneous-typed operands. +// + +#undef GENTPROT2 +#define GENTPROT2( ctype_a, ctype_b, cha, chb, opname ) \ +\ +void PASTEMAC2(cha,chb,opname) \ + ( \ + trans_t transa, \ + dim_t m, \ + dim_t n, \ + void* a, inc_t rs_a, inc_t cs_a, \ + void* b, inc_t rs_b, inc_t cs_b \ + ); + +INSERT_GENTPROT2_BASIC0( castm ) +INSERT_GENTPROT2_MIXDP0( castm ) + +// +// Prototype object-based _check() function. +// + +void bli_castm_check + ( + obj_t* a, + obj_t* b + ); + diff --git a/frame/base/cast/bli_castv.c b/frame/base/cast/bli_castv.c new file mode 100644 index 000000000..e6af84f3b --- /dev/null +++ b/frame/base/cast/bli_castv.c @@ -0,0 +1,211 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +// NOTE: This is one of the few functions in BLIS that is defined +// with heterogeneous type support. This is done so that we have +// an operation that can be used to typecast (copy-cast) a matrix +// of one datatype to a scalar of another datatype. + +typedef void (*FUNCPTR_T) + ( + conj_t conjx, + dim_t n, + void* restrict x, inc_t inc_x, + void* restrict y, inc_t inc_y + ); + +static FUNCPTR_T GENARRAY2_ALL(ftypes,castv); + +// +// Define object-based interface. +// + +void bli_castv + ( + obj_t* x, + obj_t* y + ) +{ + num_t dt_x = bli_obj_dt( x ); + num_t dt_y = bli_obj_dt( y ); + + conj_t conjx = bli_obj_conj_status( x ); + + dim_t n = bli_obj_vector_dim( x ); + + void* buf_x = bli_obj_buffer_at_off( x ); + inc_t inc_x = bli_obj_vector_inc( x ); + + void* buf_y = bli_obj_buffer_at_off( y ); + inc_t inc_y = bli_obj_vector_inc( y ); + + FUNCPTR_T f; + + // Check parameters. + if ( bli_error_checking_is_enabled() ) + bli_castv_check( x, y ); + +#if 0 + if ( bli_obj_dt( x ) == bli_obj_dt( y ) ) + { + // If x and y share the same datatype, we can simply use copyv. + bli_copyv( x, y ); + return; + } +#endif + + // Index into the type combination array to extract the correct + // function pointer. + f = ftypes[dt_x][dt_y]; + + // Invoke the void pointer-based function. + f + ( + conjx, + n, + buf_x, inc_x, + buf_y, inc_y + ); +} + +// ----------------------------------------------------------------------------- + +// +// Define BLAS-like interfaces with typed operands. +// + +#undef GENTFUNC2 +#define GENTFUNC2( ctype_x, ctype_y, chx, chy, opname ) \ +\ +void PASTEMAC2(chx,chy,opname) \ + ( \ + conj_t conjx, \ + dim_t n, \ + void* restrict x, inc_t incx, \ + void* restrict y, inc_t incy \ + ) \ +{ \ + ctype_x* restrict x1 = x; \ + ctype_y* restrict y1 = y; \ + dim_t i; \ +\ + if ( bli_is_conj( conjx ) ) \ + { \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC2(chx,chy,copyjs)( x1[i], y1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC2(chx,chy,copyjs)( *x1, *y1 ); \ +\ + x1 += incx; \ + y1 += incy; \ + } \ + } \ + } \ + else \ + { \ + if ( incx == 1 && incy == 1 ) \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC2(chx,chy,copys)( x1[i], y1[i] ); \ + } \ + } \ + else \ + { \ + for ( i = 0; i < n; ++i ) \ + { \ + PASTEMAC2(chx,chy,copys)( *x1, *y1 ); \ +\ + x1 += incx; \ + y1 += incy; \ + } \ + } \ + } \ +} + +INSERT_GENTFUNC2_BASIC0( castv ) +INSERT_GENTFUNC2_MIXDP0( castv ) + +// ----------------------------------------------------------------------------- + +// +// Define object-based _check() function. +// + +void bli_castv_check + ( + obj_t* x, + obj_t* y + ) +{ + err_t e_val; + + // Check object datatypes. + + e_val = bli_check_floating_object( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_floating_object( y ); + bli_check_error_code( e_val ); + + // Check object dimensions. + + e_val = bli_check_vector_object( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_vector_object( y ); + bli_check_error_code( e_val ); + + e_val = bli_check_equal_vector_lengths( x, y ); + bli_check_error_code( e_val ); + + // Check object buffers (for non-NULLness). + + e_val = bli_check_object_buffer( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_object_buffer( y ); + bli_check_error_code( e_val ); +} + diff --git a/frame/base/cast/bli_castv.h b/frame/base/cast/bli_castv.h new file mode 100644 index 000000000..1e1175184 --- /dev/null +++ b/frame/base/cast/bli_castv.h @@ -0,0 +1,72 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +// +// Prototype object-based interface. +// + +void bli_castv + ( + obj_t* x, + obj_t* y + ); + +// +// Prototype BLAS-like interfaces with heterogeneous-typed operands. +// + +#undef GENTPROT2 +#define GENTPROT2( ctype_x, ctype_y, chx, chy, opname ) \ +\ +void PASTEMAC2(chx,chy,opname) \ + ( \ + conj_t conjx, \ + dim_t n, \ + void* x, inc_t incx, \ + void* y, inc_t incy \ + ); + +INSERT_GENTPROT2_BASIC0( castv ) +INSERT_GENTPROT2_MIXDP0( castv ) + +// +// Prototype object-based _check() function. +// + +void bli_castv_check + ( + obj_t* x, + obj_t* y + ); + diff --git a/frame/base/cast/old/bli_cast_check.c b/frame/base/cast/old/bli_cast_check.c new file mode 100644 index 000000000..3d2ea0b6f --- /dev/null +++ b/frame/base/cast/old/bli_cast_check.c @@ -0,0 +1,118 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +void bli_castm_check + ( + obj_t* a, + obj_t* b + ) +{ + err_t e_val; + + // Check object datatypes. + + e_val = bli_check_floating_object( a ); + bli_check_error_code( e_val ); + + e_val = bli_check_floating_object( b ); + bli_check_error_code( e_val ); + + // Check structure. + // NOTE: We enforce general structure for now in order to simplify the + // implementation. + + bli_check_general_object( a ); + bli_check_error_code( e_val ); + + bli_check_general_object( b ); + bli_check_error_code( e_val ); + + // Check object dimensions. + + e_val = bli_check_matrix_object( a ); + bli_check_error_code( e_val ); + + e_val = bli_check_matrix_object( b ); + bli_check_error_code( e_val ); + + e_val = bli_check_conformal_dims( a, b ); + bli_check_error_code( e_val ); + + // Check object buffers (for non-NULLness). + + e_val = bli_check_object_buffer( a ); + bli_check_error_code( e_val ); + + e_val = bli_check_object_buffer( b ); + bli_check_error_code( e_val ); +} + +void bli_castv_check + ( + obj_t* x, + obj_t* y + ) +{ + err_t e_val; + + // Check object datatypes. + + e_val = bli_check_floating_object( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_floating_object( y ); + bli_check_error_code( e_val ); + + // Check object dimensions. + + e_val = bli_check_vector_object( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_vector_object( y ); + bli_check_error_code( e_val ); + + e_val = bli_check_equal_vector_lengths( x, y ); + bli_check_error_code( e_val ); + + // Check object buffers (for non-NULLness). + + e_val = bli_check_object_buffer( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_object_buffer( y ); + bli_check_error_code( e_val ); +} + diff --git a/frame/base/cast/old/bli_cast_check.h b/frame/base/cast/old/bli_cast_check.h new file mode 100644 index 000000000..eb3356b8e --- /dev/null +++ b/frame/base/cast/old/bli_cast_check.h @@ -0,0 +1,45 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_castm_check + ( + obj_t* a, + obj_t* b + ); + +void bli_castv_check + ( + obj_t* x, + obj_t* y + ); diff --git a/frame/base/proj/bli_projm.c b/frame/base/proj/bli_projm.c new file mode 100644 index 000000000..e208a79f3 --- /dev/null +++ b/frame/base/proj/bli_projm.c @@ -0,0 +1,127 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +void bli_projm + ( + obj_t* a, + obj_t* b + ) +{ + // Check parameters. + if ( bli_error_checking_is_enabled() ) + bli_projm_check( a, b ); + + if ( ( bli_obj_is_real( a ) && bli_obj_is_real( b ) ) || + ( bli_obj_is_complex( a ) && bli_obj_is_complex( b ) ) ) + { + // If a and b are both real or both complex, we can simply use + // copym. + bli_copym( a, b ); + } + else + { + // This branch handles the case where one operand is real and + // the other is complex. + + if ( bli_obj_is_real( a ) /* && bli_obj_is_complex( b ) */ ) + { + // If a is real and b is complex, we must obtain the real part + // of b so that we can copy a into the real part (after + // initializing all of b, including imaginary components, to + // zero). + + obj_t br; + + bli_obj_real_part( b, &br ); + + bli_setm( &BLIS_ZERO, b ); + bli_copym( a, &br ); + } + else // bli_obj_is_complex( a ) && bli_obj_is_real( b ) + { + // If a is complex and b is real, we can simply copy the + // real part of a into b. + + obj_t ar; + + bli_obj_real_part( a, &ar ); + + bli_copym( &ar, b ); + } + } +} + +// ----------------------------------------------------------------------------- + +void bli_projm_check + ( + obj_t* a, + obj_t* b + ) +{ + err_t e_val; + + // Check object datatypes. + + e_val = bli_check_floating_object( a ); + bli_check_error_code( e_val ); + + e_val = bli_check_floating_object( b ); + bli_check_error_code( e_val ); + + e_val = bli_check_consistent_object_precisions( a, b ); + bli_check_error_code( e_val ); + + // Check object dimensions. + + e_val = bli_check_matrix_object( a ); + bli_check_error_code( e_val ); + + e_val = bli_check_matrix_object( b ); + bli_check_error_code( e_val ); + + e_val = bli_check_conformal_dims( a, b ); + bli_check_error_code( e_val ); + + // Check object buffers (for non-NULLness). + + e_val = bli_check_object_buffer( a ); + bli_check_error_code( e_val ); + + e_val = bli_check_object_buffer( b ); + bli_check_error_code( e_val ); +} + diff --git a/frame/base/proj/bli_projm.h b/frame/base/proj/bli_projm.h new file mode 100644 index 000000000..154b67ed2 --- /dev/null +++ b/frame/base/proj/bli_projm.h @@ -0,0 +1,46 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_projm + ( + obj_t* a, + obj_t* b + ); + +void bli_projm_check + ( + obj_t* a, + obj_t* b + ); + diff --git a/frame/base/proj/bli_projv.c b/frame/base/proj/bli_projv.c new file mode 100644 index 000000000..fcad4d890 --- /dev/null +++ b/frame/base/proj/bli_projv.c @@ -0,0 +1,127 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +void bli_projv + ( + obj_t* x, + obj_t* y + ) +{ + // Check parameters. + if ( bli_error_checking_is_enabled() ) + bli_projv_check( x, y ); + + if ( ( bli_obj_is_real( x ) && bli_obj_is_real( y ) ) || + ( bli_obj_is_complex( x ) && bli_obj_is_complex( y ) ) ) + { + // If x and y are both real or both complex, we can simply use + // copyv. + bli_copyv( x, y ); + } + else + { + // This branch handles the case where one operand is real and + // the other is complex. + + if ( bli_obj_is_real( x ) /* && bli_obj_is_complex( y ) */ ) + { + // If x is real and y is complex, we must obtain the real part + // of y so that we can copy x into the real part (after + // initializing all of y, including imaginary components, to + // zero). + + obj_t yr; + + bli_obj_real_part( y, &yr ); + + bli_setv( &BLIS_ZERO, y ); + bli_copyv( x, &yr ); + } + else // bli_obj_is_complex( x ) && bli_obj_is_real( y ) + { + // If x is complex and y is real, we can simply copy the + // real part of x into y. + + obj_t xr; + + bli_obj_real_part( x, &xr ); + + bli_copyv( &xr, y ); + } + } +} + +// ----------------------------------------------------------------------------- + +void bli_projv_check + ( + obj_t* x, + obj_t* y + ) +{ + err_t e_val; + + // Check object datatypes. + + e_val = bli_check_floating_object( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_floating_object( y ); + bli_check_error_code( e_val ); + + e_val = bli_check_consistent_object_precisions( x, y ); + bli_check_error_code( e_val ); + + // Check object dimensions. + + e_val = bli_check_vector_object( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_vector_object( y ); + bli_check_error_code( e_val ); + + e_val = bli_check_equal_vector_lengths( x, y ); + bli_check_error_code( e_val ); + + // Check object buffers (for non-NULLness). + + e_val = bli_check_object_buffer( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_object_buffer( y ); + bli_check_error_code( e_val ); +} + diff --git a/frame/base/proj/bli_projv.h b/frame/base/proj/bli_projv.h new file mode 100644 index 000000000..8b504a685 --- /dev/null +++ b/frame/base/proj/bli_projv.h @@ -0,0 +1,46 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_projv + ( + obj_t* x, + obj_t* y + ); + +void bli_projv_check + ( + obj_t* x, + obj_t* y + ); + diff --git a/frame/base/proj/old/bli_proj_check.c b/frame/base/proj/old/bli_proj_check.c new file mode 100644 index 000000000..f030cc497 --- /dev/null +++ b/frame/base/proj/old/bli_proj_check.c @@ -0,0 +1,114 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +#include "blis.h" + +void bli_projm_check + ( + obj_t* a, + obj_t* b + ) +{ + err_t e_val; + + // Check object datatypes. + + e_val = bli_check_floating_object( a ); + bli_check_error_code( e_val ); + + e_val = bli_check_floating_object( b ); + bli_check_error_code( e_val ); + + e_val = bli_check_consistent_object_precisions( a, b ); + bli_check_error_code( e_val ); + + // Check object dimensions. + + e_val = bli_check_matrix_object( a ); + bli_check_error_code( e_val ); + + e_val = bli_check_matrix_object( b ); + bli_check_error_code( e_val ); + + e_val = bli_check_conformal_dims( a, b ); + bli_check_error_code( e_val ); + + // Check object buffers (for non-NULLness). + + e_val = bli_check_object_buffer( a ); + bli_check_error_code( e_val ); + + e_val = bli_check_object_buffer( b ); + bli_check_error_code( e_val ); +} + +void bli_projv_check + ( + obj_t* x, + obj_t* y + ) +{ + err_t e_val; + + // Check object datatypes. + + e_val = bli_check_floating_object( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_floating_object( y ); + bli_check_error_code( e_val ); + + e_val = bli_check_consistent_object_precisions( x, y ); + bli_check_error_code( e_val ); + + // Check object dimensions. + + e_val = bli_check_vector_object( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_vector_object( y ); + bli_check_error_code( e_val ); + + e_val = bli_check_equal_vector_lengths( x, y ); + bli_check_error_code( e_val ); + + // Check object buffers (for non-NULLness). + + e_val = bli_check_object_buffer( x ); + bli_check_error_code( e_val ); + + e_val = bli_check_object_buffer( y ); + bli_check_error_code( e_val ); +} + diff --git a/frame/base/proj/old/bli_proj_check.h b/frame/base/proj/old/bli_proj_check.h new file mode 100644 index 000000000..d20aad9d6 --- /dev/null +++ b/frame/base/proj/old/bli_proj_check.h @@ -0,0 +1,45 @@ +/* + + BLIS + An object-based framework for developing high-performance BLAS-like + libraries. + + Copyright (C) 2014, The University of Texas at Austin + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + - Neither the name of The University of Texas at Austin nor the names + of its contributors may be used to endorse or promote products + derived from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +*/ + +void bli_projm_check + ( + obj_t* a, + obj_t* b + ); + +void bli_projv_check + ( + obj_t* x, + obj_t* y + ); diff --git a/frame/include/bli_gentfunc_macro_defs.h b/frame/include/bli_gentfunc_macro_defs.h index 6d980b1fa..a52904cc4 100644 --- a/frame/include/bli_gentfunc_macro_defs.h +++ b/frame/include/bli_gentfunc_macro_defs.h @@ -402,6 +402,51 @@ GENTFUNC2( dcomplex, scomplex, z, c, tfuncname, varname ) \ +// -- Mixed domain/precision (all) two-operand macro -- + +// -- (no auxiliary arguments) -- + +#define INSERT_GENTFUNC2_MIXDP0( tfuncname ) \ +\ +GENTFUNC2( float, double, s, d, tfuncname ) \ +GENTFUNC2( float, scomplex, s, c, tfuncname ) \ +GENTFUNC2( float, dcomplex, s, z, tfuncname ) \ +\ +GENTFUNC2( double, float, d, s, tfuncname ) \ +GENTFUNC2( double, scomplex, d, c, tfuncname ) \ +GENTFUNC2( double, dcomplex, d, z, tfuncname ) \ +\ +GENTFUNC2( scomplex, float, c, s, tfuncname ) \ +GENTFUNC2( scomplex, double, c, d, tfuncname ) \ +GENTFUNC2( scomplex, dcomplex, c, z, tfuncname ) \ +\ +GENTFUNC2( dcomplex, float, z, s, tfuncname ) \ +GENTFUNC2( dcomplex, double, z, d, tfuncname ) \ +GENTFUNC2( dcomplex, scomplex, z, c, tfuncname ) + + +// -- (one auxiliary argument) -- + +#define INSERT_GENTFUNC2_MIX_DP( tfuncname, varname ) \ +\ +GENTFUNC2( float, double, s, d, tfuncname, varname ) \ +GENTFUNC2( float, scomplex, s, c, tfuncname, varname ) \ +GENTFUNC2( float, dcomplex, s, z, tfuncname, varname ) \ +\ +GENTFUNC2( double, float, d, s, tfuncname, varname ) \ +GENTFUNC2( double, scomplex, d, c, tfuncname, varname ) \ +GENTFUNC2( double, dcomplex, d, z, tfuncname, varname ) \ +\ +GENTFUNC2( scomplex, float, c, s, tfuncname, varname ) \ +GENTFUNC2( scomplex, double, c, d, tfuncname, varname ) \ +GENTFUNC2( scomplex, dcomplex, c, z, tfuncname, varname ) \ +\ +GENTFUNC2( dcomplex, float, z, s, tfuncname, varname ) \ +GENTFUNC2( dcomplex, double, z, d, tfuncname, varname ) \ +GENTFUNC2( dcomplex, scomplex, z, c, tfuncname, varname ) + + + // -- Basic two-operand with real projection of first operand -- // -- (no auxiliary arguments) -- diff --git a/frame/include/bli_gentprot_macro_defs.h b/frame/include/bli_gentprot_macro_defs.h index 5d685cc56..e0ba84ff9 100644 --- a/frame/include/bli_gentprot_macro_defs.h +++ b/frame/include/bli_gentprot_macro_defs.h @@ -395,6 +395,50 @@ GENTPROT2( dcomplex, scomplex, z, c, tfuncname, varname ) \ +// -- Mixed domain/precision (all) two-operand macro -- + +// -- (no auxiliary arguments) -- + +#define INSERT_GENTPROT2_MIXDP0( funcname ) \ +\ +GENTPROT2( float, double, s, d, funcname ) \ +GENTPROT2( float, scomplex, s, c, funcname ) \ +GENTPROT2( float, dcomplex, s, z, funcname ) \ +\ +GENTPROT2( double, float, d, s, funcname ) \ +GENTPROT2( double, scomplex, d, c, funcname ) \ +GENTPROT2( double, dcomplex, d, z, funcname ) \ +\ +GENTPROT2( scomplex, float, c, s, funcname ) \ +GENTPROT2( scomplex, double, c, d, funcname ) \ +GENTPROT2( scomplex, dcomplex, c, z, funcname ) \ +\ +GENTPROT2( dcomplex, float, z, s, funcname ) \ +GENTPROT2( dcomplex, double, z, d, funcname ) \ +GENTPROT2( dcomplex, scomplex, z, c, funcname ) + +// -- (one auxiliary argument) -- + +#define INSERT_GENTPROT2_MIX_DP( tfuncname, varname ) \ +\ +GENTPROT2( float, double, s, d, tfuncname, varname ) \ +GENTPROT2( float, scomplex, s, c, tfuncname, varname ) \ +GENTPROT2( float, dcomplex, s, z, tfuncname, varname ) \ +\ +GENTPROT2( double, float, d, s, tfuncname, varname ) \ +GENTPROT2( double, scomplex, d, c, tfuncname, varname ) \ +GENTPROT2( double, dcomplex, d, z, tfuncname, varname ) \ +\ +GENTPROT2( scomplex, float, c, s, tfuncname, varname ) \ +GENTPROT2( scomplex, double, c, d, tfuncname, varname ) \ +GENTPROT2( scomplex, dcomplex, c, z, tfuncname, varname ) \ +\ +GENTPROT2( dcomplex, float, z, s, tfuncname, varname ) \ +GENTPROT2( dcomplex, double, z, d, tfuncname, varname ) \ +GENTPROT2( dcomplex, scomplex, z, c, tfuncname, varname ) + + + // -- Basic two-operand with real projection of first operand -- // -- (no auxiliary arguments) -- diff --git a/frame/include/bli_obj_macro_defs.h b/frame/include/bli_obj_macro_defs.h index cf5aa550a..a09fdfaae 100644 --- a/frame/include/bli_obj_macro_defs.h +++ b/frame/include/bli_obj_macro_defs.h @@ -76,11 +76,36 @@ static bool_t bli_obj_is_const( obj_t* obj ) return ( bli_obj_dt( obj ) == BLIS_BITVAL_CONST_TYPE ); } -static objbits_t bli_obj_domain( obj_t* obj ) +static dom_t bli_obj_domain( obj_t* obj ) { return ( obj->info & BLIS_DOMAIN_BIT ); } +static prec_t bli_obj_prec( obj_t* obj ) +{ + return ( obj->info & BLIS_PRECISION_BIT ); +} + +static bool_t bli_obj_is_single_prec( obj_t* obj ) +{ + return ( bli_obj_prec( obj ) == BLIS_BITVAL_SINGLE_PREC ); +} + +static bool_t bli_obj_is_double_prec( obj_t* obj ) +{ + return ( bli_obj_prec( obj ) == BLIS_BITVAL_DOUBLE_PREC ); +} + +static num_t bli_obj_dt_proj_to_single_prec( obj_t* obj ) +{ + return ( bli_obj_dt( obj ) & ~BLIS_BITVAL_SINGLE_PREC ); +} + +static num_t bli_obj_dt_proj_to_double_prec( obj_t* obj ) +{ + return ( bli_obj_dt( obj ) | BLIS_BITVAL_DOUBLE_PREC ); +} + static bool_t bli_obj_is_real( obj_t* obj ) { return ( bli_obj_domain( obj ) == BLIS_BITVAL_REAL ); @@ -91,16 +116,6 @@ static bool_t bli_obj_is_complex( obj_t* obj ) return ( bli_obj_domain( obj ) == BLIS_BITVAL_COMPLEX ); } -static objbits_t bli_obj_prec( obj_t* obj ) -{ - return ( obj->info & BLIS_PRECISION_BIT ); -} - -static bool_t bli_obj_is_double_prec( obj_t* obj ) -{ - return ( bli_obj_prec( obj ) == BLIS_BITVAL_DOUBLE_PREC ); -} - static num_t bli_obj_dt_proj_to_real( obj_t* obj ) { return ( bli_obj_dt( obj ) & ~BLIS_BITVAL_COMPLEX ); @@ -108,7 +123,7 @@ static num_t bli_obj_dt_proj_to_real( obj_t* obj ) static num_t bli_obj_dt_proj_to_complex( obj_t* obj ) { - return ( bli_obj_dt( obj ) & BLIS_BITVAL_COMPLEX ); + return ( bli_obj_dt( obj ) | BLIS_BITVAL_COMPLEX ); } static num_t bli_obj_target_dt( obj_t* obj ) @@ -116,9 +131,29 @@ static num_t bli_obj_target_dt( obj_t* obj ) return ( ( obj->info & BLIS_TARGET_DT_BITS ) >> BLIS_TARGET_DT_SHIFT ); } +static dom_t bli_obj_target_domain( obj_t* obj ) +{ + return ( ( obj->info & BLIS_TARGET_DOMAIN_BIT ) >> BLIS_TARGET_DT_SHIFT ); +} + +static prec_t bli_obj_target_prec( obj_t* obj ) +{ + return ( ( obj->info & BLIS_TARGET_PREC_BIT ) >> BLIS_TARGET_DT_SHIFT ); +} + static num_t bli_obj_exec_dt( obj_t* obj ) { - return ( ( obj->info & BLIS_EXECUTION_DT_BITS ) >> BLIS_EXECUTION_DT_SHIFT ); + return ( ( obj->info & BLIS_EXEC_DT_BITS ) >> BLIS_EXEC_DT_SHIFT ); +} + +static dom_t bli_obj_exec_domain( obj_t* obj ) +{ + return ( ( obj->info & BLIS_EXEC_DOMAIN_BIT ) >> BLIS_EXEC_DT_SHIFT ); +} + +static prec_t bli_obj_exec_prec( obj_t* obj ) +{ + return ( ( obj->info & BLIS_EXEC_PREC_BIT ) >> BLIS_EXEC_DT_SHIFT ); } static trans_t bli_obj_conjtrans_status( obj_t* obj ) @@ -326,9 +361,29 @@ static void bli_obj_set_target_dt( num_t dt, obj_t* obj ) obj->info = ( obj->info & ~BLIS_TARGET_DT_BITS ) | ( dt << BLIS_TARGET_DT_SHIFT ); } +static void bli_obj_set_target_domain( dom_t dt, obj_t* obj ) +{ + obj->info = ( obj->info & ~BLIS_TARGET_DOMAIN_BIT ) | ( dt << BLIS_TARGET_DOMAIN_SHIFT ); +} + +static void bli_obj_set_target_prec( prec_t dt, obj_t* obj ) +{ + obj->info = ( obj->info & ~BLIS_TARGET_PREC_BIT ) | ( dt << BLIS_TARGET_PREC_SHIFT ); +} + static void bli_obj_set_exec_dt( num_t dt, obj_t* obj ) { - obj->info = ( obj->info & ~BLIS_EXECUTION_DT_BITS ) | ( dt << BLIS_EXECUTION_DT_SHIFT ); + obj->info = ( obj->info & ~BLIS_EXEC_DT_BITS ) | ( dt << BLIS_EXEC_DT_SHIFT ); +} + +static void bli_obj_set_exec_domain( dom_t dt, obj_t* obj ) +{ + obj->info = ( obj->info & ~BLIS_EXEC_DOMAIN_BIT ) | ( dt << BLIS_EXEC_DOMAIN_SHIFT ); +} + +static void bli_obj_set_exec_prec( prec_t dt, obj_t* obj ) +{ + obj->info = ( obj->info & ~BLIS_EXEC_PREC_BIT ) | ( dt << BLIS_EXEC_PREC_SHIFT ); } static void bli_obj_set_pack_schema( pack_t schema, obj_t* obj ) @@ -909,39 +964,7 @@ static void bli_obj_toggle_uplo_if_trans( trans_t trans, obj_t* obj ) } } -// Make a full alias (shallow copy) - -static void bli_obj_alias_to( obj_t* a, obj_t* b ) -{ - bli_obj_init_full_shallow_copy_of( a, b ); -} - -// Check if two objects are aliases of one another - -static bool_t bli_obj_is_alias_of( obj_t* a, obj_t* b ) -{ - return ( bli_obj_buffer( a ) == bli_obj_buffer( b ) ); -} - - -// Create an alias with a trans value applied. -// (Note: trans may include a conj component.) - -static void bli_obj_alias_with_trans( trans_t trans, obj_t* a, obj_t* b ) -{ - bli_obj_alias_to( a, b ); - bli_obj_apply_trans( trans, b ); -} - -// Create an alias with a conj value applied. - -static void bli_obj_alias_with_conj( conj_t conja, obj_t* a, obj_t* b ) -{ - bli_obj_alias_to( a, b ); - bli_obj_apply_conj( conja, b ); -} - -// Initialize object with default properties (info field) +// Initialize object with default properties (info field). static void bli_obj_set_defaults( obj_t* obj ) { @@ -1021,6 +1044,91 @@ static void* bli_obj_buffer_for_1x1( num_t dt, obj_t* obj ) ); } +// Make a full alias (shallow copy). + +static void bli_obj_alias_to( obj_t* a, obj_t* b ) +{ + bli_obj_init_full_shallow_copy_of( a, b ); +} + +// Check if two objects are aliases of one another. + +static bool_t bli_obj_is_alias_of( obj_t* a, obj_t* b ) +{ + return ( bli_obj_buffer( a ) == bli_obj_buffer( b ) ); +} + + +// Create an alias with a trans value applied. +// (Note: trans may include a conj component.) + +static void bli_obj_alias_with_trans( trans_t trans, obj_t* a, obj_t* b ) +{ + bli_obj_alias_to( a, b ); + bli_obj_apply_trans( trans, b ); +} + +// Create an alias with a conj value applied. + +static void bli_obj_alias_with_conj( conj_t conja, obj_t* a, obj_t* b ) +{ + bli_obj_alias_to( a, b ); + bli_obj_apply_conj( conja, b ); +} + +// Alias only the real part. + +static void bli_obj_real_part( obj_t* c, obj_t* r ) +{ + bli_obj_alias_to( c, r ); + + if ( bli_obj_is_complex( c ) ) + { + // Change the datatype. + num_t dt_r = bli_obj_dt_proj_to_real( c ); + bli_obj_set_dt( dt_r, r ); + + // Update the element size. + siz_t es_c = bli_obj_elem_size( c ); + bli_obj_set_elem_size( es_c/2, r ); + + // Update the strides. + inc_t rs_c = bli_obj_row_stride( c ); + inc_t cs_c = bli_obj_col_stride( c ); + bli_obj_set_strides( 2*rs_c, 2*cs_c, r ); + + // Buffer is left unchanged. + } +} + +// Alias only the imaginary part. + +static void bli_obj_imag_part( obj_t* c, obj_t* i ) +{ + if ( bli_obj_is_complex( c ) ) + { + bli_obj_alias_to( c, i ); + + // Change the datatype. + num_t dt_r = bli_obj_dt_proj_to_real( c ); + bli_obj_set_dt( dt_r, i ); + + // Update the element size. + siz_t es_c = bli_obj_elem_size( c ); + bli_obj_set_elem_size( es_c/2, i ); + + // Update the strides. + inc_t rs_c = bli_obj_row_stride( c ); + inc_t cs_c = bli_obj_col_stride( c ); + bli_obj_set_strides( 2*rs_c, 2*cs_c, i ); + + // Update the buffer. + inc_t is_c = bli_obj_imag_stride( c ); + char* p = bli_obj_buffer_at_off( c ); + bli_obj_set_buffer( p + is_c * es_c/2, i ); + } +} + // Given a 1x1 object, acquire an address to the buffer depending on whether // the object is a BLIS_CONSTANT, and also set a datatype associated with the // chosen buffer (possibly using an auxiliary datatype if the object is diff --git a/frame/include/bli_param_macro_defs.h b/frame/include/bli_param_macro_defs.h index 09cd90772..b49f17c6a 100644 --- a/frame/include/bli_param_macro_defs.h +++ b/frame/include/bli_param_macro_defs.h @@ -112,6 +112,16 @@ static bool_t bli_is_double_prec( num_t dt ) bli_is_dcomplex( dt ) ); } +static dom_t bli_dt_domain( num_t dt ) +{ + return ( dt & BLIS_DOMAIN_BIT ); +} + +static prec_t bli_dt_prec( num_t dt ) +{ + return ( dt & BLIS_PRECISION_BIT ); +} + static num_t bli_dt_proj_to_real( num_t dt ) { return ( dt & ~BLIS_BITVAL_COMPLEX ); @@ -119,7 +129,17 @@ static num_t bli_dt_proj_to_real( num_t dt ) static num_t bli_dt_proj_to_complex( num_t dt ) { - return ( dt & BLIS_BITVAL_COMPLEX ); + return ( dt | BLIS_BITVAL_COMPLEX ); +} + +static num_t bli_dt_proj_to_single_prec( num_t dt ) +{ + return ( dt & ~BLIS_BITVAL_SINGLE_PREC ); +} + +static num_t bli_dt_proj_to_double_prec( num_t dt ) +{ + return ( dt | BLIS_BITVAL_DOUBLE_PREC ); } @@ -990,6 +1010,41 @@ void bli_set_dims_incs_uplo_1m_noswap } } +// Set dimensions and increments for TWO matrix arguments. + +static +void bli_set_dims_incs_2m + ( + trans_t transa, + dim_t m, dim_t n, inc_t rs_a, inc_t cs_a, + inc_t rs_b, inc_t cs_b, + dim_t* n_elem, dim_t* n_iter, inc_t* inca, inc_t* lda, + inc_t* incb, inc_t* ldb + ) +{ + { + *n_iter = n; + *n_elem = m; + *inca = rs_a; + *lda = cs_a; + *incb = rs_b; + *ldb = cs_b; + + if ( bli_does_trans( transa ) ) + { + bli_swap_incs( inca, lda ); + } + + if ( bli_is_row_tilted( *n_elem, *n_iter, *incb, *ldb ) && + bli_is_row_tilted( *n_elem, *n_iter, *inca, *lda ) ) + { + bli_swap_dims( n_iter, n_elem ); + bli_swap_incs( inca, lda ); + bli_swap_incs( incb, ldb ); + } + } +} + // Set dimensions, increments, effective uplo/diagoff, etc for TWO matrix // arguments. @@ -1033,7 +1088,7 @@ void bli_set_dims_incs_uplo_2m if ( bli_is_stored_subpart( diagoffa_use_, transa, uploa, m, n ) ) uploa = BLIS_DENSE; - n_iter_max_ = n; + n_iter_max_ = n; *n_elem_max = m; *inca = rs_a; *lda = cs_a; diff --git a/frame/include/bli_type_defs.h b/frame/include/bli_type_defs.h index b1a1f55b6..2d400518d 100644 --- a/frame/include/bli_type_defs.h +++ b/frame/include/bli_type_defs.h @@ -210,11 +210,11 @@ typedef dcomplex f77_dcomplex; 12 ~ 10 Target numerical datatype - 10: domain (0 == real, 1 == complex) - 11: precision (0 == single, 1 == double) - - 12: unused + - 12: used to encode integer, constant types 15 ~ 13 Execution numerical datatype - 13: domain (0 == real, 1 == complex) - 14: precision (0 == single, 1 == double) - - 15: unused + - 15: used to encode integer, constant types 22 ~ 16 Packed type/status - 0 0000 00: not packed - 1 0000 00: packed (unspecified; by rows, columns, or vector) @@ -271,7 +271,11 @@ typedef dcomplex f77_dcomplex; #define BLIS_UNIT_DIAG_SHIFT 8 #define BLIS_INVERT_DIAG_SHIFT 9 #define BLIS_TARGET_DT_SHIFT 10 -#define BLIS_EXECUTION_DT_SHIFT 13 +#define BLIS_TARGET_DOMAIN_SHIFT 10 +#define BLIS_TARGET_PREC_SHIFT 11 +#define BLIS_EXEC_DT_SHIFT 13 +#define BLIS_EXEC_DOMAIN_SHIFT 13 +#define BLIS_EXEC_PREC_SHIFT 14 #define BLIS_PACK_SCHEMA_SHIFT 16 #define BLIS_PACK_RC_SHIFT 16 #define BLIS_PACK_PANEL_SHIFT 17 @@ -299,7 +303,11 @@ typedef dcomplex f77_dcomplex; #define BLIS_UNIT_DIAG_BIT ( 0x1 << BLIS_UNIT_DIAG_SHIFT ) #define BLIS_INVERT_DIAG_BIT ( 0x1 << BLIS_INVERT_DIAG_SHIFT ) #define BLIS_TARGET_DT_BITS ( 0x7 << BLIS_TARGET_DT_SHIFT ) -#define BLIS_EXECUTION_DT_BITS ( 0x7 << BLIS_EXECUTION_DT_SHIFT ) +#define BLIS_TARGET_DOMAIN_BIT ( 0x1 << BLIS_TARGET_DOMAIN_SHIFT ) +#define BLIS_TARGET_PREC_BIT ( 0x1 << BLIS_TARGET_PREC_SHIFT ) +#define BLIS_EXEC_DT_BITS ( 0x7 << BLIS_EXEC_DT_SHIFT ) +#define BLIS_EXEC_DOMAIN_BIT ( 0x1 << BLIS_EXEC_DOMAIN_SHIFT ) +#define BLIS_EXEC_PREC_BIT ( 0x1 << BLIS_EXEC_PREC_SHIFT ) #define BLIS_PACK_SCHEMA_BITS ( 0x7F << BLIS_PACK_SCHEMA_SHIFT ) #define BLIS_PACK_RC_BIT ( 0x1 << BLIS_PACK_RC_SHIFT ) #define BLIS_PACK_PANEL_BIT ( 0x1 << BLIS_PACK_PANEL_SHIFT ) @@ -1128,8 +1136,6 @@ typedef struct cntx_s pack_t schema_b_panel; pack_t schema_c_panel; - bool_t anti_pref; - dim_t thrloop[ BLIS_NUM_LOOPS ]; membrk_t* membrk; @@ -1177,6 +1183,7 @@ typedef enum BLIS_INCONSISTENT_DATATYPES = ( -36), BLIS_EXPECTED_REAL_PROJ_OF = ( -37), BLIS_EXPECTED_REAL_VALUED_OBJECT = ( -38), + BLIS_INCONSISTENT_PRECISIONS = ( -39), // Dimension-specific errors BLIS_NONCONFORMAL_DIMENSIONS = ( -40), diff --git a/frame/include/blis.h b/frame/include/blis.h index 55ab9316a..8e1803f4b 100644 --- a/frame/include/blis.h +++ b/frame/include/blis.h @@ -122,6 +122,12 @@ extern "C" { #include "bli_cpuid.h" #include "bli_string.h" #include "bli_setgetij.h" +#include "bli_setri.h" + +#include "bli_castm.h" +#include "bli_castv.h" +#include "bli_projm.h" +#include "bli_projv.h" // -- Level-0 operations -- diff --git a/frame/ind/oapi/bli_l3_nat_oapi.c b/frame/ind/oapi/bli_l3_nat_oapi.c index 2c8219d2b..c6d3bdb21 100644 --- a/frame/ind/oapi/bli_l3_nat_oapi.c +++ b/frame/ind/oapi/bli_l3_nat_oapi.c @@ -60,8 +60,7 @@ void PASTEMAC(opname,imeth) \ /* Obtain a valid (native) context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ - /* Invoke the operation's front end with the appropriate control - tree. */ \ + /* Invoke the operation's front end. */ \ PASTEMAC(opname,_front) \ ( \ alpha, a, b, beta, c, cntx, NULL \ @@ -98,8 +97,7 @@ void PASTEMAC(opname,imeth) \ /* Obtain a valid (native) context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ - /* Invoke the operation's front end with the appropriate control - tree. */ \ + /* Invoke the operation's front end. */ \ PASTEMAC(opname,_front) \ ( \ side, alpha, a, b, beta, c, cntx, NULL \ @@ -130,8 +128,7 @@ void PASTEMAC(opname,imeth) \ /* Obtain a valid (native) context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ - /* Invoke the operation's front end with the appropriate control - tree. */ \ + /* Invoke the operation's front end. */ \ PASTEMAC(opname,_front) \ ( \ alpha, a, beta, c, cntx, NULL \ @@ -161,8 +158,7 @@ void PASTEMAC(opname,imeth) \ /* Obtain a valid (native) context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ - /* Invoke the operation's front end with the appropriate control - tree. */ \ + /* Invoke the operation's front end. */ \ PASTEMAC(opname,_front) \ ( \ side, alpha, a, b, cntx, NULL \ @@ -191,8 +187,7 @@ void PASTEMAC(opname,imeth) \ /* Obtain a valid (native) context from the gks if necessary. */ \ if ( cntx == NULL ) cntx = bli_gks_query_cntx(); \ \ - /* Invoke the operation's front end with the appropriate control - tree. */ \ + /* Invoke the operation's front end. */ \ PASTEMAC(opname,_front) \ ( \ side, alpha, a, b, cntx, NULL \ diff --git a/frame/thread/bli_thrcomm_openmp.c b/frame/thread/bli_thrcomm_openmp.c index 131f70973..f2197597f 100644 --- a/frame/thread/bli_thrcomm_openmp.c +++ b/frame/thread/bli_thrcomm_openmp.c @@ -231,11 +231,18 @@ void bli_l3_thread_decorator { dim_t id = omp_get_thread_num(); + obj_t a_t, b_t, c_t; cntl_t* cntl_use; thrinfo_t* thread; + // Alias thread-local copies of A, B, and C. These will be the objects + // we pass into the thread functions. + bli_obj_alias_to( a, &a_t ); + bli_obj_alias_to( b, &b_t ); + bli_obj_alias_to( c, &c_t ); + // Create a default control tree for the operation, if needed. - bli_l3_cntl_create_if( family, a, b, c, cntl, &cntl_use ); + bli_l3_cntl_create_if( family, &a_t, &b_t, &c_t, cntl, &cntl_use ); // Create the root node of the current thread's thrinfo_t structure. bli_l3_thrinfo_create_root( id, gl_comm, cntx, cntl_use, &thread ); @@ -243,17 +250,17 @@ void bli_l3_thread_decorator func ( alpha, - a, - b, + &a_t, + &b_t, beta, - c, + &c_t, cntx, cntl_use, thread ); // Free the control tree, if one was created locally. - bli_l3_cntl_free_if( a, b, c, cntl, cntl_use, thread ); + bli_l3_cntl_free_if( &a_t, &b_t, &c_t, cntl, cntl_use, thread ); #ifdef PRINT_THRINFO threads[id] = thread; diff --git a/frame/thread/bli_thrcomm_pthreads.c b/frame/thread/bli_thrcomm_pthreads.c index e2fa35c35..132fb6740 100644 --- a/frame/thread/bli_thrcomm_pthreads.c +++ b/frame/thread/bli_thrcomm_pthreads.c @@ -161,11 +161,18 @@ void* bli_l3_thread_entry( void* data_void ) dim_t id = data->id; thrcomm_t* gl_comm = data->gl_comm; + obj_t a_t, b_t, c_t; cntl_t* cntl_use; thrinfo_t* thread; + // Alias thread-local copies of A, B, and C. These will be the objects + // we pass into the thread function. + bli_obj_alias_to( a, &a_t ); + bli_obj_alias_to( b, &b_t ); + bli_obj_alias_to( c, &c_t ); + // Create a default control tree for the operation, if needed. - bli_l3_cntl_create_if( family, a, b, c, cntl, &cntl_use ); + bli_l3_cntl_create_if( family, &a_t, &b_t, &c_t, cntl, &cntl_use ); // Create the root node of the current thread's thrinfo_t structure. bli_l3_thrinfo_create_root( id, gl_comm, cntx, cntl_use, &thread ); @@ -173,17 +180,17 @@ void* bli_l3_thread_entry( void* data_void ) func ( alpha, - a, - b, + &a_t, + &b_t, beta, - c, + &c_t, cntx, cntl_use, thread ); // Free the control tree, if one was created locally. - bli_l3_cntl_free_if( a, b, c, cntl, cntl_use, thread ); + bli_l3_cntl_free_if( &a_t, &b_t, &c_t, cntl, cntl_use, thread ); // Free the current thread's thrinfo_t structure. bli_l3_thrinfo_free( thread ); diff --git a/frame/thread/bli_thrcomm_single.c b/frame/thread/bli_thrcomm_single.c index cb0bc2ae4..068b7eda5 100644 --- a/frame/thread/bli_thrcomm_single.c +++ b/frame/thread/bli_thrcomm_single.c @@ -94,6 +94,12 @@ void bli_l3_thread_decorator cntl_t* cntl_use; thrinfo_t* thread; + // NOTE: Unlike with the _openmp.c and _pthreads.c variants, we don't + // need to alias objects for A, B, and C since they were already aliased + // in bli_*_front(). (We only needed thread-local copies so each could + // safely reset their internal (beta) scalars on c after the first + // iteration of the pc (kc) loop.) + // Create a default control tree for the operation, if needed. bli_l3_cntl_create_if( family, a, b, c, cntl, &cntl_use ); diff --git a/kernels/zen/bli_kernels_zen.h b/kernels/zen/bli_kernels_zen.h index 2c829127f..6c3fdefb2 100644 --- a/kernels/zen/bli_kernels_zen.h +++ b/kernels/zen/bli_kernels_zen.h @@ -84,6 +84,12 @@ GEMM_UKR_PROT( double, d, gemm_zen_asm_6x8 ) GEMM_UKR_PROT( scomplex, c, gemm_zen_asm_3x8 ) GEMM_UKR_PROT( dcomplex, z, gemm_zen_asm_3x4 ) +// gemm (asm d8x6) +GEMM_UKR_PROT( float, s, gemm_zen_asm_16x6 ) +GEMM_UKR_PROT( double, d, gemm_zen_asm_8x6 ) +GEMM_UKR_PROT( scomplex, c, gemm_zen_asm_8x3 ) +GEMM_UKR_PROT( dcomplex, z, gemm_zen_asm_4x3 ) + // gemmtrsm_l (asm d6x8) GEMMTRSM_UKR_PROT( float, s, gemmtrsm_l_zen_asm_6x16 ) GEMMTRSM_UKR_PROT( double, d, gemmtrsm_l_zen_asm_6x8 ) diff --git a/ref_kernels/1m/bli_packm_cxk_1er_ref.c b/ref_kernels/1m/bli_packm_cxk_1er_ref.c index 3c526506d..693fd3c47 100644 --- a/ref_kernels/1m/bli_packm_cxk_1er_ref.c +++ b/ref_kernels/1m/bli_packm_cxk_1er_ref.c @@ -189,7 +189,7 @@ void PASTEMAC3(ch,opname,arch,suf) \ } \ } -INSERT_GENTFUNCCO_BASIC2( packm_2xk_1e, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) +INSERT_GENTFUNCCO_BASIC2( packm_2xk_1er, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) @@ -364,7 +364,7 @@ void PASTEMAC3(ch,opname,arch,suf) \ } \ } -INSERT_GENTFUNCCO_BASIC2( packm_4xk_1e, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) +INSERT_GENTFUNCCO_BASIC2( packm_4xk_1er, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) @@ -555,7 +555,7 @@ void PASTEMAC3(ch,opname,arch,suf) \ } \ } -INSERT_GENTFUNCCO_BASIC2( packm_6xk_1e, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) +INSERT_GENTFUNCCO_BASIC2( packm_6xk_1er, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) @@ -762,7 +762,7 @@ void PASTEMAC3(ch,opname,arch,suf) \ } \ } -INSERT_GENTFUNCCO_BASIC2( packm_8xk_1e, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) +INSERT_GENTFUNCCO_BASIC2( packm_8xk_1er, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) @@ -985,7 +985,7 @@ void PASTEMAC3(ch,opname,arch,suf) \ } \ } -INSERT_GENTFUNCCO_BASIC2( packm_10xk_1e, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) +INSERT_GENTFUNCCO_BASIC2( packm_10xk_1er, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) @@ -1224,7 +1224,7 @@ void PASTEMAC3(ch,opname,arch,suf) \ } \ } -INSERT_GENTFUNCCO_BASIC2( packm_12xk_1e, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) +INSERT_GENTFUNCCO_BASIC2( packm_12xk_1er, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) @@ -1479,7 +1479,7 @@ void PASTEMAC3(ch,opname,arch,suf) \ } \ } -INSERT_GENTFUNCCO_BASIC2( packm_14xk_1e, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) +INSERT_GENTFUNCCO_BASIC2( packm_14xk_1er, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) @@ -1750,7 +1750,7 @@ void PASTEMAC3(ch,opname,arch,suf) \ } \ } -INSERT_GENTFUNCCO_BASIC2( packm_16xk_1e, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) +INSERT_GENTFUNCCO_BASIC2( packm_16xk_1er, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) @@ -2133,5 +2133,5 @@ void PASTEMAC3(ch,opname,arch,suf) \ } \ } -INSERT_GENTFUNCCO_BASIC2( packm_30xk_1e, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) +INSERT_GENTFUNCCO_BASIC2( packm_30xk_1er, BLIS_CNAME_INFIX, BLIS_REF_SUFFIX ) diff --git a/ref_kernels/3/bli_gemmtrsm_ref.c b/ref_kernels/3/bli_gemmtrsm_ref.c index 3657a2092..9ce0ead42 100644 --- a/ref_kernels/3/bli_gemmtrsm_ref.c +++ b/ref_kernels/3/bli_gemmtrsm_ref.c @@ -60,9 +60,9 @@ void PASTEMAC3(ch,opname,arch,suf) \ ctype* minus_one = PASTEMAC(ch,m1); \ \ PASTECH(ch,gemm_ukr_ft) \ - gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ + gemm_ukr = bli_cntx_get_l3_nat_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ PASTECH(ch,trsm_ukr_ft) \ - trsm_ukr = bli_cntx_get_l3_ukr_dt( dt, trsmkerid, cntx ); \ + trsm_ukr = bli_cntx_get_l3_nat_ukr_dt( dt, trsmkerid, cntx ); \ \ /* lower: b11 = alpha * b11 - a10 * b01; */ \ /* upper: b11 = alpha * b11 - a12 * b21; */ \ diff --git a/ref_kernels/bli_cntx_ref.c b/ref_kernels/bli_cntx_ref.c index cc5828f4d..f8b72fc15 100644 --- a/ref_kernels/bli_cntx_ref.c +++ b/ref_kernels/bli_cntx_ref.c @@ -221,23 +221,23 @@ #define packm_30xk_rih_ker_name GENARNAME(packm_30xk_rih) #undef packm_2xk_1er_ker_name -#define packm_2xk_1er_ker_name GENARNAME(packm_2xk_1e) +#define packm_2xk_1er_ker_name GENARNAME(packm_2xk_1er) #undef packm_4xk_1er_ker_name -#define packm_4xk_1er_ker_name GENARNAME(packm_4xk_1e) +#define packm_4xk_1er_ker_name GENARNAME(packm_4xk_1er) #undef packm_6xk_1er_ker_name -#define packm_6xk_1er_ker_name GENARNAME(packm_6xk_1e) +#define packm_6xk_1er_ker_name GENARNAME(packm_6xk_1er) #undef packm_8xk_1er_ker_name -#define packm_8xk_1er_ker_name GENARNAME(packm_8xk_1e) +#define packm_8xk_1er_ker_name GENARNAME(packm_8xk_1er) #undef packm_10xk_1er_ker_name -#define packm_10xk_1er_ker_name GENARNAME(packm_10xk_1e) +#define packm_10xk_1er_ker_name GENARNAME(packm_10xk_1er) #undef packm_12xk_1er_ker_name -#define packm_12xk_1er_ker_name GENARNAME(packm_12xk_1e) +#define packm_12xk_1er_ker_name GENARNAME(packm_12xk_1er) #undef packm_14xk_1er_ker_name -#define packm_14xk_1er_ker_name GENARNAME(packm_14xk_1e) +#define packm_14xk_1er_ker_name GENARNAME(packm_14xk_1er) #undef packm_16xk_1er_ker_name -#define packm_16xk_1er_ker_name GENARNAME(packm_16xk_1e) +#define packm_16xk_1er_ker_name GENARNAME(packm_16xk_1er) #undef packm_30xk_1er_ker_name -#define packm_30xk_1er_ker_name GENARNAME(packm_30xk_1e) +#define packm_30xk_1er_ker_name GENARNAME(packm_30xk_1er) // Include the level-1m kernel API template. #include "bli_l1m_ker.h" @@ -363,11 +363,11 @@ void GENBARNAME(cntx_init) funcs = bli_cntx_l3_vir_ukrs_buf( cntx ); - gen_func_init_co( &funcs[ BLIS_GEMM_UKR ], gemm1m_ukr_name ); - gen_func_init_co( &funcs[ BLIS_GEMMTRSM_L_UKR ], gemmtrsm1m_l_ukr_name ); - gen_func_init_co( &funcs[ BLIS_GEMMTRSM_U_UKR ], gemmtrsm1m_u_ukr_name ); - gen_func_init_co( &funcs[ BLIS_TRSM_L_UKR ], trsm1m_l_ukr_name ); - gen_func_init_co( &funcs[ BLIS_TRSM_U_UKR ], trsm1m_u_ukr_name ); + gen_func_init( &funcs[ BLIS_GEMM_UKR ], gemm_ukr_name ); + gen_func_init( &funcs[ BLIS_GEMMTRSM_L_UKR ], gemmtrsm_l_ukr_name ); + gen_func_init( &funcs[ BLIS_GEMMTRSM_U_UKR ], gemmtrsm_u_ukr_name ); + gen_func_init( &funcs[ BLIS_TRSM_L_UKR ], trsm_l_ukr_name ); + gen_func_init( &funcs[ BLIS_TRSM_U_UKR ], trsm_u_ukr_name ); // -- Set level-3 native micro-kernels and preferences --------------------- @@ -467,7 +467,7 @@ void GENBARNAME(cntx_init) bli_cntx_set_schema_b_panel( BLIS_PACKED_COL_PANELS, cntx ); bli_cntx_set_schema_c_panel( BLIS_NOT_PACKED, cntx ); - bli_cntx_set_anti_pref( FALSE, cntx ); + //bli_cntx_set_anti_pref( FALSE, cntx ); bli_cntx_set_thrloop( 1, 1, 1, 1, 1, cntx ); @@ -726,7 +726,7 @@ void GENBAINAME(cntx_init) // Initialize the blocksizes according to the micro-kernel preference as // well as the algorithm. - if ( bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ) ) + if ( bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ) ) { // This branch is used for algorithms 1m_c_bp, 1m_r_pb. @@ -754,7 +754,7 @@ void GENBAINAME(cntx_init) cntx ); } - else // if ( bli_cntx_l3_ukr_prefers_rows_dt( dt, BLIS_GEMM_UKR, cntx ) ) + else // if ( bli_cntx_l3_vir_ukr_prefers_rows_dt( dt, BLIS_GEMM_UKR, cntx ) ) { // This branch is used for algorithms 1m_r_bp, 1m_c_pb. @@ -811,7 +811,7 @@ void GENBAINAME(cntx_init) } else if ( method == BLIS_1M ) { - const bool_t is_pb = FALSE; + //const bool_t is_pb = FALSE; // Set the anti-preference field to TRUE when executing a panel-block // algorithm, and FALSE otherwise. This will cause higher-level generic @@ -819,7 +819,7 @@ void GENBAINAME(cntx_init) // the micro-kernel output preference so that the two will come back into // agreement in the panel-block macro-kernel (which implemented in terms // of the block-panel macro-kernel with some induced transpositions). - bli_cntx_set_anti_pref( is_pb, cntx ); + //bli_cntx_set_anti_pref( is_pb, cntx ); } else // if ( method == BLIS_NAT ) { diff --git a/ref_kernels/ind/bli_gemm1m_ref.c b/ref_kernels/ind/bli_gemm1m_ref.c index 53b51f303..e8cd01175 100644 --- a/ref_kernels/ind/bli_gemm1m_ref.c +++ b/ref_kernels/ind/bli_gemm1m_ref.c @@ -54,7 +54,7 @@ void PASTEMAC3(ch,opname,arch,suf) \ \ PASTECH(chr,gemm_ukr_ft) \ rgemm_ukr = bli_cntx_get_l3_nat_ukr_dt( dt_r, BLIS_GEMM_UKR, cntx ); \ - const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ + const bool_t col_pref = bli_cntx_l3_nat_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const bool_t row_pref = !col_pref; \ \ const dim_t mr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); \ diff --git a/ref_kernels/ind/bli_gemmtrsm1m_ref.c b/ref_kernels/ind/bli_gemmtrsm1m_ref.c index 5782d79aa..1a88f7eec 100644 --- a/ref_kernels/ind/bli_gemmtrsm1m_ref.c +++ b/ref_kernels/ind/bli_gemmtrsm1m_ref.c @@ -59,7 +59,7 @@ void PASTEMAC3(ch,opname,arch,suf) \ PASTECH(ch,trsm_ukr_ft) \ ctrsm_vir_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, trsmkerid, cntx ); \ \ - const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ + const bool_t col_pref = bli_cntx_l3_nat_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ const dim_t mr = bli_cntx_get_blksz_def_dt( dt, BLIS_MR, cntx ); \ const dim_t nr = bli_cntx_get_blksz_def_dt( dt, BLIS_NR, cntx ); \ diff --git a/sandbox/ref99/README.md b/sandbox/ref99/README.md index 63ff433c0..fd1f3ae75 100644 --- a/sandbox/ref99/README.md +++ b/sandbox/ref99/README.md @@ -12,9 +12,9 @@ when you think of implementing the gemm operation: a series of loops around an optimized (usually assembly-based) microkernel with some packing functions thrown in at various levels.) -Why sandboxes? Sometimes, you just want to experiment with tweaks or changes -to the gemm operation, but you want to do so in a simple environment rather -than the somewhat obfuscated and highly macroized and refactored code of the +Why sandboxes? Sometimes you want to experiment with tweaks or changes to +the gemm operation, but you want to do so in a simple environment rather than +the highly macroized and refactored (and somewhat obfuscated) code of the core framework (which, I will remind everyone, is highly macroized and refactored mostly so that all floating-point datatypes and all level-3 operations are supported with minimal source code). By building a BLIS sandbox, @@ -56,16 +56,16 @@ implementation. Like any decent sandbox, there are rules for playing here. Please follow these guidelines for the best sandbox developer experience. -0. Don't bother worrying about makefiles. We've already taken care of the +1. Don't bother worrying about makefiles. We've already taken care of the boring/annoying/headache-inducing build system stuff for you. :) By configuring -BLIS with a sandbox enabled, `make` will scan your directory and compile all -of its source code using similar compilation rules as were used for the rest +BLIS with a sandbox enabled, `make` will scan your sandbox directory and compile +all of its source code using similar compilation rules as were used for the rest of the framework. In addition, the compilation command line will automatically contain one `-I` option for every subdirectory in your sandbox, so it doesn't matter where in your sandbox you place your header files. They will be found! -1. Your sandbox must be written in C99 or C++11. If you write your sandbox in +2. Your sandbox must be written in C99 or C++11. If you write your sandbox in C++11, you must use one of the BLIS-approved file extensions for your source files (`.cc`, `.cpp`, `.cxx`) and your header files (`.hh`, `.hpp`, `.hxx`). Note that `blis.h` @@ -73,13 +73,13 @@ already contains all of its definitions inside of an `extern "C"` block, so you should be able to `#include "blis.h"` from your C++11 source code without any issues. -2. All of your code to replace BLIS's default implementation of `bli_gemmnat()` +3. All of your code to replace BLIS's default implementation of `bli_gemmnat()` should reside in the named sandbox directory, or some directory therein. (Obviously.) For example, this `README.md` file is located in the `ref99` sandbox, located in `sandbox/ref99`. All of the code associated with this sandbox will be contained within `sandbox/ref99`. -3. The *only* header file that is required of your sandbox is `bli_sandbox.h`. +4. The *only* header file that is required of your sandbox is `bli_sandbox.h`. It must be named `bli_sandbox.h` because `blis.h` will `#include` this file when the sandbox is enabled at configure-time. That said, you will probably want to keep the file empty. Why require a file that is supposed to be empty? @@ -93,7 +93,7 @@ Usually, neither of these situations will require any of your local definitions since those definitions are only needed to define your sandbox implementation of `bli_gemmnat()`, and this function is already prototyped by BLIS. -4. Your definition of `bli_gemmnat()` should be the *only* function you define +5. Your definition of `bli_gemmnat()` should be the *only* function you define in your sandbox that begins with `bli_`. If you define other functions that begin with `bli_`, you risk a namespace collision with existing framework functions. To guarantee safety, please prefix your locally-defined sandbox @@ -112,33 +112,54 @@ working with the existing BLIS infrastructure. For example, with a BLIS sandbox you **can** do the following kinds of things: - use a different gemm algorithmic partitioning path than the default Goto-like -algorithm; -- experiment with different implementations of `packm` kernels; + algorithm; +- experiment with different implementations of `packm` (not just `packm` + kernels, which can already be customized within each sub-configuration); - try inlining your functions manually; - pivot away from using `obj_t` objects at higher algorithmic level (such as -immediately after calling `bli_gemmnat()`) to try to avoid some overhead; + immediately after calling `bli_gemmnat()`) to try to avoid some overhead; +- create experimental implementations of new BLAS-like operations (provided + that you also provide an implementation of `blis_gemmnat()`). -You **cannot**, however, do the following kinds of things: +You **cannot**, however, use a sandbox to do the following kinds of things: - define new datatypes (half-precision, quad-precision, short integer, etc.) -and expect the rest of BLIS to "know" how to handle them; -- use a sandbox to implement a different level-3 operation, such as Hermitian -rank-k update; -- define a new BLAS-like operation. + and expect the rest of BLIS to "know" how to handle them; +- use a sandbox to replace the default implementation of a different level-3 + operation, such as Hermitian rank-k update; +- change the existing BLIS APIs; +- remove support for one or more BLIS datatypes (to cut down on library size, + for example). Another important limitation is the fact that the build system currently uses "framework `CFLAGS`" when compiling the sandbox source files. These are the same -`CFLAGS` used when compiling general framework source code, which are likely -more general-purpose than the `CFLAGS` used for, say, optimized kernels or even -reference kernels. (To see precisely which flags are being employed for any -given file, enable verbosity at compile-time via `make V=1`.) Compiling -sandboxes with these more versatile `CFLAGS` compiler options means that we -only need to compile one instance of each sandbox source file, even when -targeting multiple configurations (for example, via `./configure x86_64`). -However, it also means that sandboxes are not ideal for microkernels, as they -usually need additional compiler flags not included in the set used for -framework `CFLAGS` in order to yield the highest performance. If you have a -new microkernel you would like to use within a sandbox, it's best to formally -register it along with a new configuration, which will allow you to specify +`CFLAGS` used when compiling general framework source code, +``` +# Example framework CFLAGS used by 'haswell' sub-configuration +-O3 -Wall -Wno-unused-function -Wfatal-errors -fPIC -std=c99 +-D_POSIX_C_SOURCE=200112L -I./include/haswell -I./frame/3/ +-I./frame/ind/ukernels/ -I./frame/1m/ -I./frame/1f/ -I./frame/1/ +-I./frame/include -DBLIS_VERSION_STRING=\"0.3.2-51\" +``` +which are likely more general-purpose than the `CFLAGS` used for, say, +optimized kernels or even reference kernels. +``` +# Example optimized kernel CFLAGS used by 'haswell' sub-configuration +-O3 -mavx2 -mfma -mfpmath=sse -march=core-avx2 -Wall -Wno-unused-function +-Wfatal-errors -fPIC -std=c99 -D_POSIX_C_SOURCE=200112L -I./include/haswell +-I./frame/3/ -I./frame/ind/ukernels/ -I./frame/1m/ -I./frame/1f/ -I./frame/1/ +-I./frame/include -DBLIS_VERSION_STRING=\"0.3.2-51\" +``` +(To see precisely which flags are being employed for any given file, enable +verbosity at compile-time via `make V=1`.) Compiling sandboxes with these more +versatile `CFLAGS` compiler options means that we only need to compile one +instance of each sandbox source file, even when targeting multiple +configurations (for example, via `./configure x86_64`). However, it also means +that sandboxes are not ideal for microkernels, as they sometimes need additional +compiler flags not included in the set used for framework `CFLAGS` in order to +yield the highest performance. If you have a new microkernel you would like to +use within a sandbox, you can always prototype it within a sandbox. However, +once it is stable and ready for use by others, it's best to formally register +the kernel(s) along with a new configuration, which will allow you to specify kernel-specific compiler flags to be used when compiling your microkernel. Please see the [Configuration wiki](https://github.com/flame/blis/wiki/ConfigurationHowTo) diff --git a/sandbox/ref99/blx_gemm_front.c b/sandbox/ref99/blx_gemm_front.c index 2010011d4..c2ac1ccf7 100644 --- a/sandbox/ref99/blx_gemm_front.c +++ b/sandbox/ref99/blx_gemm_front.c @@ -87,7 +87,7 @@ void blx_gemm_front // contiguous columns, or if C is stored by columns and the micro-kernel // prefers contiguous rows, transpose the entire operation to allow the // micro-kernel to access elements of C in its preferred manner. - if ( bli_cntx_l3_ukr_eff_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) + if ( bli_cntx_l3_vir_ukr_dislikes_storage_of( &c_local, BLIS_GEMM_UKR, cntx ) ) { bli_obj_swap( &a_local, &b_local ); @@ -96,11 +96,37 @@ void blx_gemm_front bli_obj_induce_trans( &c_local ); } + { + // A sort of hack for communicating the desired pach schemas for A and + // B to bli_gemm_cntl_create() (via bli_l3_thread_decorator() and + // bli_l3_cntl_create_if()). This allows us to access the schemas from + // the control tree, which hopefully reduces some confusion, + // particularly in bli_packm_init(). + if ( bli_cntx_method( cntx ) == BLIS_NAT ) + { + bli_obj_set_pack_schema( BLIS_PACKED_ROW_PANELS, &a_local ); + bli_obj_set_pack_schema( BLIS_PACKED_COL_PANELS, &b_local ); + } + else // if ( bli_cntx_method( cntx ) != BLIS_NAT ) + { + pack_t schema_a = bli_cntx_schema_a_block( cntx ); + pack_t schema_b = bli_cntx_schema_b_panel( cntx ); + + bli_obj_set_pack_schema( schema_a, &a_local ); + bli_obj_set_pack_schema( schema_b, &b_local ); + } + } + // Record the threading for each level within the context. - bli_cntx_set_thrloop_from_env( BLIS_GEMM, BLIS_LEFT, cntx, - bli_obj_length( &c_local ), - bli_obj_width( &c_local ), - bli_obj_width( &a_local ) ); + bli_cntx_set_thrloop_from_env + ( + BLIS_GEMM, + BLIS_LEFT, // ignored for gemm + bli_obj_length( &c_local ), + bli_obj_width( &c_local ), + bli_obj_width( &a_local ), + cntx + ); // Invoke the internal back-end via the thread handler. blx_gemm_thread diff --git a/sandbox/ref99/cntl/blx_gemm_cntl.c b/sandbox/ref99/cntl/blx_gemm_cntl.c index 169161d54..4f499e614 100644 --- a/sandbox/ref99/cntl/blx_gemm_cntl.c +++ b/sandbox/ref99/cntl/blx_gemm_cntl.c @@ -37,17 +37,21 @@ cntl_t* blx_gemm_cntl_create ( - opid_t family + opid_t family, + pack_t schema_a, + pack_t schema_b ) { - return blx_gemmbp_cntl_create( family ); + return blx_gemmbp_cntl_create( family, schema_a, schema_b ); } // ----------------------------------------------------------------------------- cntl_t* blx_gemmbp_cntl_create ( - opid_t family + opid_t family, + pack_t schema_a, + pack_t schema_b ) { void* macro_kernel_p = blx_gemm_ker_var2; @@ -79,7 +83,7 @@ cntl_t* blx_gemmbp_cntl_create FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? - BLIS_PACKED_ROW_PANELS, + schema_a, // normally BLIS_PACKED_ROW_PANELS BLIS_BUFFER_FOR_A_BLOCK, gemm_cntl_bp_bu ); @@ -103,7 +107,7 @@ cntl_t* blx_gemmbp_cntl_create FALSE, // do NOT invert diagonal FALSE, // reverse iteration if upper? FALSE, // reverse iteration if lower? - BLIS_PACKED_COL_PANELS, + schema_b, // normally BLIS_PACKED_COL_PANELS BLIS_BUFFER_FOR_B_PANEL, gemm_cntl_op_bp ); diff --git a/sandbox/ref99/cntl/blx_gemm_cntl.h b/sandbox/ref99/cntl/blx_gemm_cntl.h index 637ead73e..59d7589a4 100644 --- a/sandbox/ref99/cntl/blx_gemm_cntl.h +++ b/sandbox/ref99/cntl/blx_gemm_cntl.h @@ -34,14 +34,18 @@ cntl_t* blx_gemm_cntl_create ( - opid_t family + opid_t family, + pack_t schema_a, + pack_t schema_b ); // ----------------------------------------------------------------------------- cntl_t* blx_gemmbp_cntl_create ( - opid_t family + opid_t family, + pack_t schema_a, + pack_t schema_b ); // ----------------------------------------------------------------------------- diff --git a/sandbox/ref99/cntl/blx_l3_cntl_if.c b/sandbox/ref99/cntl/blx_l3_cntl_if.c index 264bfb930..2eddb4360 100644 --- a/sandbox/ref99/cntl/blx_l3_cntl_if.c +++ b/sandbox/ref99/cntl/blx_l3_cntl_if.c @@ -46,11 +46,26 @@ void blx_l3_cntl_create_if cntl_t** cntl_use ) { + // This is part of a hack to support mixed domain in bli_gemm_front(). + // Sometimes we need to specify a non-standard schema for A and B, and + // we decided to transmit them via the schema field in the obj_t's + // rather than pass them in as function parameters. Once the values + // have been read, we immediately reset them back to their expected + // values for unpacked objects. Notice that we do this even if the + // caller passed in a custom control tree; that's because we still need + // to reset the pack schema of a and b, which were modified by the + // operation's _front() function. + pack_t schema_a = bli_obj_pack_schema( a ); + pack_t schema_b = bli_obj_pack_schema( b ); + + bli_obj_set_pack_schema( BLIS_NOT_PACKED, a ); + bli_obj_set_pack_schema( BLIS_NOT_PACKED, b ); + // If the control tree pointer is NULL, we construct a default // tree as a function of the operation family. if ( cntl_orig == NULL ) { - *cntl_use = blx_gemm_cntl_create( family ); + *cntl_use = blx_gemm_cntl_create( family, schema_a, schema_b ); } else { diff --git a/sandbox/ref99/thread/blx_gemm_thread.h b/sandbox/ref99/thread/blx_gemm_thread.h index 265d53e1f..903f590f1 100644 --- a/sandbox/ref99/thread/blx_gemm_thread.h +++ b/sandbox/ref99/thread/blx_gemm_thread.h @@ -32,9 +32,6 @@ */ -#ifndef BLIS_GEMM_THREAD_H -#define BLIS_GEMM_THREAD_H - // gemm internal function type typedef void (*gemmint_t) ( @@ -57,4 +54,3 @@ void blx_gemm_thread cntl_t* cntl ); -#endif diff --git a/sandbox/ref99/vars/blx_gemm_ker_var2.c b/sandbox/ref99/vars/blx_gemm_ker_var2.c index bfc248d12..6a291c8c7 100644 --- a/sandbox/ref99/vars/blx_gemm_ker_var2.c +++ b/sandbox/ref99/vars/blx_gemm_ker_var2.c @@ -171,7 +171,7 @@ void PASTECH2(blx_,ch,varname) \ /* Query the context for the micro-kernel address and cast it to its function pointer type. */ \ PASTECH(ch,gemm_ukr_ft) \ - gemm_ukr = bli_cntx_get_l3_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ + gemm_ukr = bli_cntx_get_l3_vir_ukr_dt( dt, BLIS_GEMM_UKR, cntx ); \ \ /* Temporary C buffer for edge cases. Note that the strides of this temporary buffer are set so that they match the storage of the @@ -180,7 +180,7 @@ void PASTECH2(blx_,ch,varname) \ ctype ct[ BLIS_STACK_BUF_MAX_SIZE \ / sizeof( ctype ) ] \ __attribute__((aligned(BLIS_STACK_BUF_ALIGN_SIZE))); \ - const bool_t col_pref = bli_cntx_l3_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ + const bool_t col_pref = bli_cntx_l3_vir_ukr_prefers_cols_dt( dt, BLIS_GEMM_UKR, cntx ); \ const inc_t rs_ct = ( col_pref ? 1 : NR ); \ const inc_t cs_ct = ( col_pref ? MR : 1 ); \ \ @@ -283,11 +283,11 @@ void PASTECH2(blx_,ch,varname) \ m_cur = ( bli_is_not_edge_f( i, m_iter, m_left ) ? MR : m_left ); \ \ /* Compute the addresses of the next panels of A and B. */ \ - a2 = gemm_get_next_a_micropanel( caucus, a1, rstep_a ); \ + a2 = bli_gemm_get_next_a_upanel( caucus, a1, rstep_a ); \ if ( bli_is_last_iter( i, m_iter, ir_thread_id, ir_num_threads ) ) \ { \ a2 = a_cast; \ - b2 = gemm_get_next_b_micropanel( thread, b1, cstep_b ); \ + b2 = bli_gemm_get_next_b_upanel( thread, b1, cstep_b ); \ if ( bli_is_last_iter( j, n_iter, jr_thread_id, jr_num_threads ) ) \ b2 = b_cast; \ } \ diff --git a/test/3m4m/Makefile b/test/3m4m/Makefile index e5aa84dfa..ca0ac7721 100644 --- a/test/3m4m/Makefile +++ b/test/3m4m/Makefile @@ -214,22 +214,45 @@ PDEF_MT := -DP_BEGIN=200 \ # --- Targets/rules ------------------------------------------------------------ # -all-st: blis-st openblas-st mkl-st -all-mt: blis-mt openblas-mt mkl-mt +all: all-st all-mt +blis: blis-st blis-mt +blis-nat: blis-nat-st blis-nat-mt +openblas: openblas-st openblas-mt +mkl: mkl-st mkl-mt -blis-st: blis-gemm-st -blis-mt: blis-gemm-mt +all-st: blis-st openblas-st mkl-st +all-mt: blis-mt openblas-mt mkl-mt -openblas-st: openblas-gemm-st -openblas-mt: openblas-gemm-mt +blis-st: blis-gemm-st +blis-mt: blis-gemm-mt -mkl-st: mkl-gemm-st -mkl-mt: mkl-gemm-mt +blis-nat-st: blis-gemm-nat-st +blis-nat-mt: blis-gemm-nat-mt -blis-gemm-st: \ +openblas-st: openblas-gemm-st +openblas-mt: openblas-gemm-mt + +mkl-st: mkl-gemm-st +mkl-mt: mkl-gemm-mt + +blis-gemm-st: blis-gemm-nat-st \ + blis-gemm-ind-st +blis-gemm-mt: blis-gemm-nat-mt \ + blis-gemm-ind-mt + +blis-gemm-nat-st: \ test_sgemm_asm_blis_st.x \ test_dgemm_asm_blis_st.x \ - \ + test_cgemm_asm_blis_st.x \ + test_zgemm_asm_blis_st.x + +blis-gemm-nat-mt: \ + test_sgemm_asm_blis_mt.x \ + test_dgemm_asm_blis_mt.x \ + test_cgemm_asm_blis_mt.x \ + test_zgemm_asm_blis_mt.x + +blis-gemm-ind-st: \ test_cgemm_3mhw_blis_st.x \ test_zgemm_3mhw_blis_st.x \ test_cgemm_3m1_blis_st.x \ @@ -241,14 +264,9 @@ blis-gemm-st: \ test_cgemm_4m1a_blis_st.x \ test_zgemm_4m1a_blis_st.x \ test_cgemm_1m_blis_st.x \ - test_zgemm_1m_blis_st.x \ - test_cgemm_asm_blis_st.x \ - test_zgemm_asm_blis_st.x + test_zgemm_1m_blis_st.x -blis-gemm-mt: \ - test_sgemm_asm_blis_mt.x \ - test_dgemm_asm_blis_mt.x \ - \ +blis-gemm-ind-mt: \ test_cgemm_3mhw_blis_mt.x \ test_zgemm_3mhw_blis_mt.x \ test_cgemm_3m1_blis_mt.x \ @@ -260,9 +278,7 @@ blis-gemm-mt: \ test_cgemm_4m1a_blis_mt.x \ test_zgemm_4m1a_blis_mt.x \ test_cgemm_1m_blis_mt.x \ - test_zgemm_1m_blis_mt.x \ - test_cgemm_asm_blis_mt.x \ - test_zgemm_asm_blis_mt.x + test_zgemm_1m_blis_mt.x openblas-gemm-st: \ test_sgemm_openblas_st.x \ diff --git a/testsuite/input.operations b/testsuite/input.operations index e3cd20503..c3e6d6f16 100644 --- a/testsuite/input.operations +++ b/testsuite/input.operations @@ -21,12 +21,7 @@ # determined by its local switch. For example, if the level-1v section # override is set to 1, and there is a 1 on the line marked "addv", # then the addv operation will be tested. Similarly, a 0 would cause -# addv to not be tested. NOTE: You may ignore the lines marked "test -# sequential front-end." These lines are for future use, to -# distinguish tests of the sequential implementation from tests of -# the multithreaded implementation. For now, BLIS does not contain -# separate APIs for multithreaded execution, even though -# multithreading is supported. So, these should be left set to 1. +# addv to not be tested. # # ENABLING ONLY SELECT OPERATIONS # If you would like to enable just a few (or even just one) operation @@ -105,75 +100,60 @@ # --- Utility -------------------------------------------------------------- 1 # randv -1 # test sequential front-end -1 # dimensions: m 1 # randm -1 # test sequential front-end -1 -1 # dimensions: m n # --- Level-1v ------------------------------------------------------------- 1 # addv -1 # test sequential front-end -1 # dimensions: m ? # parameters: conjx 1 # amaxv -1 # test sequential front-end -1 # dimensions: m 1 # axpbyv -1 # test sequential front-end -1 # dimensions: m ? # parameters: conjx 1 # axpyv -1 # test sequential front-end -1 # dimensions: m ? # parameters: conjx 1 # copyv -1 # test sequential front-end -1 # dimensions: m ? # parameters: conjx 1 # dotv -1 # test sequential front-end -1 # dimensions: m ?? # parameters: conjx conjy 1 # dotxv -1 # test sequential front-end -1 # dimensions: m ?? # parameters: conjx conjy 1 # normfv -1 # test sequential front-end -1 # dimensions: m 1 # scalv -1 # test sequential front-end -1 # dimensions: m ? # parameters: conjbeta 1 # scal2v -1 # test sequential front-end -1 # dimensions: m ? # parameters: conjx 1 # setv -1 # test sequential front-end -1 # dimensions: m 1 # subv -1 # test sequential front-end -1 # dimensions: m ? # parameters: conjx 1 # xpbyv -1 # test sequential front-end -1 # dimensions: m ? # parameters: conjx @@ -181,40 +161,32 @@ # --- Level-1m ------------------------------------------------------------- 1 # addm -1 # test sequential front-end -1 -2 # dimensions: m n ? # parameters: transa 1 # axpym -1 # test sequential front-end -1 -1 # dimensions: m n ? # parameters: transa 1 # copym -1 # test sequential front-end -1 -2 # dimensions: m n ? # parameters: transa 1 # normfm -1 # test sequential front-end -1 -2 # dimensions: m n 1 # scalm -1 # test sequential front-end -1 -2 # dimensions: m n ? # parameters: conjbeta 1 # scal2m -1 # test sequential front-end -1 -2 # dimensions: m n ? # parameters: transa 1 # setm -1 # test sequential front-end -1 -2 # dimensions: m n 1 # subm -1 # test sequential front-end -1 -2 # dimensions: m n ? # parameters: transa @@ -222,27 +194,22 @@ # --- Level-1f kernels ----------------------------------------------------- 1 # axpy2v -1 # test sequential front-end -1 # dimensions: m ?? # parameters: conjx conjy 1 # dotaxpyv -1 # test sequential front-end -1 # dimensions: m ??? # parameters: conjxt conjx conjy 1 # axpyf -1 # test sequential front-end -1 # dimensions: m ?? # parameters: conja conjx 1 # dotxf -1 # test sequential front-end -1 # dimensions: m ?? # parameters: conjat conjx 1 # dotxaxpyf -1 # test sequential front-end -1 # dimensions: m ???? # parameters: conjat conja conjw conjx @@ -250,52 +217,42 @@ # --- Level-2 -------------------------------------------------------------- 1 # gemv -1 # test sequential front-end -1 -2 # dimensions: m n ?? # parameters: transa conjx 1 # ger -1 # test sequential front-end -1 -2 # dimensions: m n ?? # parameters: conjx conjy 1 # hemv -1 # test sequential front-end -1 # dimensions: m ??? # parameters: uploa conja conjx 1 # her -1 # test sequential front-end -1 # dimensions: m ?? # parameters: uploc conjx 1 # her2 -1 # test sequential front-end -1 # dimensions: m ??? # parameters: uploc conjx conjy 1 # symv -1 # test sequential front-end -1 # dimensions: m ??? # parameters: uploa conja conjx 1 # syr -1 # test sequential front-end -1 # dimensions: m ?? # parameters: uploc conjx 1 # syr2 -1 # test sequential front-end -1 # dimensions: m ??? # parameters: uploc conjx conjy 1 # trmv -1 # test sequential front-end -1 # dimensions: m ??? # parameters: uploa transa diaga 1 # trsv -1 # test sequential front-end -1 # dimensions: m ??? # parameters: uploa transa diaga @@ -303,15 +260,12 @@ # --- Level-3 micro-kernels ------------------------------------------------ 1 # gemm -1 # test sequential micro-kernel -1 # dimensions: k 1 # trsm -1 # test sequential micro-kernel ? # parameters: uploa 1 # gemmtrsm -1 # test sequential micro-kernel -1 # dimensions: k ? # parameters: uploa @@ -319,52 +273,42 @@ # --- Level-3 -------------------------------------------------------------- 1 # gemm -1 # test sequential front-end -1 -1 -1 # dimensions: m n k ?? # parameters: transa transb 1 # hemm -1 # test sequential front-end -1 -1 # dimensions: m n ???? # parameters: side uploa conja transb 1 # herk -1 # test sequential front-end -1 -1 # dimensions: m k ?? # parameters: uploc transa 1 # her2k -1 # test sequential front-end -1 -1 # dimensions: m k ??? # parameters: uploc transa transb 1 # symm -1 # test sequential front-end -1 -1 # dimensions: m n ???? # parameters: side uploa conja transb 1 # syrk -1 # test sequential front-end -1 -1 # dimensions: m k ?? # parameters: uploc transa 1 # syr2k -1 # test sequential front-end -1 -1 # dimensions: m k ??? # parameters: uploc transa transb 1 # trmm -1 # test sequential front-end -1 -1 # dimensions: m n ???? # parameters: side uploa transa diaga 1 # trmm3 -1 # test sequential front-end -1 -1 # dimensions: m n ????n # parameters: side uploa transa diaga transb 1 # trsm -1 # test sequential front-end -1 -1 # dimensions: m n ???? # parameters: side uploa transa diaga diff --git a/testsuite/input.operations.fast b/testsuite/input.operations.fast index 3cf2ce52f..d2a44276e 100644 --- a/testsuite/input.operations.fast +++ b/testsuite/input.operations.fast @@ -21,12 +21,7 @@ # determined by its local switch. For example, if the level-1v section # override is set to 1, and there is a 1 on the line marked "addv", # then the addv operation will be tested. Similarly, a 0 would cause -# addv to not be tested. NOTE: You may ignore the lines marked "test -# sequential front-end." These lines are for future use, to -# distinguish tests of the sequential implementation from tests of -# the multithreaded implementation. For now, BLIS does not contain -# separate APIs for multithreaded execution, even though -# multithreading is supported. So, these should be left set to 1. +# addv to not be tested. # # ENABLING ONLY SELECT OPERATIONS # If you would like to enable just a few (or even just one) operation @@ -105,75 +100,60 @@ # --- Utility -------------------------------------------------------------- 1 # randv -1 # test sequential front-end -1 # dimensions: m 1 # randm -1 # test sequential front-end -1 -1 # dimensions: m n # --- Level-1v ------------------------------------------------------------- 1 # addv -1 # test sequential front-end -1 # dimensions: m ? # parameters: conjx 1 # amaxv -1 # test sequential front-end -1 # dimensions: m 1 # axpbyv -1 # test sequential front-end -1 # dimensions: m ? # parameters: conjx 1 # axpyv -1 # test sequential front-end -1 # dimensions: m ? # parameters: conjx 1 # copyv -1 # test sequential front-end -1 # dimensions: m ? # parameters: conjx 1 # dotv -1 # test sequential front-end -1 # dimensions: m ?? # parameters: conjx conjy 1 # dotxv -1 # test sequential front-end -1 # dimensions: m ?? # parameters: conjx conjy 1 # normfv -1 # test sequential front-end -1 # dimensions: m 1 # scalv -1 # test sequential front-end -1 # dimensions: m ? # parameters: conjbeta 1 # scal2v -1 # test sequential front-end -1 # dimensions: m ? # parameters: conjx 1 # setv -1 # test sequential front-end -1 # dimensions: m 1 # subv -1 # test sequential front-end -1 # dimensions: m ? # parameters: conjx 1 # xpbyv -1 # test sequential front-end -1 # dimensions: m ? # parameters: conjx @@ -181,40 +161,32 @@ # --- Level-1m ------------------------------------------------------------- 1 # addm -1 # test sequential front-end -1 -2 # dimensions: m n ? # parameters: transa 1 # axpym -1 # test sequential front-end -1 -1 # dimensions: m n ? # parameters: transa 1 # copym -1 # test sequential front-end -1 -2 # dimensions: m n ? # parameters: transa 1 # normfm -1 # test sequential front-end -1 -2 # dimensions: m n 1 # scalm -1 # test sequential front-end -1 -2 # dimensions: m n ? # parameters: conjbeta 1 # scal2m -1 # test sequential front-end -1 -2 # dimensions: m n ? # parameters: transa 1 # setm -1 # test sequential front-end -1 -2 # dimensions: m n 1 # subm -1 # test sequential front-end -1 -2 # dimensions: m n ? # parameters: transa @@ -222,27 +194,22 @@ # --- Level-1f kernels ----------------------------------------------------- 1 # axpy2v -1 # test sequential front-end -1 # dimensions: m ?? # parameters: conjx conjy 1 # dotaxpyv -1 # test sequential front-end -1 # dimensions: m ??? # parameters: conjxt conjx conjy 1 # axpyf -1 # test sequential front-end -1 # dimensions: m ?? # parameters: conja conjx 1 # dotxf -1 # test sequential front-end -1 # dimensions: m ?? # parameters: conjat conjx 1 # dotxaxpyf -1 # test sequential front-end -1 # dimensions: m ???? # parameters: conjat conja conjw conjx @@ -250,52 +217,42 @@ # --- Level-2 -------------------------------------------------------------- 1 # gemv -1 # test sequential front-end -1 -2 # dimensions: m n ?? # parameters: transa conjx 1 # ger -1 # test sequential front-end -1 -2 # dimensions: m n ?? # parameters: conjx conjy 1 # hemv -1 # test sequential front-end -1 # dimensions: m ??? # parameters: uploa conja conjx 1 # her -1 # test sequential front-end -1 # dimensions: m ?? # parameters: uploc conjx 1 # her2 -1 # test sequential front-end -1 # dimensions: m ??? # parameters: uploc conjx conjy 1 # symv -1 # test sequential front-end -1 # dimensions: m ??? # parameters: uploa conja conjx 1 # syr -1 # test sequential front-end -1 # dimensions: m ?? # parameters: uploc conjx 1 # syr2 -1 # test sequential front-end -1 # dimensions: m ??? # parameters: uploc conjx conjy 1 # trmv -1 # test sequential front-end -1 # dimensions: m ??? # parameters: uploa transa diaga 1 # trsv -1 # test sequential front-end -1 # dimensions: m ??? # parameters: uploa transa diaga @@ -303,15 +260,12 @@ # --- Level-3 micro-kernels ------------------------------------------------ 1 # gemm -1 # test sequential micro-kernel -1 # dimensions: k 1 # trsm -1 # test sequential micro-kernel ? # parameters: uploa 1 # gemmtrsm -1 # test sequential micro-kernel -1 # dimensions: k ? # parameters: uploa @@ -319,52 +273,42 @@ # --- Level-3 -------------------------------------------------------------- 1 # gemm -1 # test sequential front-end -1 -1 -1 # dimensions: m n k nn # parameters: transa transb 1 # hemm -1 # test sequential front-end -1 -1 # dimensions: m n ??nn # parameters: side uploa conja transb 1 # herk -1 # test sequential front-end -1 -1 # dimensions: m k ?n # parameters: uploc transa 1 # her2k -1 # test sequential front-end -1 -1 # dimensions: m k ?nn # parameters: uploc transa transb 1 # symm -1 # test sequential front-end -1 -1 # dimensions: m n ??nn # parameters: side uploa conja transb 1 # syrk -1 # test sequential front-end -1 -1 # dimensions: m k ?n # parameters: uploc transa 1 # syr2k -1 # test sequential front-end -1 -1 # dimensions: m k ?nn # parameters: uploc transa transb 1 # trmm -1 # test sequential front-end -1 -1 # dimensions: m n ??n? # parameters: side uploa transa diaga 0 # trmm3 -1 # test sequential front-end -1 -1 # dimensions: m n ??n?n # parameters: side uploa transa diaga transb 1 # trsm -1 # test sequential front-end -1 -1 # dimensions: m n ??n? # parameters: side uploa transa diaga diff --git a/testsuite/src/test_addm.c b/testsuite/src/test_addm.c index 2cca94caa..b49783231 100644 --- a/testsuite/src/test_addm.c +++ b/testsuite/src/test_addm.c @@ -104,17 +104,17 @@ void libblis_test_addm { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l1m_over == DISABLE_ALL ) return; + libblis_test_l1m_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_addm_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_addv.c b/testsuite/src/test_addv.c index 048af87a3..97afcc792 100644 --- a/testsuite/src/test_addv.c +++ b/testsuite/src/test_addv.c @@ -103,17 +103,17 @@ void libblis_test_addv { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l1v_over == DISABLE_ALL ) return; + libblis_test_l1v_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_addv_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_amaxv.c b/testsuite/src/test_amaxv.c index 5befee328..7ce0ef506 100644 --- a/testsuite/src/test_amaxv.c +++ b/testsuite/src/test_amaxv.c @@ -107,17 +107,17 @@ void libblis_test_amaxv { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l1v_over == DISABLE_ALL ) return; + libblis_test_l1v_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_amaxv_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_axpbyv.c b/testsuite/src/test_axpbyv.c index 737384c0a..460909eb5 100644 --- a/testsuite/src/test_axpbyv.c +++ b/testsuite/src/test_axpbyv.c @@ -114,17 +114,17 @@ void libblis_test_axpbyv { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l1v_over == DISABLE_ALL ) return; + libblis_test_l1v_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_axpbyv_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_axpy2v.c b/testsuite/src/test_axpy2v.c index 6319cc29d..4f1f2c8c6 100644 --- a/testsuite/src/test_axpy2v.c +++ b/testsuite/src/test_axpy2v.c @@ -114,17 +114,17 @@ void libblis_test_axpy2v { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l1f_over == DISABLE_ALL ) return; + libblis_test_l1f_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_axpy2v_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_axpyf.c b/testsuite/src/test_axpyf.c index 61397ab3d..e7c7ad69f 100644 --- a/testsuite/src/test_axpyf.c +++ b/testsuite/src/test_axpyf.c @@ -112,17 +112,17 @@ void libblis_test_axpyf { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l1f_over == DISABLE_ALL ) return; + libblis_test_l1f_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_axpyf_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_axpym.c b/testsuite/src/test_axpym.c index 04992a0de..53250106e 100644 --- a/testsuite/src/test_axpym.c +++ b/testsuite/src/test_axpym.c @@ -109,17 +109,17 @@ void libblis_test_axpym { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l1m_over == DISABLE_ALL ) return; + libblis_test_l1m_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_axpym_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_axpyv.c b/testsuite/src/test_axpyv.c index e616c3350..cb3415692 100644 --- a/testsuite/src/test_axpyv.c +++ b/testsuite/src/test_axpyv.c @@ -109,17 +109,17 @@ void libblis_test_axpyv { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l1v_over == DISABLE_ALL ) return; + libblis_test_l1v_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_axpyv_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_copym.c b/testsuite/src/test_copym.c index 166e2efe8..859f34fda 100644 --- a/testsuite/src/test_copym.c +++ b/testsuite/src/test_copym.c @@ -103,17 +103,17 @@ void libblis_test_copym { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l1m_over == DISABLE_ALL ) return; + libblis_test_l1m_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_copym_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_copyv.c b/testsuite/src/test_copyv.c index c9e77e6dc..a7f0b9aec 100644 --- a/testsuite/src/test_copyv.c +++ b/testsuite/src/test_copyv.c @@ -103,17 +103,17 @@ void libblis_test_copyv { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l1v_over == DISABLE_ALL ) return; + libblis_test_l1v_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_copyv_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_dotaxpyv.c b/testsuite/src/test_dotaxpyv.c index 41a6cea89..26c1e0288 100644 --- a/testsuite/src/test_dotaxpyv.c +++ b/testsuite/src/test_dotaxpyv.c @@ -116,17 +116,17 @@ void libblis_test_dotaxpyv { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l1f_over == DISABLE_ALL ) return; + libblis_test_l1f_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_dotaxpyv_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_dotv.c b/testsuite/src/test_dotv.c index f6a177a42..79368ad3e 100644 --- a/testsuite/src/test_dotv.c +++ b/testsuite/src/test_dotv.c @@ -105,17 +105,17 @@ void libblis_test_dotv { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l1v_over == DISABLE_ALL ) return; + libblis_test_l1v_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_dotv_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_dotxaxpyf.c b/testsuite/src/test_dotxaxpyf.c index e9160af37..85b819b79 100644 --- a/testsuite/src/test_dotxaxpyf.c +++ b/testsuite/src/test_dotxaxpyf.c @@ -122,17 +122,17 @@ void libblis_test_dotxaxpyf { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l1f_over == DISABLE_ALL ) return; + libblis_test_l1f_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_dotxaxpyf_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_dotxf.c b/testsuite/src/test_dotxf.c index ff3032a55..c6a1d0977 100644 --- a/testsuite/src/test_dotxf.c +++ b/testsuite/src/test_dotxf.c @@ -114,17 +114,17 @@ void libblis_test_dotxf { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l1f_over == DISABLE_ALL ) return; + libblis_test_l1f_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_dotxf_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_dotxv.c b/testsuite/src/test_dotxv.c index 796574220..82d876190 100644 --- a/testsuite/src/test_dotxv.c +++ b/testsuite/src/test_dotxv.c @@ -110,17 +110,17 @@ void libblis_test_dotxv { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l1v_over == DISABLE_ALL ) return; + libblis_test_l1v_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_dotxv_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_gemm.c b/testsuite/src/test_gemm.c index e692a54e4..061f0f825 100644 --- a/testsuite/src/test_gemm.c +++ b/testsuite/src/test_gemm.c @@ -116,17 +116,17 @@ void libblis_test_gemm { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l3_over == DISABLE_ALL ) return; + libblis_test_l3_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_gemm_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_gemm_ukr.c b/testsuite/src/test_gemm_ukr.c index c86cfcafe..df79e15a9 100644 --- a/testsuite/src/test_gemm_ukr.c +++ b/testsuite/src/test_gemm_ukr.c @@ -117,17 +117,17 @@ void libblis_test_gemm_ukr { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l3ukr_over == DISABLE_ALL ) return; + libblis_test_l3ukr_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_gemm_ukr_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_gemmtrsm_ukr.c b/testsuite/src/test_gemmtrsm_ukr.c index a24fdf896..db142487f 100644 --- a/testsuite/src/test_gemmtrsm_ukr.c +++ b/testsuite/src/test_gemmtrsm_ukr.c @@ -132,17 +132,17 @@ void libblis_test_gemmtrsm_ukr { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l3ukr_over == DISABLE_ALL ) return; + libblis_test_l3ukr_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_gemmtrsm_ukr_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_gemv.c b/testsuite/src/test_gemv.c index ac2cf9b69..a7be2860d 100644 --- a/testsuite/src/test_gemv.c +++ b/testsuite/src/test_gemv.c @@ -113,17 +113,17 @@ void libblis_test_gemv { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l2_over == DISABLE_ALL ) return; + libblis_test_l2_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_gemv_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_ger.c b/testsuite/src/test_ger.c index 35054793f..220d1dcf9 100644 --- a/testsuite/src/test_ger.c +++ b/testsuite/src/test_ger.c @@ -111,17 +111,17 @@ void libblis_test_ger { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l2_over == DISABLE_ALL ) return; + libblis_test_l2_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_ger_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_hemm.c b/testsuite/src/test_hemm.c index b2265a468..535450262 100644 --- a/testsuite/src/test_hemm.c +++ b/testsuite/src/test_hemm.c @@ -119,17 +119,17 @@ void libblis_test_hemm { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l3_over == DISABLE_ALL ) return; + libblis_test_l3_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_hemm_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_hemv.c b/testsuite/src/test_hemv.c index a4ddefda1..0cae6044d 100644 --- a/testsuite/src/test_hemv.c +++ b/testsuite/src/test_hemv.c @@ -114,17 +114,17 @@ void libblis_test_hemv { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l2_over == DISABLE_ALL ) return; + libblis_test_l2_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_hemv_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_her.c b/testsuite/src/test_her.c index 06e258925..c0e857387 100644 --- a/testsuite/src/test_her.c +++ b/testsuite/src/test_her.c @@ -111,17 +111,17 @@ void libblis_test_her { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l2_over == DISABLE_ALL ) return; + libblis_test_l2_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_her_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_her2.c b/testsuite/src/test_her2.c index c0210c0b9..827a723ce 100644 --- a/testsuite/src/test_her2.c +++ b/testsuite/src/test_her2.c @@ -113,17 +113,17 @@ void libblis_test_her2 { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l2_over == DISABLE_ALL ) return; + libblis_test_l2_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_her2_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_her2k.c b/testsuite/src/test_her2k.c index d79f56698..b708559cb 100644 --- a/testsuite/src/test_her2k.c +++ b/testsuite/src/test_her2k.c @@ -117,17 +117,17 @@ void libblis_test_her2k { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l3_over == DISABLE_ALL ) return; + libblis_test_l3_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_her2k_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_herk.c b/testsuite/src/test_herk.c index 1db6dc113..8b3bb74b6 100644 --- a/testsuite/src/test_herk.c +++ b/testsuite/src/test_herk.c @@ -115,17 +115,17 @@ void libblis_test_herk { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l3_over == DISABLE_ALL ) return; + libblis_test_l3_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_herk_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_libblis.c b/testsuite/src/test_libblis.c index 20a69254d..419fadbcb 100644 --- a/testsuite/src/test_libblis.c +++ b/testsuite/src/test_libblis.c @@ -498,10 +498,6 @@ void libblis_test_read_op_info( test_ops_t* ops, ops->indiv_over = TRUE; } - // Read the line for the sequential front-end/micro-kernel interface. - libblis_test_read_next_line( buffer, input_stream ); - sscanf( buffer, "%d ", &(op->front_seq) ); - op->n_dims = libblis_test_get_n_dims_from_dimset( dimset ); op->dimset = dimset; @@ -569,12 +565,6 @@ void libblis_test_read_op_info( test_ops_t* ops, // Initialize the parent pointer. op->ops = ops; - - // Disable operation if requested. - if ( op->op_switch == DISABLE_ALL ) - { - op->front_seq = DISABLE; - } } @@ -975,9 +965,7 @@ void libblis_test_output_op_struct( FILE* os, test_op_t* op, char* op_str ) { dimset_t dimset = op->dimset; - libblis_test_fprintf_c( os, "test %s seq front-end? %d\n", op_str, op->front_seq ); - - if ( dimset == BLIS_TEST_DIMS_MNK ) + if ( dimset == BLIS_TEST_DIMS_MNK ) { libblis_test_fprintf_c( os, "%s m n k %d %d %d\n", op_str, op->dim_spec[0], op->dim_spec[1], op->dim_spec[2] ); @@ -2434,7 +2422,7 @@ int libblis_test_op_is_disabled( test_op_t* op ) // If there was at least one individual override, then an op test is // disabled if it is NOT equal to ENABLE_ONLY. If there were no // individual overrides, then an op test is disabled if it is equal - // to DISABLE_ALL. + // to DISABLE. if ( op->ops->indiv_over == TRUE ) { if ( op->op_switch != ENABLE_ONLY ) r_val = TRUE; @@ -2442,9 +2430,56 @@ int libblis_test_op_is_disabled( test_op_t* op ) } else // if ( op->ops->indiv_over == FALSE ) { - if ( op->op_switch == DISABLE_ALL ) r_val = TRUE; - else r_val = FALSE; + if ( op->op_switch == DISABLE ) r_val = TRUE; + else r_val = FALSE; } return r_val; } + +int libblis_test_op_is_done( test_op_t* op ) +{ + return op->test_done; +} + +int libblis_test_util_is_disabled( test_op_t* op ) +{ + if ( op->ops->util_over == DISABLE ) return TRUE; + else return FALSE; +} + +int libblis_test_l1v_is_disabled( test_op_t* op ) +{ + if ( op->ops->l1v_over == DISABLE ) return TRUE; + else return FALSE; +} + +int libblis_test_l1m_is_disabled( test_op_t* op ) +{ + if ( op->ops->l1m_over == DISABLE ) return TRUE; + else return FALSE; +} + +int libblis_test_l1f_is_disabled( test_op_t* op ) +{ + if ( op->ops->l1f_over == DISABLE ) return TRUE; + else return FALSE; +} + +int libblis_test_l2_is_disabled( test_op_t* op ) +{ + if ( op->ops->l2_over == DISABLE ) return TRUE; + else return FALSE; +} + +int libblis_test_l3ukr_is_disabled( test_op_t* op ) +{ + if ( op->ops->l3ukr_over == DISABLE ) return TRUE; + else return FALSE; +} + +int libblis_test_l3_is_disabled( test_op_t* op ) +{ + if ( op->ops->l3_over == DISABLE ) return TRUE; + else return FALSE; +} diff --git a/testsuite/src/test_libblis.h b/testsuite/src/test_libblis.h index 69b51e333..ac49f8da1 100644 --- a/testsuite/src/test_libblis.h +++ b/testsuite/src/test_libblis.h @@ -86,8 +86,6 @@ #define SECONDS_TO_SLEEP 3 -#define DISABLE_ALL 0 -#define SPECIFY 1 #define DISABLE 0 #define ENABLE 1 #define ENABLE_ONLY 2 @@ -187,7 +185,9 @@ typedef struct opid_t opid; int op_switch; +#if 0 int front_seq; +#endif unsigned int n_dims; dimset_t dimset; int dim_spec[ MAX_NUM_DIMENSIONS ]; @@ -430,6 +430,14 @@ void libblis_test_parse_command_line( int argc, char** argv ); void libblis_test_check_empty_problem( obj_t* c, double* perf, double* resid ); int libblis_test_op_is_disabled( test_op_t* op ); +int libblis_test_op_is_done( test_op_t* op ); +int libblis_test_util_is_disabled( test_op_t* op ); +int libblis_test_l1v_is_disabled( test_op_t* op ); +int libblis_test_l1m_is_disabled( test_op_t* op ); +int libblis_test_l1f_is_disabled( test_op_t* op ); +int libblis_test_l2_is_disabled( test_op_t* op ); +int libblis_test_l3ukr_is_disabled( test_op_t* op ); +int libblis_test_l3_is_disabled( test_op_t* op ); // // --- Test module headers ----------------------------------------------------- diff --git a/testsuite/src/test_normfm.c b/testsuite/src/test_normfm.c index 1bee0756e..ba11f015c 100644 --- a/testsuite/src/test_normfm.c +++ b/testsuite/src/test_normfm.c @@ -102,17 +102,17 @@ void libblis_test_normfm { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l1m_over == DISABLE_ALL ) return; + libblis_test_l1m_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_normfm_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_normfv.c b/testsuite/src/test_normfv.c index c5224cd06..7fb05e29d 100644 --- a/testsuite/src/test_normfv.c +++ b/testsuite/src/test_normfv.c @@ -102,17 +102,17 @@ void libblis_test_normfv { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l1v_over == DISABLE_ALL ) return; + libblis_test_l1v_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_normfv_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_randm.c b/testsuite/src/test_randm.c index 9eda02a34..8cb51e872 100644 --- a/testsuite/src/test_randm.c +++ b/testsuite/src/test_randm.c @@ -99,17 +99,17 @@ void libblis_test_randm { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->util_over == DISABLE_ALL ) return; + libblis_test_util_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_randm_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_randv.c b/testsuite/src/test_randv.c index 7ac693c9a..a7f10947e 100644 --- a/testsuite/src/test_randv.c +++ b/testsuite/src/test_randv.c @@ -99,17 +99,17 @@ void libblis_test_randv { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->util_over == DISABLE_ALL ) return; + libblis_test_util_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_randv_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_scal2m.c b/testsuite/src/test_scal2m.c index dfbbdb64a..06a8ff0ab 100644 --- a/testsuite/src/test_scal2m.c +++ b/testsuite/src/test_scal2m.c @@ -108,17 +108,17 @@ void libblis_test_scal2m { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l1m_over == DISABLE_ALL ) return; + libblis_test_l1m_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_scal2m_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_scal2v.c b/testsuite/src/test_scal2v.c index 9ab18d317..379c2179b 100644 --- a/testsuite/src/test_scal2v.c +++ b/testsuite/src/test_scal2v.c @@ -108,17 +108,17 @@ void libblis_test_scal2v { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l1v_over == DISABLE_ALL ) return; + libblis_test_l1v_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_scal2v_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_scalm.c b/testsuite/src/test_scalm.c index 2d6f53e80..f4c73bc47 100644 --- a/testsuite/src/test_scalm.c +++ b/testsuite/src/test_scalm.c @@ -104,17 +104,17 @@ void libblis_test_scalm { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l1m_over == DISABLE_ALL ) return; + libblis_test_l1m_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_scalm_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_scalv.c b/testsuite/src/test_scalv.c index b019cd3c7..045ca0f2c 100644 --- a/testsuite/src/test_scalv.c +++ b/testsuite/src/test_scalv.c @@ -105,17 +105,17 @@ void libblis_test_scalv { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l1v_over == DISABLE_ALL ) return; + libblis_test_l1v_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_scalv_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_setm.c b/testsuite/src/test_setm.c index 781ec4aa5..de1bd3636 100644 --- a/testsuite/src/test_setm.c +++ b/testsuite/src/test_setm.c @@ -101,17 +101,17 @@ void libblis_test_setm { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l1m_over == DISABLE_ALL ) return; + libblis_test_l1m_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_setm_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_setv.c b/testsuite/src/test_setv.c index 456aca6cb..912a4885f 100644 --- a/testsuite/src/test_setv.c +++ b/testsuite/src/test_setv.c @@ -101,17 +101,17 @@ void libblis_test_setv { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l1v_over == DISABLE_ALL ) return; + libblis_test_l1v_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_setv_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_subm.c b/testsuite/src/test_subm.c index 950c1fc18..9821329fd 100644 --- a/testsuite/src/test_subm.c +++ b/testsuite/src/test_subm.c @@ -104,17 +104,17 @@ void libblis_test_subm { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l1m_over == DISABLE_ALL ) return; + libblis_test_l1m_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_subm_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_subv.c b/testsuite/src/test_subv.c index 0189d321f..1c1152ae5 100644 --- a/testsuite/src/test_subv.c +++ b/testsuite/src/test_subv.c @@ -104,17 +104,17 @@ void libblis_test_subv { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l1v_over == DISABLE_ALL ) return; + libblis_test_l1v_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_subv_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_symm.c b/testsuite/src/test_symm.c index 0ba63d82a..04b8ffe82 100644 --- a/testsuite/src/test_symm.c +++ b/testsuite/src/test_symm.c @@ -119,17 +119,17 @@ void libblis_test_symm { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l3_over == DISABLE_ALL ) return; + libblis_test_l3_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_symm_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_symv.c b/testsuite/src/test_symv.c index 4d5e0f386..99343cff6 100644 --- a/testsuite/src/test_symv.c +++ b/testsuite/src/test_symv.c @@ -114,17 +114,17 @@ void libblis_test_symv { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l2_over == DISABLE_ALL ) return; + libblis_test_l2_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_symv_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_syr.c b/testsuite/src/test_syr.c index dbc1f4d26..c8f496d1f 100644 --- a/testsuite/src/test_syr.c +++ b/testsuite/src/test_syr.c @@ -111,17 +111,17 @@ void libblis_test_syr { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l2_over == DISABLE_ALL ) return; + libblis_test_l2_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_syr_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_syr2.c b/testsuite/src/test_syr2.c index 9edad1b69..9ee68db9b 100644 --- a/testsuite/src/test_syr2.c +++ b/testsuite/src/test_syr2.c @@ -113,17 +113,17 @@ void libblis_test_syr2 { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l2_over == DISABLE_ALL ) return; + libblis_test_l2_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_syr2_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_syr2k.c b/testsuite/src/test_syr2k.c index fcf837575..0ff10cdc2 100644 --- a/testsuite/src/test_syr2k.c +++ b/testsuite/src/test_syr2k.c @@ -117,17 +117,17 @@ void libblis_test_syr2k { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l3_over == DISABLE_ALL ) return; + libblis_test_l3_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_syr2k_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_syrk.c b/testsuite/src/test_syrk.c index 8b1ecc535..3cd5e2c48 100644 --- a/testsuite/src/test_syrk.c +++ b/testsuite/src/test_syrk.c @@ -115,17 +115,17 @@ void libblis_test_syrk { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l3_over == DISABLE_ALL ) return; + libblis_test_l3_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_syrk_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_trmm.c b/testsuite/src/test_trmm.c index e13ad08a6..e10237ef0 100644 --- a/testsuite/src/test_trmm.c +++ b/testsuite/src/test_trmm.c @@ -115,17 +115,17 @@ void libblis_test_trmm { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l3_over == DISABLE_ALL ) return; + libblis_test_l3_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_trmm_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_trmm3.c b/testsuite/src/test_trmm3.c index 162731bf6..741678c1c 100644 --- a/testsuite/src/test_trmm3.c +++ b/testsuite/src/test_trmm3.c @@ -119,17 +119,17 @@ void libblis_test_trmm3 { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l3_over == DISABLE_ALL ) return; + libblis_test_l3_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_trmm3_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_trmv.c b/testsuite/src/test_trmv.c index 75a3d1acf..5030ef37c 100644 --- a/testsuite/src/test_trmv.c +++ b/testsuite/src/test_trmv.c @@ -110,17 +110,17 @@ void libblis_test_trmv { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l2_over == DISABLE_ALL ) return; + libblis_test_l2_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_trmv_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_trsm.c b/testsuite/src/test_trsm.c index d4debc516..39860fa6d 100644 --- a/testsuite/src/test_trsm.c +++ b/testsuite/src/test_trsm.c @@ -115,17 +115,17 @@ void libblis_test_trsm { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l3_over == DISABLE_ALL ) return; + libblis_test_l3_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_trsm_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_trsm_ukr.c b/testsuite/src/test_trsm_ukr.c index 5bbec8dd0..e4f6edb75 100644 --- a/testsuite/src/test_trsm_ukr.c +++ b/testsuite/src/test_trsm_ukr.c @@ -115,17 +115,17 @@ void libblis_test_trsm_ukr { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l3ukr_over == DISABLE_ALL ) return; + libblis_test_l3ukr_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_trsm_ukr_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_trsv.c b/testsuite/src/test_trsv.c index 0872af5b9..11255d5d1 100644 --- a/testsuite/src/test_trsv.c +++ b/testsuite/src/test_trsv.c @@ -110,17 +110,17 @@ void libblis_test_trsv { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l2_over == DISABLE_ALL ) return; + libblis_test_l2_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_trsv_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/testsuite/src/test_xpbyv.c b/testsuite/src/test_xpbyv.c index 34129a5fd..3578f51f8 100644 --- a/testsuite/src/test_xpbyv.c +++ b/testsuite/src/test_xpbyv.c @@ -108,17 +108,17 @@ void libblis_test_xpbyv { // Return early if this test has already been done. - if ( op->test_done == TRUE ) return; + if ( libblis_test_op_is_done( op ) ) return; // Return early if operation is disabled. if ( libblis_test_op_is_disabled( op ) || - op->ops->l1v_over == DISABLE_ALL ) return; + libblis_test_l1v_is_disabled( op ) ) return; // Call dependencies first. if ( TRUE ) libblis_test_xpbyv_deps( params, op ); // Execute the test driver for each implementation requested. - if ( op->front_seq == ENABLE ) + //if ( op->front_seq == ENABLE ) { libblis_test_op_driver( params, op, diff --git a/travis/cpuid/excavator.def b/travis/cpuid/excavator.def new file mode 100644 index 000000000..2479cdd44 --- /dev/null +++ b/travis/cpuid/excavator.def @@ -0,0 +1,78 @@ +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2018, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name of The University of Texas at Austin nor the names +# of its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# CPU: AMD A12-8870, 4000 MHz +# +00000000 ******** => 0000000D 68747541 444D4163 69746E65 +00000001 ******** => 00660F51 00040800 7ED8320B 178BFBFF +00000002 ******** => 00000000 00000000 00000000 00000000 +00000003 ******** => 00000000 00000000 00000000 00000000 +00000005 ******** => 00000040 00000040 00000003 00000000 +00000006 ******** => 00000004 00000000 00000001 00000000 +00000007 ******** => 00000000 000001A9 00000000 00000000 +00000008 ******** => 00000000 00000000 00000000 00000000 +00000009 ******** => 00000000 00000000 00000000 00000000 +0000000A ******** => 00000000 00000000 00000000 00000000 +0000000C ******** => 00000000 00000000 00000000 00000000 +0000000D 00000000 => 00000007 00000340 000003C0 40000000 +0000000D 00000001 => 00000001 00000000 00000000 00000000 +0000000D 00000002 => 00000100 00000240 00000000 00000000 +0000000D 0000003E => 00000080 00000340 00000000 00000000 +80000000 ******** => 8000001E 68747541 444D4163 69746E65 +80000001 ******** => 00660F51 20000000 2FABBFFF 2FD3FBFF +80000002 ******** => 20444D41 204F5250 2D323141 30373838 +80000003 ******** => 2C375220 20323120 504D4F43 20455455 +80000004 ******** => 45524F43 43342053 2047382B 00202020 +80000005 ******** => FF40FF18 FF40FF30 20080140 60030140 +80000006 ******** => 64006400 64004200 04008140 00000000 +80000007 ******** => 00000000 00000005 00000400 000037D9 +80000008 ******** => 00003030 00000000 00004003 00000000 +80000009 ******** => 00000000 00000000 00000000 00000000 +8000000A ******** => 00000001 00008000 00000000 0001BCFF +8000000B ******** => 00000000 00000000 00000000 00000000 +8000000C ******** => 00000000 00000000 00000000 00000000 +8000000D ******** => 00000000 00000000 00000000 00000000 +8000000E ******** => 00000000 00000000 00000000 00000000 +8000000F ******** => 00000000 00000000 00000000 00000000 +80000010 ******** => 00000000 00000000 00000000 00000000 +80000011 ******** => 00000000 00000000 00000000 00000000 +80000012 ******** => 00000000 00000000 00000000 00000000 +80000013 ******** => 00000000 00000000 00000000 00000000 +80000014 ******** => 00000000 00000000 00000000 00000000 +80000015 ******** => 00000000 00000000 00000000 00000000 +80000016 ******** => 00000000 00000000 00000000 00000000 +80000017 ******** => 00000000 00000000 00000000 00000000 +80000018 ******** => 00000000 00000000 00000000 00000000 +80000019 ******** => F040F018 64006400 00000000 00000000 +8000001A ******** => 00000003 00000000 00000000 00000000 +8000001B ******** => 000007FF 00000000 00000000 00000000 +8000001C ******** => 00000000 80032013 00010200 E000000F +8000001E ******** => 00000010 00000100 00000000 00000000 diff --git a/travis/cpuid/haswell.def b/travis/cpuid/haswell.def new file mode 100644 index 000000000..7bbd0c1b5 --- /dev/null +++ b/travis/cpuid/haswell.def @@ -0,0 +1,65 @@ +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2018, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name of The University of Texas at Austin nor the names +# of its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# CPU: Intel Xeon E5-2660 v3, 2600 MHz +# +00000000 ******** => 0000000F 756E6547 6C65746E 49656E69 +00000001 ******** => 000306F2 00200800 7FFEFBFF BFEBFBFF +00000002 ******** => 76036301 00F0B5FF 00000000 00C10000 +00000003 ******** => 00000000 00000000 00000000 00000000 +00000004 00000000 => 3C004121 01C0003F 0000003F 00000000 +00000004 00000001 => 3C004122 01C0003F 0000003F 00000000 +00000004 00000002 => 3C004143 01C0003F 000001FF 00000000 +00000004 00000003 => 3C07C163 04C0003F 00004FFF 00000006 +00000005 ******** => 00000040 00000040 00000003 00002120 +00000006 ******** => 00000075 00000002 00000009 00000000 +00000007 ******** => 00000000 000037AB 00000000 00000000 +00000008 ******** => 00000000 00000000 00000000 00000000 +00000009 ******** => 00000001 00000000 00000000 00000000 +0000000A ******** => 07300403 00000000 00000000 00000603 +0000000B 00000000 => 00000001 00000002 00000100 00000000 +0000000B 00000001 => 00000005 00000014 00000201 00000000 +0000000C ******** => 00000000 00000000 00000000 00000000 +0000000D 00000000 => 00000007 00000340 00000340 00000000 +0000000D 00000001 => 00000001 00000000 00000000 00000000 +0000000D 00000002 => 00000100 00000240 00000000 00000000 +0000000E ******** => 00000000 00000000 00000000 00000000 +0000000F 00000000 => 00000000 00000027 00000000 00000002 +0000000F 00000001 => 00000000 0000A000 00000027 00000001 +80000000 ******** => 80000008 00000000 00000000 00000000 +80000001 ******** => 00000000 00000000 00000021 2C100000 +80000002 ******** => 65746E49 2952286C 6F655820 2952286E +80000003 ******** => 55504320 2D354520 30363632 20337620 +80000004 ******** => 2E322040 48473036 0000007A 00000000 +80000005 ******** => 00000000 00000000 00000000 00000000 +80000006 ******** => 00000000 00000000 01006040 00000000 +80000007 ******** => 00000000 00000000 00000000 00000100 +80000008 ******** => 0000302E 00000000 00000000 00000000 diff --git a/travis/cpuid/penryn.def b/travis/cpuid/penryn.def new file mode 100644 index 000000000..3f12be1af --- /dev/null +++ b/travis/cpuid/penryn.def @@ -0,0 +1,52 @@ +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2018, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name of The University of Texas at Austin nor the names +# of its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# CPU: Intel Xeon X5550, 2666 MHz +# +00000000 ******** => 0000000B 756E6547 6C65746E 49656E69 +00000001 ******** => 000106A2 00100800 00BCE3BD BFEBFBFF +00000002 ******** => 55035A01 00F0B2E4 00000000 09CA212C +00000003 ******** => 00000000 00000000 00000000 00000000 +00000005 ******** => 00000040 00000040 00000003 00021120 +00000006 ******** => 00000003 00000002 00000001 00000000 +00000007 ******** => 00000000 00000000 00000000 00000000 +00000008 ******** => 00000000 00000000 00000000 00000000 +00000009 ******** => 00000000 00000000 00000000 00000000 +0000000A ******** => 07300403 00000000 00000000 00000603 +80000000 ******** => 80000008 00000000 00000000 00000000 +80000001 ******** => 00000000 00000000 00000001 28100000 +80000002 ******** => 756E6547 20656E69 65746E49 2952286C +80000003 ******** => 55504320 20202020 20202020 40202020 +80000004 ******** => 30303020 20402030 37362E32 007A4847 +80000005 ******** => 00000000 00000000 00000000 00000000 +80000006 ******** => 00000000 00000000 01006040 00000000 +80000007 ******** => 00000000 00000000 00000000 00000100 +80000008 ******** => 00003028 00000000 00000000 00000000 diff --git a/travis/cpuid/piledriver.def b/travis/cpuid/piledriver.def new file mode 100644 index 000000000..06c64b1c1 --- /dev/null +++ b/travis/cpuid/piledriver.def @@ -0,0 +1,82 @@ +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2018, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name of The University of Texas at Austin nor the names +# of its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# CPU: AMD A10-6800K, 4300 MHz +# +00000000 ******** => 0000000D 68747541 444D4163 69746E65 +00000001 ******** => 00610F31 00040800 3E98320B 178BFBFF +00000002 ******** => 00000000 00000000 00000000 00000000 +00000003 ******** => 00000000 00000000 00000000 00000000 +00000005 ******** => 00000040 00000040 00000003 00000000 +00000006 ******** => 00000000 00000000 00000001 00000000 +00000007 ******** => 00000000 00000008 00000000 00000000 +00000008 ******** => 00000000 00000000 00000000 00000000 +00000009 ******** => 00000000 00000000 00000000 00000000 +0000000A ******** => 00000000 00000000 00000000 00000000 +0000000C ******** => 00000000 00000000 00000000 00000000 +0000000D 00000000 => 00000007 00000340 000003C0 40000000 +0000000D 00000001 => 00000000 00000000 00000000 00000000 +0000000D 00000002 => 00000100 00000240 00000000 00000000 +0000000D 0000003E => 00000080 00000340 00000000 00000000 +80000000 ******** => 8000001E 68747541 444D4163 69746E65 +80000001 ******** => 00610F31 20000000 01EBBFFF 2FD3FBFF +80000002 ******** => 20444D41 2D303141 30303836 5041204B +80000003 ******** => 69772055 52206874 6F656461 6D74286E +80000004 ******** => 44482029 61724720 63696870 00202073 +80000005 ******** => FF40FF18 FF40FF30 10040140 40020140 +80000006 ******** => 64006400 64004200 08008140 00000000 +80000007 ******** => 00000000 00000000 00000000 000007D9 +80000008 ******** => 00003030 00000000 00004003 00000000 +80000009 ******** => 00000000 00000000 00000000 00000000 +8000000A ******** => 00000001 00010000 00000000 00001CFF +8000000B ******** => 00000000 00000000 00000000 00000000 +8000000C ******** => 00000000 00000000 00000000 00000000 +8000000D ******** => 00000000 00000000 00000000 00000000 +8000000E ******** => 00000000 00000000 00000000 00000000 +8000000F ******** => 00000000 00000000 00000000 00000000 +80000010 ******** => 00000000 00000000 00000000 00000000 +80000011 ******** => 00000000 00000000 00000000 00000000 +80000012 ******** => 00000000 00000000 00000000 00000000 +80000013 ******** => 00000000 00000000 00000000 00000000 +80000014 ******** => 00000000 00000000 00000000 00000000 +80000015 ******** => 00000000 00000000 00000000 00000000 +80000016 ******** => 00000000 00000000 00000000 00000000 +80000017 ******** => 00000000 00000000 00000000 00000000 +80000018 ******** => 00000000 00000000 00000000 00000000 +80000019 ******** => F040F018 64006400 00000000 00000000 +8000001A ******** => 00000003 00000000 00000000 00000000 +8000001B ******** => 000000FF 00000000 00000000 00000000 +8000001C ******** => 00000000 80032013 00010200 8000000F +8000001D 00000001 => 00000121 00C0003F 0000003F 00000000 +8000001D 00000002 => 00004122 0040003F 000001FF 00000000 +8000001D 00000003 => 00004143 03C0003F 000007FF 00000001 +8000001E ******** => 00000010 00000100 00000000 00000000 +8FFFFFFF ******** => 00000000 00000000 00000000 00000000 diff --git a/travis/cpuid/sandybridge.def b/travis/cpuid/sandybridge.def new file mode 100644 index 000000000..7faf93b9f --- /dev/null +++ b/travis/cpuid/sandybridge.def @@ -0,0 +1,55 @@ +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2018, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name of The University of Texas at Austin nor the names +# of its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# CPU: Intel Xeon E3-1230 v2, 3700 MHz +# +00000000 ******** => 0000000D 756E6547 6C65746E 49656E69 +00000001 ******** => 000306A9 00100800 7FBAE3FF BFEBFBFF +00000002 ******** => 76035A01 00F0B2FF 00000000 00CA0000 +00000003 ******** => 00000000 00000000 00000000 00000000 +00000005 ******** => 00000040 00000040 00000003 00001120 +00000006 ******** => 00000077 00000002 00000009 00000000 +00000007 ******** => 00000000 00000281 00000000 00000000 +00000008 ******** => 00000000 00000000 00000000 00000000 +00000009 ******** => 00000000 00000000 00000000 00000000 +0000000A ******** => 07300403 00000000 00000000 00000603 +0000000C ******** => 00000000 00000000 00000000 00000000 +0000000D 00000000 => 00000007 00000240 00000340 00000000 +0000000D 00000001 => 00000100 00000240 00000000 00000000 +80000000 ******** => 80000008 00000000 00000000 00000000 +80000001 ******** => 00000000 00000000 00000001 28100000 +80000002 ******** => 20202020 6E492020 286C6574 58202952 +80000003 ******** => 286E6F65 43202952 45205550 32312D33 +80000004 ******** => 56203033 20402032 30332E33 007A4847 +80000005 ******** => 00000000 00000000 00000000 00000000 +80000006 ******** => 00000000 00000000 01006040 00000000 +80000007 ******** => 00000000 00000000 00000000 00000100 +80000008 ******** => 00003024 00000000 00000000 00000000 diff --git a/travis/cpuid/skx.def b/travis/cpuid/skx.def new file mode 100644 index 000000000..adff98e3f --- /dev/null +++ b/travis/cpuid/skx.def @@ -0,0 +1,82 @@ +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2018, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name of The University of Texas at Austin nor the names +# of its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# CPU: Intel Xeon Platinum 8180, 2500 MHz +# +00000000 ******** => 00000016 756E6547 6C65746E 49656E69 +00000001 ******** => 00050654 00400800 7FFEFBFF BFEBFBFF +00000002 ******** => 76036301 00F0B5FF 00000000 00C30000 +00000003 ******** => 00000000 00000000 00000000 00000000 +00000004 00000000 => 7C004121 01C0003F 0000003F 00000000 +00000004 00000001 => 7C004122 01C0003F 0000003F 00000000 +00000004 00000002 => 7C004143 03C0003F 000003FF 00000000 +00000004 00000003 => 7C0FC163 0280003F 0000DFFF 00000004 +00000005 ******** => 00000040 00000040 00000003 00002020 +00000006 ******** => 00000EF7 00000002 00000009 00000000 +00000007 ******** => 00000000 D39FFFFB 00000008 00000000 +00000008 ******** => 00000000 00000000 00000000 00000000 +00000009 ******** => 00000000 00000000 00000000 00000000 +0000000A ******** => 07300404 00000000 00000000 00000603 +0000000B 00000000 => 00000001 00000002 00000100 00000000 +0000000B 00000001 => 00000006 00000038 00000201 00000000 +0000000C ******** => 00000000 00000000 00000000 00000000 +0000000D 00000000 => 000002FF 00000A80 00000A88 00000000 +0000000D 00000001 => 0000000F 00000A00 00000100 00000000 +0000000D 00000002 => 00000100 00000240 00000000 00000000 +0000000D 00000003 => 00000040 000003C0 00000000 00000000 +0000000D 00000004 => 00000040 00000400 00000000 00000000 +0000000D 00000005 => 00000040 00000440 00000000 00000000 +0000000D 00000006 => 00000200 00000480 00000000 00000000 +0000000D 00000007 => 00000400 00000680 00000000 00000000 +0000000D 00000008 => 00000080 00000000 00000001 00000000 +0000000D 00000009 => 00000008 00000A80 00000000 00000000 +0000000E ******** => 00000000 00000000 00000000 00000000 +0000000F 00000000 => 00000000 000000DF 00000000 00000002 +0000000F 00000001 => 00000000 0001C000 000000DF 00000007 +00000010 00000000 => 00000000 0000000A 00000000 00000000 +00000010 00000001 => 0000000A 00000600 00000004 0000000F +00000011 ******** => 00000000 00000000 00000000 00000000 +00000012 00000000 => 00000000 00000000 00000000 00000000 +00000012 00000001 => 00000000 00000000 00000000 00000000 +00000013 ******** => 00000000 00000000 00000000 00000000 +00000014 00000000 => 00000001 0000000F 00000007 00000000 +00000014 00000001 => 02490002 003F3FFF 00000000 00000000 +00000015 ******** => 00000002 000000C8 00000000 00000000 +00000016 ******** => 000009C4 00000ED8 00000064 00000000 +80000000 ******** => 80000008 00000000 00000000 00000000 +80000001 ******** => 00000000 00000000 00000121 2C100000 +80000002 ******** => 65746E49 2952286C 6F655820 2952286E +80000003 ******** => 616C5020 756E6974 3138206D 43203038 +80000004 ******** => 40205550 352E3220 7A484730 00000000 +80000005 ******** => 00000000 00000000 00000000 00000000 +80000006 ******** => 00000000 00000000 01006040 00000000 +80000007 ******** => 00000000 00000000 00000000 00000100 +80000008 ******** => 0000302E 00000000 00000000 00000000 diff --git a/travis/cpuid/skx1.def b/travis/cpuid/skx1.def new file mode 100644 index 000000000..d28b69432 --- /dev/null +++ b/travis/cpuid/skx1.def @@ -0,0 +1,82 @@ +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2018, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name of The University of Texas at Austin nor the names +# of its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# CPU: Intel Xeon Bronze 3106 +# +00000000 ******** => 00000016 756E6547 6C65746E 49656E69 +00000001 ******** => 00050654 00100800 7FFEFBFF BFEBFBFF +00000002 ******** => 76036301 00F0B6FF 00000000 00C30000 +00000003 ******** => 00000000 00000000 00000000 00000000 +00000004 00000000 => 1C004121 01C0003F 0000003F 00000000 +00000004 00000001 => 1C004122 01C0003F 0000003F 00000000 +00000004 00000002 => 1C004143 03C0003F 000003FF 00000000 +00000004 00000003 => 1C03C163 0280003F 00002FFF 00000004 +00000005 ******** => 00000040 00000040 00000003 00002020 +00000006 ******** => 00000EF5 00000002 00000009 00000000 +00000007 ******** => 00000000 D39FFFFB 00000008 00000000 +00000008 ******** => 00000000 00000000 00000000 00000000 +00000009 ******** => 00000000 00000000 00000000 00000000 +0000000A ******** => 07300804 00000000 00000000 00000603 +0000000B 00000000 => 00000001 00000001 00000100 00000000 +0000000B 00000001 => 00000004 00000006 00000201 00000000 +0000000C ******** => 00000000 00000000 00000000 00000000 +0000000D 00000000 => 000002FF 00000340 00000A88 00000000 +0000000D 00000001 => 0000000F 00000340 00000100 00000000 +0000000D 00000002 => 00000100 00000240 00000000 00000000 +0000000D 00000003 => 00000040 000003C0 00000000 00000000 +0000000D 00000004 => 00000040 00000400 00000000 00000000 +0000000D 00000005 => 00000040 00000440 00000000 00000000 +0000000D 00000006 => 00000200 00000480 00000000 00000000 +0000000D 00000007 => 00000400 00000680 00000000 00000000 +0000000D 00000008 => 00000080 00000000 00000001 00000000 +0000000D 00000009 => 00000008 00000A80 00000000 00000000 +0000000E ******** => 00000000 00000000 00000000 00000000 +0000000F 00000000 => 00000000 0000002F 00000000 00000002 +0000000F 00000001 => 00000000 00006000 0000002F 00000007 +00000010 00000000 => 00000000 0000000A 00000000 00000000 +00000010 00000001 => 0000000A 00000600 00000004 0000000F +00000011 ******** => 00000000 00000000 00000000 00000000 +00000012 00000000 => 00000000 00000000 00000000 00000000 +00000012 00000001 => 00000000 00000000 00000000 00000000 +00000013 ******** => 00000000 00000000 00000000 00000000 +00000014 00000000 => 00000001 0000000F 00000007 00000000 +00000014 00000001 => 02490002 003F3FFF 00000000 00000000 +00000015 ******** => 00000002 00000088 00000000 00000000 +00000016 ******** => 000006A4 000006A4 00000064 00000000 +80000000 ******** => 80000008 00000000 00000000 00000000 +80000001 ******** => 00000000 00000000 00000121 2C100000 +80000002 ******** => 65746E49 2952286C 6F655820 2952286E +80000003 ******** => 6F724220 20657A6E 34303133 55504320 +80000004 ******** => 31204020 4730372E 00007A48 00000000 +80000005 ******** => 00000000 00000000 00000000 00000000 +80000006 ******** => 00000000 00000000 01006040 00000000 +80000007 ******** => 00000000 00000000 00000000 00000100 +80000008 ******** => 0000302E 00000000 00000000 00000000 diff --git a/travis/cpuid/steamroller.def b/travis/cpuid/steamroller.def new file mode 100644 index 000000000..c56d6104a --- /dev/null +++ b/travis/cpuid/steamroller.def @@ -0,0 +1,80 @@ +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2018, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name of The University of Texas at Austin nor the names +# of its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# CPU: AMD A10-7850K, 4000 MHz +# +00000000 ******** => 0000000D 68747541 444D4163 69746E65 +00000001 ******** => 00630F01 00040800 3E98320B 178BFBFF +00000002 ******** => 00000000 00000000 00000000 00000000 +00000003 ******** => 00000000 00000000 00000000 00000000 +00000005 ******** => 00000040 00000040 00000003 00000000 +00000006 ******** => 00000000 00000000 00000001 00000000 +00000007 ******** => 00000000 00000009 00000000 00000000 +00000008 ******** => 00000000 00000000 00000000 00000000 +00000009 ******** => 00000000 00000000 00000000 00000000 +0000000A ******** => 00000000 00000000 00000000 00000000 +0000000C ******** => 00000000 00000000 00000000 00000000 +0000000D 00000000 => 00000007 00000340 000003C0 40000000 +0000000D 00000001 => 00000100 00000240 00000000 00000000 +80000000 ******** => 8000001E 68747541 444D4163 69746E65 +80000001 ******** => 00630F01 10000000 0FEBBFFF 2FD3FBFF +80000002 ******** => 20444D41 2D303141 30353837 5041204B +80000003 ******** => 69772055 52206874 6F656461 4D54286E +80000004 ******** => 37522029 61724720 63696870 00202073 +80000005 ******** => FF40FF18 FF40FF30 10040140 60030140 +80000006 ******** => 64006400 64004200 08008140 00000000 +80000007 ******** => 00000000 00000001 00000000 000027D9 +80000008 ******** => 00003030 00000000 00004003 00000000 +80000009 ******** => 00000000 00000000 00000000 00000000 +8000000A ******** => 00000001 00010000 00000000 00001CFF +8000000B ******** => 00000000 00000000 00000000 00000000 +8000000C ******** => 00000000 00000000 00000000 00000000 +8000000D ******** => 00000000 00000000 00000000 00000000 +8000000E ******** => 00000000 00000000 00000000 00000000 +8000000F ******** => 00000000 00000000 00000000 00000000 +80000010 ******** => 00000000 00000000 00000000 00000000 +80000011 ******** => 00000000 00000000 00000000 00000000 +80000012 ******** => 00000000 00000000 00000000 00000000 +80000013 ******** => 00000000 00000000 00000000 00000000 +80000014 ******** => 00000000 00000000 00000000 00000000 +80000015 ******** => 00000000 00000000 00000000 00000000 +80000016 ******** => 00000000 00000000 00000000 00000000 +80000017 ******** => 00000000 00000000 00000000 00000000 +80000018 ******** => 00000000 00000000 00000000 00000000 +80000019 ******** => F040F018 64006400 00000000 00000000 +8000001A ******** => 00000003 00000000 00000000 00000000 +8000001B ******** => 000001FF 00000000 00000000 00000000 +8000001C ******** => 00000000 80032013 00010200 E000000F +8000001D 00000000 => 00000121 00C0003F 0000003F 00000000 +8000001D 00000001 => 00004122 0080003F 000001FF 00000000 +8000001D 00000002 => 00004143 03C0003F 000007FF 00000001 +8000001E ******** => 00000010 00000100 00000000 00000000 +8FFFFFFF ******** => 00000000 00000000 00000000 00000000 diff --git a/travis/cpuid/zen.def b/travis/cpuid/zen.def new file mode 100644 index 000000000..2c20714fb --- /dev/null +++ b/travis/cpuid/zen.def @@ -0,0 +1,83 @@ +# +# BLIS +# An object-based framework for developing high-performance BLAS-like +# libraries. +# +# Copyright (C) 2018, The University of Texas at Austin +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# - Neither the name of The University of Texas at Austin nor the names +# of its contributors may be used to endorse or promote products +# derived from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# CPU: AMD EPYC 7551P, 3000 MHz +# +00000000 ******** => 0000000D 68747541 444D4163 69746E65 +00000001 ******** => 00800F12 00400800 7ED8320B 178BFBFF +00000002 ******** => 00000000 00000000 00000000 00000000 +00000003 ******** => 00000000 00000000 00000000 00000000 +00000005 ******** => 00000040 00000040 00000003 00000011 +00000006 ******** => 00000004 00000000 00000001 00000000 +00000007 ******** => 00000000 209C01A9 00000000 00000000 +00000008 ******** => 00000000 00000000 00000000 00000000 +00000009 ******** => 00000000 00000000 00000000 00000000 +0000000A ******** => 00000000 00000000 00000000 00000000 +0000000C ******** => 00000000 00000000 00000000 00000000 +0000000D 00000000 => 00000007 00000340 00000340 00000000 +0000000D 00000001 => 0000000F 00000340 00000000 00000000 +0000000D 00000002 => 00000100 00000240 00000000 00000000 +80000000 ******** => 8000001F 68747541 444D4163 69746E65 +80000001 ******** => 00800F12 40000000 35C233FF 2FD3FBFF +80000002 ******** => 20444D41 43595045 35353720 33205031 +80000003 ******** => 6F432D32 50206572 65636F72 726F7373 +80000004 ******** => 20202020 20202020 20202020 00202020 +80000005 ******** => FF40FF40 FF40FF40 20080140 40040140 +80000006 ******** => 36006400 56006400 02006140 0200C140 +80000007 ******** => 00000000 0000001B 00000000 00006799 +80000008 ******** => 00003030 00000007 0000603F 00000000 +80000009 ******** => 00000000 00000000 00000000 00000000 +8000000A ******** => 00000001 00008000 00000000 0001BCFF +8000000B ******** => 00000000 00000000 00000000 00000000 +8000000C ******** => 00000000 00000000 00000000 00000000 +8000000D ******** => 00000000 00000000 00000000 00000000 +8000000E ******** => 00000000 00000000 00000000 00000000 +8000000F ******** => 00000000 00000000 00000000 00000000 +80000010 ******** => 00000000 00000000 00000000 00000000 +80000011 ******** => 00000000 00000000 00000000 00000000 +80000012 ******** => 00000000 00000000 00000000 00000000 +80000013 ******** => 00000000 00000000 00000000 00000000 +80000014 ******** => 00000000 00000000 00000000 00000000 +80000015 ******** => 00000000 00000000 00000000 00000000 +80000016 ******** => 00000000 00000000 00000000 00000000 +80000017 ******** => 00000000 00000000 00000000 00000000 +80000018 ******** => 00000000 00000000 00000000 00000000 +80000019 ******** => F040F040 00000000 00000000 00000000 +8000001A ******** => 00000003 00000000 00000000 00000000 +8000001B ******** => 000003FF 00000000 00000000 00000000 +8000001C ******** => 00000000 00000000 00000000 00000000 +8000001D 00000000 => 00004121 01C0003F 0000003F 00000000 +8000001D 00000001 => 00004122 00C0003F 000000FF 00000000 +8000001D 00000002 => 00004143 01C0003F 000003FF 00000002 +8000001D 00000003 => 0001C163 03C0003F 00001FFF 00000001 +8000001E ******** => 00000000 00000100 00000300 00000000 +8000001F ******** => 0000000F 0000016F 0000000F 00000001 +8FFFFFFF ******** => 00000000 00000000 00000000 00000000 diff --git a/travis/do_sde.sh b/travis/do_sde.sh new file mode 100755 index 000000000..3c13d6be1 --- /dev/null +++ b/travis/do_sde.sh @@ -0,0 +1,47 @@ +#!/bin/bash + +set -e +set -x + +SDE_VERSION=sde-external-8.16.0-2018-01-30-lin +SDE_TARBALL=$SDE_VERSION.tar.bz2 +SDE=$SDE_VERSION/sde64 + +set +x +curl -s -X POST https://content.dropboxapi.com/2/files/download -H "Authorization: Bearer $DROPBOX_TOKEN" -H "Dropbox-API-Arg: {\"path\": \"/$SDE_TARBALL\"}" > $SDE_TARBALL +set -x +tar xvf $SDE_TARBALL + +make -j2 testsuite-bin +cp $DIST_PATH/testsuite/input.general.fast input.general +cp $DIST_PATH/testsuite/input.operations.fast input.operations + +TMP=`ldd ./test_libblis.x | grep ld | sed 's/^.*=> //'` +LD_SO=${TMP%% *} +TMP=`ldd ./test_libblis.x | grep libc | sed 's/^.*=> //'` +LIBC_SO=${TMP%% *} +TMP=`ldd ./test_libblis.x | grep libm | sed 's/^.*=> //'` +LIBM_SO=${TMP%% *} +for LIB in $LD_SO $LIBC_SO $LIBM_SO; do + $DIST_PATH/travis/patch-ld-so.py $LIB .tmp + chmod a+x .tmp + sudo mv .tmp $LIB +done + +for ARCH in penryn sandybridge haswell skx knl piledriver steamroller excavator zen; do + if [ "$ARCH" = "knl" ]; then + $SDE -knl -- ./test_libblis.x > output.testsuite + else + $SDE -cpuid_in $DIST_PATH/travis/cpuid/$ARCH.def -- ./test_libblis.x > output.testsuite + fi + $DIST_PATH/build/check-blistest.sh ./output.testsuite + TMP=`grep "active sub-configuration" output.testsuite` + CONFIG=${TMP##* } + if [ "$CONFIG" != "$ARCH" ]; then + echo "Wrong configuration chosen:" + echo " Expected: $ARCH" + echo " Got: $CONFIG" + exit 1 + fi +done + diff --git a/travis/do_testsuite.sh b/travis/do_testsuite.sh new file mode 100755 index 000000000..0fa8341ec --- /dev/null +++ b/travis/do_testsuite.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +set -e +set -x + +export BLIS_IC_NT=2 +export BLIS_JC_NT=1 +export BLIS_IR_NT=1 +export BLIS_JR_NT=1 + +make testblis +$DIST_PATH/build/check-blistest.sh ./output.testsuite +make testblas +$DIST_PATH/build/check-blastest.sh + diff --git a/travis/patch-ld-so.py b/travis/patch-ld-so.py new file mode 100755 index 000000000..72e580d74 --- /dev/null +++ b/travis/patch-ld-so.py @@ -0,0 +1,16 @@ +#!/usr/bin/env python + +# +# Patch ld.so to disable runtime CPUID detection +# Taken from https://stackoverflow.com/a/44483482 +# + +import re +import sys + +infile, outfile = sys.argv[1:] +d = open(infile, 'rb').read() +# Match CPUID(eax=0), "xor eax,eax" followed closely by "cpuid" +o = re.sub(b'(\x31\xc0.{0,32})\x0f\xa2', b'\\1\x66\x90', d) +#assert d != o +open(outfile, 'wb').write(o)